% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/armed_bandit_helpers.R
\name{LinUCBDisjointPolicyEpsilon}
\alias{LinUCBDisjointPolicyEpsilon}
\title{LinUCB Disjoint Policy with Epsilon-Greedy Exploration}
\description{
LinUCB Disjoint Policy with Epsilon-Greedy Exploration

LinUCB Disjoint Policy with Epsilon-Greedy Exploration
}
\details{
Implements the disjoint LinUCB algorithm with upper confidence bounds and epsilon-greedy exploration.
}
\section{Methods}{

- `initialize(alpha = 1.0, epsilon = 0.1)`: Create a new LinUCBDisjointPolicyEpsilon object.
- `set_parameters(context_params)`: Initialize arm-level parameters.
- `get_action(t, context)`: Selects an arm using epsilon-greedy UCB.
- `set_reward(t, context, action, reward)`: Updates internal statistics based on observed reward.
}

\section{Super class}{
\code{cramR::NA} -> \code{LinUCBDisjointPolicyEpsilon}
}
\section{Public fields}{
\if{html}{\out{<div class="r6-fields">}}
\describe{
\item{\code{alpha}}{Numeric, exploration parameter controlling the width of the confidence bound.}

\item{\code{epsilon}}{Numeric, probability of selecting a random action (exploration).}

\item{\code{class_name}}{Internal class name.}
}
\if{html}{\out{</div>}}
}
\section{Methods}{
\subsection{Public methods}{
\itemize{
\item \href{#method-LinUCBDisjointPolicyEpsilon-new}{\code{LinUCBDisjointPolicyEpsilon$new()}}
\item \href{#method-LinUCBDisjointPolicyEpsilon-set_parameters}{\code{LinUCBDisjointPolicyEpsilon$set_parameters()}}
\item \href{#method-LinUCBDisjointPolicyEpsilon-get_action}{\code{LinUCBDisjointPolicyEpsilon$get_action()}}
\item \href{#method-LinUCBDisjointPolicyEpsilon-set_reward}{\code{LinUCBDisjointPolicyEpsilon$set_reward()}}
\item \href{#method-LinUCBDisjointPolicyEpsilon-clone}{\code{LinUCBDisjointPolicyEpsilon$clone()}}
}
}
\if{html}{\out{
<details open><summary>Inherited methods</summary>
<ul>
<li><span class="pkg-link" data-pkg="cramR" data-topic="NA" data-id="initialize_theta"><a href='../../cramR/html/NA.html#method-NA-initialize_theta'><code>cramR::NA$initialize_theta()</code></a></span></li>
<li><span class="pkg-link" data-pkg="cramR" data-topic="NA" data-id="post_initialization"><a href='../../cramR/html/NA.html#method-NA-post_initialization'><code>cramR::NA$post_initialization()</code></a></span></li>
</ul>
</details>
}}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-LinUCBDisjointPolicyEpsilon-new"></a>}}
\if{latex}{\out{\hypertarget{method-LinUCBDisjointPolicyEpsilon-new}{}}}
\subsection{Method \code{new()}}{
Initializes the policy with UCB parameter \code{alpha} and exploration rate \code{epsilon}.
\subsection{Usage}{
\if{html}{\out{<div class="r">}}\preformatted{LinUCBDisjointPolicyEpsilon$new(alpha = 1, epsilon = 0.1)}\if{html}{\out{</div>}}
}

\subsection{Arguments}{
\if{html}{\out{<div class="arguments">}}
\describe{
\item{\code{alpha}}{Numeric. Controls width of the UCB bonus.}

\item{\code{epsilon}}{Numeric between 0 and 1. Probability of random action selection.}
}
\if{html}{\out{</div>}}
}
}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-LinUCBDisjointPolicyEpsilon-set_parameters"></a>}}
\if{latex}{\out{\hypertarget{method-LinUCBDisjointPolicyEpsilon-set_parameters}{}}}
\subsection{Method \code{set_parameters()}}{
Set arm-specific parameter structures.
\subsection{Usage}{
\if{html}{\out{<div class="r">}}\preformatted{LinUCBDisjointPolicyEpsilon$set_parameters(context_params)}\if{html}{\out{</div>}}
}

\subsection{Arguments}{
\if{html}{\out{<div class="arguments">}}
\describe{
\item{\code{context_params}}{A list with context information, typically including the number of unique features.}
}
\if{html}{\out{</div>}}
}
}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-LinUCBDisjointPolicyEpsilon-get_action"></a>}}
\if{latex}{\out{\hypertarget{method-LinUCBDisjointPolicyEpsilon-get_action}{}}}
\subsection{Method \code{get_action()}}{
Selects an arm using epsilon-greedy Upper Confidence Bound (UCB).
\subsection{Usage}{
\if{html}{\out{<div class="r">}}\preformatted{LinUCBDisjointPolicyEpsilon$get_action(t, context)}\if{html}{\out{</div>}}
}

\subsection{Arguments}{
\if{html}{\out{<div class="arguments">}}
\describe{
\item{\code{t}}{Integer time step.}

\item{\code{context}}{A list with contextual features and number of arms.}
}
\if{html}{\out{</div>}}
}
\subsection{Returns}{
A list containing the selected action.
}
}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-LinUCBDisjointPolicyEpsilon-set_reward"></a>}}
\if{latex}{\out{\hypertarget{method-LinUCBDisjointPolicyEpsilon-set_reward}{}}}
\subsection{Method \code{set_reward()}}{
Updates internal statistics using the observed reward for the selected arm.
\subsection{Usage}{
\if{html}{\out{<div class="r">}}\preformatted{LinUCBDisjointPolicyEpsilon$set_reward(t, context, action, reward)}\if{html}{\out{</div>}}
}

\subsection{Arguments}{
\if{html}{\out{<div class="arguments">}}
\describe{
\item{\code{t}}{Integer time step.}

\item{\code{context}}{Contextual features for all arms at time \code{t}.}

\item{\code{action}}{A list containing the chosen arm.}

\item{\code{reward}}{A list containing the observed reward for the selected arm.}
}
\if{html}{\out{</div>}}
}
\subsection{Returns}{
Updated internal parameters.
}
}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-LinUCBDisjointPolicyEpsilon-clone"></a>}}
\if{latex}{\out{\hypertarget{method-LinUCBDisjointPolicyEpsilon-clone}{}}}
\subsection{Method \code{clone()}}{
The objects of this class are cloneable with this method.
\subsection{Usage}{
\if{html}{\out{<div class="r">}}\preformatted{LinUCBDisjointPolicyEpsilon$clone(deep = FALSE)}\if{html}{\out{</div>}}
}

\subsection{Arguments}{
\if{html}{\out{<div class="arguments">}}
\describe{
\item{\code{deep}}{Whether to make a deep clone.}
}
\if{html}{\out{</div>}}
}
}
}
