12 #ifndef MLPACK_METHODS_RL_POLICY_GREEDY_POLICY_HPP 13 #define MLPACK_METHODS_RL_POLICY_GREEDY_POLICY_HPP 29 template <
typename EnvironmentType>
46 const size_t annealInterval,
47 const double minEpsilon) :
48 epsilon(initialEpsilon),
49 minEpsilon(minEpsilon),
50 delta((initialEpsilon - minEpsilon) / annealInterval)
65 if (!deterministic && exploration < epsilon)
70 arma::as_scalar(arma::find(actionValue == actionValue.max(), 1)));
79 epsilon = std::max(minEpsilon, epsilon);
85 const double&
Epsilon()
const {
return epsilon; }
typename EnvironmentType::Action ActionType
Convenient typedef for action.
Implementation for epsilon greedy policy.
The core includes that mlpack expects; standard C++ includes and Armadillo.
void Anneal()
Exploration probability will anneal at each step.
const double & Epsilon() const
double Random()
Generates a uniform random number between 0 and 1.
GreedyPolicy(const double initialEpsilon, const size_t annealInterval, const double minEpsilon)
Constructor for epsilon greedy policy class.
int RandInt(const int hiExclusive)
Generates a uniform random integer.
ActionType Sample(const arma::colvec &actionValue, bool deterministic=false)
Sample an action based on given action values.