greedy_policy.hpp
Go to the documentation of this file.
1 
12 #ifndef MLPACK_METHODS_RL_POLICY_GREEDY_POLICY_HPP
13 #define MLPACK_METHODS_RL_POLICY_GREEDY_POLICY_HPP
14 
15 #include <mlpack/prereqs.hpp>
16 
17 namespace mlpack {
18 namespace rl {
19 
29 template <typename EnvironmentType>
31 {
32  public:
34  using ActionType = typename EnvironmentType::Action;
35 
45  GreedyPolicy(const double initialEpsilon,
46  const size_t annealInterval,
47  const double minEpsilon) :
48  epsilon(initialEpsilon),
49  minEpsilon(minEpsilon),
50  delta((initialEpsilon - minEpsilon) / annealInterval)
51  { /* Nothing to do here. */ }
52 
60  ActionType Sample(const arma::colvec& actionValue, bool deterministic = false)
61  {
62  double exploration = math::Random();
63 
64  // Select the action randomly.
65  if (!deterministic && exploration < epsilon)
66  return static_cast<ActionType>(math::RandInt(ActionType::size));
67 
68  // Select the action greedily.
69  return static_cast<ActionType>(
70  arma::as_scalar(arma::find(actionValue == actionValue.max(), 1)));
71  }
72 
76  void Anneal()
77  {
78  epsilon -= delta;
79  epsilon = std::max(minEpsilon, epsilon);
80  }
81 
85  const double& Epsilon() const { return epsilon; }
86 
87  private:
89  double epsilon;
90 
92  double minEpsilon;
93 
95  double delta;
96 };
97 
98 } // namespace rl
99 } // namespace mlpack
100 
101 #endif
typename EnvironmentType::Action ActionType
Convenient typedef for action.
.hpp
Definition: add_to_po.hpp:21
Implementation for epsilon greedy policy.
The core includes that mlpack expects; standard C++ includes and Armadillo.
void Anneal()
Exploration probability will anneal at each step.
const double & Epsilon() const
double Random()
Generates a uniform random number between 0 and 1.
Definition: random.hpp:71
GreedyPolicy(const double initialEpsilon, const size_t annealInterval, const double minEpsilon)
Constructor for epsilon greedy policy class.
int RandInt(const int hiExclusive)
Generates a uniform random integer.
Definition: random.hpp:87
ActionType Sample(const arma::colvec &actionValue, bool deterministic=false)
Sample an action based on given action values.