greedy_policy.hpp
Go to the documentation of this file.
1 
13 #ifndef MLPACK_METHODS_RL_POLICY_GREEDY_POLICY_HPP
14 #define MLPACK_METHODS_RL_POLICY_GREEDY_POLICY_HPP
15 
16 #include <mlpack/prereqs.hpp>
17 
18 namespace mlpack {
19 namespace rl {
20 
30 template <typename EnvironmentType>
32 {
33  public:
35  using ActionType = typename EnvironmentType::Action;
36 
48  GreedyPolicy(const double initialEpsilon,
49  const size_t annealInterval,
50  const double minEpsilon,
51  const double decayRate = 1.0) :
52  epsilon(initialEpsilon),
53  minEpsilon(minEpsilon),
54  delta(((initialEpsilon - minEpsilon) * decayRate) / annealInterval)
55  { /* Nothing to do here. */ }
56 
65  ActionType Sample(const arma::colvec& actionValue,
66  bool deterministic = false,
67  const bool isNoisy = false)
68  {
69  double exploration = math::Random();
70 
71  // Select the action randomly.
72  if (!deterministic && exploration < epsilon && isNoisy == false)
73  return static_cast<ActionType>(math::RandInt(ActionType::size));
74 
75  // Select the action greedily.
76  return static_cast<ActionType>(
77  arma::as_scalar(arma::find(actionValue == actionValue.max(), 1)));
78  }
79 
83  void Anneal()
84  {
85  epsilon -= delta;
86  epsilon = std::max(minEpsilon, epsilon);
87  }
88 
92  const double& Epsilon() const { return epsilon; }
93 
94  private:
96  double epsilon;
97 
99  double minEpsilon;
100 
102  double delta;
103 };
104 
105 } // namespace rl
106 } // namespace mlpack
107 
108 #endif
typename EnvironmentType::Action ActionType
Convenient typedef for action.
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: add_to_po.hpp:21
Implementation for epsilon greedy policy.
The core includes that mlpack expects; standard C++ includes and Armadillo.
void Anneal()
Exploration probability will anneal at each step.
ActionType Sample(const arma::colvec &actionValue, bool deterministic=false, const bool isNoisy=false)
Sample an action based on given action values.
const double & Epsilon() const
double Random()
Generates a uniform random number between 0 and 1.
Definition: random.hpp:78
int RandInt(const int hiExclusive)
Generates a uniform random integer.
Definition: random.hpp:105
GreedyPolicy(const double initialEpsilon, const size_t annealInterval, const double minEpsilon, const double decayRate=1.0)
Constructor for epsilon greedy policy class.