mountain_car.hpp
Go to the documentation of this file.
1 
16 #ifndef MLPACK_METHODS_RL_ENVIRONMENT_MOUNTAIN_CAR_HPP
17 #define MLPACK_METHODS_RL_ENVIRONMENT_MOUNTAIN_CAR_HPP
18 
19 #include <mlpack/prereqs.hpp>
21 
22 namespace mlpack {
23 namespace rl {
24 
29 {
30  public:
35  class State
36  {
37  public:
41  State(): data(dimension, arma::fill::zeros)
42  { /* Nothing to do here. */ }
43 
49  State(const arma::colvec& data): data(data)
50  { /* Nothing to do here. */ }
51 
53  arma::colvec& Data() { return data; }
54 
56  double Velocity() const { return data[0]; }
58  double& Velocity() { return data[0]; }
59 
61  double Position() const { return data[1]; }
63  double& Position() { return data[1]; }
64 
66  const arma::colvec& Encode() const { return data; }
67 
69  static constexpr size_t dimension = 2;
70 
71  private:
73  arma::colvec data;
74  };
75 
79  enum Action
80  {
84 
87  };
88 
101  MountainCar(const double positionMin = -1.2,
102  const double positionMax = 0.6,
103  const double positionGoal = 0.5,
104  const double velocityMin = -0.07,
105  const double velocityMax = 0.07,
106  const double doneReward = 0,
107  const size_t maxSteps = 0) :
108  positionMin(positionMin),
109  positionMax(positionMax),
110  positionGoal(positionGoal),
111  velocityMin(velocityMin),
112  velocityMax(velocityMax),
113  doneReward(doneReward),
114  maxSteps(maxSteps),
115  stepsPerformed(0)
116  { /* Nothing to do here */ }
117 
127  double Sample(const State& state,
128  const Action& action,
129  State& nextState)
130  {
131  // Update the number of steps performed.
132  stepsPerformed++;
133 
134  // Calculate acceleration.
135  int direction = action - 1;
136  nextState.Velocity() = state.Velocity() + 0.001 * direction - 0.0025 *
137  std::cos(3 * state.Position());
138  nextState.Velocity() = math::ClampRange(nextState.Velocity(),
139  velocityMin, velocityMax);
140 
141  // Update states.
142  nextState.Position() = state.Position() + nextState.Velocity();
143  nextState.Position() = math::ClampRange(nextState.Position(),
144  positionMin, positionMax);
145 
146  if (nextState.Position() == positionMin && nextState.Velocity() < 0)
147  nextState.Velocity() = 0.0;
148 
149  // Check if the episode has terminated.
150  bool done = IsTerminal(nextState);
151 
152  // Do not reward the agent if time ran out.
153  if (done && maxSteps != 0 && stepsPerformed >= maxSteps)
154  return 0;
155  else if (done)
156  return doneReward;
157 
158  return -1;
159  }
160 
169  double Sample(const State& state, const Action& action)
170  {
171  State nextState;
172  return Sample(state, action, nextState);
173  }
174 
182  {
183  State state;
184  stepsPerformed = 0;
185  state.Velocity() = 0.0;
186  state.Position() = arma::as_scalar(arma::randu(1)) * 0.2 - 0.6;
187  return state;
188  }
189 
196  bool IsTerminal(const State& state) const
197  {
198  if (maxSteps != 0 && stepsPerformed >= maxSteps)
199  {
200  Log::Info << "Episode terminated due to the maximum number of steps"
201  "being taken.";
202  return true;
203  }
204  else if (state.Position() >= positionGoal)
205  {
206  Log::Info << "Episode terminated due to agent succeeding.";
207  return true;
208  }
209  return false;
210  }
211 
213  size_t StepsPerformed() const { return stepsPerformed; }
214 
216  size_t MaxSteps() const { return maxSteps; }
218  size_t& MaxSteps() { return maxSteps; }
219 
220  private:
222  double positionMin;
223 
225  double positionMax;
226 
228  double positionGoal;
229 
231  double velocityMin;
232 
234  double velocityMax;
235 
237  double doneReward;
238 
240  size_t maxSteps;
241 
243  size_t stepsPerformed;
244 };
245 
246 } // namespace rl
247 } // namespace mlpack
248 
249 #endif
bool IsTerminal(const State &state) const
This function checks if the car has reached the terminal state.
size_t MaxSteps() const
Get the maximum number of steps allowed.
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: add_to_po.hpp:21
State InitialSample()
Initial position is randomly generated within [-0.6, -0.4].
The core includes that mlpack expects; standard C++ includes and Armadillo.
const arma::colvec & Encode() const
Encode the state to a column vector.
double & Position()
Modify the position.
double & Velocity()
Modify the velocity.
Miscellaneous math clamping routines.
MountainCar(const double positionMin=-1.2, const double positionMax=0.6, const double positionGoal=0.5, const double velocityMin=-0.07, const double velocityMax=0.07, const double doneReward=0, const size_t maxSteps=0)
Construct a Mountain Car instance using the given constant.
double Sample(const State &state, const Action &action)
Dynamics of Mountain Car.
static MLPACK_EXPORT util::PrefixedOutStream Info
Prints informational messages if –verbose is specified, prefixed with [INFO ].
Definition: log.hpp:84
State(const arma::colvec &data)
Construct a state based on the given data.
Track the size of the action space.
Action
Implementation of action of Mountain Car.
arma::colvec & Data()
Modify the internal representation of the state.
Implementation of state of Mountain Car.
State()
Construct a state instance.
Implementation of Mountain Car task.
size_t StepsPerformed() const
Get the number of steps performed.
size_t & MaxSteps()
Set the maximum number of steps allowed.
double Sample(const State &state, const Action &action, State &nextState)
Dynamics of Mountain Car.
double Position() const
Get the position.
static constexpr size_t dimension
Dimension of the encoded state.
double Velocity() const
Get the velocity.
double ClampRange(double value, const double rangeMin, const double rangeMax)
Clamp a number between a particular range.
Definition: clamp.hpp:53