pendulum.hpp
Go to the documentation of this file.
1 
16 #ifndef MLPACK_METHODS_RL_ENVIRONMENT_PENDULUM_HPP
17 #define MLPACK_METHODS_RL_ENVIRONMENT_PENDULUM_HPP
18 
19 #include <mlpack/prereqs.hpp>
20 
21 namespace mlpack {
22 namespace rl {
23 
30 class Pendulum
31 {
32  public:
37  class State
38  {
39  public:
43  State() : data(dimension, arma::fill::zeros)
44  { /* Nothing to do here. */ }
45 
51  State(const arma::colvec& data): data(data)
52  { /* Nothing to do here. */ }
53 
55  arma::colvec& Data() { return data; }
56 
58  double Theta() const { return data[0]; }
60  double& Theta() { return data[0]; }
61 
63  double AngularVelocity() const { return data[1]; }
65  double& AngularVelocity() { return data[1]; }
66 
68  const arma::colvec& Encode() const { return data; }
69 
71  static constexpr size_t dimension = 2;
72 
73  private:
75  arma::colvec data;
76  };
77 
83  struct Action
84  {
85  double action[1];
86  // Storing degree of freedom
87  const int size = 1;
88  };
89 
97  Pendulum(const double maxAngularVelocity = 8,
98  const double maxTorque = 2.0,
99  const double dt = 0.05) :
100  maxAngularVelocity(maxAngularVelocity),
101  maxTorque(maxTorque),
102  dt(dt)
103  { /* Nothing to do here */ }
104 
114  double Sample(const State& state,
115  const Action& action,
116  State& nextState) const
117  {
118  // Get current state.
119  double theta = state.Theta();
120  double angularVelocity = state.AngularVelocity();
121 
122  // Define constants which specify our pendulum.
123  const double gravity = 10.0;
124  const double mass = 1.0;
125  const double length = 1.0;
126 
127  // Get action and clip the values between max and min limits.
128  double torque = std::min(
129  std::max(action.action[0], -maxTorque), maxTorque);
130 
131  // Calculate costs of taking this action in the current state.
132  double costs = std::pow(AngleNormalize(theta), 2) + 0.1 *
133  std::pow(angularVelocity, 2) + 0.001 * std::pow(torque, 2);
134 
135  // Calculate new state values and assign to the next state.
136  double newAngularVelocity = angularVelocity + (-3.0 * gravity / (2 *
137  length) * std::sin(theta + M_PI) + 3.0 / std::pow(mass * length, 2) *
138  torque) * dt;
139  nextState.AngularVelocity() = std::min(std::max(newAngularVelocity,
140  -maxAngularVelocity), maxAngularVelocity);
141  nextState.Theta() = theta + newAngularVelocity * dt;
142 
143  // Return the reward of taking the action in current state.
144  // The reward is simply the negative of cost incurred for the action.
145  return -costs;
146  }
147 
155  double Sample(const State& state, const Action& action) const
156  {
157  State nextState;
158  return Sample(state, action, nextState);
159  }
160 
168  {
169  State state;
170  state.Theta() = math::Random(-M_PI, M_PI);
171  state.AngularVelocity() = math::Random(-1.0, 1.0);
172  return state;
173  }
174 
180  double AngleNormalize(double theta) const
181  {
182  // Scale angle within [-pi, pi).
183  return double(fmod(theta + M_PI, 2 * M_PI) - M_PI);
184  }
185 
186  private:
188  double maxAngularVelocity;
189 
191  double maxTorque;
192 
194  double dt;
195 };
196 
197 } // namespace rl
198 } // namespace mlpack
199 
200 #endif
double Theta() const
Get the theta.
Definition: pendulum.hpp:58
double & Theta()
Modify the value of theta.
Definition: pendulum.hpp:60
Implementation of Pendulum task.
Definition: pendulum.hpp:30
.hpp
Definition: add_to_po.hpp:21
The core includes that mlpack expects; standard C++ includes and Armadillo.
Pendulum(const double maxAngularVelocity=8, const double maxTorque=2.0, const double dt=0.05)
Construct a Pendulum instance using the given values.
Definition: pendulum.hpp:97
Implementation of action of Pendulum.
Definition: pendulum.hpp:83
double Sample(const State &state, const Action &action, State &nextState) const
Dynamics of Pendulum.
Definition: pendulum.hpp:114
arma::colvec & Data()
Modify the internal representation of the state.
Definition: pendulum.hpp:55
State(const arma::colvec &data)
Construct a state based on the given data.
Definition: pendulum.hpp:51
double AngleNormalize(double theta) const
This function calculates the normalized anlge for a particular theta.
Definition: pendulum.hpp:180
#define M_PI
Definition: prereqs.hpp:39
double AngularVelocity() const
Get the angular velocity.
Definition: pendulum.hpp:63
State()
Construct a state instance.
Definition: pendulum.hpp:43
double & AngularVelocity()
Modify the value of angular velocity.
Definition: pendulum.hpp:65
State InitialSample() const
Initial theta is randomly generated within [-pi, pi].
Definition: pendulum.hpp:167
static constexpr size_t dimension
Dimension of the encoded state.
Definition: pendulum.hpp:71
double Sample(const State &state, const Action &action) const
Dynamics of Pendulum.
Definition: pendulum.hpp:155
Implementation of state of Pendulum.
Definition: pendulum.hpp:37
double Random()
Generates a uniform random number between 0 and 1.
Definition: random.hpp:71
const arma::colvec & Encode() const
Encode the state to a column vector.
Definition: pendulum.hpp:68