acrobat.hpp
Go to the documentation of this file.
1 
13 #ifndef MLPACK_METHODS_RL_ENVIRONMENT_ACROBAT_HPP
14 #define MLPACK_METHODS_RL_ENVIRONMENT_ACROBAT_HPP
15 
16 #include <mlpack/core.hpp>
17 
18 namespace mlpack{
19 namespace rl{
20 
28 class Acrobat
29 {
30  public:
31  /*
32  * Implementation of Acrobat State. Each State is a tuple vector
33  * (theta1, thetha2, angular velocity 1, angular velocity 2).
34  */
35  class State
36  {
37  public:
41  State(): data(dimension) { /* nothing to do here */ }
42 
48  State(const arma::colvec& data) : data(data)
49  { /* nothing to do here */ }
50 
52  arma::colvec& Data() { return data; }
53 
55  double Theta1() const { return data[0]; }
57  double& Theta1() { return data[0]; }
58 
60  double Theta2() const { return data[1]; }
62  double& Theta2() { return data[1]; }
63 
65  double AngularVelocity1() const { return data[2]; }
67  double& AngularVelocity1() { return data[2]; }
68 
70  double AngularVelocity2() const { return data[3]; }
72  double& AngularVelocity2() { return data[3]; }
73 
75  const arma::colvec& Encode() const { return data; }
76 
78  static constexpr size_t dimension = 4;
79 
80  private:
82  arma::colvec data;
83  };
84 
85  /*
86  * Implementation of action for Acrobat
87  */
88  enum Action
89  {
93 
94  // Track the size of the action space.
96  };
97 
113  Acrobat(const double gravity = 9.81,
114  const double linkLength1 = 1.0,
115  const double linkLength2 = 1.0,
116  const double linkMass1 = 1.0,
117  const double linkMass2 = 1.0,
118  const double linkCom1 = 0.5,
119  const double linkCom2 = 0.5,
120  const double linkMoi = 1.0,
121  const double maxVel1 = 4 * M_PI,
122  const double maxVel2 = 9 * M_PI,
123  const double dt = 0.2,
124  const double doneReward = 0) :
125  gravity(gravity),
126  linkLength1(linkLength1),
127  linkLength2(linkLength2),
128  linkMass1(linkMass1),
129  linkMass2(linkMass2),
130  linkCom1(linkCom1),
131  linkCom2(linkCom2),
132  linkMoi(linkMoi),
133  maxVel1(maxVel1),
134  maxVel2(maxVel2),
135  dt(dt),
136  doneReward(doneReward)
137  { /* Nothing to do here */ }
138 
148  double Sample(const State& state,
149  const Action& action,
150  State& nextState) const
151  {
152  // Make a vector to estimate nextstate.
153  arma::colvec currentState = {state.Theta1(), state.Theta2(),
154  state.AngularVelocity1(), state.AngularVelocity2()};
155 
156  arma::colvec currentNextState = Rk4(currentState, Torque(action));
157 
158  nextState.Theta1() = Wrap(currentNextState[0], -M_PI, M_PI);
159 
160  nextState.Theta2() = Wrap(currentNextState[1], -M_PI, M_PI);
162 
163  nextState.AngularVelocity1() = std::min(
164  std::max(currentNextState[2], -maxVel1), maxVel1);
165  nextState.AngularVelocity2() = std::min(
166  std::max(currentNextState[3], -maxVel2), maxVel2);
171  bool done = IsTerminal(nextState);
172  if (done)
173  return doneReward;
174  return -1;
175  };
176 
186  double Sample(const State& state, const Action& action) const
187  {
188  State nextState;
189  return Sample(state, action, nextState);
190  }
191 
196  {
197  return State((arma::randu<arma::colvec>(4) - 0.5) / 5.0);
198  }
199 
205  bool IsTerminal(const State& state) const
206  {
207  return bool (-std::cos(state.Theta1())-std::cos(state.Theta1() +
208  state.Theta2()) > 1.0);
209  }
210 
218  arma::colvec Dsdt(arma::colvec state, const double torque) const
219  {
220  const double m1 = linkMass1;
221  const double m2 = linkMass2;
222  const double l1 = linkLength1;
223  const double lc1 = linkCom1;
224  const double lc2 = linkCom2;
225  const double I1 = linkMoi;
226  const double I2 = linkMoi;
227  const double g = gravity;
228  const double a = torque;
229  const double theta1 = state[0];
230  const double theta2 = state[1];
231 
232  arma::colvec values(4);
233  values[0] = state[2];
234  values[1] = state[3];
235 
236  const double d1 = m1 * std::pow(lc1, 2) + m2 * (std::pow(l1, 2) +
237  std::pow(lc2, 2) + 2 * l1 * lc2 * std::cos(theta2)) + I1 + I2;
238 
239  const double d2 = m2 * (std::pow(lc2, 2) + l1 * lc2 * std::cos(theta2)) +
240  I2;
241 
242  const double phi2 = m2 * lc2 * g * std::cos(theta1 + theta2 - M_PI / 2.);
243 
244  const double phi1 = - m2 * l1 * lc2 * std::pow(values[1], 2) *
245  std::sin(theta2) - 2 * m2 * l1 * lc2 * values[1] * values[0] *
246  std::sin(theta2) + (m1 * lc1 + m2 * l1) * g *
247  std::cos(theta1 - M_PI / 2) + phi2;
248 
249  values[3] = (a + d2 / d1 * phi1 - m2 * l1 * lc2 * std::pow(values[0], 2) *
250  std::sin(theta2) - phi2) / (m2 * std::pow(lc2, 2) + I2 -
251  std::pow(d2, 2) / d1);
252 
253  values[2] = -(d2 * values[3] + phi1) / d1;
254 
255  return values;
256  };
257 
267  double Wrap(double value,
268  const double minimum,
269  const double maximum) const
270  {
271  const double diff = maximum - minimum;
272 
273  if (value > maximum)
274  {
275  value = value - diff;
276  }
277  else if (value < minimum)
278  {
279  value = value + diff;
280  }
281 
282  return value;
283  };
284 
291  double Torque(const Action& action) const
292  {
293  // Add noise to the Torque Torque is action number - 1. {0,1,2} -> {-1,0,1}.
294  return double(action - 1) + mlpack::math::Random(-0.1, 0.1);
295  }
296 
305  arma::colvec Rk4(const arma::colvec state, const double torque) const
306  {
307  arma::colvec k1 = Dsdt(state, torque);
308  arma::colvec k2 = Dsdt(state + dt * k1 / 2, torque);
309  arma::colvec k3 = Dsdt(state + dt * k2 / 2, torque);
310  arma::colvec k4 = Dsdt(state + dt * k3, torque);
311  arma::colvec nextState = state + dt * (k1 + 2 * k2 + 2 * k3 + k4) / 6;
312 
313  return nextState;
314  };
315 
316  private:
318  double gravity;
319 
321  double linkLength1;
322 
324  double linkLength2;
325 
327  double linkMass1;
328 
330  double linkMass2;
331 
333  double linkCom1;
334 
336  double linkCom2;
337 
339  double linkMoi;
340 
342  double maxVel1;
343 
345  double maxVel2;
346 
348  double dt;
349 
351  double doneReward;
352 }; // class Acrobat
353 
354 } // namespace rl
355 } // namespace mlpack
356 
357 #endif
arma::colvec Dsdt(arma::colvec state, const double torque) const
This is the ordinary differential equations required for estimation of nextState through RK4 method...
Definition: acrobat.hpp:218
arma::colvec & Data()
Modify the state representation.
Definition: acrobat.hpp:52
double & AngularVelocity1()
Modify the angular velocity (one).
Definition: acrobat.hpp:67
.hpp
Definition: add_to_po.hpp:21
arma::colvec Rk4(const arma::colvec state, const double torque) const
This function calls the RK4 iterative method to estimate the next state based on given ordinary diffe...
Definition: acrobat.hpp:305
double AngularVelocity1() const
Get value of Angular velocity (one).
Definition: acrobat.hpp:65
const arma::colvec & Encode() const
Encode the state to a column vector.
Definition: acrobat.hpp:75
Acrobat(const double gravity=9.81, const double linkLength1=1.0, const double linkLength2=1.0, const double linkMass1=1.0, const double linkMass2=1.0, const double linkCom1=0.5, const double linkCom2=0.5, const double linkMoi=1.0, const double maxVel1=4 *M_PI, const double maxVel2=9 *M_PI, const double dt=0.2, const double doneReward=0)
Construct a Acrobat instance using the given constants.
Definition: acrobat.hpp:113
#define M_PI
Definition: prereqs.hpp:39
double Torque(const Action &action) const
This function calculates the torque for a particular action.
Definition: acrobat.hpp:291
State InitialSample() const
This function does random initialization of state space.
Definition: acrobat.hpp:195
double & Theta1()
Modify value of theta (one).
Definition: acrobat.hpp:57
double Sample(const State &state, const Action &action) const
Dynamics of the Acrobat System.
Definition: acrobat.hpp:186
double Wrap(double value, const double minimum, const double maximum) const
Wrap funtion is required to truncate the angle value from -180 to 180.
Definition: acrobat.hpp:267
double & Theta2()
Modify value of theta (two).
Definition: acrobat.hpp:62
double & AngularVelocity2()
Modify the angular velocity (two).
Definition: acrobat.hpp:72
static constexpr size_t dimension
Dimension of the encoded state.
Definition: acrobat.hpp:78
double AngularVelocity2() const
Get value of Angular velocity (two).
Definition: acrobat.hpp:70
double Theta2() const
Get value of theta (two).
Definition: acrobat.hpp:60
Include all of the base components required to write mlpack methods, and the main mlpack Doxygen docu...
double Theta1() const
Get value of theta (one).
Definition: acrobat.hpp:55
double Random()
Generates a uniform random number between 0 and 1.
Definition: random.hpp:71
bool IsTerminal(const State &state) const
This function checks if the acrobat has reached the terminal state.
Definition: acrobat.hpp:205
State()
Construct a state instance.
Definition: acrobat.hpp:41
State(const arma::colvec &data)
Construct a state instance from given data.
Definition: acrobat.hpp:48
Implementation of Acrobat game.
Definition: acrobat.hpp:28
double Sample(const State &state, const Action &action, State &nextState) const
Dynamics of the Acrobat System.
Definition: acrobat.hpp:148