13 #ifndef MLPACK_METHODS_RL_ENVIRONMENT_ACROBOT_HPP 14 #define MLPACK_METHODS_RL_ENVIRONMENT_ACROBOT_HPP 48 State(
const arma::colvec& data) : data(data)
52 arma::colvec&
Data() {
return data; }
55 double Theta1()
const {
return data[0]; }
60 double Theta2()
const {
return data[1]; }
75 const arma::colvec&
Encode()
const {
return data; }
114 const double linkLength1 = 1.0,
115 const double linkLength2 = 1.0,
116 const double linkMass1 = 1.0,
117 const double linkMass2 = 1.0,
118 const double linkCom1 = 0.5,
119 const double linkCom2 = 0.5,
120 const double linkMoi = 1.0,
121 const double maxVel1 = 4 *
M_PI,
122 const double maxVel2 = 9 *
M_PI,
123 const double dt = 0.2,
124 const double doneReward = 0) :
126 linkLength1(linkLength1),
127 linkLength2(linkLength2),
128 linkMass1(linkMass1),
129 linkMass2(linkMass2),
136 doneReward(doneReward)
150 State& nextState)
const 153 arma::colvec currentState = {state.
Theta1(), state.
Theta2(),
156 arma::colvec currentNextState =
Rk4(currentState,
Torque(action));
164 std::max(currentNextState[2], -maxVel1), maxVel1);
166 std::max(currentNextState[3], -maxVel2), maxVel2);
189 return Sample(state, action, nextState);
197 return State((arma::randu<arma::colvec>(4) - 0.5) / 5.0);
207 return bool (-std::cos(state.
Theta1())-std::cos(state.
Theta1() +
218 arma::colvec
Dsdt(arma::colvec state,
const double torque)
const 220 const double m1 = linkMass1;
221 const double m2 = linkMass2;
222 const double l1 = linkLength1;
223 const double lc1 = linkCom1;
224 const double lc2 = linkCom2;
225 const double I1 = linkMoi;
226 const double I2 = linkMoi;
227 const double g = gravity;
228 const double a = torque;
229 const double theta1 = state[0];
230 const double theta2 = state[1];
232 arma::colvec values(4);
233 values[0] = state[2];
234 values[1] = state[3];
236 const double d1 = m1 * std::pow(lc1, 2) + m2 * (std::pow(l1, 2) +
237 std::pow(lc2, 2) + 2 * l1 * lc2 * std::cos(theta2)) + I1 + I2;
239 const double d2 = m2 * (std::pow(lc2, 2) + l1 * lc2 * std::cos(theta2)) +
242 const double phi2 = m2 * lc2 * g * std::cos(theta1 + theta2 -
M_PI / 2.);
244 const double phi1 = - m2 * l1 * lc2 * std::pow(values[1], 2) *
245 std::sin(theta2) - 2 * m2 * l1 * lc2 * values[1] * values[0] *
246 std::sin(theta2) + (m1 * lc1 + m2 * l1) * g *
247 std::cos(theta1 -
M_PI / 2) + phi2;
249 values[3] = (a + d2 / d1 * phi1 - m2 * l1 * lc2 * std::pow(values[0], 2) *
250 std::sin(theta2) - phi2) / (m2 * std::pow(lc2, 2) + I2 -
251 std::pow(d2, 2) / d1);
253 values[2] = -(d2 * values[3] + phi1) / d1;
268 const double minimum,
269 const double maximum)
const 271 const double diff = maximum - minimum;
275 value = value - diff;
277 else if (value < minimum)
279 value = value + diff;
305 arma::colvec
Rk4(
const arma::colvec state,
const double torque)
const 307 arma::colvec k1 =
Dsdt(state, torque);
308 arma::colvec k2 =
Dsdt(state + dt * k1 / 2, torque);
309 arma::colvec k3 =
Dsdt(state + dt * k2 / 2, torque);
310 arma::colvec k4 =
Dsdt(state + dt * k3, torque);
311 arma::colvec nextState = state + dt * (k1 + 2 * k2 + 2 * k3 + k4) / 6;
arma::colvec & Data()
Modify the state representation.
bool IsTerminal(const State &state) const
This function checks if the acrobot has reached the terminal state.
double & AngularVelocity1()
Modify the angular velocity (one).
double AngularVelocity1() const
Get value of Angular velocity (one).
const arma::colvec & Encode() const
Encode the state to a column vector.
State InitialSample() const
This function does random initialization of state space.
double & Theta1()
Modify value of theta (one).
double Torque(const Action &action) const
This function calculates the torque for a particular action.
double & Theta2()
Modify value of theta (two).
double & AngularVelocity2()
Modify the angular velocity (two).
static constexpr size_t dimension
Dimension of the encoded state.
Implementation of Acrobot game.
arma::colvec Dsdt(arma::colvec state, const double torque) const
This is the ordinary differential equations required for estimation of nextState through RK4 method...
double AngularVelocity2() const
Get value of Angular velocity (two).
Include all of the base components required to write mlpack methods, and the main mlpack Doxygen docu...
double Theta2() const
Get value of theta (two).
double Sample(const State &state, const Action &action, State &nextState) const
Dynamics of the Acrobot System.
double Sample(const State &state, const Action &action) const
Dynamics of the Acrobot System.
double Random()
Generates a uniform random number between 0 and 1.
double Theta1() const
Get value of theta (one).
State()
Construct a state instance.
State(const arma::colvec &data)
Construct a state instance from given data.
double Wrap(double value, const double minimum, const double maximum) const
Wrap funtion is required to truncate the angle value from -180 to 180.
arma::colvec Rk4(const arma::colvec state, const double torque) const
This function calls the RK4 iterative method to estimate the next state based on given ordinary diffe...
Acrobot Acrobat
Add an alias for backward compatibility.
Acrobot(const double gravity=9.81, const double linkLength1=1.0, const double linkLength2=1.0, const double linkMass1=1.0, const double linkMass2=1.0, const double linkCom1=0.5, const double linkCom2=0.5, const double linkMoi=1.0, const double maxVel1=4 *M_PI, const double maxVel2=9 *M_PI, const double dt=0.2, const double doneReward=0)
Construct a Acrobot instance using the given constants.