reward_clipping.hpp
Go to the documentation of this file.
1 
12 #ifndef MLPACK_METHODS_RL_ENVIRONMENT_REWARD_CLIPPING_HPP
13 #define MLPACK_METHODS_RL_ENVIRONMENT_REWARD_CLIPPING_HPP
14 
15 #include <mlpack/prereqs.hpp>
16 
17 namespace mlpack {
18 namespace rl {
19 
28 template <typename EnvironmentType>
30 {
31  public:
33  using State = typename EnvironmentType::State;
34 
36  using Action = typename EnvironmentType::Action;
37 
46  RewardClipping(EnvironmentType& environment,
47  const double minReward = -1.0,
48  const double maxReward = 1.0) :
49  environment(environment),
50  minReward(minReward),
51  maxReward(maxReward)
52  {
53  // Nothing to do here
54  }
55 
62  {
63  return environment.InitialSample();
64  }
65 
73  bool IsTerminal(const State& state) const
74  {
75  return environment.IsTerminal(state);
76  }
77 
87  double Sample(const State& state,
88  const Action& action,
89  State& nextState)
90  {
91  // Get original unclipped reward from base environment.
92  double unclippedReward = environment.Sample(state, action, nextState);
93  // Clip rewards according to the min and max limit and return.
94  return std::min(std::max(unclippedReward, minReward), maxReward);
95  }
96 
105  double Sample(const State& state, const Action& action)
106  {
107  State nextState;
108  return Sample(state, action, nextState);
109  }
110 
112  EnvironmentType& Environment() const { return environment; }
114  EnvironmentType& Environment() { return environment; }
115 
117  double MinReward() const { return minReward; }
119  double& MinReward() { return minReward; }
120 
122  double MaxReward() const { return maxReward; }
124  double& MaxReward() { return maxReward; }
125 
126  private:
128  EnvironmentType environment;
129 
131  double minReward;
132 
134  double maxReward;
135 };
136 
137 } // namespace rl
138 } // namespace mlpack
139 
140 #endif
double MaxReward() const
Get the maximum reward value.
strip_type.hpp
Definition: add_to_po.hpp:21
EnvironmentType & Environment()
Modify the environment.
RewardClipping(EnvironmentType &environment, const double minReward=-1.0, const double maxReward=1.0)
Constructor for creating a RewardClipping instance.
The core includes that mlpack expects; standard C++ includes and Armadillo.
double Sample(const State &state, const Action &action, State &nextState)
Dynamics of Environment.
State InitialSample()
The InitialSample method is called by the environment to initialize the starting state.
typename EnvironmentType::State State
Convenient typedef for state.
typename EnvironmentType::Action Action
Convenient typedef for action.
double & MaxReward()
Modify the maximum reward value.
double & MinReward()
Modify the minimum reward value.
double MinReward() const
Get the minimum reward value.
Interface for clipping the reward to some value between the specified maximum and minimum value (Clip...
double Sample(const State &state, const Action &action)
Dynamics of Environment.
bool IsTerminal(const State &state) const
Checks whether given state is a terminal state.
EnvironmentType & Environment() const
Get the environment.