12 #ifndef MLPACK_METHODS_RL_ENVIRONMENT_REWARD_CLIPPING_HPP 13 #define MLPACK_METHODS_RL_ENVIRONMENT_REWARD_CLIPPING_HPP 28 template <
typename EnvironmentType>
33 using State =
typename EnvironmentType::State;
36 using Action =
typename EnvironmentType::Action;
47 const double minReward = -1.0,
48 const double maxReward = 1.0) :
49 environment(environment),
63 return environment.InitialSample();
75 return environment.IsTerminal(state);
92 double unclippedReward = environment.Sample(state, action, nextState);
94 return std::min(std::max(unclippedReward, minReward), maxReward);
108 return Sample(state, action, nextState);
128 EnvironmentType environment;
double MaxReward() const
Get the maximum reward value.
EnvironmentType & Environment()
Modify the environment.
RewardClipping(EnvironmentType &environment, const double minReward=-1.0, const double maxReward=1.0)
Constructor for creating a RewardClipping instance.
The core includes that mlpack expects; standard C++ includes and Armadillo.
double Sample(const State &state, const Action &action, State &nextState)
Dynamics of Environment.
State InitialSample()
The InitialSample method is called by the environment to initialize the starting state.
typename EnvironmentType::State State
Convenient typedef for state.
typename EnvironmentType::Action Action
Convenient typedef for action.
double & MaxReward()
Modify the maximum reward value.
double & MinReward()
Modify the minimum reward value.
double MinReward() const
Get the minimum reward value.
Interface for clipping the reward to some value between the specified maximum and minimum value (Clip...
double Sample(const State &state, const Action &action)
Dynamics of Environment.
bool IsTerminal(const State &state) const
Checks whether given state is a terminal state.
EnvironmentType & Environment() const
Get the environment.