mlpack-3.0.2/doxygen/random__replay_8hpp_source.html

 #ifndef MLPACK_METHODS_RL_REPLAY_RANDOM_REPLAY_HPP
 #define MLPACK_METHODS_RL_REPLAY_RANDOM_REPLAY_HPP

 #include <mlpack/prereqs.hpp>

 namespace mlpack {
 namespace rl {

 template <typename EnvironmentType>
 class RandomReplay
 {
  public:
   using ActionType = typename EnvironmentType::Action;

   using StateType = typename EnvironmentType::State;

   RandomReplay(const size_t batchSize,
                const size_t capacity,
                const size_t dimension = StateType::dimension) :
       batchSize(batchSize),
       capacity(capacity),
       position(0),
       states(dimension, capacity),
       actions(capacity),
       rewards(capacity),
       nextStates(dimension, capacity),
       isTerminal(capacity),
       full(false)
   { /* Nothing to do here. */ }

   void Store(const StateType& state,
              ActionType action,
              double reward,
              const StateType& nextState,
              bool isEnd)
   {
     states.col(position) = state.Encode();
     actions(position) = action;
     rewards(position) = reward;
     nextStates.col(position) = nextState.Encode();
     isTerminal(position) = isEnd;
     position++;
     if (position == capacity)
     {
       full = true;
       position = 0;
     }
   }

   void Sample(arma::mat& sampledStates,
               arma::icolvec& sampledActions,
               arma::colvec& sampledRewards,
               arma::mat& sampledNextStates,
               arma::icolvec& isTerminal)
   {
     size_t upperBound = full ? capacity : position;
     arma::uvec sampledIndices = arma::randi<arma::uvec>(
         batchSize, arma::distr_param(0, upperBound - 1));

     sampledStates = states.cols(sampledIndices);
     sampledActions = actions.elem(sampledIndices);
     sampledRewards = rewards.elem(sampledIndices);
     sampledNextStates = nextStates.cols(sampledIndices);
     isTerminal = this->isTerminal.elem(sampledIndices);
   }

   const size_t& Size()
   {
     return full ? capacity : position;
   }

  private:
   size_t batchSize;

   size_t capacity;

   size_t position;

   arma::mat states;

   arma::icolvec actions;

   arma::colvec rewards;

   arma::mat nextStates;

   arma::icolvec isTerminal;

   bool full;
 };

 } // namespace rl
 } // namespace mlpack

 #endif
mlpack::rl::RandomReplay::ActionType
typename EnvironmentType::Action ActionType
Convenient typedef for action.
Definition: random_replay.hpp:47

mlpack
.hpp
Definition: add_to_po.hpp:21

prereqs.hpp
The core includes that mlpack expects; standard C++ includes and Armadillo.

mlpack::rl::RandomReplay::StateType
typename EnvironmentType::State StateType
Convenient typedef for state.
Definition: random_replay.hpp:50

mlpack::rl::RandomReplay::Store
void Store(const StateType &state, ActionType action, double reward, const StateType &nextState, bool isEnd)
Store the given experience.
Definition: random_replay.hpp:82

mlpack::rl::RandomReplay::Sample
void Sample(arma::mat &sampledStates, arma::icolvec &sampledActions, arma::colvec &sampledRewards, arma::mat &sampledNextStates, arma::icolvec &isTerminal)
Sample some experiences.
Definition: random_replay.hpp:111

mlpack::rl::RandomReplay::Size
const size_t & Size()
Get the number of transitions in the memory.
Definition: random_replay.hpp:133

mlpack::rl::RandomReplay
Implementation of random experience replay.
Definition: random_replay.hpp:43

mlpack::rl::RandomReplay::RandomReplay
RandomReplay(const size_t batchSize, const size_t capacity, const size_t dimension=StateType::dimension)
Construct an instance of random experience replay class.
Definition: random_replay.hpp:59