mlpack-3.0.2/doxygen/q__learning_8hpp_source.html

 #ifndef MLPACK_METHODS_RL_Q_LEARNING_HPP
 #define MLPACK_METHODS_RL_Q_LEARNING_HPP

 #include <mlpack/prereqs.hpp>

 #include "replay/random_replay.hpp"
 #include "training_config.hpp"

 namespace mlpack {
 namespace rl {

 template <
   typename EnvironmentType,
   typename NetworkType,
   typename UpdaterType,
   typename PolicyType,
   typename ReplayType = RandomReplay<EnvironmentType>
 >
 class QLearning
 {
  public:
   using StateType = typename EnvironmentType::State;

   using ActionType = typename EnvironmentType::Action;

   QLearning(TrainingConfig config,
             NetworkType network,
             PolicyType policy,
             ReplayType replayMethod,
             UpdaterType updater = UpdaterType(),
             EnvironmentType environment = EnvironmentType());

   double Step();

   double Episode();

   const size_t& TotalSteps() const { return totalSteps; }

   bool& Deterministic() { return deterministic; }
   const bool& Deterministic() const { return deterministic; }

  private:
   arma::Col<size_t> BestAction(const arma::mat& actionValues);

   TrainingConfig config;

   NetworkType learningNetwork;

   NetworkType targetNetwork;

   UpdaterType updater;

   PolicyType policy;

   ReplayType replayMethod;

   EnvironmentType environment;

   size_t totalSteps;

   StateType state;

   bool deterministic;
 };

 } // namespace rl
 } // namespace mlpack

 // Include implementation
 #include "q_learning_impl.hpp"
 #endif
mlpack
.hpp
Definition: add_to_po.hpp:21

mlpack::rl::QLearning::ActionType
typename EnvironmentType::Action ActionType
Convenient typedef for action.
Definition: q_learning.hpp:64

random_replay.hpp

mlpack::rl::QLearning::QLearning
QLearning(TrainingConfig config, NetworkType network, PolicyType policy, ReplayType replayMethod, UpdaterType updater=UpdaterType(), EnvironmentType environment=EnvironmentType())
Create the QLearning object with given settings.

mlpack::rl::QLearning::Deterministic
bool & Deterministic()
Modify the training mode / test mode indicator.
Definition: q_learning.hpp:104

prereqs.hpp
The core includes that mlpack expects; standard C++ includes and Armadillo.

mlpack::rl::QLearning::Episode
double Episode()
Execute an episode.

mlpack::rl::TrainingConfig
Definition: training_config.hpp:19

mlpack::rl::QLearning::StateType
typename EnvironmentType::State StateType
Convenient typedef for state.
Definition: q_learning.hpp:61

mlpack::rl::QLearning::Deterministic
const bool & Deterministic() const
Get the indicator of training mode / test mode.
Definition: q_learning.hpp:106

mlpack::rl::QLearning::TotalSteps
const size_t & TotalSteps() const
Definition: q_learning.hpp:101

mlpack::rl::QLearning
Implementation of various Q-Learning algorithms, such as DQN, double DQN.
Definition: q_learning.hpp:57

training_config.hpp

mlpack::rl::QLearning::Step
double Step()
Execute a step in an episode.