mlpack-3.3.2/doxygen/dueling__dqn_8hpp_source.html

 #ifndef MLPACK_METHODS_RL_DUELING_DQN_HPP
 #define MLPACK_METHODS_RL_DUELING_DQN_HPP

 #include <mlpack/prereqs.hpp>
 #include <mlpack/methods/ann/ffn.hpp>
 #include <mlpack/methods/ann/init_rules/gaussian_init.hpp>
 #include <mlpack/methods/ann/layer/layer.hpp>
 #include <mlpack/methods/ann/loss_functions/mean_squared_error.hpp>
 #include <mlpack/methods/ann/loss_functions/empty_loss.hpp>

 namespace mlpack {
 namespace rl {

 using namespace mlpack::ann;

 template <
   typename CompleteNetworkType = FFN<EmptyLoss<>, GaussianInitialization>,
   typename FeatureNetworkType = Sequential<>,
   typename AdvantageNetworkType = Sequential<>,
   typename ValueNetworkType = Sequential<>
 >
 class DuelingDQN
 {
  public:
   DuelingDQN() : isNoisy(false)
   {
     featureNetwork = new Sequential<>();
     valueNetwork = new Sequential<>();
     advantageNetwork = new Sequential<>();
     concat = new Concat<>(true);

     concat->Add(valueNetwork);
     concat->Add(advantageNetwork);
     completeNetwork.Add(new IdentityLayer<>());
     completeNetwork.Add(featureNetwork);
     completeNetwork.Add(concat);
   }

   DuelingDQN(const int inputDim,
              const int h1,
              const int h2,
              const int outputDim,
              const bool isNoisy = false):
       completeNetwork(EmptyLoss<>(), GaussianInitialization(0, 0.001)),
       isNoisy(isNoisy)
   {
     featureNetwork = new Sequential<>();
     featureNetwork->Add(new Linear<>(inputDim, h1));
     featureNetwork->Add(new ReLULayer<>());

     valueNetwork = new Sequential<>();
     advantageNetwork = new Sequential<>();

     if (isNoisy)
     {
       noisyLayerIndex.push_back(valueNetwork->Model().size());
       valueNetwork->Add(new NoisyLinear<>(h1, h2));
       advantageNetwork->Add(new NoisyLinear<>(h1, h2));

       valueNetwork->Add(new ReLULayer<>());
       advantageNetwork->Add(new ReLULayer<>());

       noisyLayerIndex.push_back(valueNetwork->Model().size());
       valueNetwork->Add(new NoisyLinear<>(h2, 1));
       advantageNetwork->Add(new NoisyLinear<>(h2, outputDim));
     }
     else
     {
       valueNetwork->Add(new Linear<>(h1, h2));
       valueNetwork->Add(new ReLULayer<>());
       valueNetwork->Add(new Linear<>(h2, 1));

       advantageNetwork->Add(new Linear<>(h1, h2));
       advantageNetwork->Add(new ReLULayer<>());
       advantageNetwork->Add(new Linear<>(h2, outputDim));
     }

     concat = new Concat<>(true);
     concat->Add(valueNetwork);
     concat->Add(advantageNetwork);

     completeNetwork.Add(new IdentityLayer<>());
     completeNetwork.Add(featureNetwork);
     completeNetwork.Add(concat);
     this->ResetParameters();
   }

   DuelingDQN(FeatureNetworkType featureNetwork,
              AdvantageNetworkType advantageNetwork,
              ValueNetworkType valueNetwork,
              const bool isNoisy = false):
       featureNetwork(std::move(featureNetwork)),
       advantageNetwork(std::move(advantageNetwork)),
       valueNetwork(std::move(valueNetwork)),
       isNoisy(isNoisy)
   {
     concat = new Concat<>(true);
     concat->Add(valueNetwork);
     concat->Add(advantageNetwork);
     completeNetwork.Add(new IdentityLayer<>());
     completeNetwork.Add(featureNetwork);
     completeNetwork.Add(concat);
     this->ResetParameters();
   }

   DuelingDQN(const DuelingDQN& model) : isNoisy(false)
   { /* Nothing to do here. */ }

   void operator = (const DuelingDQN& model)
   {
     *valueNetwork = *model.valueNetwork;
     *advantageNetwork = *model.advantageNetwork;
     *featureNetwork = *model.featureNetwork;
     isNoisy = model.isNoisy;
     noisyLayerIndex = model.noisyLayerIndex;
   }

   void Predict(const arma::mat state, arma::mat& actionValue)
   {
     arma::mat advantage, value, networkOutput;
     completeNetwork.Predict(state, networkOutput);
     value = networkOutput.row(0);
     advantage = networkOutput.rows(1, networkOutput.n_rows - 1);
     actionValue = advantage.each_row() +
         (value - arma::mean(advantage));
   }

   void Forward(const arma::mat state, arma::mat& actionValue)
   {
     arma::mat advantage, value, networkOutput;
     completeNetwork.Forward(state, networkOutput);
     value = networkOutput.row(0);
     advantage = networkOutput.rows(1, networkOutput.n_rows - 1);
     actionValue = advantage.each_row() +
         (value - arma::mean(advantage));
     this->actionValues = actionValue;
   }

   void Backward(const arma::mat state, arma::mat& target, arma::mat& gradient)
   {
     arma::mat gradLoss;
     lossFunction.Backward(this->actionValues, target, gradLoss);

     arma::mat gradValue = arma::sum(gradLoss);
     arma::mat gradAdvantage = gradLoss.each_row() - arma::mean(gradLoss);

     arma::mat grad = arma::join_cols(gradValue, gradAdvantage);
     completeNetwork.Backward(state, grad, gradient);
   }

   void ResetParameters()
   {
     completeNetwork.ResetParameters();
   }

   void ResetNoise()
   {
     for (size_t i = 0; i < noisyLayerIndex.size(); i++)
     {
       boost::get<NoisyLinear<>*>
           (valueNetwork->Model()[noisyLayerIndex[i]])->ResetNoise();
       boost::get<NoisyLinear<>*>
           (advantageNetwork->Model()[noisyLayerIndex[i]])->ResetNoise();
     }
   }

   const arma::mat& Parameters() const { return completeNetwork.Parameters(); }
   arma::mat& Parameters() { return completeNetwork.Parameters(); }

  private:
   CompleteNetworkType completeNetwork;

   Concat<>* concat;

   FeatureNetworkType* featureNetwork;

   AdvantageNetworkType* advantageNetwork;

   ValueNetworkType* valueNetwork;

   bool isNoisy;

   std::vector<size_t> noisyLayerIndex;

   arma::mat actionValues;

   MeanSquaredError<> lossFunction;
 };

 } // namespace rl
 } // namespace mlpack

 #endif
mlpack::ann
Artificial Neural Network.
Definition: elish_function.hpp:32

ffn.hpp

mlpack
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: add_to_po.hpp:21

mlpack::rl::DuelingDQN::Backward
void Backward(const arma::mat state, arma::mat &target, arma::mat &gradient)
Perform the backward pass of the state in real batch mode.
Definition: dueling_dqn.hpp:205

mlpack::rl::DuelingDQN::DuelingDQN
DuelingDQN(const int inputDim, const int h1, const int h2, const int outputDim, const bool isNoisy=false)
Construct an instance of DuelingDQN class.
Definition: dueling_dqn.hpp:79

mlpack::rl::DuelingDQN::Forward
void Forward(const arma::mat state, arma::mat &actionValue)
Perform the forward pass of the states in real batch mode.
Definition: dueling_dqn.hpp:187

prereqs.hpp
The core includes that mlpack expects; standard C++ includes and Armadillo.

mlpack::ann::Linear
Implementation of the Linear layer class.
Definition: layer_types.hpp:82

mlpack::rl::DuelingDQN::Predict
void Predict(const arma::mat state, arma::mat &actionValue)
Predict the responses to a given set of predictors.
Definition: dueling_dqn.hpp:171

std
Definition: prereqs.hpp:55

mlpack::rl::DuelingDQN
Implementation of the Dueling Deep Q-Learning network.
Definition: dueling_dqn.hpp:52

mlpack::ann::EmptyLoss
The empty loss does nothing, letting the user calculate the loss outside the model.
Definition: empty_loss.hpp:35

gaussian_init.hpp

mlpack::ann::BaseLayer
Implementation of the base layer.
Definition: base_layer.hpp:65

mlpack::rl::DuelingDQN::DuelingDQN
DuelingDQN()
Default constructor.
Definition: dueling_dqn.hpp:56

mlpack::rl::DuelingDQN::DuelingDQN
DuelingDQN(const DuelingDQN &model)
Copy constructor.
Definition: dueling_dqn.hpp:147

mlpack::rl::DuelingDQN::DuelingDQN
DuelingDQN(FeatureNetworkType featureNetwork, AdvantageNetworkType advantageNetwork, ValueNetworkType valueNetwork, const bool isNoisy=false)
Definition: dueling_dqn.hpp:128

mlpack::ann::Concat
Implementation of the Concat class.
Definition: concat.hpp:45

empty_loss.hpp

mlpack::ann::NoisyLinear
Implementation of the NoisyLinear layer class.
Definition: layer_types.hpp:96

mean_squared_error.hpp

mlpack::rl::DuelingDQN::ResetNoise
void ResetNoise()
Resets noise of the network, if the network is of type noisy.
Definition: dueling_dqn.hpp:228

layer.hpp

mlpack::ann::MeanSquaredError
The mean squared error performance function measures the network&#39;s performance according to the mean ...
Definition: mean_squared_error.hpp:34

mlpack::rl::DuelingDQN::ResetParameters
void ResetParameters()
Resets the parameters of the network.
Definition: dueling_dqn.hpp:220

mlpack::rl::DuelingDQN::Parameters
arma::mat & Parameters()
Modify the Parameters.
Definition: dueling_dqn.hpp:242

mlpack::ann::FFN
Implementation of a standard feed forward network.
Definition: ffn.hpp:52

mlpack::rl::DuelingDQN::Parameters
const arma::mat & Parameters() const
Return the Parameters.
Definition: dueling_dqn.hpp:240

mlpack::ann::Sequential::Add
void Add(Args... args)
Definition: sequential.hpp:142

mlpack::ann::Sequential
Implementation of the Sequential class.
Definition: layer_types.hpp:124

mlpack::ann::Concat::Add
void Add(Args... args)
Definition: concat.hpp:147

mlpack::ann::GaussianInitialization
This class is used to initialize weigth matrix with a gaussian.
Definition: gaussian_init.hpp:28