nadamax_update.hpp
Go to the documentation of this file.
1 
13 #ifndef MLPACK_CORE_OPTIMIZERS_ADAM_NADAMAX_UPDATE_HPP
14 #define MLPACK_CORE_OPTIMIZERS_ADAM_NADAMAX_UPDATE_HPP
15 
16 #include <mlpack/prereqs.hpp>
17 
18 namespace mlpack {
19 namespace optimization {
20 
38 {
39  public:
49  NadaMaxUpdate(const double epsilon = 1e-8,
50  const double beta1 = 0.9,
51  const double beta2 = 0.99,
52  const double scheduleDecay = 4e-3) :
53  epsilon(epsilon),
54  beta1(beta1),
55  beta2(beta2),
56  scheduleDecay(scheduleDecay),
57  cumBeta1(1),
58  iteration(0)
59  {
60  // Nothing to do.
61  }
62 
70  void Initialize(const size_t rows, const size_t cols)
71  {
72  m = arma::zeros<arma::mat>(rows, cols);
73  u = arma::zeros<arma::mat>(rows, cols);
74  }
75 
83  void Update(arma::mat& iterate,
84  const double stepSize,
85  const arma::mat& gradient)
86  {
87  // Increment the iteration counter variable.
88  ++iteration;
89 
90  // And update the iterate.
91  m *= beta1;
92  m += (1 - beta1) * gradient;
93 
94  u = arma::max(u * beta2, arma::abs(gradient));
95 
96  double beta1T = beta1 * (1 - (0.5 *
97  std::pow(0.96, iteration * scheduleDecay)));
98 
99  double beta1T1 = beta1 * (1 - (0.5 *
100  std::pow(0.96, (iteration + 1) * scheduleDecay)));
101 
102  cumBeta1 *= beta1T;
103 
104  const double biasCorrection1 = 1.0 - cumBeta1;
105 
106  const double biasCorrection2 = 1.0 - (cumBeta1 * beta1T1);
107 
108  if ((biasCorrection1 != 0) && (biasCorrection2 != 0))
109  {
110  iterate -= (stepSize * (((1 - beta1T) / biasCorrection1) * gradient
111  + (beta1T1 / biasCorrection2) * m)) / (u + epsilon);
112  }
113  }
114 
116  double Epsilon() const { return epsilon; }
118  double& Epsilon() { return epsilon; }
119 
121  double CumBeta1() const { return cumBeta1; }
123  double& CumBeta1() { return cumBeta1; }
124 
126  double Beta1() const { return beta1; }
128  double& Beta1() { return beta1; }
129 
131  double Beta2() const { return beta2; }
133  double& Beta2() { return beta2; }
134 
136  double ScheduleDecay() const { return scheduleDecay; }
138  double& ScheduleDecay() { return scheduleDecay; }
139 
140  private:
141  // The epsilon value used to initialise the squared gradient parameter.
142  double epsilon;
143 
144  // The smoothing parameter.
145  double beta1;
146 
147  // The second moment coefficient.
148  double beta2;
149 
150  // The exponential moving average of gradient values.
151  arma::mat m;
152 
153  // The exponentially weighted infinity norm.
154  arma::mat u;
155 
156  // The decay parameter for decay coefficients
157  double scheduleDecay;
158 
159  // The cumulative product of decay coefficients
160  double cumBeta1;
161 
162  // The number of iterations.
163  double iteration;
164 };
165 
166 } // namespace optimization
167 } // namespace mlpack
168 
169 #endif
double ScheduleDecay() const
Get the decay parameter for decay coefficients.
double Epsilon() const
Get the value used to initialise the squared gradient parameter.
NadaMaxUpdate(const double epsilon=1e-8, const double beta1=0.9, const double beta2=0.99, const double scheduleDecay=4e-3)
Construct the NadaMax update policy with the given parameters.
.hpp
Definition: add_to_po.hpp:21
void Update(arma::mat &iterate, const double stepSize, const arma::mat &gradient)
Update step for NadaMax.
double & CumBeta1()
Modify the value of the cumulative product of decay coefficients.
The core includes that mlpack expects; standard C++ includes and Armadillo.
double & Epsilon()
Modify the value used to initialise the squared gradient parameter.
double & Beta1()
Modify the smoothing parameter.
double & ScheduleDecay()
Modify the decay parameter for decay coefficients.
void Initialize(const size_t rows, const size_t cols)
The Initialize() method is called by the optimizer before the start of the iteration update process...
double Beta2() const
Get the second moment coefficient.
double CumBeta1() const
Get the value of the cumulative product of decay coefficients.
double Beta1() const
Get the smoothing parameter.
double & Beta2()
Modify the second moment coefficient.
NadaMax is an optimizer that combines the AdaMax and NAG.