lstm.hpp
Go to the documentation of this file.
1 
12 #ifndef MLPACK_METHODS_ANN_LAYER_LSTM_HPP
13 #define MLPACK_METHODS_ANN_LAYER_LSTM_HPP
14 
15 #include <mlpack/prereqs.hpp>
16 #include <limits>
17 
18 namespace mlpack {
19 namespace ann {
20 
58 template <
59  typename InputDataType = arma::mat,
60  typename OutputDataType = arma::mat
61 >
62 class LSTM
63 {
64  public:
66  LSTM();
67 
75  LSTM(const size_t inSize,
76  const size_t outSize,
77  const size_t rho = std::numeric_limits<size_t>::max());
78 
86  template<typename InputType, typename OutputType>
87  void Forward(const InputType& input, OutputType& output);
88 
98  template<typename InputType, typename OutputType>
99  void Forward(const InputType& input,
100  OutputType& output,
101  OutputType& cellState,
102  bool useCellState = false);
103 
113  template<typename InputType, typename ErrorType, typename GradientType>
114  void Backward(const InputType& input,
115  const ErrorType& gy,
116  GradientType& g);
117 
118  /*
119  * Reset the layer parameter.
120  */
121  void Reset();
122 
123  /*
124  * Resets the cell to accept a new input. This breaks the BPTT chain starts a
125  * new one.
126  *
127  * @param size The current maximum number of steps through time.
128  */
129  void ResetCell(const size_t size);
130 
131  /*
132  * Calculate the gradient using the output delta and the input activation.
133  *
134  * @param input The input parameter used for calculating the gradient.
135  * @param error The calculated error.
136  * @param gradient The calculated gradient.
137  */
138  template<typename InputType, typename ErrorType, typename GradientType>
139  void Gradient(const InputType& input,
140  const ErrorType& error,
141  GradientType& gradient);
142 
144  size_t Rho() const { return rho; }
146  size_t& Rho() { return rho; }
147 
149  OutputDataType const& Parameters() const { return weights; }
151  OutputDataType& Parameters() { return weights; }
152 
154  OutputDataType const& OutputParameter() const { return outputParameter; }
156  OutputDataType& OutputParameter() { return outputParameter; }
157 
159  OutputDataType const& Delta() const { return delta; }
161  OutputDataType& Delta() { return delta; }
162 
164  OutputDataType const& Gradient() const { return grad; }
166  OutputDataType& Gradient() { return grad; }
167 
169  size_t InSize() const { return inSize; }
170 
172  size_t OutSize() const { return outSize; }
173 
177  template<typename Archive>
178  void serialize(Archive& ar, const unsigned int /* version */);
179 
180  private:
182  size_t inSize;
183 
185  size_t outSize;
186 
188  size_t rho;
189 
191  size_t forwardStep;
192 
194  size_t backwardStep;
195 
197  size_t gradientStep;
198 
200  OutputDataType weights;
201 
203  OutputDataType prevOutput;
204 
206  size_t batchSize;
207 
209  size_t batchStep;
210 
213  size_t gradientStepIdx;
214 
216  OutputDataType cellActivationError;
217 
219  OutputDataType delta;
220 
222  OutputDataType grad;
223 
225  OutputDataType outputParameter;
226 
228  OutputDataType output2GateInputWeight;
229 
231  OutputDataType input2GateInputWeight;
232 
234  OutputDataType input2GateInputBias;
235 
237  OutputDataType cell2GateInputWeight;
238 
240  OutputDataType output2GateForgetWeight;
241 
243  OutputDataType input2GateForgetWeight;
244 
246  OutputDataType input2GateForgetBias;
247 
249  OutputDataType cell2GateForgetWeight;
250 
252  OutputDataType output2GateOutputWeight;
253 
255  OutputDataType input2GateOutputWeight;
256 
258  OutputDataType input2GateOutputBias;
259 
261  OutputDataType cell2GateOutputWeight;
262 
264  OutputDataType inputGate;
265 
267  OutputDataType forgetGate;
268 
270  OutputDataType hiddenLayer;
271 
273  OutputDataType outputGate;
274 
276  OutputDataType inputGateActivation;
277 
279  OutputDataType forgetGateActivation;
280 
282  OutputDataType outputGateActivation;
283 
285  OutputDataType hiddenLayerActivation;
286 
288  OutputDataType input2HiddenWeight;
289 
291  OutputDataType input2HiddenBias;
292 
294  OutputDataType output2HiddenWeight;
295 
297  OutputDataType cell;
298 
300  OutputDataType cellActivation;
301 
303  OutputDataType forgetGateError;
304 
306  OutputDataType outputGateError;
307 
309  OutputDataType prevError;
310 
312  OutputDataType outParameter;
313 
315  OutputDataType inputCellError;
316 
318  OutputDataType inputGateError;
319 
321  OutputDataType hiddenError;
322 
324  size_t rhoSize;
325 
327  size_t bpttSteps;
328 }; // class LSTM
329 
330 } // namespace ann
331 } // namespace mlpack
332 
333 // Include implementation.
334 #include "lstm_impl.hpp"
335 
336 #endif
OutputDataType const & OutputParameter() const
Get the output parameter.
Definition: lstm.hpp:154
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: add_to_po.hpp:21
size_t OutSize() const
Get the number of output units.
Definition: lstm.hpp:172
The core includes that mlpack expects; standard C++ includes and Armadillo.
OutputDataType & Gradient()
Modify the gradient.
Definition: lstm.hpp:166
OutputDataType & OutputParameter()
Modify the output parameter.
Definition: lstm.hpp:156
void serialize(Archive &ar, const unsigned int)
Serialize the layer.
size_t InSize() const
Get the number of input units.
Definition: lstm.hpp:169
size_t Rho() const
Get the maximum number of steps to backpropagate through time (BPTT).
Definition: lstm.hpp:144
OutputDataType const & Parameters() const
Get the parameters.
Definition: lstm.hpp:149
void Backward(const InputType &input, const ErrorType &gy, GradientType &g)
Ordinary feed backward pass of a neural network, calculating the function f(x) by propagating x backw...
OutputDataType const & Delta() const
Get the delta.
Definition: lstm.hpp:159
void Forward(const InputType &input, OutputType &output)
Ordinary feed-forward pass of a neural network, evaluating the function f(x) by propagating the activ...
LSTM()
Create the LSTM object.
OutputDataType const & Gradient() const
Get the gradient.
Definition: lstm.hpp:164
void ResetCell(const size_t size)
OutputDataType & Delta()
Modify the delta.
Definition: lstm.hpp:161
OutputDataType & Parameters()
Modify the parameters.
Definition: lstm.hpp:151
size_t & Rho()
Modify the maximum number of steps to backpropagate through time (BPTT).
Definition: lstm.hpp:146