lstm.hpp
Go to the documentation of this file.
1 
12 #ifndef MLPACK_METHODS_ANN_LAYER_LSTM_HPP
13 #define MLPACK_METHODS_ANN_LAYER_LSTM_HPP
14 
15 #include <mlpack/prereqs.hpp>
16 #include <limits>
17 
18 namespace mlpack {
19 namespace ann {
20 
59 template <
60  typename InputDataType = arma::mat,
61  typename OutputDataType = arma::mat
62 >
63 class LSTM
64 {
65  public:
67  LSTM();
68 
76  LSTM(const size_t inSize,
77  const size_t outSize,
78  const size_t rho = std::numeric_limits<size_t>::max());
79 
87  template<typename InputType, typename OutputType>
88  void Forward(InputType&& input, OutputType&& output);
89 
99  template<typename InputType, typename ErrorType, typename GradientType>
100  void Backward(const InputType&& input,
101  ErrorType&& gy,
102  GradientType&& g);
103 
104  /*
105  * Reset the layer parameter.
106  */
107  void Reset();
108 
109  /*
110  * Resets the cell to accept a new input. This breaks the BPTT chain starts a
111  * new one.
112  *
113  * @param size The current maximum number of steps through time.
114  */
115  void ResetCell(const size_t size);
116 
117  /*
118  * Calculate the gradient using the output delta and the input activation.
119  *
120  * @param input The input parameter used for calculating the gradient.
121  * @param error The calculated error.
122  * @param gradient The calculated gradient.
123  */
124  template<typename InputType, typename ErrorType, typename GradientType>
125  void Gradient(InputType&& input,
126  ErrorType&& error,
127  GradientType&& gradient);
128 
130  size_t Rho() const { return rho; }
132  size_t& Rho() { return rho; }
133 
135  OutputDataType const& Parameters() const { return weights; }
137  OutputDataType& Parameters() { return weights; }
138 
140  InputDataType const& InputParameter() const { return inputParameter; }
142  InputDataType& InputParameter() { return inputParameter; }
143 
145  OutputDataType const& OutputParameter() const { return outputParameter; }
147  OutputDataType& OutputParameter() { return outputParameter; }
148 
150  OutputDataType const& Delta() const { return delta; }
152  OutputDataType& Delta() { return delta; }
153 
155  OutputDataType const& Gradient() const { return grad; }
157  OutputDataType& Gradient() { return grad; }
158 
162  template<typename Archive>
163  void serialize(Archive& ar, const unsigned int /* version */);
164 
165  private:
167  size_t inSize;
168 
170  size_t outSize;
171 
173  size_t rho;
174 
176  size_t forwardStep;
177 
179  size_t backwardStep;
180 
182  size_t gradientStep;
183 
185  OutputDataType weights;
186 
188  OutputDataType prevOutput;
189 
191  size_t batchSize;
192 
194  size_t batchStep;
195 
198  size_t gradientStepIdx;
199 
201  OutputDataType cellActivationError;
202 
204  OutputDataType delta;
205 
207  OutputDataType grad;
208 
210  InputDataType inputParameter;
211 
213  OutputDataType outputParameter;
214 
216  OutputDataType output2GateInputWeight;
217 
219  OutputDataType input2GateInputWeight;
220 
222  OutputDataType input2GateInputBias;
223 
225  OutputDataType cell2GateInputWeight;
226 
228  OutputDataType output2GateForgetWeight;
229 
231  OutputDataType input2GateForgetWeight;
232 
234  OutputDataType input2GateForgetBias;
235 
237  OutputDataType cell2GateForgetWeight;
238 
240  OutputDataType output2GateOutputWeight;
241 
243  OutputDataType input2GateOutputWeight;
244 
246  OutputDataType input2GateOutputBias;
247 
249  OutputDataType cell2GateOutputWeight;
250 
252  OutputDataType inputGate;
253 
255  OutputDataType forgetGate;
256 
258  OutputDataType hiddenLayer;
259 
261  OutputDataType outputGate;
262 
264  OutputDataType inputGateActivation;
265 
267  OutputDataType forgetGateActivation;
268 
270  OutputDataType outputGateActivation;
271 
273  OutputDataType hiddenLayerActivation;
274 
276  OutputDataType input2HiddenWeight;
277 
279  OutputDataType input2HiddenBias;
280 
282  OutputDataType output2HiddenWeight;
283 
285  OutputDataType cell;
286 
288  OutputDataType cellActivation;
289 
291  OutputDataType forgetGateError;
292 
294  OutputDataType outputGateError;
295 
297  OutputDataType prevError;
298 
300  OutputDataType outParameter;
301 
303  OutputDataType inputCellError;
304 
306  OutputDataType inputGateError;
307 
309  OutputDataType hiddenError;
310 
312  size_t rhoSize;
313 
315  size_t bpttSteps;
316 }; // class LSTM
317 
318 } // namespace ann
319 } // namespace mlpack
320 
321 // Include implementation.
322 #include "lstm_impl.hpp"
323 
324 #endif
OutputDataType const & OutputParameter() const
Get the output parameter.
Definition: lstm.hpp:145
.hpp
Definition: add_to_po.hpp:21
The core includes that mlpack expects; standard C++ includes and Armadillo.
OutputDataType & Gradient()
Modify the gradient.
Definition: lstm.hpp:157
OutputDataType & OutputParameter()
Modify the output parameter.
Definition: lstm.hpp:147
void serialize(Archive &ar, const unsigned int)
Serialize the layer.
size_t Rho() const
Get the maximum number of steps to backpropagate through time (BPTT).
Definition: lstm.hpp:130
OutputDataType const & Parameters() const
Get the parameters.
Definition: lstm.hpp:135
void Forward(InputType &&input, OutputType &&output)
Ordinary feed forward pass of a neural network, evaluating the function f(x) by propagating the activ...
InputDataType & InputParameter()
Modify the input parameter.
Definition: lstm.hpp:142
OutputDataType const & Delta() const
Get the delta.
Definition: lstm.hpp:150
void Backward(const InputType &&input, ErrorType &&gy, GradientType &&g)
Ordinary feed backward pass of a neural network, calculating the function f(x) by propagating x backw...
LSTM()
Create the LSTM object.
InputDataType const & InputParameter() const
Get the input parameter.
Definition: lstm.hpp:140
OutputDataType const & Gradient() const
Get the gradient.
Definition: lstm.hpp:155
void ResetCell(const size_t size)
OutputDataType & Delta()
Modify the delta.
Definition: lstm.hpp:152
OutputDataType & Parameters()
Modify the parameters.
Definition: lstm.hpp:137
size_t & Rho()
Modify the maximum number of steps to backpropagate through time (BPTT).
Definition: lstm.hpp:132