PageRenderTime 47ms CodeModel.GetById 20ms RepoModel.GetById 1ms app.codeStats 0ms

/neuralnethack/mlp/Error.hh

http://neuralnethack.googlecode.com/
C++ Header | 215 lines | 54 code | 37 blank | 124 comment | 0 complexity | e6c1b36f3cbd915a1298f047638fe063 MD5 | raw file
Possible License(s): GPL-2.0
  1. /*$Id: Error.hh 1654 2007-07-04 21:55:36Z michael $*/
  2. /*
  3. Copyright (C) 2004 Michael Green
  4. neuralnethack is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  15. Michael Green <michael@thep.lu.se>
  16. */
  17. #ifndef __Error_hh__
  18. #define __Error_hh__
  19. #include "Mlp.hh"
  20. #include "../datatools/DataSet.hh"
  21. #include <vector>
  22. namespace MultiLayerPerceptron
  23. {
  24. /**A base class representing the error functions. The error function is
  25. * responsible for calculating an error and a gradient to update the Mlp
  26. * weights by. Also every Error function use full batch mode i.e. it is up
  27. * to the Trainer to divide it into block or online by dividing the
  28. * DataSet that it receives into sub DataSet:s. The impementation specific
  29. * Error function \f[E\f] is extended with
  30. * \f[\hat{E} = E + \nu\sum_i\frac{\omega_i^2}{\omega_0^2 + \omega_i^2}\f]
  31. * \sa SummedSquare, CrossEntropy, DataSet, Trainer.
  32. */
  33. class Error
  34. {
  35. public:
  36. /**Basic destructor. */
  37. virtual ~Error();
  38. /**Calculate the gradient vector, and return the error.
  39. * This returns the error calculated from a block of patterns.
  40. * \param mlp the MLP to calculate the gradient for.
  41. * \param dset the data set to use.
  42. * \return the error.
  43. */
  44. virtual double gradient(MultiLayerPerceptron::Mlp& mlp,
  45. DataTools::DataSet& dset) = 0;
  46. /**Calculate the gradient vector, and return the error.
  47. * This returns the error calculated from a block of patterns.
  48. * \return the error.
  49. */
  50. virtual double gradient() = 0;
  51. /**Calculate the summed square error for this Mlp and DataSet.
  52. * \param mlp the output from the MLP.
  53. * \param dset the desired output.
  54. * \return the error.
  55. */
  56. virtual double outputError(MultiLayerPerceptron::Mlp& mlp,
  57. DataTools::DataSet& dset) = 0;
  58. /**Calculate the error for this Mlp and DataSet.
  59. * \return the error.
  60. */
  61. virtual double outputError() const = 0;
  62. /**Accessor method for the Mlp.
  63. * \return the Mlp belonging to the Error.
  64. */
  65. MultiLayerPerceptron::Mlp& mlp();
  66. /**Mutator method for the Mlp.
  67. * \param mlp the Mlp to assign to this Error.
  68. */
  69. void mlp(MultiLayerPerceptron::Mlp& mlp);
  70. /**Accessor method for the DataSet.
  71. * \return the DataSet belonging to the Error.
  72. */
  73. DataTools::DataSet& dset();
  74. /**Mutator method for the DataSet.
  75. * \param dset the DataSet to assign to this Error.
  76. */
  77. void dset(DataTools::DataSet& dset);
  78. /**Accessor for theWeightElimOn.
  79. * \return the theWeightElimOn.
  80. */
  81. bool weightElimOn() const;
  82. /**Mutator for theWeightElimOn.
  83. * \param on the value to set.
  84. */
  85. void weightElimOn(bool on);
  86. /**Accessor for theWeightElimAlpha.
  87. * \return the theWeightElimAlpha.
  88. */
  89. double weightElimAlpha() const;
  90. /**Mutator for theWeightElimAlpha.
  91. * \param alpha the value to set.
  92. */
  93. void weightElimAlpha(double alpha);
  94. /**Accessor for theWeightElimW0.
  95. * \return the theWeightElimW0.
  96. */
  97. double weightElimW0() const;
  98. /**Mutator for theWeightElimW0.
  99. * \param w0 the value to set.
  100. */
  101. void weightElimW0(double w0);
  102. protected:
  103. /**Basic constructor.
  104. * \param mlp the mlp to use.
  105. * \param dset the dataset to use.
  106. */
  107. Error(MultiLayerPerceptron::Mlp& mlp, DataTools::DataSet& dset);
  108. /**Calculate the error for this output.
  109. * \param out the output from the MLP.
  110. * \param dout the desired output.
  111. * \return the error.
  112. */
  113. virtual double outputError(const std::vector<double>& out,
  114. const std::vector<double>& dout) const = 0;
  115. /**Calculates the gradient of the weight elimination term.
  116. * \param wi the weight to regularize.
  117. * \return the weight elimination term.
  118. */
  119. double weightElimGrad(double wi) const;
  120. /**Add the gradient of the weight elimination term to each weight
  121. * in the specified interval.
  122. * The offset defines the starting index while length
  123. * defies the number of elements to update.
  124. * \param gradients the gradients to regularize.
  125. * \param offset the offset.
  126. * \param length the length.
  127. */
  128. void weightElimGrad(std::vector<double>& gradients,
  129. const std::vector<double>& weights,
  130. uint offset, uint length) const;
  131. /**Add the gradient of the weight elimination term to each
  132. * gradient except the bias.
  133. * \param gradients the gradient vector for the entire Mlp.
  134. * \param ncurr the number of neurons in current layer.
  135. * \param nprev the number of neurons in previous layer.
  136. */
  137. void weightElimGradLayer(std::vector<double>& gradients,
  138. const std::vector<double>& weights,
  139. uint ncurr, uint nprev) const;
  140. /**Add the gradient of the weight elimination term to each
  141. * gradient except the bias.
  142. * \param gradients the gradient vector for the entire Mlp.
  143. * \param arch the architecture for the Mlp.
  144. */
  145. void weightElimGradMlp(std::vector<double>& gradients,
  146. const std::vector<double>& weights,
  147. const std::vector<uint>& arch) const;
  148. /**Add the gradient of the weight elimination term to each
  149. * gradient in the Mlp.
  150. * The bias is skipped as usual.
  151. */
  152. void weightElimGrad();
  153. /**Calculates the weight elimination term.
  154. * \return the weight elimination term.
  155. */
  156. double weightElim() const;
  157. /**The Mlp associated with an Error.*/
  158. MultiLayerPerceptron::Mlp* theMlp;
  159. /**The DataSet associated with an Error.*/
  160. DataTools::DataSet* theDset;
  161. /**Controls whether to use weight elimination or not. */
  162. bool theWeightElimOn;
  163. /**The importance of the weight elimination term. */
  164. double theWeightElimAlpha;
  165. /**Scaling factor typically set to unity. */
  166. double theWeightElimW0;
  167. private:
  168. /**Copy constructor. */
  169. Error(const Error&);
  170. /**Assignment operator. */
  171. Error& operator=(const Error&);
  172. };
  173. }
  174. #endif