PageRenderTime 50ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/multivagraddec.php

https://github.com/Drewness/PHPIR
PHP | 271 lines | 124 code | 34 blank | 113 comment | 11 complexity | c8d8636ce86f3d1b62d71d61f9f5dbd9 MD5 | raw file
Possible License(s): BSD-3-Clause
  1. <?php
  2. class MVGradient {
  3. protected $data;
  4. protected $learning_rate = 0.1;
  5. /**
  6. * Set the data for the function.
  7. * @param array - 0 => (x1, x2, x3, x4), 1 => y
  8. */
  9. public function set_data($data) {
  10. $this->data = $this->scale_data($data);
  11. }
  12. /**
  13. * Set the rate at which the algorithm updates.
  14. * Normal values are 0.1 - 0.001
  15. *
  16. * @param float $rate
  17. * @return void
  18. */
  19. public function set_learning_rate($rate) {
  20. $this->learning_rate = $rate;
  21. }
  22. /**
  23. * Normalise variance and scale data to:
  24. * xi - avg(xi) / range(max-min)
  25. * so we get in a -0.5 - 0.5 range with an
  26. * avg of 0
  27. * - this is a bit of clunky method!
  28. */
  29. protected function scale_data($data) {
  30. $minmax = array();
  31. $rows = count($data);
  32. foreach($data as $key => $row) {
  33. foreach($row[0] as $id => $val) {
  34. /* Initialise Arrays */
  35. if(!isset($minmax[$id])) {
  36. $minmax[$id] = array();
  37. $minmax[$id]['min'] = false;
  38. $minmax[$id]['max'] = false;
  39. $minmax[$id]['total'] = 0;
  40. }
  41. /* Get stats */
  42. if( $minmax[$id]['min'] == false ||
  43. $minmax[$id]['min'] > $val) {
  44. $minmax[$id]['min'] = $val;
  45. }
  46. if( $minmax[$id]['max'] == false ||
  47. $minmax[$id]['max'] < $val) {
  48. $minmax[$id]['max'] = $val;
  49. }
  50. $minmax[$id]['total'] += $val;
  51. }
  52. }
  53. /* Compute average and variance */
  54. foreach($minmax as $id => $row) {
  55. $minmax[$id]['var'] = $row['max'] - $row['min'];
  56. $minmax[$id]['avg'] = $row['total'] / $rows;
  57. }
  58. foreach($data as $key => $row) {
  59. foreach($row[0] as $id => $val) {
  60. $data[$key][0][$id] = ( $val - $minmax[$id]['avg'] )
  61. / $minmax[$id]['var'];
  62. }
  63. }
  64. return $data;
  65. }
  66. /**
  67. * Update the parameters, including using a dummy row value
  68. * of 1 for the first parameter.
  69. *
  70. * @param array $params
  71. * @return array
  72. */
  73. protected function learn($params) {
  74. $data_rate = 1/count($this->data);
  75. foreach($params as $id => $p) {
  76. foreach($this->data as $row) {
  77. $score = $this->mv_hypothesis($row[0], $params) - $row[1];
  78. // Update parameters
  79. $params[$id] -= $this->learning_rate *
  80. ($data_rate *
  81. ( $score * ($id == 0 ? 1 : $row[0][$id-1]) )
  82. );
  83. }
  84. }
  85. return $params;
  86. }
  87. /**
  88. * Generate a score based on the data and passed parameters
  89. *
  90. * @param array $params
  91. * @return int
  92. */
  93. protected function mv_hypothesis($rowdata, $params) {
  94. $score = $params[0];
  95. foreach($rowdata as $id => $value) {
  96. $score += $value * $params[$id+1];
  97. }
  98. return $score;
  99. }
  100. /**
  101. * Return the sum of squared error score
  102. *
  103. * @param array $params
  104. * @return int
  105. */
  106. public function score($params) {
  107. $score = 0;
  108. foreach($this->data as $row) {
  109. $score += pow($this->mv_hypothesis($row[0], $params) - $row[1], 2);
  110. }
  111. return $score;
  112. }
  113. /**
  114. * Update parameters
  115. *
  116. * @param string $data
  117. * @param string $parameters
  118. * @return array parameters
  119. */
  120. function mv_gradient($parameters) {
  121. $score = $this->score($parameters);
  122. // Create a new hypothesis to test our score
  123. $parameters = $this->learn($parameters);
  124. if($score < $this->score($parameters)) {
  125. return false;
  126. }
  127. return $parameters;
  128. }
  129. /**
  130. * Find the parameters that best fit the data
  131. *
  132. * @param int $iterations - max iterations to run
  133. * @param array $defaults - optional starting params
  134. * @return array - best fit parameters
  135. */
  136. public function find_params($iterations = 5000, $defaults = null) {
  137. if(!$defaults) {
  138. $defaults = array_fill(0, count($this->data[0][0]) + 1, 0);
  139. }
  140. $parameters = $defaults;
  141. $iters = 0;
  142. do {
  143. $last_parameters = $parameters;
  144. $parameters = $this->mv_gradient($parameters);
  145. } while($parameters != false && $iters++ < $iterations);
  146. return $parameters ? $parameters : $last_parameters;
  147. }
  148. }
  149. /* Nice regular data for testing */
  150. $data = array(
  151. array(array(2, 4000, 0.5), 2+2+(2*4)+(3*5)),
  152. array(array(2, 4000, 0.4), 2+2+(2*4)+(3*4)),
  153. array(array(2, 4000, 0.6), 2+2+(2*4)+(3*6)),
  154. array(array(1, 5000, 0.5), 2+1+(2*5)+(3*5)),
  155. array(array(2, 5000, 0.1), 2+2+(2*5)+(3*1)),
  156. );
  157. class PolyMV extends MVGradient {
  158. /**
  159. * Skip scaling just for the example
  160. */
  161. protected function scale_data($data) {
  162. return $data;
  163. }
  164. /**
  165. * Generate a score based on the data and passed parameters
  166. *
  167. * @param array $params
  168. * @return int
  169. */
  170. protected function mv_hypothesis($rowdata, $params) {
  171. $score = $params[0];
  172. foreach($rowdata as $id => $value) {
  173. $score += pow($value, $id+2) * $params[$id+1];
  174. }
  175. return $score;
  176. }
  177. /**
  178. * Update the parameters, including using a dummy row value
  179. * of 1 for the first parameter.
  180. *
  181. * @param array $params
  182. * @return array
  183. */
  184. protected function learn($params) {
  185. $data_rate = 1/count($this->data);
  186. foreach($params as $id => $p) {
  187. foreach($this->data as $row) {
  188. $score = $this->mv_hypothesis($row[0], $params) - $row[1];
  189. // Update parameters
  190. // We have to multiply by an appropriate power as part of the
  191. // partial derivative
  192. $params[$id] -= $this->learning_rate *
  193. ($data_rate *
  194. ( $score * ($id == 0 ? 1 : pow($row[0][$id-1], $id+1)) )
  195. );
  196. }
  197. }
  198. return $params;
  199. }
  200. }
  201. /*
  202. $iterations = array(10, 100, 500, 1000, 2000, 5000, 10000);
  203. $mvg = new MVGradient();
  204. $mvg->set_data($data);
  205. foreach(array(0.1, 0.01, 0.001, 0.001) as $rate) {
  206. $mvg->set_learning_rate($rate);
  207. foreach($iterations as $i) {
  208. $params = $mvg->find_params($i);
  209. echo $mvg->score($params), "\n";
  210. }
  211. echo "\n";
  212. }
  213. die();
  214. // We have a polynomial example here
  215. $data = array(
  216. array(array(2, 2), 1+(3*pow(2, 2))+(2*pow(2, 3))),
  217. array(array(3, 3), 1+(3*pow(3, 2))+(2*pow(3, 3))),
  218. array(array(4, 4), 1+(3*pow(4, 2))+(2*pow(4, 3))),
  219. array(array(5, 5), 1+(3*pow(5, 2))+(2*pow(5, 3))),
  220. );
  221. $iterations = array(10000);
  222. $mvg = new PolyMV();
  223. $mvg->set_data($data);
  224. $mvg->set_learning_rate(0.001);
  225. foreach($iterations as $i) {
  226. $params = $mvg->find_params($i);
  227. echo $mvg->score($params), "\n";
  228. var_dump($params);
  229. }
  230. echo "\n";
  231. */