PageRenderTime 44ms CodeModel.GetById 13ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/Scenario/Data/Analyzer.php

http://github.com/jsylvanus/phpScenario
PHP | 247 lines | 140 code | 25 blank | 82 comment | 23 complexity | 92e8967bff8297cca4430c59a235a4d5 MD5 | raw file
  1. <?php
  2. /**
  3. * phpScenario
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://www.phpscenario.org/license.php
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@phpscenario.org so we can send you a copy immediately.
  14. *
  15. * @category Scenario
  16. * @package Scenario
  17. * @copyright Copyright (c) 2011-2012 TK Studios. (http://www.tkstudios.com)
  18. * @license http://www.phpscenario.org/license.php New BSD License
  19. */
  20. /**
  21. * Data Analyzer
  22. *
  23. * Provides statistical analysis information for experiments.
  24. *
  25. * @category Scenario
  26. * @package Scenario
  27. * @copyright Copyright (c) 2011-2012 TK Studios. (http://www.tkstudios.com)
  28. * @license http://www.phpscenario.org/license.php New BSD License
  29. */
  30. class Scenario_Data_Analyzer {
  31. /**
  32. *
  33. * @var array
  34. */
  35. protected $_analysis;
  36. public function getResults($analysis_only = true, $for_expname = null) {
  37. if ($for_expname == null) {
  38. $out = array();
  39. foreach($this->_analysis as $tname => $results) {
  40. $out[$tname] = $analysis_only ? $results['analysis'] : $results;
  41. }
  42. return $out;
  43. }
  44. return $analysis_only ?
  45. $this->_analysis[$for_expname]['analysis'] :
  46. $this->_analysis[$for_expname];
  47. }
  48. /**
  49. *
  50. * @var Scenario_Data_Consolidator
  51. */
  52. protected $_rawData;
  53. /**
  54. *
  55. * @var Scenario_Experiment
  56. */
  57. protected $_lastExperiment;
  58. /**
  59. * Core reference
  60. *
  61. * @var Scenario_Core
  62. */
  63. protected $_core;
  64. public function __get($name) {
  65. if ($name == 'results') return $this->_rawData;
  66. return null;
  67. }
  68. public function __construct($core, $experiment = null) {
  69. $this->_core = $core;
  70. if ($experiment !== null) {
  71. $this->analyzeExperiment($experiment);
  72. }
  73. }
  74. public function analyzeExperiment($experiment) {
  75. $this->_lastExperiment = $experiment;
  76. try {
  77. $this->_rawData = $this->_loadResults();
  78. $this->_analysis = $this->_summarize();
  79. } catch (Scenario_Data_Exception $e) {
  80. require_once 'Scenario/Exception.php';
  81. throw new Scenario_Exception('A data exception occurred while processing results: ' . $e->getMessage());
  82. } catch (Scenario_Exception $e) {
  83. throw new Exception('A general Scenario exception occurred while processing results: ' . $e->getMessage());
  84. } catch (Exception $e) {
  85. // well, shit.
  86. throw $e;
  87. }
  88. }
  89. public function getLastExperiment() {
  90. return $this->_lastExperiment;
  91. }
  92. /**
  93. *
  94. * @return Scenario_Data_Consolidator
  95. */
  96. private function _loadResults() {
  97. if ($this->_lastExperiment === null) {
  98. require_once 'Scenario/Exception.php';
  99. throw new Scenario_Exception('_loadResults called while _lastExperiment is null');
  100. }
  101. $exp = $this->_lastExperiment;
  102. require_once 'Scenario/Data/Consolidator.php';
  103. $data = new Scenario_Data_Consolidator(array());
  104. // get data in chunks
  105. $limit = 1000;
  106. $start = 0;
  107. $numresults = 0;
  108. do {
  109. $results = $this->_core->getAdapter()->GetResults($exp, $start, $limit);
  110. $numresults = count($results);
  111. $start += $limit;
  112. foreach($results as $result) {
  113. if ($result instanceof Scenario_Result) {
  114. $data->addResult($result);
  115. } else {
  116. require_once 'Scenario/Exception.php';
  117. throw new Scenario_Exception('Scenario_ResultSet must contain only Scenario_Result objects.');
  118. }
  119. }
  120. unset($results);
  121. } while ($numresults >= $limit);
  122. return $data;
  123. }
  124. private function _summarize() {
  125. $results = array();
  126. // string array
  127. foreach($this->_rawData->getExperimentNames() as $expName) {
  128. // raw data array
  129. $data = $this->_rawData->getExperimentData($expName);
  130. $data['analysis'] = array(
  131. 'total_tested' => 0,
  132. 'total_converted' => 0,
  133. 'conversion_rate' => 0.0,
  134. 'treatments' => array()
  135. );
  136. $analysis = &$data['analysis'];
  137. $treatments = &$analysis['treatments'];
  138. // if (!$data['_multivar']) {
  139. // first pass: totals & per-treatment c-rates
  140. foreach($data['_treatments'] as $tname => $vals) {
  141. $analysis['total_tested'] += $treatments[$tname]['total_tested'] = $vals['total'];
  142. $analysis['total_converted'] += $treatments[$tname]['total_converted'] = $vals['completed'];
  143. $treatments[$tname]['conversion_rate'] = floatval($vals['completed']) / floatval($vals['total']);
  144. }
  145. $analysis['conversion_rate'] = floatval($analysis['total_converted']) / floatval($analysis['total_tested']);
  146. // second pass: per-treatment calculations
  147. foreach($data['_treatments'] as $tname => $vals) {
  148. // percent tested
  149. $pct = $treatments[$tname]['percent_tested'] = $vals['total'] / floatval($analysis['total_tested']);
  150. // standard error
  151. $stderr = $treatments[$tname]['standard_error'] = sqrt( ($pct * ( 1 - $pct )) / floatval($vals['total']) );
  152. // 95% confidence interval
  153. $treatments[$tname]['high_confidence'] = $stderr * 1.96;
  154. // z-score
  155. $cRate = $analysis['conversion_rate'];
  156. $cTotal = $analysis['total_tested'];
  157. $tRate = $treatments[$tname]['conversion_rate'];
  158. $tTotal = $vals['total'];
  159. if ($cTotal == 0 || $tTotal == 0 || $tRate == 0) {
  160. $treatments[$tname]['z_score'] = 0;
  161. } else {
  162. $treatments[$tname]['z_score'] = ($tRate - $cRate) / sqrt( ( ($tRate * (1.0 - $tRate)) / floatval($tTotal)) + ( ($cRate * (1.0 - $cRate)) / floatval($cTotal)) );;
  163. }
  164. }
  165. //
  166. // } else {
  167. //
  168. // // multivar version
  169. // var_dump($this->_rawData);
  170. // }
  171. //
  172. $results[$expName] = $data;
  173. }
  174. $this->_analysis = $results;
  175. return $results;
  176. /*
  177. * Stuff to calculate:
  178. * 1. total results
  179. * 2. conversion rates (treatment conv / treatment results; per-treatment)
  180. * 3. % tested (# tested / total results; per-treatment)
  181. * 4. Standard error ( Sqrt( (#3 * (1 - #3)) / # tested in treatment ); per treatment )
  182. * 5. 95% confidence level ( #4 * 1.96, split across #2; per-treatment )
  183. * 6. z-score
  184. */
  185. }
  186. /**
  187. *
  188. * @param Scenario_Experiment $experiment
  189. * @return array
  190. */
  191. private function _treatmentMatrix(Scenario_Experiment $experiment) {
  192. if ($experiment->isMultiVar()) {
  193. $tmp = array();
  194. foreach($experiment->getChildren() as $cxp) {
  195. /* @var $cxp Scenario_Experiment */
  196. $tmp[$cxp->getExperimentID()] = array_keys($experiment->getWeightings());
  197. }
  198. if (count($tmp) == 0) return array();
  199. ksort($tmp);
  200. $out = array();
  201. foreach($tmp[0] as $val) {
  202. $out[] = array($val);
  203. }
  204. if (count($tmp) == 1) return $out;
  205. for($i = 1; $i < count($order); $i++) {
  206. $old = $out;
  207. $out = array();
  208. foreach($tmp[$i] as $right) {
  209. foreach($old as $left) {
  210. $out[] = array_merge($left, array($right));
  211. }
  212. }
  213. }
  214. return $out;
  215. } else {
  216. $tmp = array_keys($experiment->getWeightings());
  217. $out = array();
  218. foreach($tmp as $k) $out[] = array($k);
  219. return $out;
  220. }
  221. }
  222. }