PageRenderTime 55ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/PHPExcel/Shared/JAMA/examples/Stats.php

https://bitbucket.org/nfredricks/wp-employee-time
PHP | 1605 lines | 834 code | 72 blank | 699 comment | 226 complexity | dfc48d3fa9118e8f89a9f975ea53fa64 MD5 | raw file
  1. <?php
  2. //
  3. // +----------------------------------------------------------------------+
  4. // | PHP Version 4 |
  5. // +----------------------------------------------------------------------+
  6. // | Copyright (c) 1997-2003 The PHP Group |
  7. // +----------------------------------------------------------------------+
  8. // | This source file is subject to version 2.0 of the PHP license, |
  9. // | that is bundled with this package in the file LICENSE, and is |
  10. // | available at through the world-wide-web at |
  11. // | http://www.php.net/license/2_02.txt. |
  12. // | If you did not receive a copy of the PHP license and are unable to |
  13. // | obtain it through the world-wide-web, please send a note to |
  14. // | license@php.net so we can mail you a copy immediately. |
  15. // +----------------------------------------------------------------------+
  16. // | Authors: Jesus M. Castagnetto <jmcastagnetto@php.net> |
  17. // +----------------------------------------------------------------------+
  18. //
  19. // $Id: Stats.php,v 1.15 2003/06/01 11:40:30 jmcastagnetto Exp $
  20. //
  21. include_once 'PEAR.php';
  22. /**
  23. * @package Math_Stats
  24. */
  25. // Constants for defining the statistics to calculate /*{{{*/
  26. /**
  27. * STATS_BASIC to generate the basic descriptive statistics
  28. */
  29. define('STATS_BASIC', 1);
  30. /**
  31. * STATS_FULL to generate also higher moments, mode, median, etc.
  32. */
  33. define('STATS_FULL', 2);
  34. /*}}}*/
  35. // Constants describing the data set format /*{{{*/
  36. /**
  37. * STATS_DATA_SIMPLE for an array of numeric values. This is the default.
  38. * e.g. $data = array(2,3,4,5,1,1,6);
  39. */
  40. define('STATS_DATA_SIMPLE', 0);
  41. /**
  42. * STATS_DATA_CUMMULATIVE for an associative array of frequency values,
  43. * where in each array entry, the index is the data point and the
  44. * value the count (frequency):
  45. * e.g. $data = array(3=>4, 2.3=>5, 1.25=>6, 0.5=>3)
  46. */
  47. define('STATS_DATA_CUMMULATIVE', 1);
  48. /*}}}*/
  49. // Constants defining how to handle nulls /*{{{*/
  50. /**
  51. * STATS_REJECT_NULL, reject data sets with null values. This is the default.
  52. * Any non-numeric value is considered a null in this context.
  53. */
  54. define('STATS_REJECT_NULL', -1);
  55. /**
  56. * STATS_IGNORE_NULL, ignore null values and prune them from the data.
  57. * Any non-numeric value is considered a null in this context.
  58. */
  59. define('STATS_IGNORE_NULL', -2);
  60. /**
  61. * STATS_USE_NULL_AS_ZERO, assign the value of 0 (zero) to null values.
  62. * Any non-numeric value is considered a null in this context.
  63. */
  64. define('STATS_USE_NULL_AS_ZERO', -3);
  65. /*}}}*/
  66. /**
  67. * A class to calculate descriptive statistics from a data set.
  68. * Data sets can be simple arrays of data, or a cummulative hash.
  69. * The second form is useful when passing large data set,
  70. * for example the data set:
  71. *
  72. * <pre>
  73. * $data1 = array (1,2,1,1,1,1,3,3,4.1,3,2,2,4.1,1,1,2,3,3,2,2,1,1,2,2);
  74. * </pre>
  75. *
  76. * can be epxressed more compactly as:
  77. *
  78. * <pre>
  79. * $data2 = array('1'=>9, '2'=>8, '3'=>5, '4.1'=>2);
  80. * </pre>
  81. *
  82. * Example of use:
  83. *
  84. * <pre>
  85. * include_once 'Math/Stats.php';
  86. * $s = new Math_Stats();
  87. * $s->setData($data1);
  88. * // or
  89. * // $s->setData($data2, STATS_DATA_CUMMULATIVE);
  90. * $stats = $s->calcBasic();
  91. * echo 'Mean: '.$stats['mean'].' StDev: '.$stats['stdev'].' <br />\n';
  92. *
  93. * // using data with nulls
  94. * // first ignoring them:
  95. * $data3 = array(1.2, 'foo', 2.4, 3.1, 4.2, 3.2, null, 5.1, 6.2);
  96. * $s->setNullOption(STATS_IGNORE_NULL);
  97. * $s->setData($data3);
  98. * $stats3 = $s->calcFull();
  99. *
  100. * // and then assuming nulls == 0
  101. * $s->setNullOption(STATS_USE_NULL_AS_ZERO);
  102. * $s->setData($data3);
  103. * $stats3 = $s->calcFull();
  104. * </pre>
  105. *
  106. * Originally this class was part of NumPHP (Numeric PHP package)
  107. *
  108. * @author Jesus M. Castagnetto <jmcastagnetto@php.net>
  109. * @version 0.8
  110. * @access public
  111. * @package Math_Stats
  112. */
  113. class Base {/*{{{*/
  114. // properties /*{{{*/
  115. /**
  116. * The simple or cummulative data set.
  117. * Null by default.
  118. *
  119. * @access private
  120. * @var array
  121. */
  122. public $_data = null;
  123. /**
  124. * Expanded data set. Only set when cummulative data
  125. * is being used. Null by default.
  126. *
  127. * @access private
  128. * @var array
  129. */
  130. public $_dataExpanded = null;
  131. /**
  132. * Flag for data type, one of STATS_DATA_SIMPLE or
  133. * STATS_DATA_CUMMULATIVE. Null by default.
  134. *
  135. * @access private
  136. * @var int
  137. */
  138. public $_dataOption = null;
  139. /**
  140. * Flag for null handling options. One of STATS_REJECT_NULL,
  141. * STATS_IGNORE_NULL or STATS_USE_NULL_AS_ZERO
  142. *
  143. * @access private
  144. * @var int
  145. */
  146. public $_nullOption;
  147. /**
  148. * Array for caching result values, should be reset
  149. * when using setData()
  150. *
  151. * @access private
  152. * @var array
  153. */
  154. public $_calculatedValues = array();
  155. /*}}}*/
  156. /**
  157. * Constructor for the class
  158. *
  159. * @access public
  160. * @param optional int $nullOption how to handle null values
  161. * @return object Math_Stats
  162. */
  163. function Math_Stats($nullOption=STATS_REJECT_NULL) {/*{{{*/
  164. $this->_nullOption = $nullOption;
  165. }/*}}}*/
  166. /**
  167. * Sets and verifies the data, checking for nulls and using
  168. * the current null handling option
  169. *
  170. * @access public
  171. * @param array $arr the data set
  172. * @param optional int $opt data format: STATS_DATA_CUMMULATIVE or STATS_DATA_SIMPLE (default)
  173. * @return mixed true on success, a PEAR_Error object otherwise
  174. */
  175. function setData($arr, $opt=STATS_DATA_SIMPLE) {/*{{{*/
  176. if (!is_array($arr)) {
  177. return PEAR::raiseError('invalid data, an array of numeric data was expected');
  178. }
  179. $this->_data = null;
  180. $this->_dataExpanded = null;
  181. $this->_dataOption = null;
  182. $this->_calculatedValues = array();
  183. if ($opt == STATS_DATA_SIMPLE) {
  184. $this->_dataOption = $opt;
  185. $this->_data = array_values($arr);
  186. } else if ($opt == STATS_DATA_CUMMULATIVE) {
  187. $this->_dataOption = $opt;
  188. $this->_data = $arr;
  189. $this->_dataExpanded = array();
  190. }
  191. return $this->_validate();
  192. }/*}}}*/
  193. /**
  194. * Returns the data which might have been modified
  195. * according to the current null handling options.
  196. *
  197. * @access public
  198. * @param boolean $expanded whether to return a expanded list, default is false
  199. * @return mixed array of data on success, a PEAR_Error object otherwise
  200. * @see _validate()
  201. */
  202. function getData($expanded=false) {/*{{{*/
  203. if ($this->_data == null) {
  204. return PEAR::raiseError('data has not been set');
  205. }
  206. if ($this->_dataOption == STATS_DATA_CUMMULATIVE && $expanded) {
  207. return $this->_dataExpanded;
  208. } else {
  209. return $this->_data;
  210. }
  211. }/*}}}*/
  212. /**
  213. * Sets the null handling option.
  214. * Must be called before assigning a new data set containing null values
  215. *
  216. * @access public
  217. * @return mixed true on success, a PEAR_Error object otherwise
  218. * @see _validate()
  219. */
  220. function setNullOption($nullOption) {/*{{{*/
  221. if ($nullOption == STATS_REJECT_NULL
  222. || $nullOption == STATS_IGNORE_NULL
  223. || $nullOption == STATS_USE_NULL_AS_ZERO) {
  224. $this->_nullOption = $nullOption;
  225. return true;
  226. } else {
  227. return PEAR::raiseError('invalid null handling option expecting: '.
  228. 'STATS_REJECT_NULL, STATS_IGNORE_NULL or STATS_USE_NULL_AS_ZERO');
  229. }
  230. }/*}}}*/
  231. /**
  232. * Transforms the data by substracting each entry from the mean and
  233. * dividing by its standard deviation. This will reset all pre-calculated
  234. * values to their original (unset) defaults.
  235. *
  236. * @access public
  237. * @return mixed true on success, a PEAR_Error object otherwise
  238. * @see mean()
  239. * @see stDev()
  240. * @see setData()
  241. */
  242. function studentize() {/*{{{*/
  243. $mean = $this->mean();
  244. if (PEAR::isError($mean)) {
  245. return $mean;
  246. }
  247. $std = $this->stDev();
  248. if (PEAR::isError($std)) {
  249. return $std;
  250. }
  251. if ($std == 0) {
  252. return PEAR::raiseError('cannot studentize data, standard deviation is zero.');
  253. }
  254. $arr = array();
  255. if ($this->_dataOption == STATS_DATA_CUMMULATIVE) {
  256. foreach ($this->_data as $val=>$freq) {
  257. $newval = ($val - $mean) / $std;
  258. $arr["$newval"] = $freq;
  259. }
  260. } else {
  261. foreach ($this->_data as $val) {
  262. $newval = ($val - $mean) / $std;
  263. $arr[] = $newval;
  264. }
  265. }
  266. return $this->setData($arr, $this->_dataOption);
  267. }/*}}}*/
  268. /**
  269. * Transforms the data by substracting each entry from the mean.
  270. * This will reset all pre-calculated values to their original (unset) defaults.
  271. *
  272. * @access public
  273. * @return mixed true on success, a PEAR_Error object otherwise
  274. * @see mean()
  275. * @see setData()
  276. */
  277. function center() {/*{{{*/
  278. $mean = $this->mean();
  279. if (PEAR::isError($mean)) {
  280. return $mean;
  281. }
  282. $arr = array();
  283. if ($this->_dataOption == STATS_DATA_CUMMULATIVE) {
  284. foreach ($this->_data as $val=>$freq) {
  285. $newval = $val - $mean;
  286. $arr["$newval"] = $freq;
  287. }
  288. } else {
  289. foreach ($this->_data as $val) {
  290. $newval = $val - $mean;
  291. $arr[] = $newval;
  292. }
  293. }
  294. return $this->setData($arr, $this->_dataOption);
  295. }/*}}}*/
  296. /**
  297. * Calculates the basic or full statistics for the data set
  298. *
  299. * @access public
  300. * @param int $mode one of STATS_BASIC or STATS_FULL
  301. * @param boolean $returnErrorObject whether the raw PEAR_Error (when true, default),
  302. * or only the error message will be returned (when false), if an error happens.
  303. * @return mixed an associative array of statistics on success, a PEAR_Error object otherwise
  304. * @see calcBasic()
  305. * @see calcFull()
  306. */
  307. function calc($mode, $returnErrorObject=true) {/*{{{*/
  308. if ($this->_data == null) {
  309. return PEAR::raiseError('data has not been set');
  310. }
  311. if ($mode == STATS_BASIC) {
  312. return $this->calcBasic($returnErrorObject);
  313. } elseif ($mode == STATS_FULL) {
  314. return $this->calcFull($returnErrorObject);
  315. } else {
  316. return PEAR::raiseError('incorrect mode, expected STATS_BASIC or STATS_FULL');
  317. }
  318. }/*}}}*/
  319. /**
  320. * Calculates a basic set of statistics
  321. *
  322. * @access public
  323. * @param boolean $returnErrorObject whether the raw PEAR_Error (when true, default),
  324. * or only the error message will be returned (when false), if an error happens.
  325. * @return mixed an associative array of statistics on success, a PEAR_Error object otherwise
  326. * @see calc()
  327. * @see calcFull()
  328. */
  329. function calcBasic($returnErrorObject=true) {/*{{{*/
  330. return array (
  331. 'min' => $this->__format($this->min(), $returnErrorObject),
  332. 'max' => $this->__format($this->max(), $returnErrorObject),
  333. 'sum' => $this->__format($this->sum(), $returnErrorObject),
  334. 'sum2' => $this->__format($this->sum2(), $returnErrorObject),
  335. 'count' => $this->__format($this->count(), $returnErrorObject),
  336. 'mean' => $this->__format($this->mean(), $returnErrorObject),
  337. 'stdev' => $this->__format($this->stDev(), $returnErrorObject),
  338. 'variance' => $this->__format($this->variance(), $returnErrorObject),
  339. 'range' => $this->__format($this->range(), $returnErrorObject)
  340. );
  341. }/*}}}*/
  342. /**
  343. * Calculates a full set of statistics
  344. *
  345. * @access public
  346. * @param boolean $returnErrorObject whether the raw PEAR_Error (when true, default),
  347. * or only the error message will be returned (when false), if an error happens.
  348. * @return mixed an associative array of statistics on success, a PEAR_Error object otherwise
  349. * @see calc()
  350. * @see calcBasic()
  351. */
  352. function calcFull($returnErrorObject=true) {/*{{{*/
  353. return array (
  354. 'min' => $this->__format($this->min(), $returnErrorObject),
  355. 'max' => $this->__format($this->max(), $returnErrorObject),
  356. 'sum' => $this->__format($this->sum(), $returnErrorObject),
  357. 'sum2' => $this->__format($this->sum2(), $returnErrorObject),
  358. 'count' => $this->__format($this->count(), $returnErrorObject),
  359. 'mean' => $this->__format($this->mean(), $returnErrorObject),
  360. 'median' => $this->__format($this->median(), $returnErrorObject),
  361. 'mode' => $this->__format($this->mode(), $returnErrorObject),
  362. 'midrange' => $this->__format($this->midrange(), $returnErrorObject),
  363. 'geometric_mean' => $this->__format($this->geometricMean(), $returnErrorObject),
  364. 'harmonic_mean' => $this->__format($this->harmonicMean(), $returnErrorObject),
  365. 'stdev' => $this->__format($this->stDev(), $returnErrorObject),
  366. 'absdev' => $this->__format($this->absDev(), $returnErrorObject),
  367. 'variance' => $this->__format($this->variance(), $returnErrorObject),
  368. 'range' => $this->__format($this->range(), $returnErrorObject),
  369. 'std_error_of_mean' => $this->__format($this->stdErrorOfMean(), $returnErrorObject),
  370. 'skewness' => $this->__format($this->skewness(), $returnErrorObject),
  371. 'kurtosis' => $this->__format($this->kurtosis(), $returnErrorObject),
  372. 'coeff_of_variation' => $this->__format($this->coeffOfVariation(), $returnErrorObject),
  373. 'sample_central_moments' => array (
  374. 1 => $this->__format($this->sampleCentralMoment(1), $returnErrorObject),
  375. 2 => $this->__format($this->sampleCentralMoment(2), $returnErrorObject),
  376. 3 => $this->__format($this->sampleCentralMoment(3), $returnErrorObject),
  377. 4 => $this->__format($this->sampleCentralMoment(4), $returnErrorObject),
  378. 5 => $this->__format($this->sampleCentralMoment(5), $returnErrorObject)
  379. ),
  380. 'sample_raw_moments' => array (
  381. 1 => $this->__format($this->sampleRawMoment(1), $returnErrorObject),
  382. 2 => $this->__format($this->sampleRawMoment(2), $returnErrorObject),
  383. 3 => $this->__format($this->sampleRawMoment(3), $returnErrorObject),
  384. 4 => $this->__format($this->sampleRawMoment(4), $returnErrorObject),
  385. 5 => $this->__format($this->sampleRawMoment(5), $returnErrorObject)
  386. ),
  387. 'frequency' => $this->__format($this->frequency(), $returnErrorObject),
  388. 'quartiles' => $this->__format($this->quartiles(), $returnErrorObject),
  389. 'interquartile_range' => $this->__format($this->interquartileRange(), $returnErrorObject),
  390. 'interquartile_mean' => $this->__format($this->interquartileMean(), $returnErrorObject),
  391. 'quartile_deviation' => $this->__format($this->quartileDeviation(), $returnErrorObject),
  392. 'quartile_variation_coefficient' => $this->__format($this->quartileVariationCoefficient(), $returnErrorObject),
  393. 'quartile_skewness_coefficient' => $this->__format($this->quartileSkewnessCoefficient(), $returnErrorObject)
  394. );
  395. }/*}}}*/
  396. /**
  397. * Calculates the minimum of a data set.
  398. * Handles cummulative data sets correctly
  399. *
  400. * @access public
  401. * @return mixed the minimum value on success, a PEAR_Error object otherwise
  402. * @see calc()
  403. * @see max()
  404. */
  405. function min() {/*{{{*/
  406. if ($this->_data == null) {
  407. return PEAR::raiseError('data has not been set');
  408. }
  409. if (!array_key_exists('min', $this->_calculatedValues)) {
  410. if ($this->_dataOption == STATS_DATA_CUMMULATIVE) {
  411. $min = min(array_keys($this->_data));
  412. } else {
  413. $min = min($this->_data);
  414. }
  415. $this->_calculatedValues['min'] = $min;
  416. }
  417. return $this->_calculatedValues['min'];
  418. }/*}}}*/
  419. /**
  420. * Calculates the maximum of a data set.
  421. * Handles cummulative data sets correctly
  422. *
  423. * @access public
  424. * @return mixed the maximum value on success, a PEAR_Error object otherwise
  425. * @see calc()
  426. * @see min()
  427. */
  428. function max() {/*{{{*/
  429. if ($this->_data == null) {
  430. return PEAR::raiseError('data has not been set');
  431. }
  432. if (!array_key_exists('max', $this->_calculatedValues)) {
  433. if ($this->_dataOption == STATS_DATA_CUMMULATIVE) {
  434. $max = max(array_keys($this->_data));
  435. } else {
  436. $max = max($this->_data);
  437. }
  438. $this->_calculatedValues['max'] = $max;
  439. }
  440. return $this->_calculatedValues['max'];
  441. }/*}}}*/
  442. /**
  443. * Calculates SUM { xi }
  444. * Handles cummulative data sets correctly
  445. *
  446. * @access public
  447. * @return mixed the sum on success, a PEAR_Error object otherwise
  448. * @see calc()
  449. * @see sum2()
  450. * @see sumN()
  451. */
  452. function sum() {/*{{{*/
  453. if (!array_key_exists('sum', $this->_calculatedValues)) {
  454. $sum = $this->sumN(1);
  455. if (PEAR::isError($sum)) {
  456. return $sum;
  457. } else {
  458. $this->_calculatedValues['sum'] = $sum;
  459. }
  460. }
  461. return $this->_calculatedValues['sum'];
  462. }/*}}}*/
  463. /**
  464. * Calculates SUM { (xi)^2 }
  465. * Handles cummulative data sets correctly
  466. *
  467. * @access public
  468. * @return mixed the sum on success, a PEAR_Error object otherwise
  469. * @see calc()
  470. * @see sum()
  471. * @see sumN()
  472. */
  473. function sum2() {/*{{{*/
  474. if (!array_key_exists('sum2', $this->_calculatedValues)) {
  475. $sum2 = $this->sumN(2);
  476. if (PEAR::isError($sum2)) {
  477. return $sum2;
  478. } else {
  479. $this->_calculatedValues['sum2'] = $sum2;
  480. }
  481. }
  482. return $this->_calculatedValues['sum2'];
  483. }/*}}}*/
  484. /**
  485. * Calculates SUM { (xi)^n }
  486. * Handles cummulative data sets correctly
  487. *
  488. * @access public
  489. * @param numeric $n the exponent
  490. * @return mixed the sum on success, a PEAR_Error object otherwise
  491. * @see calc()
  492. * @see sum()
  493. * @see sum2()
  494. */
  495. function sumN($n) {/*{{{*/
  496. if ($this->_data == null) {
  497. return PEAR::raiseError('data has not been set');
  498. }
  499. $sumN = 0;
  500. if ($this->_dataOption == STATS_DATA_CUMMULATIVE) {
  501. foreach($this->_data as $val=>$freq) {
  502. $sumN += $freq * pow((double)$val, (double)$n);
  503. }
  504. } else {
  505. foreach($this->_data as $val) {
  506. $sumN += pow((double)$val, (double)$n);
  507. }
  508. }
  509. return $sumN;
  510. }/*}}}*/
  511. /**
  512. * Calculates PROD { (xi) }, (the product of all observations)
  513. * Handles cummulative data sets correctly
  514. *
  515. * @access public
  516. * @return mixed the product on success, a PEAR_Error object otherwise
  517. * @see productN()
  518. */
  519. function product() {/*{{{*/
  520. if (!array_key_exists('product', $this->_calculatedValues)) {
  521. $product = $this->productN(1);
  522. if (PEAR::isError($product)) {
  523. return $product;
  524. } else {
  525. $this->_calculatedValues['product'] = $product;
  526. }
  527. }
  528. return $this->_calculatedValues['product'];
  529. }/*}}}*/
  530. /**
  531. * Calculates PROD { (xi)^n }, which is the product of all observations
  532. * Handles cummulative data sets correctly
  533. *
  534. * @access public
  535. * @param numeric $n the exponent
  536. * @return mixed the product on success, a PEAR_Error object otherwise
  537. * @see product()
  538. */
  539. function productN($n) {/*{{{*/
  540. if ($this->_data == null) {
  541. return PEAR::raiseError('data has not been set');
  542. }
  543. $prodN = 1.0;
  544. if ($this->_dataOption == STATS_DATA_CUMMULATIVE) {
  545. foreach($this->_data as $val=>$freq) {
  546. if ($val == 0) {
  547. return 0.0;
  548. }
  549. $prodN *= $freq * pow((double)$val, (double)$n);
  550. }
  551. } else {
  552. foreach($this->_data as $val) {
  553. if ($val == 0) {
  554. return 0.0;
  555. }
  556. $prodN *= pow((double)$val, (double)$n);
  557. }
  558. }
  559. return $prodN;
  560. }/*}}}*/
  561. /**
  562. * Calculates the number of data points in the set
  563. * Handles cummulative data sets correctly
  564. *
  565. * @access public
  566. * @return mixed the count on success, a PEAR_Error object otherwise
  567. * @see calc()
  568. */
  569. function count() {/*{{{*/
  570. if ($this->_data == null) {
  571. return PEAR::raiseError('data has not been set');
  572. }
  573. if (!array_key_exists('count', $this->_calculatedValues)) {
  574. if ($this->_dataOption == STATS_DATA_CUMMULATIVE) {
  575. $count = count($this->_dataExpanded);
  576. } else {
  577. $count = count($this->_data);
  578. }
  579. $this->_calculatedValues['count'] = $count;
  580. }
  581. return $this->_calculatedValues['count'];
  582. }/*}}}*/
  583. /**
  584. * Calculates the mean (average) of the data points in the set
  585. * Handles cummulative data sets correctly
  586. *
  587. * @access public
  588. * @return mixed the mean value on success, a PEAR_Error object otherwise
  589. * @see calc()
  590. * @see sum()
  591. * @see count()
  592. */
  593. function mean() {/*{{{*/
  594. if (!array_key_exists('mean', $this->_calculatedValues)) {
  595. $sum = $this->sum();
  596. if (PEAR::isError($sum)) {
  597. return $sum;
  598. }
  599. $count = $this->count();
  600. if (PEAR::isError($count)) {
  601. return $count;
  602. }
  603. $this->_calculatedValues['mean'] = $sum / $count;
  604. }
  605. return $this->_calculatedValues['mean'];
  606. }/*}}}*/
  607. /**
  608. * Calculates the range of the data set = max - min
  609. *
  610. * @access public
  611. * @return mixed the value of the range on success, a PEAR_Error object otherwise.
  612. */
  613. function range() {/*{{{*/
  614. if (!array_key_exists('range', $this->_calculatedValues)) {
  615. $min = $this->min();
  616. if (PEAR::isError($min)) {
  617. return $min;
  618. }
  619. $max = $this->max();
  620. if (PEAR::isError($max)) {
  621. return $max;
  622. }
  623. $this->_calculatedValues['range'] = $max - $min;
  624. }
  625. return $this->_calculatedValues['range'];
  626. }/*}}}*/
  627. /**
  628. * Calculates the variance (unbiased) of the data points in the set
  629. * Handles cummulative data sets correctly
  630. *
  631. * @access public
  632. * @return mixed the variance value on success, a PEAR_Error object otherwise
  633. * @see calc()
  634. * @see __sumdiff()
  635. * @see count()
  636. */
  637. function variance() {/*{{{*/
  638. if (!array_key_exists('variance', $this->_calculatedValues)) {
  639. $variance = $this->__calcVariance();
  640. if (PEAR::isError($variance)) {
  641. return $variance;
  642. }
  643. $this->_calculatedValues['variance'] = $variance;
  644. }
  645. return $this->_calculatedValues['variance'];
  646. }/*}}}*/
  647. /**
  648. * Calculates the standard deviation (unbiased) of the data points in the set
  649. * Handles cummulative data sets correctly
  650. *
  651. * @access public
  652. * @return mixed the standard deviation on success, a PEAR_Error object otherwise
  653. * @see calc()
  654. * @see variance()
  655. */
  656. function stDev() {/*{{{*/
  657. if (!array_key_exists('stDev', $this->_calculatedValues)) {
  658. $variance = $this->variance();
  659. if (PEAR::isError($variance)) {
  660. return $variance;
  661. }
  662. $this->_calculatedValues['stDev'] = sqrt($variance);
  663. }
  664. return $this->_calculatedValues['stDev'];
  665. }/*}}}*/
  666. /**
  667. * Calculates the variance (unbiased) of the data points in the set
  668. * given a fixed mean (average) value. Not used in calcBasic(), calcFull()
  669. * or calc().
  670. * Handles cummulative data sets correctly
  671. *
  672. * @access public
  673. * @param numeric $mean the fixed mean value
  674. * @return mixed the variance on success, a PEAR_Error object otherwise
  675. * @see __sumdiff()
  676. * @see count()
  677. * @see variance()
  678. */
  679. function varianceWithMean($mean) {/*{{{*/
  680. return $this->__calcVariance($mean);
  681. }/*}}}*/
  682. /**
  683. * Calculates the standard deviation (unbiased) of the data points in the set
  684. * given a fixed mean (average) value. Not used in calcBasic(), calcFull()
  685. * or calc().
  686. * Handles cummulative data sets correctly
  687. *
  688. * @access public
  689. * @param numeric $mean the fixed mean value
  690. * @return mixed the standard deviation on success, a PEAR_Error object otherwise
  691. * @see varianceWithMean()
  692. * @see stDev()
  693. */
  694. function stDevWithMean($mean) {/*{{{*/
  695. $varianceWM = $this->varianceWithMean($mean);
  696. if (PEAR::isError($varianceWM)) {
  697. return $varianceWM;
  698. }
  699. return sqrt($varianceWM);
  700. }/*}}}*/
  701. /**
  702. * Calculates the absolute deviation of the data points in the set
  703. * Handles cummulative data sets correctly
  704. *
  705. * @access public
  706. * @return mixed the absolute deviation on success, a PEAR_Error object otherwise
  707. * @see calc()
  708. * @see __sumabsdev()
  709. * @see count()
  710. * @see absDevWithMean()
  711. */
  712. function absDev() {/*{{{*/
  713. if (!array_key_exists('absDev', $this->_calculatedValues)) {
  714. $absDev = $this->__calcAbsoluteDeviation();
  715. if (PEAR::isError($absdev)) {
  716. return $absdev;
  717. }
  718. $this->_calculatedValues['absDev'] = $absDev;
  719. }
  720. return $this->_calculatedValues['absDev'];
  721. }/*}}}*/
  722. /**
  723. * Calculates the absolute deviation of the data points in the set
  724. * given a fixed mean (average) value. Not used in calcBasic(), calcFull()
  725. * or calc().
  726. * Handles cummulative data sets correctly
  727. *
  728. * @access public
  729. * @param numeric $mean the fixed mean value
  730. * @return mixed the absolute deviation on success, a PEAR_Error object otherwise
  731. * @see __sumabsdev()
  732. * @see absDev()
  733. */
  734. function absDevWithMean($mean) {/*{{{*/
  735. return $this->__calcAbsoluteDeviation($mean);
  736. }/*}}}*/
  737. /**
  738. * Calculates the skewness of the data distribution in the set
  739. * The skewness measures the degree of asymmetry of a distribution,
  740. * and is related to the third central moment of a distribution.
  741. * A normal distribution has a skewness = 0
  742. * A distribution with a tail off towards the high end of the scale
  743. * (positive skew) has a skewness > 0
  744. * A distribution with a tail off towards the low end of the scale
  745. * (negative skew) has a skewness < 0
  746. * Handles cummulative data sets correctly
  747. *
  748. * @access public
  749. * @return mixed the skewness value on success, a PEAR_Error object otherwise
  750. * @see __sumdiff()
  751. * @see count()
  752. * @see stDev()
  753. * @see calc()
  754. */
  755. function skewness() {/*{{{*/
  756. if (!array_key_exists('skewness', $this->_calculatedValues)) {
  757. $count = $this->count();
  758. if (PEAR::isError($count)) {
  759. return $count;
  760. }
  761. $stDev = $this->stDev();
  762. if (PEAR::isError($stDev)) {
  763. return $stDev;
  764. }
  765. $sumdiff3 = $this->__sumdiff(3);
  766. if (PEAR::isError($sumdiff3)) {
  767. return $sumdiff3;
  768. }
  769. $this->_calculatedValues['skewness'] = ($sumdiff3 / ($count * pow($stDev, 3)));
  770. }
  771. return $this->_calculatedValues['skewness'];
  772. }/*}}}*/
  773. /**
  774. * Calculates the kurtosis of the data distribution in the set
  775. * The kurtosis measures the degrees of peakedness of a distribution.
  776. * It is also called the "excess" or "excess coefficient", and is
  777. * a normalized form of the fourth central moment of a distribution.
  778. * A normal distributions has kurtosis = 0
  779. * A narrow and peaked (leptokurtic) distribution has a
  780. * kurtosis > 0
  781. * A flat and wide (platykurtic) distribution has a kurtosis < 0
  782. * Handles cummulative data sets correctly
  783. *
  784. * @access public
  785. * @return mixed the kurtosis value on success, a PEAR_Error object otherwise
  786. * @see __sumdiff()
  787. * @see count()
  788. * @see stDev()
  789. * @see calc()
  790. */
  791. function kurtosis() {/*{{{*/
  792. if (!array_key_exists('kurtosis', $this->_calculatedValues)) {
  793. $count = $this->count();
  794. if (PEAR::isError($count)) {
  795. return $count;
  796. }
  797. $stDev = $this->stDev();
  798. if (PEAR::isError($stDev)) {
  799. return $stDev;
  800. }
  801. $sumdiff4 = $this->__sumdiff(4);
  802. if (PEAR::isError($sumdiff4)) {
  803. return $sumdiff4;
  804. }
  805. $this->_calculatedValues['kurtosis'] = ($sumdiff4 / ($count * pow($stDev, 4))) - 3;
  806. }
  807. return $this->_calculatedValues['kurtosis'];
  808. }/*}}}*/
  809. /**
  810. * Calculates the median of a data set.
  811. * The median is the value such that half of the points are below it
  812. * in a sorted data set.
  813. * If the number of values is odd, it is the middle item.
  814. * If the number of values is even, is the average of the two middle items.
  815. * Handles cummulative data sets correctly
  816. *
  817. * @access public
  818. * @return mixed the median value on success, a PEAR_Error object otherwise
  819. * @see count()
  820. * @see calc()
  821. */
  822. function median() {/*{{{*/
  823. if ($this->_data == null) {
  824. return PEAR::raiseError('data has not been set');
  825. }
  826. if (!array_key_exists('median', $this->_calculatedValues)) {
  827. if ($this->_dataOption == STATS_DATA_CUMMULATIVE) {
  828. $arr =& $this->_dataExpanded;
  829. } else {
  830. $arr =& $this->_data;
  831. }
  832. $n = $this->count();
  833. if (PEAR::isError($n)) {
  834. return $n;
  835. }
  836. $h = intval($n / 2);
  837. if ($n % 2 == 0) {
  838. $median = ($arr[$h] + $arr[$h - 1]) / 2;
  839. } else {
  840. $median = $arr[$h + 1];
  841. }
  842. $this->_calculatedValues['median'] = $median;
  843. }
  844. return $this->_calculatedValues['median'];
  845. }/*}}}*/
  846. /**
  847. * Calculates the mode of a data set.
  848. * The mode is the value with the highest frequency in the data set.
  849. * There can be more than one mode.
  850. * Handles cummulative data sets correctly
  851. *
  852. * @access public
  853. * @return mixed an array of mode value on success, a PEAR_Error object otherwise
  854. * @see frequency()
  855. * @see calc()
  856. */
  857. function mode() {/*{{{*/
  858. if ($this->_data == null) {
  859. return PEAR::raiseError('data has not been set');
  860. }
  861. if (!array_key_exists('mode', $this->_calculatedValues)) {
  862. if ($this->_dataOption == STATS_DATA_CUMMULATIVE) {
  863. $arr = $this->_data;
  864. } else {
  865. $arr = $this->frequency();
  866. }
  867. arsort($arr);
  868. $mcount = 1;
  869. foreach ($arr as $val=>$freq) {
  870. if ($mcount == 1) {
  871. $mode = array($val);
  872. $mfreq = $freq;
  873. ++$mcount;
  874. continue;
  875. }
  876. if ($mfreq == $freq)
  877. $mode[] = $val;
  878. if ($mfreq > $freq)
  879. break;
  880. }
  881. $this->_calculatedValues['mode'] = $mode;
  882. }
  883. return $this->_calculatedValues['mode'];
  884. }/*}}}*/
  885. /**
  886. * Calculates the midrange of a data set.
  887. * The midrange is the average of the minimum and maximum of the data set.
  888. * Handles cummulative data sets correctly
  889. *
  890. * @access public
  891. * @return mixed the midrange value on success, a PEAR_Error object otherwise
  892. * @see min()
  893. * @see max()
  894. * @see calc()
  895. */
  896. function midrange() {/*{{{*/
  897. if (!array_key_exists('midrange', $this->_calculatedValues)) {
  898. $min = $this->min();
  899. if (PEAR::isError($min)) {
  900. return $min;
  901. }
  902. $max = $this->max();
  903. if (PEAR::isError($max)) {
  904. return $max;
  905. }
  906. $this->_calculatedValues['midrange'] = (($max + $min) / 2);
  907. }
  908. return $this->_calculatedValues['midrange'];
  909. }/*}}}*/
  910. /**
  911. * Calculates the geometrical mean of the data points in the set
  912. * Handles cummulative data sets correctly
  913. *
  914. * @access public
  915. * @return mixed the geometrical mean value on success, a PEAR_Error object otherwise
  916. * @see calc()
  917. * @see product()
  918. * @see count()
  919. */
  920. function geometricMean() {/*{{{*/
  921. if (!array_key_exists('geometricMean', $this->_calculatedValues)) {
  922. $count = $this->count();
  923. if (PEAR::isError($count)) {
  924. return $count;
  925. }
  926. $prod = $this->product();
  927. if (PEAR::isError($prod)) {
  928. return $prod;
  929. }
  930. if ($prod == 0.0) {
  931. return 0.0;
  932. }
  933. if ($prod < 0) {
  934. return PEAR::raiseError('The product of the data set is negative, geometric mean undefined.');
  935. }
  936. $this->_calculatedValues['geometricMean'] = pow($prod , 1 / $count);
  937. }
  938. return $this->_calculatedValues['geometricMean'];
  939. }/*}}}*/
  940. /**
  941. * Calculates the harmonic mean of the data points in the set
  942. * Handles cummulative data sets correctly
  943. *
  944. * @access public
  945. * @return mixed the harmonic mean value on success, a PEAR_Error object otherwise
  946. * @see calc()
  947. * @see count()
  948. */
  949. function harmonicMean() {/*{{{*/
  950. if ($this->_data == null) {
  951. return PEAR::raiseError('data has not been set');
  952. }
  953. if (!array_key_exists('harmonicMean', $this->_calculatedValues)) {
  954. $count = $this->count();
  955. if (PEAR::isError($count)) {
  956. return $count;
  957. }
  958. $invsum = 0.0;
  959. if ($this->_dataOption == STATS_DATA_CUMMULATIVE) {
  960. foreach($this->_data as $val=>$freq) {
  961. if ($val == 0) {
  962. return PEAR::raiseError('cannot calculate a '.
  963. 'harmonic mean with data values of zero.');
  964. }
  965. $invsum += $freq / $val;
  966. }
  967. } else {
  968. foreach($this->_data as $val) {
  969. if ($val == 0) {
  970. return PEAR::raiseError('cannot calculate a '.
  971. 'harmonic mean with data values of zero.');
  972. }
  973. $invsum += 1 / $val;
  974. }
  975. }
  976. $this->_calculatedValues['harmonicMean'] = $count / $invsum;
  977. }
  978. return $this->_calculatedValues['harmonicMean'];
  979. }/*}}}*/
  980. /**
  981. * Calculates the nth central moment (m{n}) of a data set.
  982. *
  983. * The definition of a sample central moment is:
  984. *
  985. * m{n} = 1/N * SUM { (xi - avg)^n }
  986. *
  987. * where: N = sample size, avg = sample mean.
  988. *
  989. * @access public
  990. * @param integer $n moment to calculate
  991. * @return mixed the numeric value of the moment on success, PEAR_Error otherwise
  992. */
  993. function sampleCentralMoment($n) {/*{{{*/
  994. if (!is_int($n) || $n < 1) {
  995. return PEAR::isError('moment must be a positive integer >= 1.');
  996. }
  997. if ($n == 1) {
  998. return 0;
  999. }
  1000. $count = $this->count();
  1001. if (PEAR::isError($count)) {
  1002. return $count;
  1003. }
  1004. if ($count == 0) {
  1005. return PEAR::raiseError("Cannot calculate {$n}th sample moment, ".
  1006. 'there are zero data entries');
  1007. }
  1008. $sum = $this->__sumdiff($n);
  1009. if (PEAR::isError($sum)) {
  1010. return $sum;
  1011. }
  1012. return ($sum / $count);
  1013. }/*}}}*/
  1014. /**
  1015. * Calculates the nth raw moment (m{n}) of a data set.
  1016. *
  1017. * The definition of a sample central moment is:
  1018. *
  1019. * m{n} = 1/N * SUM { xi^n }
  1020. *
  1021. * where: N = sample size, avg = sample mean.
  1022. *
  1023. * @access public
  1024. * @param integer $n moment to calculate
  1025. * @return mixed the numeric value of the moment on success, PEAR_Error otherwise
  1026. */
  1027. function sampleRawMoment($n) {/*{{{*/
  1028. if (!is_int($n) || $n < 1) {
  1029. return PEAR::isError('moment must be a positive integer >= 1.');
  1030. }
  1031. $count = $this->count();
  1032. if (PEAR::isError($count)) {
  1033. return $count;
  1034. }
  1035. if ($count == 0) {
  1036. return PEAR::raiseError("Cannot calculate {$n}th raw moment, ".
  1037. 'there are zero data entries.');
  1038. }
  1039. $sum = $this->sumN($n);
  1040. if (PEAR::isError($sum)) {
  1041. return $sum;
  1042. }
  1043. return ($sum / $count);
  1044. }/*}}}*/
  1045. /**
  1046. * Calculates the coefficient of variation of a data set.
  1047. * The coefficient of variation measures the spread of a set of data
  1048. * as a proportion of its mean. It is often expressed as a percentage.
  1049. * Handles cummulative data sets correctly
  1050. *
  1051. * @access public
  1052. * @return mixed the coefficient of variation on success, a PEAR_Error object otherwise
  1053. * @see stDev()
  1054. * @see mean()
  1055. * @see calc()
  1056. */
  1057. function coeffOfVariation() {/*{{{*/
  1058. if (!array_key_exists('coeffOfVariation', $this->_calculatedValues)) {
  1059. $mean = $this->mean();
  1060. if (PEAR::isError($mean)) {
  1061. return $mean;
  1062. }
  1063. if ($mean == 0.0) {
  1064. return PEAR::raiseError('cannot calculate the coefficient '.
  1065. 'of variation, mean of sample is zero');
  1066. }
  1067. $stDev = $this->stDev();
  1068. if (PEAR::isError($stDev)) {
  1069. return $stDev;
  1070. }
  1071. $this->_calculatedValues['coeffOfVariation'] = $stDev / $mean;
  1072. }
  1073. return $this->_calculatedValues['coeffOfVariation'];
  1074. }/*}}}*/
  1075. /**
  1076. * Calculates the standard error of the mean.
  1077. * It is the standard deviation of the sampling distribution of
  1078. * the mean. The formula is:
  1079. *
  1080. * S.E. Mean = SD / (N)^(1/2)
  1081. *
  1082. * This formula does not assume a normal distribution, and shows
  1083. * that the size of the standard error of the mean is inversely
  1084. * proportional to the square root of the sample size.
  1085. *
  1086. * @access public
  1087. * @return mixed the standard error of the mean on success, a PEAR_Error object otherwise
  1088. * @see stDev()
  1089. * @see count()
  1090. * @see calc()
  1091. */
  1092. function stdErrorOfMean() {/*{{{*/
  1093. if (!array_key_exists('stdErrorOfMean', $this->_calculatedValues)) {
  1094. $count = $this->count();
  1095. if (PEAR::isError($count)) {
  1096. return $count;
  1097. }
  1098. $stDev = $this->stDev();
  1099. if (PEAR::isError($stDev)) {
  1100. return $stDev;
  1101. }
  1102. $this->_calculatedValues['stdErrorOfMean'] = $stDev / sqrt($count);
  1103. }
  1104. return $this->_calculatedValues['stdErrorOfMean'];
  1105. }/*}}}*/
  1106. /**
  1107. * Calculates the value frequency table of a data set.
  1108. * Handles cummulative data sets correctly
  1109. *
  1110. * @access public
  1111. * @return mixed an associative array of value=>frequency items on success, a PEAR_Error object otherwise
  1112. * @see min()
  1113. * @see max()
  1114. * @see calc()
  1115. */
  1116. function frequency() {/*{{{*/
  1117. if ($this->_data == null) {
  1118. return PEAR::raiseError('data has not been set');
  1119. }
  1120. if (!array_key_exists('frequency', $this->_calculatedValues)) {
  1121. if ($this->_dataOption == STATS_DATA_CUMMULATIVE) {
  1122. $freq = $this->_data;
  1123. } else {
  1124. $freq = array();
  1125. foreach ($this->_data as $val) {
  1126. $freq["$val"]++;
  1127. }
  1128. ksort($freq);
  1129. }
  1130. $this->_calculatedValues['frequency'] = $freq;
  1131. }
  1132. return $this->_calculatedValues['frequency'];
  1133. }/*}}}*/
  1134. /**
  1135. * The quartiles are defined as the values that divide a sorted
  1136. * data set into four equal-sized subsets, and correspond to the
  1137. * 25th, 50th, and 75th percentiles.
  1138. *
  1139. * @access public
  1140. * @return mixed an associative array of quartiles on success, a PEAR_Error otherwise
  1141. * @see percentile()
  1142. */
  1143. function quartiles() {/*{{{*/
  1144. if (!array_key_exists('quartiles', $this->_calculatedValues)) {
  1145. $q1 = $this->percentile(25);
  1146. if (PEAR::isError($q1)) {
  1147. return $q1;
  1148. }
  1149. $q2 = $this->percentile(50);
  1150. if (PEAR::isError($q2)) {
  1151. return $q2;
  1152. }
  1153. $q3 = $this->percentile(75);
  1154. if (PEAR::isError($q3)) {
  1155. return $q3;
  1156. }
  1157. $this->_calculatedValues['quartiles'] = array (
  1158. '25' => $q1,
  1159. '50' => $q2,
  1160. '75' => $q3
  1161. );
  1162. }
  1163. return $this->_calculatedValues['quartiles'];
  1164. }/*}}}*/
  1165. /**
  1166. * The interquartile mean is defined as the mean of the values left
  1167. * after discarding the lower 25% and top 25% ranked values, i.e.:
  1168. *
  1169. * interquart mean = mean(<P(25),P(75)>)
  1170. *
  1171. * where: P = percentile
  1172. *
  1173. * @todo need to double check the equation
  1174. * @access public
  1175. * @return mixed a numeric value on success, a PEAR_Error otherwise
  1176. * @see quartiles()
  1177. */
  1178. function interquartileMean() {/*{{{*/
  1179. if (!array_key_exists('interquartileMean', $this->_calculatedValues)) {
  1180. $quart = $this->quartiles();
  1181. if (PEAR::isError($quart)) {
  1182. return $quart;
  1183. }
  1184. $q3 = $quart['75'];
  1185. $q1 = $quart['25'];
  1186. $sum = 0;
  1187. $n = 0;
  1188. foreach ($this->getData(true) as $val) {
  1189. if ($val >= $q1 && $val <= $q3) {
  1190. $sum += $val;
  1191. ++$n;
  1192. }
  1193. }
  1194. if ($n == 0) {
  1195. return PEAR::raiseError('error calculating interquartile mean, '.
  1196. 'empty interquartile range of values.');
  1197. }
  1198. $this->_calculatedValues['interquartileMean'] = $sum / $n;
  1199. }
  1200. return $this->_calculatedValues['interquartileMean'];
  1201. }/*}}}*/
  1202. /**
  1203. * The interquartile range is the distance between the 75th and 25th
  1204. * percentiles. Basically the range of the middle 50% of the data set,
  1205. * and thus is not affected by outliers or extreme values.
  1206. *
  1207. * interquart range = P(75) - P(25)
  1208. *
  1209. * where: P = percentile
  1210. *
  1211. * @access public
  1212. * @return mixed a numeric value on success, a PEAR_Error otherwise
  1213. * @see quartiles()
  1214. */
  1215. function interquartileRange() {/*{{{*/
  1216. if (!array_key_exists('interquartileRange', $this->_calculatedValues)) {
  1217. $quart = $this->quartiles();
  1218. if (PEAR::isError($quart)) {
  1219. return $quart;
  1220. }
  1221. $q3 = $quart['75'];
  1222. $q1 = $quart['25'];
  1223. $this->_calculatedValues['interquartileRange'] = $q3 - $q1;
  1224. }
  1225. return $this->_calculatedValues['interquartileRange'];
  1226. }/*}}}*/
  1227. /**
  1228. * The quartile deviation is half of the interquartile range value
  1229. *
  1230. * quart dev = (P(75) - P(25)) / 2
  1231. *
  1232. * where: P = percentile
  1233. *
  1234. * @access public
  1235. * @return mixed a numeric value on success, a PEAR_Error otherwise
  1236. * @see quartiles()
  1237. * @see interquartileRange()
  1238. */
  1239. function quartileDeviation() {/*{{{*/
  1240. if (!array_key_exists('quartileDeviation', $this->_calculatedValues)) {
  1241. $iqr = $this->interquartileRange();
  1242. if (PEAR::isError($iqr)) {
  1243. return $iqr;
  1244. }
  1245. $this->_calculatedValues['quartileDeviation'] = $iqr / 2;
  1246. }
  1247. return $this->_calculatedValues['quartileDeviation'];
  1248. }/*}}}*/
  1249. /**
  1250. * The quartile variation coefficient is defines as follows:
  1251. *
  1252. * quart var coeff = 100 * (P(75) - P(25)) / (P(75) + P(25))
  1253. *
  1254. * where: P = percentile
  1255. *
  1256. * @todo need to double check the equation
  1257. * @access public
  1258. * @return mixed a numeric value on success, a PEAR_Error otherwise
  1259. * @see quartiles()
  1260. */
  1261. function quartileVariationCoefficient() {/*{{{*/
  1262. if (!array_key_exists('quartileVariationCoefficient', $this->_calculatedValues)) {
  1263. $quart = $this->quartiles();
  1264. if (PEAR::isError($quart)) {
  1265. return $quart;
  1266. }
  1267. $q3 = $quart['75'];
  1268. $q1 = $quart['25'];
  1269. $d = $q3 - $q1;
  1270. $s = $q3 + $q1;
  1271. $this->_calculatedValues['quartileVariationCoefficient'] = 100 * $d / $s;
  1272. }
  1273. return $this->_calculatedValues['quartileVariationCoefficient'];
  1274. }/*}}}*/
  1275. /**
  1276. * The quartile skewness coefficient (also known as Bowley Skewness),
  1277. * is defined as follows:
  1278. *
  1279. * quart skewness coeff = (P(25) - 2*P(50) + P(75)) / (P(75) - P(25))
  1280. *
  1281. * where: P = percentile
  1282. *
  1283. * @todo need to double check the equation
  1284. * @access public
  1285. * @return mixed a numeric value on success, a PEAR_Error otherwise
  1286. * @see quartiles()
  1287. */
  1288. function quartileSkewnessCoefficient() {/*{{{*/
  1289. if (!array_key_exists('quartileSkewnessCoefficient', $this->_calculatedValues)) {
  1290. $quart = $this->quartiles();
  1291. if (PEAR::isError($quart)) {
  1292. return $quart;
  1293. }
  1294. $q3 = $quart['75'];
  1295. $q2 = $quart['50'];
  1296. $q1 = $quart['25'];
  1297. $d = $q3 - 2*$q2 + $q1;
  1298. $s = $q3 - $q1;
  1299. $this->_calculatedValues['quartileSkewnessCoefficient'] = $d / $s;
  1300. }
  1301. return $this->_calculatedValues['quartileSkewnessCoefficient'];
  1302. }/*}}}*/
  1303. /**
  1304. * The pth percentile is the value such that p% of the a sorted data set
  1305. * is smaller than it, and (100 - p)% of the data is larger.
  1306. *
  1307. * A quick algorithm to pick the appropriate value from a sorted data
  1308. * set is as follows:
  1309. *
  1310. * - Count the number of values: n
  1311. * - Calculate the position of the value in the data list: i = p * (n + 1)
  1312. * - if i is an integer, return the data at that position
  1313. * - if i < 1, return the minimum of the data set
  1314. * - if i > n, return the maximum of the data set
  1315. * - otherwise, average the entries at adjacent positions to i
  1316. *
  1317. * The median is the 50th percentile value.
  1318. *
  1319. * @todo need to double check generality of the algorithm
  1320. *
  1321. * @access public
  1322. * @param numeric $p the percentile to estimate, e.g. 25 for 25th percentile
  1323. * @return mixed a numeric value on success, a PEAR_Error otherwise
  1324. * @see quartiles()
  1325. * @see median()
  1326. */
  1327. function percentile($p) {/*{{{*/
  1328. $count = $this->count();
  1329. if (PEAR::isError($count)) {
  1330. return $count;
  1331. }
  1332. if ($this->_dataOption == STATS_DATA_CUMMULATIVE) {
  1333. $data =& $this->_dataExpanded;
  1334. } else {
  1335. $data =& $this->_data;
  1336. }
  1337. $obsidx = $p * ($count + 1) / 100;
  1338. if (intval($obsidx) == $obsidx) {
  1339. return $data[($obsidx - 1)];
  1340. } elseif ($obsidx < 1) {
  1341. return $data[0];
  1342. } elseif ($obsidx > $count) {
  1343. return $data[($count - 1)];
  1344. } else {
  1345. $left = floor($obsidx - 1);
  1346. $right = ceil($obsidx - 1);
  1347. return ($data[$left] + $data[$right]) / 2;
  1348. }
  1349. }/*}}}*/
  1350. // private methods
  1351. /**
  1352. * Utility function to calculate: SUM { (xi - mean)^n }
  1353. *
  1354. * @access private
  1355. * @param numeric $power the exponent
  1356. * @param optional double $mean the data set mean value
  1357. * @return mixed the sum on success, a PEAR_Error object otherwise
  1358. *
  1359. * @see stDev()
  1360. * @see variaceWithMean();
  1361. * @see skewness();
  1362. * @see kurtosis();
  1363. */
  1364. function __sumdiff($power, $mean=null) {/*{{{*/
  1365. if ($this->_data == null) {
  1366. return PEAR::raiseError('data has not been set');
  1367. }
  1368. if (is_null($mean)) {
  1369. $mean = $this->mean();
  1370. if (PEAR::isError($mean)) {
  1371. return $mean;
  1372. }
  1373. }
  1374. $sdiff = 0;
  1375. if ($this->_dataOption == STATS_DATA_CUMMULATIVE) {
  1376. foreach ($this->_data as $val=>$freq) {
  1377. $sdiff += $freq * pow((double)($val - $mean), (double)$power);
  1378. }
  1379. } else {
  1380. foreach ($this->_data as $val)
  1381. $sdiff += pow((double)($val - $mean), (double)$power);
  1382. }
  1383. return $sdiff;
  1384. }/*}}}*/
  1385. /**
  1386. * Utility function to calculate the variance with or without
  1387. * a fixed mean
  1388. *
  1389. * @access private
  1390. * @param $mean the fixed mean to use, null as default
  1391. * @return mixed a numeric value on success, a PEAR_Error otherwise
  1392. * @see variance()
  1393. * @see varianceWithMean()
  1394. */
  1395. function __calcVariance($mean = null) {/*{{{*/
  1396. if ($this->_data == null) {
  1397. return PEAR::raiseError('data has not been set');
  1398. }
  1399. $sumdiff2 = $this->__sumdiff(2, $mean);
  1400. if (PEAR::isError($sumdiff2)) {
  1401. return $sumdiff2;
  1402. }
  1403. $count = $this->count();
  1404. if (PEAR::isError($count)) {
  1405. return $count;
  1406. }
  1407. if ($count == 1) {
  1408. return PEAR::raiseError('cannot calculate variance of a singe data point');
  1409. }
  1410. return ($sumdiff2 / ($count - 1));
  1411. }/*}}}*/
  1412. /**
  1413. * Utility function to calculate the absolute deviation with or without
  1414. * a fixed mean
  1415. *
  1416. * @access private
  1417. * @param $mean the fixed mean to use, null as default
  1418. * @return mixed a numeric value on success, a PEAR_Error otherwise
  1419. * @see absDev()
  1420. * @see absDevWithMean()
  1421. */
  1422. function __calcAbsoluteDeviation($mean = null) {/*{{{*/
  1423. if ($this->_data == null) {
  1424. return PEAR::raiseError('data has not been set');
  1425. }
  1426. $count = $this->count();
  1427. if (PEAR::isError($count)) {
  1428. return $count;
  1429. }
  1430. $sumabsdev = $this->__sumabsdev($mean);
  1431. if (PEAR::isError($sumabsdev)) {
  1432. return $sumabsdev;
  1433. }
  1434. return $sumabsdev / $count;
  1435. }/*}}}*/
  1436. /**
  1437. * Utility function to calculate: SUM { | xi - mean | }
  1438. *
  1439. * @access private
  1440. * @param optional double $mean the mean value for the set or population
  1441. * @return mixed the sum on success, a PEAR_Error object otherwise
  1442. *
  1443. * @see absDev()
  1444. * @see absDevWithMean()
  1445. */
  1446. function __sumabsdev($mean=null) {/*{{{*/
  1447. if ($this->_data == null) {
  1448. return PEAR::raiseError('data has not been set');
  1449. }
  1450. if (is_null($mean)) {
  1451. $mean = $this->mean();
  1452. }
  1453. $sdev = 0;
  1454. if ($this->_dataOption == STATS_DATA_CUMMULATIVE) {
  1455. foreach ($this->_data as $val=>$freq) {
  1456. $sdev += $freq * abs($val - $mean);
  1457. }
  1458. } else {
  1459. foreach ($this->_data as $val) {
  1460. $sdev += abs($val - $mean);
  1461. }
  1462. }
  1463. return $sdev;
  1464. }/*}}}*/
  1465. /**
  1466. * Utility function to format a PEAR_Error to be used by calc(),
  1467. * calcBasic() and calcFull()
  1468. *
  1469. * @access private
  1470. * @param mixed $v value to be formatted
  1471. * @param boolean $returnErrorObject whether the raw PEAR_Error (when true, default),
  1472. * or only the error message will be returned (when false)
  1473. * @return mixed if the value is a PEAR_Error object, and $useErrorObject
  1474. * is false, then a string with the error message will be returned,
  1475. * otherwise the value will not be modified and returned as passed.
  1476. */
  1477. function __format($v, $useErrorObject=true) {/*{{{*/
  1478. if (PEAR::isError($v) && $useErrorObject == false) {
  1479. return $v->getMessage();
  1480. } else {
  1481. return $v;
  1482. }
  1483. }/*}}}*/
  1484. /**
  1485. * Utility function to validate the data and modify it
  1486. * according to the current null handling option
  1487. *
  1488. * @access private
  1489. * @return mixed true on success, a PEAR_Error object otherwise
  1490. *
  1491. * @see setData()
  1492. */
  1493. function _validate() {/*{{{*/
  1494. $flag = ($this->_dataOption == STATS_DATA_CUMMULATIVE);
  1495. foreach ($this->_data as $key=>$value) {
  1496. $d = ($flag) ? $key : $value;
  1497. $v = ($flag) ? $value : $key;
  1498. if (!is_numeric($d)) {
  1499. switch ($this->_nullOption) {
  1500. case STATS_IGNORE_NULL :
  1501. unset($this->_data["$key"]);
  1502. break;
  1503. case STATS_USE_NULL_AS_ZERO:
  1504. if ($flag) {
  1505. unset($this->_data["$key"]);
  1506. $this->_data[0] += $v;
  1507. } else {
  1508. $this->_data[$key] = 0;
  1509. }
  1510. break;
  1511. case STATS_REJECT_NULL :
  1512. default:
  1513. return PEAR::raiseError('data rejected, contains NULL values');
  1514. break;
  1515. }
  1516. }
  1517. }
  1518. if ($flag) {
  1519. ksort($this->_data);
  1520. $this->_dataExpanded = array();
  1521. foreach ($this->_data as $val=>$freq) {
  1522. $this->_dataExpanded = array_pad($this->_dataExpanded, count($this->_dataExpanded) + $freq, $val);
  1523. }
  1524. sort($this->_dataExpanded);
  1525. } else {
  1526. sort($this->_data);
  1527. }
  1528. return true;
  1529. }/*}}}*/
  1530. }/*}}}*/
  1531. // vim: ts=4:sw=4:et:
  1532. // vim6: fdl=1: fdm=marker:
  1533. ?>