/assessment/libs/stats.php
PHP | 1167 lines | 1062 code | 26 blank | 79 comment | 101 complexity | 74142c2570cde5ff78db7ddd3464c9e4 MD5 | raw file
- <?php
- //A library of Stats functions. Version 1.9, March 30, 2008
-
- global $allowedmacros;
- array_push($allowedmacros,"nCr","nPr","mean","stdev","percentile","quartile","TIquartile","median","freqdist","frequency","histogram","fdhistogram","fdbargraph","normrand","boxplot","normalcdf","tcdf","invnormalcdf","invtcdf","invtcdf2","linreg","countif","binomialpdf","binomialcdf","chicdf","invchicdf","chi2cdf","invchi2cdf","fcdf","invfcdf","piechart");
-
- //nCr(n,r)
- //The Choose function
- function nCr($n,$r){
- if ($r > $n)
- return false;
- if (($n-$r) < $r)
- return nCr($n,($n-$r));
- $return = 1;
- for ($i=0;$i < $r;$i++){
- $return *= ($n-$i)/($i+1);
- }
- return $return;
- }
-
- //nPr(n,r)
- //The Permutations function
- function nPr($n,$r){
- if ($r > $n)
- return false;
- if ($r==0)
- return 1;
- $return = 1;
- $i = $n-$r;
- while ($i<$n) {
- $return *= ++$i;
- }
- return $return;
- }
-
- //mean(array)
- //Finds the mean of an array of numbers
- function mean($a) {
- return (array_sum($a)/count($a));
- }
-
- //variance(array)
- //the (sample) variance of an array of numbers
- function variance($a) {
- $v = 0;
- $mean = mean($a);
- foreach ($a as $x) {
- $v += pow($x-$mean,2);
- }
- return ($v/(count($a)-1));
- }
-
- //stdev(array)
- //the (sample) standard deviation of an array of numbers
- function stdev($a) {
- return sqrt(variance($a));
- }
-
- //percentile(array,percentile)
- //example: percentile($a,30) would find the 30th percentile of the data
- //method based on Triola
- function percentile($a,$p) {
- sort($a, SORT_NUMERIC);
- if ($p==0) {
- return $a[0];
- } else if ($p==100) {
- return $a[count($a)-1];
- }
-
- $l = $p*count($a)/100;
- if (floor($l)==$l) {
- return (($a[$l-1]+$a[$l])/2);
- } else {
- return ($a[ceil($l)-1]);
- }
- }
-
- //quartile(array,quartile)
- //finds the 0 (min), 1st, 2nd (median), 3rd, or 4th (max) quartile of an
- //array of numbers. Calculates using percentiles.
- function quartile($a,$q) {
- return percentile($a,$q*25);
- }
-
- //TIquartile(array,quartile)
- //finds the 0 (min), 1st, 2nd (median), 3rd, or 4th (max) quartile of an
- //array of numbers. Calculates using the TI-84 method.
- function TIquartile($a,$q) {
- sort($a, SORT_NUMERIC);
- $n = count($a);
- if ($q==0) {
- return $a[0];
- } else if ($q==4) {
- return $a[count($a)-1];
- }
- if ($q==2) {
- if ($n%2==0) { //even
- $m = $n/2;
- return (($a[$m-1] + $a[$m])/2);
- } else {
- return ($a[floor($n/2)]);
- }
- } else {
- if ($n%2==0) { //even
- if ($n%4==0) { //lower half is even
- $m = $n/4;
- if ($q==3) { $m = $n-$m;}
- return (($a[$m-1] + $a[$m])/2);
- } else {
- $m = floor($n/4);
- if ($q==3) { $m = $n-$m-1;}
- return ($a[$m]);
- }
- } else {
- if ((($n-1)/2)%2==0) {//lower half is even
- $m = floor($n/4);
- if ($q==3) { $m = $n-$m;}
- return (($a[$m-1] + $a[$m])/2);
- } else {
- $m = floor($n/4);
- if ($q==3) { $m = $n-$m-1;}
- return ($a[$m]);
- }
- }
- }
- }
-
- //median(array)
- //returns the median of an array of numbers
- function median($a) {
- return percentile($a,50);
- }
-
- //freqdist(array,label,start,classwidth)
- //display macro. Returns an HTML table that is a frequency distribution of
- //the data
- // array: array of data values
- // label: name of data values
- // start: first lower class limit
- // classwidth: width of the classes
- function freqdist($a,$label,$start,$cw) {
- if ($cw<0) { $cw *= -1;} else if ($cw==0) { echo "Error - classwidth cannot be 0"; return 0;}
- sort($a, SORT_NUMERIC);
- $x = $start;
- $curr = 0;
- $out = "<table class=stats><thead><tr><th>$label</th><th>Freq</th></tr></thead>\n<tbody>\n";
- while ($x <= $a[count($a)-1]) {
- $out .= "<tr><td>`$x <= x < ";
- $x += $cw;
- $out .= "$x`</td><td>";
- $i = $curr;
- while (($a[$i] < $x) && ($i < count($a))) {
- $i++;
- }
- $out .= ($i-$curr) . "</td></tr>\n";
- $curr = $i;
- }
- $out .= "</tbody></table>\n";
- return $out;
- }
-
- //frequency(array,start,classwidth)
- //Returns an array of frequencies for the data grouped into classes
- // array: array of data values
- // start: first lower class limit
- // classwidth: width of the classes
- function frequency($a,$start,$cw) {
- if ($cw<0) { $cw *= -1;} else if ($cw==0) { echo "Error - classwidth cannot be 0"; return 0;}
- sort($a, SORT_NUMERIC);
- $x = $start;
- $curr = 0;
- while ($x <= $a[count($a)-1]) {
- $x += $cw;
- $i = $curr;
- while (($a[$i] < $x) && ($i < count($a))) {
- $i++;
- }
- $out[] = ($i-$curr);
- $curr = $i;
- }
- return $out;
- }
-
- //countif(array,condition)
- //Returns count of items in array that meet condition
- // array: array of data values
- // condition: a condition, using x for data values
- //Example: countif($a,"x<3 && x>2")
- function countif($a,$ifcond) {
- $rsnoquote = preg_replace('/"[^"]*"/','""',$ifcond);
- $rsnoquote = preg_replace('/\'[^\']*\'/','\'\'',$rsnoquote);
- if (preg_match_all('/([$\w]+)\s*\([^\)]*\)/',$rsnoquote,$funcs)) {
- $ismath = true;
- for ($i=0;$i<count($funcs[1]);$i++) {
- if (strpos($funcs[1][$i],"$")===false) {
- if (!in_array($funcs[1][$i],$allowedmacros)) {
- echo "{$funcs[1][$i]} is not an allowed function<BR>\n";
- return false;
- }
- }
- }
- }
- $ifcond = str_replace('!=','#=',$ifcond);
- $ifcond = mathphp($ifcond,'x');
- $ifcond = str_replace('#=','!=',$ifcond);
- $ifcond = str_replace('(x)','($x)',$ifcond);
- $iffunc = create_function('$x','return('.$ifcond.');');
-
- $cnt = 0;
- foreach ($a as $v) {
- if ($iffunc($v)) {
- $cnt++;
- }
- }
- return $cnt;
- }
-
- //histogram(array,label,start,classwidth,[labelstart,upper,width,height])
- //display macro. Creates a histogram from a data set
- // array: array of data values
- // label: name of data values
- // start: first lower class limit
- // classwidth: width of the classes
- // labelstart (optional): value to start axis labeling at. Defaults to start
- // upper (optional): first upper class limit. Defaults to start+classwidth
- // width,height (optional): width and height in pixels of graph
- function histogram($a,$label,$start,$cw,$startlabel=false,$upper=false,$width=300,$height=200) {
- if ($cw<0) { $cw *= -1;} else if ($cw==0) { echo "Error - classwidth cannot be 0"; return 0;}
- sort($a, SORT_NUMERIC);
- $x = $start;
- $curr = 0;
- $alt = "Histogram for $label <table class=stats><thead><tr><th>Label on left of box</th><th>Frequency</th></tr></thead>\n<tbody>\n";
- $maxfreq = 0;
- if ($upper===false) {
- $dx = $cw;
- $dxdiff = 0;
- } else {
- $dx = $upper - $start;
- $dxdiff = $cw-$dx;
- }
-
- while ($x <= $a[count($a)-1]) {
- $alt .= "<tr><td>$x</td>";
- $st .= "rect([$x,0],";
- $x += $dx;
- $st .= "[$x,";
- $i = $curr;
- while (($a[$i] < $x) && ($i < count($a))) {
- $i++;
- }
- if (($i-$curr)>$maxfreq) { $maxfreq = $i-$curr;}
- $alt .= "<td>" . ($i-$curr) . "</td></tr>\n";
- $st .= ($i-$curr) . "]);";
- $curr = $i;
- $x += $dxdiff;
- }
- $alt .= "</tbody></table>\n";
- if ($GLOBALS['sessiondata']['graphdisp']==0) {
- return $alt;
- }
- $outst = "setBorder(".(40+7*strlen($maxfreq)).",40,10,5); initPicture(".($start>0?(max($start-.9*$cw,0)):$start).",$x,0,$maxfreq);";
-
- $power = floor(log10($maxfreq))-1;
- $base = $maxfreq/pow(10,$power);
- if ($base>75) {$step = 20*pow(10,$power);} else if ($base>40) { $step = 10*pow(10,$power);} else if ($base>20) {$step = 5*pow(10,$power);} else if ($base>9) {$step = 2*pow(10,$power);} else {$step = pow(10,$power);}
-
- //if ($maxfreq>100) {$step = 20;} else if ($maxfreq > 50) { $step = 10; } else if ($maxfreq > 20) { $step = 5;} else if ($maxfreq>9) { $step = 2; } else {$step=1;}
- if ($startlabel===false) {
- //$outst .= "axes($cw,$step,1,1000,$step); fill=\"blue\"; textabs([". ($width/2+15) .",0],\"$label\",\"above\");";
- $startlabel = $start;
- } //else {
- $outst .= "axes(1000,$step,1,1000,$step); fill=\"blue\"; textabs([". ($width/2+15) .",0],\"$label\",\"above\");";
- $x = $startlabel;
- $tm = -.02*$maxfreq;
- $tx = .02*$maxfreq;
- while ($x <= $a[count($a)-1]+1) {
- $outst .= "line([$x,$tm],[$x,$tx]); text([$x,0],\"$x\",\"below\");";
- $x+= $cw;
- }
- //}
- $outst .= "textabs([0,". ($height/2+15) ."],\"Frequency\",\"right\",90);";
- $outst .= $st;
- return showasciisvg($outst,$width,$height);
- }
-
- //fdhistogram(freqarray,label,start,cw,[labelstart,upper,width,height])
- //display macro. Creates a histogram from frequency array
- // freqarray: array of frequencies
- // label: name of data values
- // start: first lower class limit
- // classwidth: width of the classes
- // labelstart (optional): value to start axis labeling at. Defaults to start
- // upper (optional): first upper class limit. Defaults to start+classwidth
- // width,height (optional): width and height in pixels of graph
- function fdhistogram($freq,$label,$start,$cw,$startlabel=false,$upper=false,$width=300,$height=200) {
- if ($cw<0) { $cw *= -1;} else if ($cw==0) { echo "Error - classwidth cannot be 0"; return 0;}
- $x = $start;
- $alt = "Histogram for $label <table class=stats><thead><tr><th>Label on left of box</th><th>Frequency</th></tr></thead>\n<tbody>\n";
- $maxfreq = 0;
- if ($upper===false) {
- $dx = $cw;
- $dxdiff = 0;
- } else {
- $dx = $upper - $start;
- $dxdiff = $cw-$dx;
- }
- for ($curr=0; $curr<count($freq); $curr++) {
- $alt .= "<tr><td>$x</td><td>{$freq[$curr]}</td></tr>";
- $st .= "rect([$x,0],";
- $x += $dx;
- $st .= "[$x,{$freq[$curr]}]);";
- if ($freq[$curr]>$maxfreq) { $maxfreq = $freq[$curr];}
- $x += $dxdiff;
- }
- $alt .= "</tbody></table>\n";
- if ($GLOBALS['sessiondata']['graphdisp']==0) {
- return $alt;
- }
- $outst = "setBorder(".(40+7*strlen($maxfreq)).",40,10,5); initPicture(".($start>0?(max($start-.9*$cw,0)):$start).",$x,0,$maxfreq);";
- //$outst = "setBorder(10); initPicture(". ($start-.1*($x-$start)) .",$x,". (-.1*$maxfreq) .",$maxfreq);";
- $power = floor(log10($maxfreq))-1;
- $base = $maxfreq/pow(10,$power);
- if ($base>75) {$step = 20*pow(10,$power);} else if ($base>40) { $step = 10*pow(10,$power);} else if ($base>20) {$step = 5*pow(10,$power);} else if ($base>9) {$step = 2*pow(10,$power);} else {$step = pow(10,$power);}
- //if ($maxfreq>100) {$step = 20;} else if ($maxfreq > 50) { $step = 10; } else if ($maxfreq > 20) { $step = 5;} else if ($maxfreq>9) {$step = 2;} else {$step=1;}
- if ($startlabel===false) {
- //$outst .= "axes($cw,$step,1,1000,$step); fill=\"blue\"; textabs([". ($width/2+15) .",0],\"$label\",\"above\");";
- $startlabel = $start;
- } //else {
- $outst .= "axes(1000,$step,1,1000,$step); fill=\"blue\"; textabs([". ($width/2+15) .",0],\"$label\",\"above\");";
- $x = $startlabel;
- $tm = -.02*$maxfreq;
- $tx = .02*$maxfreq;
- for ($curr=0; $curr<count($freq)+1; $curr++) {
- $outst .= "line([$x,$tm],[$x,$tx]); text([$x,0],\"$x\",\"below\");";
- $x+=$cw;
- }
- //}
- //$outst .= "text([".($start-.1*($x-$start)).",".(.5*$maxfreq)."],\"Freq\",,90)";
- //$outst .= "axes($cw,$step,1,1000,$step); fill=\"blue\"; text([". ($start + .5*($x-$start)) .",". (-.1*$maxfreq) . "],\"$label\");";
- $outst .= "textabs([0,". ($height/2+15) ."],\"Frequency\",\"right\",90);";
- $outst .= $st;
- return showasciisvg($outst,$width,$height);
- }
-
- //fdbargraph(barlabels,freqarray,label,[width,height])
- //barlabels: array of labels for the bars
- //freqarray: array of frequencies/heights for the bars
- //label: general label for bars
- //width,height (optional): width and height for graph
- function fdbargraph($bl,$freq,$label,$width=300,$height=200) {
- if (!is_array($bl) || !is_array($freq)) {echo "barlabels and freqarray must be arrays"; return 0;}
- if (count($bl) != count($freq)) { echo "barlabels and freqarray must have same length"; return 0;}
- $alt = "Histogram for $label <table class=stats><thead><tr><th>Bar Label</th><th>Frequency/Height</th></tr></thead>\n<tbody>\n";
- $start = 0;
- $x = $start+1;
- $maxfreq = 0;
- for ($curr=0; $curr<count($bl); $curr++) {
- $alt .= "<tr><td>{$bl[$curr]}</td><td>{$freq[$curr]}</td></tr>";
- $st .= "rect([$x,0],";
- $x += 2;
- $st .= "[$x,{$freq[$curr]}]);";
- $x -= 1;
- $st .= "text([$x,0],\"{$bl[$curr]}\",\"below\");";
- $x += 1;
- if ($freq[$curr]>$maxfreq) { $maxfreq = $freq[$curr];}
- }
- $alt .= "</tbody></table>\n";
- if ($GLOBALS['sessiondata']['graphdisp']==0) {
- return $alt;
- }
- $x++;
- //$outst = "setBorder(10); initPicture(". ($start-.1*($x-$start)) .",$x,". (-.1*$maxfreq) .",$maxfreq);";
- $outst = "setBorder(45,40,10,5); initPicture(".($start>0?(max($start-.9*$cw,0)):$start).",$x,0,$maxfreq);";
-
- $power = floor(log10($maxfreq))-1;
- $base = $maxfreq/pow(10,$power);
-
- if ($base>75) {$step = 20*pow(10,$power);} else if ($base>40) { $step = 10*pow(10,$power);} else if ($base>20) {$step = 5*pow(10,$power);} else if ($base>9) {$step = 2*pow(10,$power);} else {$step = pow(10,$power);}
-
- //if ($maxfreq>100) {$step = 20;} else if ($maxfreq > 50) { $step = 10; } else if ($maxfreq > 20) { $step = 5;} else {$step=1;}
- $outst .= "axes(1000,$step,1,1000,$step); fill=\"blue\"; textabs([". ($width/2+15) .",0],\"$label\",\"above\");";
-
- //$outst .= "axes($cw,$step,1,1000,$step); fill=\"blue\"; text([". ($start + .5*($x-$start)) .",". (-.1*$maxfreq) . "],\"$label\");";
- $outst .= $st;
- return showasciisvg($outst,$width,$height);
- }
-
- //piechart(percents, labels, {width, height})
- //create a piechart
- //percents: array of pie percents (should total 100%)
- //labels: array of labels for each pie piece
- //uses Google Charts API
- function piechart($pcts,$labels,$w=350,$h=150) {
- $out = "<img src=\"http://chart.apis.google.com/chart?cht=p&chd=t:";
- $out .= implode(',',$pcts);
- $out .= "&chs={$w}x{$h}&chl=";
- $out .= implode('|',$labels);
- $out .= '" alt="Pie Chart" />';
- return $out;
- }
-
- //normrand(mu,sigma,n)
- //returns an array of n random numbers that are normally distributed with given
- //mean mu and standard deviation sigma. Uses the Box-Muller transform.
- function normrand($mu,$sig,$n) {
- for ($i=0;$i<ceil($n/2);$i++) {
- do {
- $a = rand(-32768,32768)/32768;
- $b = rand(-32768,32768)/32768;
- $r = $a*$a+$b*$b;
- $count++;
- } while ($r==0||$r>1);
- $r = sqrt(-2*log($r)/$r);
- $z[] = $sig*$a*$r + $mu;
- $z[] = $sig*$b*$r + $mu;
- }
- if ($n%2==0) {
- return $z;
- } else {
- return (array_slice($z,0,count($z)-1));
- }
- }
-
- //boxplot(array,axislabel,[datalabel])
- //draws a boxplot based on the data in array, with given axislabel
- //and optionally a datalabel (to topleft of boxplot)
- //array and datalabel can also be an array of dataarrays and
- //array of datalabels to do comparative boxplots
- function boxplot($arr,$label) {
- if (func_num_args()>2) {
- $dlbls = func_get_arg(2);
- }
- if (is_array($arr[0])) { $multi = count($arr);} else {$multi = 1;}
- $st = '';
- $bigmax = -10000000;
- $bigmin = 100000000;
- $alt = "Boxplot, axis label: $label. ";
- for ($i=0;$i<$multi;$i++) {
- if ($multi==1) { $a = $arr;} else {$a = $arr[$i];}
- sort($a,SORT_NUMERIC);
- $max = $a[count($a)-1];
- if ($max>$bigmax) { $bigmax = $max;}
- if ($a[0]<$bigmin) {$bigmin = $a[0];}
- $q1 = percentile($a,25);
- $q2 = percentile($a,50);
- $q3 = percentile($a,75);
- $yl = 2+5*$i;
- $ym = $yl+1;
- $yh = $ym+1;
- $st .= "line([$a[0],$yl],[$a[0],$yh]); rect([$q1,$yl],[$q3,$yh]); line([$q2,$yl],[$q2,$yh]);";
- $st .= "line([$max,$yl],[$max,$yh]); line([$a[0],$ym],[$q1,$ym]); line([$q3,$ym],[$max,$ym]);";
- $alt .= 'Boxplot ';
- if (isset($dlbls[$i])) {
- $alt .= "for {$dlbls[$i]}";
- } else {
- $alt .= ($i+1);
- }
- $alt .= ": Left whisker at {$a[0]}. Leftside of box at $q1. Line through box at $q2. Rightside of box at $q3. Right whisker at $max.\n";
-
- }
- if ($GLOBALS['sessiondata']['graphdisp']==0) {
- return $alt;
- }
- $outst = "setBorder(15); initPicture($bigmin,$bigmax,-3,".(5*$multi).");";
- $dw = $bigmax-$bigmin;
-
- if ($dw>100) {$step = 20;} else if ($dw > 50) { $step = 10; } else if ($dw > 20) { $step = 5;} else {$step=1;}
- $outst .= "axes($step,100,1,0,0,1,'off');";
- $outst .= "text([". ($bigmin+.5*$dw) . ",-3],\"$label\");";
- if (isset($dlbls)) {
- for ($i=0;$i<$multi;$i++) {
- if ($multi>1) { $dlbl = $dlbls[$i];} else {$dlbl = $dlbls;}
- $st .= "text([$bigmin,". (5+5*$i) ."],\"$dlbl\",\"right\");";
- }
- }
- $outst .= $st;
- return showasciisvg($outst,400,50+50*$multi);
- }
-
- //normalcdf(z,[dec])
- //calculates the area under the standard normal distribution to the left of the
- //z-value z, to dec decimals (defaults to 4)
- //based on someone else's code - can't remember whose!
- function normalcdf($ztest,$dec=4) {
- $eps = pow(.1,$dec);
- $eps2 = pow(.1,$dec+3);
-
- $ds = 1;
- $s = 0;
- $i = 0;
- $z = abs($ztest);
- $fact = 1;
- while (abs($ds)>$eps2) {
- $ds = pow(-1,$i)*pow($z,2.0*$i+1.0)/(pow(2.0,$i)*$fact*(2.0*$i+1.0));
- $s += $ds;
- $i++;
- $fact *= $i;
- if (abs($s)<$eps) {
- break;
- }
- }
- /* alternate code, less accuracy; around 10^-8 vs 10^-10 w above
- $b1 = 0.319381530;
- $b2 = -0.356563782;
- $b3 = 1.781477937;
- $b4 = -1.821255978;
- $b5 = 1.330274429;
- $p = 0.2316419;
- $c = 0.39894228;
-
- $x = $ztest;
- if($x >= 0.0) {
- $t = 1.0 / ( 1.0 + $p * $x );
- return (1.0 - $c * exp( -$x * $x / 2.0 ) * $t *
- ( $t *( $t * ( $t * ( $t * $b5 + $b4 ) + $b3 ) + $b2 ) + $b1 ));
- }
- else {
- $t = 1.0 / ( 1.0 - $p * $x );
- return ( $c * exp( -$x * $x / 2.0 ) * $t *
- ( $t *( $t * ( $t * ( $t * $b5 + $b4 ) + $b3 ) + $b2 ) + $b1 ));
- }
- */
- $s *= 0.3989422804;
- $s = round($s,$dec);
- if ($ztest > 0) {
- $pval = .5 + $s;
- } else {
- $pval = .5 - $s;
- }
- if ($pval < $eps) {
- $pval = $eps;
- } else if ($pval > (1-$eps)) {
- $pval = round(1-$eps,$dec);
- }
- return $pval;
- }
-
- //tcdf(t,df,[dec])
- //calculates the area under the t-distribution with "df" degrees of freedom
- //to the left of the t-value t
- //based on someone else's code - can't remember whose!
- function tcdf($ttest,$df,$dec=4) {
- $eps = pow(.1,$dec);
-
- $t = abs($ttest);
- if ($df > 0) {
- $k3 = 0;
- $c3 = 0;
- $a3 = $t/sqrt($df);
- $b3 = 1+pow($a3,2);
- $y = 0.5;
- if (abs(floor($df/2)*2 - $df) < $eps) {
- $c3 = $a3/(2*pow($b3,0.5));
- $k3 = 0;
- } else {
- $c3 = $a3/($b3*M_PI);
- $y += atan($a3)/M_PI;
- $k3 = 1;
- }
- if ($df > 1) {
- $k3 += 2;
- $y += $c3;
- $c3 *= ($k3-1)/($k3*$b3);
- while ($k3 < $df) {
- $k3 += 2;
- $y += $c3;
- $c3 *= ($k3-1)/($k3*$b3);
- }
- }
- if ($ttest > 0) {
- $pval = round($y,$dec);
- } else {
- $pval = round(1-$y,$dec);
- }
-
-
- if ($pval > (1-$eps)) {
- $pval = round(1-$eps,$dec);
- } else if ($pval < $eps) {
- $pval = $eps;
- }
- return $pval;
- } else {return false;}
- }
-
- //invnormalcdf(p,[dec])
- //Inverse Normal CDF
- //finds the z-value with a left-tail area of p, to dec decimals (default 5)
- // from Odeh & Evans. 1974. AS 70. Applied Statistics. 23: 96-97
- function invnormalcdf($p,$dec=5) {
-
- $p0 = -0.322232431088;
- $p1 = -1.0;
- $p2 = -0.342242088547;
- $p3 = -0.0204231210245;
- $p4 = -0.453642210148e-4;
- $q0 = 0.0993484626060;
- $q1 = 0.588581570495;
- $q2 = 0.531103462366;
- $q3 = 0.103537752850;
- $q4 = 0.38560700634e-2;
-
- if ($p < 0.5) { $pp = $p; } else {$pp = 1 - $p; }
-
-
- if ($pp < 1E-12) {
- $xp = 99;
- } else {
- $y = sqrt(log(1/($pp*$pp)));
- $xp = $y + (((($y * $p4 + $p3) * $y + $p2) * $y + $p1) * $y + $p0) / (((($y * $q4 + $q3) * $y + $q2) * $y + $q1) * $y + $q0);
- }
- $xp = round($xp,$dec);
- if ($p < 0.5) { return (-1*$xp); } else { return $xp; }
- }
-
- //invtcdf(p,df,[dec])
- //the inverse Student's t-distribution
- //computes the t-value with a left-tail probability of p, with df degrees of freedom
- //to dec decimal places (default 4)
- // from Algorithm 396: Student's t-quantiles by G.W. Hill Comm. A.C.M., vol.13(10), 619-620, October 1970
- function invtcdf($p,$ndf,$dec=4) {
- $half_pi = M_PI/2;
- $eps = 1e-12;
- $origp = $p;
- if ($p > 0.5) {$p = 1 - $p; }
- $p = $p*2;
-
- if ($ndf < 1 || $p > 1 || $p <= 0) {
- echo "error in params";
- return false;
- }
-
- if ( abs($ndf - 2) < $eps ) { //special case ndf=2
- $fn_val = round(SQRT(2 / ( $p * (2 - $p) ) - 2),$dec);
- if ($origp<.5) {return (-1*$fn_val);} else { return $fn_val;}
- } else if (abs($ndf-4) < $eps) { //special case ndf=4
- $v = 4/sqrt($p*(2-$p))*cos(1/3*acos(sqrt($p*(2-$p))));
- $fn_val = round(sqrt($v-4),$dec);
- if ($origp<.5) {return (-1*$fn_val);} else { return $fn_val;}
- } else if ($ndf < 1+$eps) { //special case ndf=1
- $prob = $p * $half_pi;
- $fn_val = round(cos($prob) / sin($prob),$dec);
- if ($origp<.5) {return (-1*$fn_val);} else { return $fn_val;}
- } else {
- $a = 1/ ($ndf - 0.5);
- $b = 48/ pow($a,2);
- $c = ((20700 * $a / $b - 98) * $a - 16) * $a + 96.36;
- $d = ((94.5 / ($b + $c) - 3) / $b + 1) * sqrt($a * $half_pi)* $ndf;
- $x = $d * $p;
- $y = pow($x,(2/ $ndf));
-
- if ($y > 0.05 + $a) {
- $x = invnormalcdf($origp,$dec+10);
- $y = pow($x,2);
- if ($ndf < 5) { $c = $c + 0.3 * ($ndf - 4.5) * ($x + 0.6);}
- $c = (((0.05 * $d * $x - 5) * $x - 7) * $x - 2) * $x + $b+ $c;
-
- $y = (((((0.4*$y + 6.3) * $y + 36) * $y + 94.5) / $c - $y - 3) / $b + 1) * $x;
- $y = $a * pow($y,2);
- if ($y > 0.002) {
- $y = exp($y) - 1;
- } else {
- $y = 0.5 * pow($y,2) + $y;
- }
- } else {
-
- $y = ((1 / ((($ndf + 6) / ($ndf * $y) - 0.089 * $d - 0.822) * ($ndf + 2) * 3) + 0.5 / ($ndf + 4)) * $y - 1) * ($ndf + 1) / ($ndf + 2) + 1 / $y;
- }
- }
- if ($dec>3) {
- $fn_val = invtrefine(sqrt($ndf*$y),1-$p/2,$ndf,$dec);
- //echo "orig: ".sqrt($ndf*$y).", refined: $fn_val <br/>";
- } else {
- $fn_val = round( sqrt($ndf * $y) , $dec);
- }
- if ($origp<.5) {return (-1*$fn_val);} else { return $fn_val;}
- }
-
- function invtrefine($t,$p,$ndf,$dec) {
- $dv = .001;
- $eps = pow(.1,$dec+1);
- while ($dv>$eps) {
- $dv = $dv/2;
- if (tcdf($t,$ndf,$dec+10)>$p) {
- $t = $t-$dv;
- } else {
- $t = $t+$dv;
- }
- }
- return round($t,$dec);
- }
-
- //linreg(xarray,yarray)
- //Computes the linear correlation coefficient, and slope and intercept of
- //regression line, based on array/list of x-values and array/list of y-values
- //Returns as array: r,slope,intercept
- function linreg($xarr,$yarr) {
- if (!is_array($xarr)) { $xarr = explode(',',$xarr);}
- if (!is_array($yarr)) { $yarr = explode(',',$yarr);}
- if (count($xarr)!=count($yarr)) {
- echo "Error: linreg requires xarray length = yarray length";
- return false;
- }
- $sx = array_sum($xarr);
- $sy = array_sum($yarr);
- $sxx=0; $syy=0; $sxy = 0;
- for ($i=0; $i<count($xarr); $i++) {
- $sxx += $xarr[$i]*$xarr[$i];
- $syy += $yarr[$i]*$yarr[$i];
- $sxy += $xarr[$i]*$yarr[$i];
- }
- $n = count($xarr);
- $r = ($n*$sxy - $sx*$sy)/(sqrt($n*$sxx-$sx*$sx)*sqrt($n*$syy-$sy*$sy));
- $m = ($n*$sxy - $sx*$sy)/($n*$sxx - $sx*$sx);
- $b = ($sy - $sx*$m)/$n;
- return array($r,$m,$b);
- }
-
- //binomialpdf(N,p,x)
- //Computes the probability of x successes out of N trials
- //where each trial has probability p of success
- function binomialpdf($N,$p,$x) {
- return (nCr($N,$x)*pow($p,$x)*pow(1-$p,$N-$x));
- }
-
-
- //binomialcdf(N,p,x)
- //Computes the probably of <=x successes out of N trials
- //where each trial has probability p of success
- function binomialcdf($N,$p,$x) {
- $out = 0;
- for ($i=0;$i<=$x;$i++) {
- $out += binomialpdf($N,$p,$i);
- }
- return $out;
- }
-
- //chi2cdf(x,df)
- //Computes the area to the left of x under the chi-squared disribution
- //with df degrees of freedom
- function chi2cdf($x,$a) {
- return gamma_cdf(0.5*$x,0.0,1.0,0.5*$a);
- }
-
- function chicdf($x,$a) {
- return gamma_cdf(0.5*$x,0.0,1.0,0.5*$a);
- }
-
- function invchicdf($cdf,$a) {
- return invchi2cdf($cdf,$a);
- }
-
- //invchi2cdf(p,df)
- //Compuates the x value with left-tail probability p under the
- //chi-squared distribution with df degrees of freedom
- function invchi2cdf($cdf,$a) {
- $aa = 0.6931471806;
- $c1 = 0.01;
- $c2 = 0.222222;
- $c3 = 0.32;
- $c4 = 0.4;
- $c5 = 1.24;
- $c6 = 2.2;
- $c7 = 4.67;
- $c8 = 6.66;
- $c9 = 6.73;
- $c10 = 13.32;
- $c11 = 60.0;
- $c12 = 70.0;
- $c13 = 84.0;
- $c14 = 105.0;
- $c15 = 120.0;
- $c16 = 127.0;
- $c17 = 140.0;
- $c18 = 175.0;
- $c19 = 210.0;
- $c20 = 252.0;
- $c21 = 264.0;
- $c22 = 294.0;
- $c23 = 346.0;
- $c24 = 420.0;
- $c25 = 462.0;
- $c26 = 606.0;
- $c27 = 672.0;
- $c28 = 707.0;
- $c29 = 735.0;
- $c30 = 889.0;
- $c31 = 932.0;
- $c32 = 966.0;
- $c33 = 1141.0;
- $c34 = 1182.0;
- $c35 = 1278.0;
- $c36 = 1740.0;
- $c37 = 2520.0;
- $c38 = 5040.0;
- $cdf_max = 0.999998;
- $cdf_min = 0.000002;
- $e = 0.0000005;
-
- $it_max = 20;
- if ($cdf < $cdf_min) {
- echo "p < min - can't do it!";
- return 0;
- }
- if ($cdf_max < $cdf) {
- echo "p > max - can't do it";
- return 0;
- }
- $xx = 0.5*$a;
- $c = $xx-1.0;
- $g = gamma_log($a/2.0);
- if ($a < -$c5*log($cdf)) {
- $ch = pow(($cdf*$xx*exp($g+$xx*$aa)),(1.0/$xx));
- if ($ch < $e) {
- $x = $ch;
- return $x;
- }
- } else if ($a <= $c3) {
- $ch = $c4;
- $a2 = log(1.0 - $cdf);
- while (1) {
- $q = $ch;
- $p1 = 1.0+$ch*($c7 + $ch);
- $p2 = $ch*($c9 + $ch*($c8+$ch));
- $t = -0.5+($c7 + 2.0*$ch)/$p1 - ($c9 + $ch*($c10 + 3.0*$ch)) / $p2;
- $ch = $ch - (1.0 - exp($a2 + $g + 0.5*$ch + $c*$aa)*$p2/$p1)/$t;
- if (abs($q/$ch - 1.0) <= $c1) {
- break;
- }
- }
- } else {
- $x2 = invnormalcdf($cdf);
- $p1 = $c2/$a;
- $ch = $a*pow(($x2*sqrt($p1) + 1.0 - $p1),3);
- if ($c6*$a + 6.0 < $ch) {
- $ch = -2.0*(log(1.0 - $cdf) - $c*log(0.5*$ch)+$g);
- }
- }
- for ($i=1;$i<=$it_max;$i++) {
- $q = $ch;
- $p1 = 0.5*$ch;
- $p2 = $cdf - gamma_inc($xx,$p1);
- $t = $p2*exp($xx*$aa + $g + $p1 - $c*log($ch));
- $b = $t/$ch;
- $a2 = 0.5*$t - $b*$c;
- $s1 = ($c19 + $a2 * ($c17 + $a2 * ($c14 + $a2 *($c13 + $a2 * ($c12 + $a2 * $c11))))) / $c24;
- $s2 = ($c24 + $a2 * ($c29 + $a2 * ($c32 + $a2 *($c33 + $a2 * $c35)))) / $c37;
- $s3 = ($c19 + $a2 * ($c25 + $a2 * ($c28 + $a2 * $c31)))/$c37;
- $s4 = ($c20 + $a2 * ($c27 + $a2 * $c34) + $c * ($c22 + $a2 * ($c30 + $a2 *$c36)))/$c38;
- $s5 = ($c13 + $c21 + $a2 + $c * ($c18 + $c26 * $a2))/$c37;
- $s6 = ($c16 + $c*($c23 + $c16*$c))/$c38;
- $ch = $ch + $t*(1.0 + 0.5*$t*$s1 - $b*$c * ($s1-$b*($s2-$b*($s3-$b*($s4-$b*($s5-$b*$s6))))));
- if ($e < abs($q/$ch - 1.0)) {
- $x = $ch;
- return ($x);
- }
- }
- $x = $ch;
- echo "Convergence not reached in invchisquare";
- return $x;
- }
-
- function gamma_cdf($x,$a,$b,$c) {
- return gamma_inc($c,($x-$a)/$b);
- }
- function gamma_inc($p,$x,$dec=4) {
- $exp_arg_min = -88.0;
- $overflow = 1e37;
- $plimit = 1000;
- $tol = 1e-7;
- $xbig = 1e8;
- $value = 0.0;
-
- if ($plimit<$p) {
- $pn1 = 3.0*sqrt($p)*(pow($x/$p,1.0/3.0) + 1.0/(9.0*$p)-1.0);
- $value = normalcdf($pn1,$dec);
- return $value;
- }
- if ($xbig<$x) {
- return 1.0;
- }
- if ($x<=1.0 || $x<$p) {
- $arg = $p*log($x) - $x - gamma_log($p+1.0);
- $c = 1.0;
- $value = 1.0;
- $a = $p;
- while ($c>$tol) {
- $a += 1.0;
- $c = $c*$x/$a;
- $value += $c;
- }
- $arg += log($value);
- if ($exp_arg_min <= $arg) {
- $value = exp($arg);
- } else {
- $value = 0.0;
- }
- } else {
- $arg = $p*log($x)-$x-gamma_log($p);
- $a = 1.0 - $p;
- $b = $a+$x+1.0;
- $c = 0.0;
- $pn1 = 1.0;
- $pn2 = $x;
- $pn3 = $x+1.0;
- $pn4 = $x*$b;
- $value = $pn3/$pn4;
- while (1) {
- $a += 1.0;
- $b += 2.0;
- $c += 1.0;
- $pn5 = $b*$pn3 - $a*$c*$pn1;
- $pn6 = $b*$pn4 - $a*$c*$pn2;
-
- if (0 < abs($pn6)) {
- $rn = $pn5/$pn6;
- if (abs($value-$rn) <= min($tol,$tol*$rn)) {
- $arg += log($value);
- if ($exp_arg_min <= $arg) {
- $value = 1.0 - exp($arg);
- } else {
- $value = 1.0;
- }
- return $value;
- }
- $value = $rn;
- }
- $pn1 = $pn3;
- $pn2 = $pn4;
- $pn3 = $pn5;
- $pn4 = $pn6;
- if ($overflow <= abs($pn5)) {
- $pn1 = $pn1/$overflow;
- $pn2 = $pn2/$overflow;
- $pn3 = $pn3/$overflow;
- $pn4 = $pn4/$overflow;
- }
- }
- }
- return $value;
- }
-
- function gamma_log($x) {
- $c = array(
- -1.910444077728E-03,
- 8.4171387781295E-04,
- -5.952379913043012E-04,
- 7.93650793500350248E-04,
- -2.777777777777681622553E-03,
- 8.333333333333333331554247E-02,
- 5.7083835261E-03 );
- $d1 = - 5.772156649015328605195174E-01;
- $d2 = 4.227843350984671393993777E-01;
- $d4 = 1.791759469228055000094023;
- $frtbig = 1.42E+09;
- $p1 = array(
- 4.945235359296727046734888,
- 2.018112620856775083915565E+02,
- 2.290838373831346393026739E+03,
- 1.131967205903380828685045E+04,
- 2.855724635671635335736389E+04,
- 3.848496228443793359990269E+04,
- 2.637748787624195437963534E+04,
- 7.225813979700288197698961E+03 );
- $p2 = array(
- 4.974607845568932035012064,
- 5.424138599891070494101986E+02,
- 1.550693864978364947665077E+04,
- 1.847932904445632425417223E+05,
- 1.088204769468828767498470E+06,
- 3.338152967987029735917223E+06,
- 5.106661678927352456275255E+06,
- 3.074109054850539556250927E+06 );
- $p4 = array(
- 1.474502166059939948905062E+04,
- 2.426813369486704502836312E+06,
- 1.214755574045093227939592E+08,
- 2.663432449630976949898078E+09,
- 2.940378956634553899906876E+010,
- 1.702665737765398868392998E+011,
- 4.926125793377430887588120E+011,
- 5.606251856223951465078242E+011 );
- $pnt68 = 0.6796875;
- $q1 = array(
- 6.748212550303777196073036E+01,
- 1.113332393857199323513008E+03,
- 7.738757056935398733233834E+03,
- 2.763987074403340708898585E+04,
- 5.499310206226157329794414E+04,
- 6.161122180066002127833352E+04,
- 3.635127591501940507276287E+04,
- 8.785536302431013170870835E+03 );
- $q2 = array(
- 1.830328399370592604055942E+02,
- 7.765049321445005871323047E+03,
- 1.331903827966074194402448E+05,
- 1.136705821321969608938755E+06,
- 5.267964117437946917577538E+06,
- 1.346701454311101692290052E+07,
- 1.782736530353274213975932E+07,
- 9.533095591844353613395747E+06 );
- $q4 = array(
- 2.690530175870899333379843E+03,
- 6.393885654300092398984238E+05,
- 4.135599930241388052042842E+07,
- 1.120872109616147941376570E+09,
- 1.488613728678813811542398E+010,
- 1.016803586272438228077304E+011,
- 3.417476345507377132798597E+011,
- 4.463158187419713286462081E+011 );
- $sqrtpi = 0.9189385332046727417803297;
- $xbig = 4.08E+36;
-
- if ($x<=0 || $xbig < $x) {
- return 1e10;
- }
- if ($x <= 1e-10) {
- $res = -log($x);
- } else if ($x<=1.5) {
- if ($x<$pnt68) {
- $corr = -log($x);
- $xm1 = $x;
- } else {
- $corr = 0.0;
- $xm1 = ($x-0.5) - 0.5;
- }
- if ($x<0.5 || $pnt68 <= $x) {
- $xden = 1.0;
- $xnum = 0.0;
- for ($i=0; $i<8; $i++) {
- $xnum = $xnum*$xm1 + $p1[$i];
- $xden = $xden*$xm1 + $q1[$i];
- }
- $res = $corr + ($xm1*($d1 + $xm1*($xnum/$xden)));
- } else {
- $xm2 = ($x-0.5)-0.5;
- $xden = 1.0;
- $xnum = 0.0;
- for ($i=0; $i<8; $i++) {
- $xnum = $xnum*$xm2 + $p2[$i];
- $xden = $xden*$xm2 + $q2[$i];
- }
- $res = $corr + ($xm2*($d2 + $xm2*($xnum/$xden)));
- }
- } else if ($x<=4.0) {
- $xm2 = $x-2.0;
- $xden = 1.0;
- $xnum = 0.0;
- for ($i=0; $i<8; $i++) {
- $xnum = $xnum*$xm2 + $p2[$i];
- $xden = $xden*$xm2 + $q2[$i];
- }
- $res = $xm2*($d2 + $xm2*($xnum/$xden));
- } else if ($x <= 12.0) {
- $xm4 = $x - 4.0;
- $xden = -1.0;
- $xnum = 0.0;
- for ($i=0; $i<8; $i++) {
- $xnum = $xnum*$xm4 + $p4[$i];
- $xden = $xden*$xm4 + $q4[$i];
- }
- $res = $d4 + $xm4*($xnum/$xden);
- } else {
- $res = 0.0;
- if ($x<= $frtbig) {
- $res = $c[6];
- $xsq = $x*$x;
- for ($i=0;$i<6;$i++) {
- $res = $res/$xsq + $c[$i];
- }
- }
- $res = $res/$x;
- $corr = log($x);
- $res = $res + $sqrtpi - 0.5*$corr;
- $res = $res + $x*($corr - 1.0);
- }
- return $res;
- }
-
- //fcdf(f,df1,df2)
- //Returns the area to right of the F-value f for the f-distribution
- //with df1 and df2 degrees of freedom (techinically it's 1-CDF)
- //Algorithm is accurate to approximately 4-5 decimals
- function fcdf($x,$df1,$df2) {
- $p1 = fcall(fspin($x,$df1,$df2));
- return $p1;
- }
-
- function fcall($x) {
- if ($x>=0) {
- $x += 0.0000005;
- } else {
- $x -= 0.0000005;
- }
- return $x;
- }
- function fspin($f,$df1,$df2) {
- $pj2 = M_PI/2;
- $pj4 = M_PI/4;
- $px2 = 2*M_PI;
- $exx = 1.10517091807564;
- $dgr = 180/M_PI;
-
- $x = $df2/($df1*$f+$df2);
- if (($df1%2)==0) {
- return (LJspin(1-$x,$df2,$df1+$df2-4,$df2-2)*pow($x,$df2/2));
- } else if (($df2%2)==0) {
- return (1-LJspin($x,$df1,$df1+$df2-4,$df1-2)*pow(1-$x,$df1/2));
- }
- $tan = atan(sqrt($df1*$f/$df2));
- $a = $tan/$pj2;
- $sat = sin($tan);
- $cot = cos($tan);
- if ($df2>1) {
- $a = $a+$sat*$cot*LJspin($cot*$cot,2,$df2-3,-1)/$pj2;
- }
- if ($df1==1) {
- return (1-$a);
- }
- $c = 4*LJspin($sat*$sat,$df2+1,$df1+$df2-4,$df2-2)*$sat*pow($cot,$df2)/M_PI;
- if ($df2==1) {
- return (1-$a+$c/2);
- }
- $k = 2;
- while ($k<=($df2-1)/2) {
- $c = $c*$k/($k-0.5);
- $k++;
- }
- return (1-$a+$c);
- }
-
- function LJspin($q,$i,$j,$b) {
- $zz = 1;
- $z = $zz;
- $k = $i;
- while ($k<=$j) {
- $zz = $zz*$q*$k/($k-$b);
- $z = $z+$zz;
- $k += 2;
- }
- return $z;
- }
-
- //invfcdf(p,df1,df2)
- //Computes the f-value with probability of p to the right
- //with degrees of freedom df1 and df2
- //Algorithm is accurate to approximately 2-4 decimal places
- //Less accurate for smaller p-values
- function invfcdf($p,$df1,$df2) {
- $v = 0.5;
- $dv = 0.5;
- $f = 0;
- $cnt = 0;
- while ($dv>1e-10) {
- $f = 1/$v-1;
- $dv = $dv/2;
- if (fcdf($f,$df1,$df2)>$p) {
- $v = $v-$dv;
- } else {
- $v = $v+$dv;
- }
- $cnt++;
- }
- return $f;
-
- }
- ?>