PageRenderTime 53ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/trunk/edem/simplesamlphp/modules/statistics/lib/Aggregator.php

https://bitbucket.org/piratihr/edem
PHP | 293 lines | 195 code | 61 blank | 37 comment | 38 complexity | 242760107c8634f460ba7f32f2365c94 MD5 | raw file
Possible License(s): LGPL-2.1, BSD-3-Clause, MIT
  1. <?php
  2. /*
  3. * @author Andreas Åkre Solberg <andreas.solberg@uninett.no>
  4. * @package simpleSAMLphp
  5. * @version $Id$
  6. */
  7. class sspmod_statistics_Aggregator {
  8. private $statconfig;
  9. private $statdir;
  10. private $inputfile;
  11. private $statrules;
  12. private $offset;
  13. private $metadata;
  14. private $fromcmdline;
  15. private $starttime;
  16. /**
  17. * Constructor
  18. */
  19. public function __construct($fromcmdline = FALSE) {
  20. $this->fromcmdline = $fromcmdline;
  21. $this->statconfig = SimpleSAML_Configuration::getConfig('module_statistics.php');
  22. $this->statdir = $this->statconfig->getValue('statdir');
  23. $this->inputfile = $this->statconfig->getValue('inputfile');
  24. $this->statrules = $this->statconfig->getValue('statrules');
  25. $this->timeres = $this->statconfig->getValue('timeres');
  26. $this->offset = $this->statconfig->getValue('offset', 0);
  27. $this->metadata = NULL;
  28. $this->starttime = time();
  29. }
  30. public function dumpConfig() {
  31. echo 'Statistics directory : ' . $this->statdir . "\n";
  32. echo 'Input file : ' . $this->inputfile . "\n";
  33. echo 'Offset : ' . $this->offset . "\n";
  34. }
  35. public function debugInfo() {
  36. echo 'Memory usage : ' . number_format(memory_get_usage() / (1024*1024), 2) . " MB\n";
  37. }
  38. public function loadMetadata() {
  39. $filename = $this->statdir . '/.stat.metadata';
  40. $metadata = NULL;
  41. if (file_exists($filename)) {
  42. $metadata = unserialize(file_get_contents($filename));
  43. }
  44. $this->metadata = $metadata;
  45. }
  46. public function getMetadata() {
  47. return $this->metadata;
  48. }
  49. public function saveMetadata() {
  50. $this->metadata['time'] = time() - $this->starttime;
  51. $this->metadata['memory'] = memory_get_usage();
  52. $this->metadata['lastrun'] = time();
  53. $filename = $this->statdir . '/.stat.metadata';
  54. file_put_contents($filename, serialize($this->metadata), LOCK_EX);
  55. }
  56. public function aggregate($debug = FALSE) {
  57. $this->loadMetadata();
  58. if (!is_dir($this->statdir))
  59. throw new Exception('Statistics module: output dir do not exists [' . $this->statdir . ']');
  60. if (!file_exists($this->inputfile))
  61. throw new Exception('Statistics module: input file do not exists [' . $this->inputfile . ']');
  62. $file = fopen($this->inputfile, 'r');
  63. #$logfile = file($this->inputfile, FILE_IGNORE_NEW_LINES );
  64. $logparser = new sspmod_statistics_LogParser(
  65. $this->statconfig->getValue('datestart', 0), $this->statconfig->getValue('datelength', 15), $this->statconfig->getValue('offsetspan', 44)
  66. );
  67. $datehandler = array(
  68. 'default' => new sspmod_statistics_DateHandler($this->offset),
  69. 'month' => new sspmod_statistics_DateHandlerMonth($this->offset),
  70. );
  71. $notBefore = 0; $lastRead = 0; $lastlinehash = '-';
  72. if (isset($this->metadata)) {
  73. $notBefore = $this->metadata['notBefore'];
  74. $lastlinehash = $this->metadata['lastlinehash'];
  75. }
  76. $lastlogline = 'sdfsdf';
  77. $lastlineflip = FALSE;
  78. $results = array();
  79. $i = 0;
  80. // Parse through log file, line by line
  81. while (!feof($file)) {
  82. $logline = fgets($file, 4096);
  83. // Continue if STAT is not found on line.
  84. if (!preg_match('/STAT/', $logline)) continue;
  85. $i++; $lastlogline = $logline;
  86. // Parse log, and extract epoch time and rest of content.
  87. $epoch = $logparser->parseEpoch($logline);
  88. $content = $logparser->parseContent($logline);
  89. $action = trim($content[5]);
  90. if ($this->fromcmdline && ($i % 10000) == 0) {
  91. echo("Read line " . $i . "\n");
  92. }
  93. if ($debug) {
  94. echo("----------------------------------------\n");
  95. echo('Log line: ' . $logline . "\n");
  96. echo('Date parse [' . substr($logline, 0, $this->statconfig->getValue('datelength', 15)) . '] to [' . date(DATE_RFC822, $epoch) . ']' . "\n");
  97. print_r($content);
  98. if ($i >= 13) exit;
  99. }
  100. if ($epoch > $lastRead) $lastRead = $epoch;
  101. if ($epoch === $notBefore) {
  102. if(!$lastlineflip) {
  103. if (sha1($logline) === $lastlinehash) {
  104. $lastlineflip = TRUE;
  105. }
  106. continue;
  107. }
  108. }
  109. if ($epoch < $notBefore) continue;
  110. // Iterate all the statrules from config.
  111. foreach ($this->statrules AS $rulename => $rule) {
  112. $type = 'aggregate';
  113. if (array_key_exists('type', $rule)) $type = $rule['type'];
  114. if ($type !== 'aggregate') continue;
  115. foreach($this->timeres AS $tres => $tresconfig ) {
  116. // echo 'Comparing action: [' . $rule['action'] . '] with [' . $action . ']' . "\n";
  117. $dh = 'default';
  118. if (isset($tresconfig['customDateHandler'])) $dh = $tresconfig['customDateHandler'];
  119. $timeslot = $datehandler['default']->toSlot($epoch, $tresconfig['slot']);
  120. $fileslot = $datehandler[$dh]->toSlot($epoch, $tresconfig['fileslot']); //print_r($content);
  121. if (isset($rule['action']) && ($action !== $rule['action'])) continue;
  122. #$difcol = trim($content[$rule['col']]); // echo '[...' . $difcol . '...]';
  123. $difcol = self::getDifCol($content, $rule['col']);
  124. if (!isset($results[$rulename][$tres][$fileslot][$timeslot]['_'])) $results[$rulename][$tres][$fileslot][$timeslot]['_'] = 0;
  125. if (!isset($results[$rulename][$tres][$fileslot][$timeslot][$difcol])) $results[$rulename][$tres][$fileslot][$timeslot][$difcol] = 0;
  126. $results[$rulename][$tres][$fileslot][$timeslot]['_']++;
  127. $results[$rulename][$tres][$fileslot][$timeslot][$difcol]++;
  128. }
  129. }
  130. }
  131. $this->metadata['notBefore'] = $lastRead;
  132. $this->metadata['lastline'] = $lastlogline;
  133. $this->metadata['lastlinehash'] = sha1($lastlogline);
  134. return $results;
  135. }
  136. private static function getDifCol($content, $colrule) {
  137. if (is_int($colrule)) {
  138. return trim($content[$colrule]);
  139. } elseif(is_array($colrule)) {
  140. $difcols = array();
  141. foreach($colrule AS $cr) {
  142. $difcols[] = trim($content[$cr]);
  143. }
  144. return join('|', $difcols);
  145. } else {
  146. return 'NA';
  147. }
  148. }
  149. private function cummulateData($previous, $newdata) {
  150. $dataset = array();
  151. foreach($previous AS $slot => $dataarray) {
  152. if (!array_key_exists($slot, $dataset)) $dataset[$slot] = array();
  153. foreach($dataarray AS $key => $data) {
  154. if (!array_key_exists($key, $dataset[$slot])) $dataset[$slot][$key] = 0;
  155. $dataset[$slot][$key] += $data;
  156. }
  157. }
  158. foreach($newdata AS $slot => $dataarray) {
  159. if (!array_key_exists($slot, $dataset)) $dataset[$slot] = array();
  160. foreach($dataarray AS $key => $data) {
  161. if (!array_key_exists($key, $dataset[$slot])) $dataset[$slot][$key] = 0;
  162. $dataset[$slot][$key] += $data;
  163. }
  164. }
  165. return $dataset;
  166. }
  167. public function store($results) {
  168. // print_r($results); // exit;
  169. $datehandler = array(
  170. 'default' => new sspmod_statistics_DateHandler($this->offset),
  171. 'month' => new sspmod_statistics_DateHandlerMonth($this->offset),
  172. );
  173. // Iterate the first level of results, which is per rule, as defined in the config.
  174. foreach ($results AS $rulename => $timeresdata) {
  175. // $timeresl = array_keys($timeresdata);
  176. //
  177. // print_r($timeresl); exit;
  178. // Iterate over time resolutions
  179. foreach($timeresdata AS $tres => $resres) {
  180. $dh = 'default';
  181. if (isset($this->timeres[$tres]['customDateHandler'])) $dh = $this->timeres[$tres]['customDateHandler'];
  182. $filenos = array_keys($resres);
  183. $lastfile = $filenos[count($filenos)-1];
  184. // Iterate the second level of results, which is the fileslot.
  185. foreach ($resres AS $fileno => $fileres) {
  186. // Slots that have data.
  187. $slotlist = array_keys($fileres);
  188. // The last slot.
  189. $maxslot = $slotlist[count($slotlist)-1];
  190. #print_r($slotlist);
  191. // Get start and end slot number within the file, based on the fileslot.
  192. $start = (int)$datehandler['default']->toSlot(
  193. $datehandler[$dh]->fromSlot($fileno, $this->timeres[$tres]['fileslot']),
  194. $this->timeres[$tres]['slot']);
  195. $end = (int)$datehandler['default']->toSlot(
  196. $datehandler[$dh]->fromSlot($fileno+1, $this->timeres[$tres]['fileslot']),
  197. $this->timeres[$tres]['slot']);
  198. // echo('from slot ' . $start . ' to slot ' . $end . ' maxslot ' . $maxslot . "\n");
  199. // print_r($slotlist);
  200. // exit;
  201. // Fill in missing entries and sort file results
  202. $filledresult = array();
  203. for ($slot = $start; $slot < $end; $slot++) {
  204. if (array_key_exists($slot, $fileres)) {
  205. $filledresult[$slot] = $fileres[$slot];
  206. } else {
  207. #echo('SLot [' . $slot . '] of [' . $maxslot . ']' . "\n");
  208. if ($lastfile == $fileno && $slot > $maxslot) {
  209. $filledresult[$slot] = array('_' => NULL);
  210. } else {
  211. $filledresult[$slot] = array('_' => 0);
  212. }
  213. }
  214. # print_r($filledresult[$slot]);
  215. # = (isset($fileres[$slot])) ? $fileres[$slot] : array('_' => NULL);
  216. }
  217. // print_r($filledresult); exit;
  218. $filename = $this->statdir . '/' . $rulename . '-' . $tres . '-' . $fileno . '.stat';
  219. if (file_exists($filename)) {
  220. // echo('Reading existing file: ' . $filename . "\n");
  221. $previousData = unserialize(file_get_contents($filename));
  222. $filledresult = $this->cummulateData($previousData, $filledresult);
  223. }
  224. // store file
  225. # echo('Writing to file: ' . $filename . "\n");
  226. file_put_contents($filename, serialize($filledresult), LOCK_EX);
  227. }
  228. }
  229. }
  230. $this->saveMetadata();
  231. }
  232. }