PageRenderTime 41ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/core/ArchiveProcessing/Period.php

https://github.com/quarkness/piwik
PHP | 418 lines | 253 code | 47 blank | 118 comment | 25 complexity | 194f072040302685d85fbc1368c65310 MD5 | raw file
  1. <?php
  2. /**
  3. * Piwik - Open source web analytics
  4. *
  5. * @link http://piwik.org
  6. * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
  7. * @version $Id$
  8. *
  9. * @category Piwik
  10. * @package Piwik
  11. */
  12. /**
  13. * Handles the archiving process for a period
  14. *
  15. * This class provides generic methods to archive data for a period (week / month / year).
  16. *
  17. * These methods are called by the plugins that do the logic of archiving their own data. \
  18. * They hook on the event 'ArchiveProcessing_Period.compute'
  19. *
  20. * @package Piwik
  21. * @subpackage Piwik_ArchiveProcessing
  22. */
  23. class Piwik_ArchiveProcessing_Period extends Piwik_ArchiveProcessing
  24. {
  25. /*
  26. * Array of (column name before => column name renamed) of the columns for which sum operation is invalid.
  27. * The summed value is not accurate and these columns will be renamed accordingly.
  28. */
  29. static public $invalidSummedColumnNameToRenamedName = array(
  30. Piwik_Archive::INDEX_NB_UNIQ_VISITORS => Piwik_Archive::INDEX_SUM_DAILY_NB_UNIQ_VISITORS
  31. );
  32. /**
  33. * Sums all values for the given field names $aNames over the period
  34. * See @archiveNumericValuesGeneral for more information
  35. *
  36. * @param string|array
  37. */
  38. public function archiveNumericValuesSum( $aNames )
  39. {
  40. return $this->archiveNumericValuesGeneral($aNames, 'sum');
  41. }
  42. /**
  43. * Get the maximum value for all values for the given field names $aNames over the period
  44. * See @archiveNumericValuesGeneral for more information
  45. *
  46. * @param string|array
  47. */
  48. public function archiveNumericValuesMax( $aNames )
  49. {
  50. return $this->archiveNumericValuesGeneral($aNames, 'max');
  51. }
  52. /**
  53. * Given a list of fields names, the method will fetch all their values over the period, and archive them using the given operation.
  54. *
  55. * For example if $operationToApply = 'sum' and $aNames = array('nb_visits', 'sum_time_visit')
  56. * it will sum all values of nb_visits for the period (for example give the number of visits for the month by summing the visits of every day)
  57. *
  58. * @param array|string $aNames Array of strings or string containg the field names to select
  59. * @param string $operationToApply Available operations = sum, max, min
  60. */
  61. private function archiveNumericValuesGeneral($aNames, $operationToApply)
  62. {
  63. $this->loadSubPeriods();
  64. if(!is_array($aNames))
  65. {
  66. $aNames = array($aNames);
  67. }
  68. // fetch the numeric values and apply the operation on them
  69. $results = array();
  70. foreach($this->archives as $id => $archive)
  71. {
  72. foreach($aNames as $name)
  73. {
  74. if(!isset($results[$name]))
  75. {
  76. $results[$name] = 0;
  77. }
  78. if($name == 'nb_uniq_visitors') continue;
  79. $valueToSum = $archive->getNumeric($name);
  80. if($valueToSum !== false)
  81. {
  82. switch ($operationToApply) {
  83. case 'sum':
  84. $results[$name] += $valueToSum;
  85. break;
  86. case 'max':
  87. $results[$name] = max($results[$name], $valueToSum);
  88. break;
  89. case 'min':
  90. $results[$name] = min($results[$name], $valueToSum);
  91. break;
  92. default:
  93. throw new Exception("Operation not applicable.");
  94. break;
  95. }
  96. }
  97. }
  98. }
  99. if(!Piwik::isUniqueVisitorsEnabled($this->period->getLabel()))
  100. {
  101. unset($results['nb_uniq_visitors']);
  102. }
  103. foreach($results as $name => $value)
  104. {
  105. if($name == 'nb_uniq_visitors')
  106. {
  107. $value = (float) $this->computeNbUniqVisitors();
  108. }
  109. $this->insertRecord($name, $value);
  110. }
  111. // if asked for only one field to sum
  112. if(count($results) == 1)
  113. {
  114. return $results[$name];
  115. }
  116. // returns the array of records once summed
  117. return $results;
  118. }
  119. /**
  120. * This method will compute the sum of DataTables over the period for the given fields $aRecordName.
  121. * The resulting DataTable will be then added to queue of data to be recorded in the database.
  122. * It will usually be called in a plugin that listens to the hook 'ArchiveProcessing_Period.compute'
  123. *
  124. * For example if $aRecordName = 'UserCountry_country' the method will select all UserCountry_country DataTable for the period
  125. * (eg. the 31 dataTable of the last month), sum them, then record it in the DB
  126. *
  127. *
  128. * This method works on recursive dataTable. For example for the 'Actions' it will select all subtables of all dataTable of all the sub periods
  129. * and get the sum.
  130. *
  131. * It returns an array that gives information about the "final" DataTable. The array gives for every field name, the number of rows in the
  132. * final DataTable (ie. the number of distinct LABEL over the period) (eg. the number of distinct keywords over the last month)
  133. *
  134. * @param string|array Field name(s) of DataTable to select so we can get the sum
  135. * @param array (current_column_name => new_column_name) for columns that must change names when summed (eg. unique visitors go from nb_uniq_visitors to sum_daily_nb_uniq_visitors)
  136. * @param int Max row count of parent datatable to archive
  137. * @param int Max row count of children datatable(s) to archive
  138. * @param string Column name to sort by, before truncating rows (ie. if there are more rows than the specified max row count)
  139. *
  140. * @return array array (
  141. * nameTable1 => number of rows,
  142. * nameTable2 => number of rows,
  143. * )
  144. */
  145. public function archiveDataTable( $aRecordName,
  146. $invalidSummedColumnNameToRenamedName = null,
  147. $maximumRowsInDataTableLevelZero = null,
  148. $maximumRowsInSubDataTable = null,
  149. $columnToSortByBeforeTruncation = null )
  150. {
  151. $this->loadSubPeriods();
  152. if(!is_array($aRecordName))
  153. {
  154. $aRecordName = array($aRecordName);
  155. }
  156. $nameToCount = array();
  157. foreach($aRecordName as $recordName)
  158. {
  159. $table = $this->getRecordDataTableSum($recordName, $invalidSummedColumnNameToRenamedName);
  160. $nameToCount[$recordName]['level0'] = $table->getRowsCount();
  161. $nameToCount[$recordName]['recursive'] = $table->getRowsCountRecursive();
  162. $blob = $table->getSerialized( $maximumRowsInDataTableLevelZero, $maximumRowsInSubDataTable, $columnToSortByBeforeTruncation );
  163. destroy($table);
  164. $this->insertBlobRecord($recordName, $blob);
  165. }
  166. Piwik_DataTable_Manager::getInstance()->deleteAll();
  167. return $nameToCount;
  168. }
  169. /**
  170. * This method selects all DataTables that have the name $name over the period.
  171. * It calls the appropriate methods that sum all these tables together.
  172. * The resulting DataTable is returned.
  173. *
  174. * @param string $name
  175. * @param array columns in the array (old name, new name) to be renamed as the sum operation is not valid on them (eg. nb_uniq_visitors->sum_daily_nb_uniq_visitors)
  176. * @return Piwik_DataTable
  177. */
  178. protected function getRecordDataTableSum( $name, $invalidSummedColumnNameToRenamedName )
  179. {
  180. $table = new Piwik_DataTable();
  181. foreach($this->archives as $archive)
  182. {
  183. $archive->preFetchBlob($name);
  184. $datatableToSum = $archive->getDataTable($name);
  185. $archive->loadSubDataTables($name, $datatableToSum);
  186. $table->addDataTable($datatableToSum);
  187. $archive->freeBlob($name);
  188. }
  189. if(is_null($invalidSummedColumnNameToRenamedName))
  190. {
  191. $invalidSummedColumnNameToRenamedName = self::$invalidSummedColumnNameToRenamedName;
  192. }
  193. foreach($invalidSummedColumnNameToRenamedName as $oldName => $newName)
  194. {
  195. $table->renameColumn($oldName, $newName);
  196. }
  197. return $table;
  198. }
  199. protected function initCompute()
  200. {
  201. parent::initCompute();
  202. }
  203. /**
  204. * Returns the ID of the archived subperiods.
  205. *
  206. * @return array Array of the idArchive of the subperiods
  207. */
  208. protected function loadSubperiodsArchive()
  209. {
  210. $periods = array();
  211. // we first compute every subperiod of the archive
  212. foreach($this->period->getSubperiods() as $period)
  213. {
  214. $archivePeriod = new Piwik_Archive_Single();
  215. $archivePeriod->setSite( $this->site );
  216. $archivePeriod->setPeriod( $period );
  217. $archivePeriod->setSegment( $this->getSegment() );
  218. $archivePeriod->setRequestedReport($this->getRequestedReport());
  219. $periods[] = $archivePeriod;
  220. }
  221. return $periods;
  222. }
  223. /**
  224. * Main method to process logs for a period.
  225. * The only logic done here is computing the number of visits, actions, etc.
  226. *
  227. * All the other reports are computed inside plugins listening to the event 'ArchiveProcessing_Period.compute'.
  228. * See some of the plugins for an example.
  229. */
  230. protected function compute()
  231. {
  232. if(!$this->isThereSomeVisits())
  233. {
  234. return;
  235. }
  236. Piwik_PostEvent('ArchiveProcessing_Period.compute', $this);
  237. }
  238. protected function loadSubPeriods()
  239. {
  240. if(empty($this->archives))
  241. {
  242. $this->archives = $this->loadSubperiodsArchive();
  243. }
  244. }
  245. // Similar logic to Piwik_ArchiveProcessing_Day::isThereSomeVisits()
  246. public function isThereSomeVisits()
  247. {
  248. if(!is_null($this->isThereSomeVisits))
  249. {
  250. return $this->isThereSomeVisits;
  251. }
  252. $this->loadSubPeriods();
  253. if(self::getPluginBeingProcessed($this->getRequestedReport()) == 'VisitsSummary'
  254. || $this->shouldProcessReportsAllPlugins($this->getSegment(), $this->period)
  255. )
  256. {
  257. $toSum = self::getCoreMetrics();
  258. $record = $this->archiveNumericValuesSum($toSum);
  259. $this->archiveNumericValuesMax( 'max_actions' );
  260. $nbVisitsConverted = $record['nb_visits_converted'];
  261. $nbVisits = $record['nb_visits'];
  262. }
  263. else
  264. {
  265. $archive = new Piwik_Archive_Single();
  266. $archive->setSite( $this->site );
  267. $archive->setPeriod( $this->period );
  268. $archive->setSegment( $this->getSegment() );
  269. $nbVisits = $archive->getNumeric('nb_visits');
  270. $nbVisitsConverted = 0;
  271. if($nbVisits > 0)
  272. {
  273. $nbVisitsConverted = $archive->getNumeric('nb_visits_converted');
  274. }
  275. }
  276. $this->setNumberOfVisits($nbVisits);
  277. $this->setNumberOfVisitsConverted($nbVisitsConverted);
  278. $this->isThereSomeVisits = ($nbVisits > 0);
  279. return $this->isThereSomeVisits;
  280. }
  281. /**
  282. * Processes number of unique visitors for the given period
  283. *
  284. * This is the only metric we process from the logs directly,
  285. * since unique visitors cannot be summed like other metrics.
  286. *
  287. * @return int
  288. */
  289. protected function computeNbUniqVisitors()
  290. {
  291. $select = "count(distinct log_visit.idvisitor) as nb_uniq_visitors";
  292. $from = "log_visit";
  293. $where = "log_visit.visit_last_action_time >= ?
  294. AND log_visit.visit_last_action_time <= ?
  295. AND log_visit.idsite = ?";
  296. $bind = array($this->getStartDatetimeUTC(), $this->getEndDatetimeUTC(), $this->idsite);
  297. $query = $this->getSegment()->getSelectQuery($select, $from, $where, $bind);
  298. return Zend_Registry::get('db')->fetchOne($query['sql'], $query['bind']);
  299. }
  300. /**
  301. * Called at the end of the archiving process.
  302. * Does some cleaning job in the database.
  303. */
  304. protected function postCompute()
  305. {
  306. parent::postCompute();
  307. $blobTable = $this->tableArchiveBlob->getTableName();
  308. $numericTable = $this->tableArchiveNumeric->getTableName();
  309. $key = 'lastPurge_' . $blobTable;
  310. $timestamp = Piwik_GetOption($key);
  311. // we shall purge temporary archives after their timeout is finished, plus an extra 2 hours
  312. // in case archiving is disabled and is late to run, we give it this extra time to run and re-process more recent records
  313. $temporaryArchivingTimeout = self::getTodayArchiveTimeToLive();
  314. $purgeEveryNSeconds = $temporaryArchivingTimeout + 2 * 3600;
  315. // we only delete archives if we are able to process them, otherwise, the browser might process reports
  316. // when &segment= is specified (or custom date range) and would below, delete temporary archives that the
  317. // browser is not able to process until next cron run (which could be more than 1 hour away)
  318. if($this->isRequestAuthorizedToArchive()
  319. && (!$timestamp
  320. || $timestamp < time() - $purgeEveryNSeconds))
  321. {
  322. Piwik_SetOption($key, time());
  323. $purgeArchivesOlderThan = Piwik_Date::factory(time() - $purgeEveryNSeconds)->getDateTime();
  324. $result = Piwik_FetchAll("
  325. SELECT idarchive
  326. FROM $numericTable
  327. WHERE name LIKE 'done%'
  328. AND value = ". Piwik_ArchiveProcessing::DONE_OK_TEMPORARY ."
  329. AND ts_archived < ?", array($purgeArchivesOlderThan));
  330. $idArchivesToDelete = array();
  331. if(!empty($result))
  332. {
  333. foreach($result as $row) {
  334. $idArchivesToDelete[] = $row['idarchive'];
  335. }
  336. $query = "DELETE
  337. FROM %s
  338. WHERE idarchive IN (".implode(',',$idArchivesToDelete).")
  339. ";
  340. Piwik_Query(sprintf($query, $blobTable));
  341. Piwik_Query(sprintf($query, $numericTable));
  342. }
  343. Piwik::log("Purging temporary archives: done [ purged archives older than $purgeArchivesOlderThan from $blobTable and $numericTable ] [Deleted IDs: ". implode(',',$idArchivesToDelete)."]");
  344. // Deleting "Custom Date Range" reports after 1 day, since they can be re-processed
  345. // and would take up unecessary space
  346. $yesterday = Piwik_Date::factory('yesterday')->getDateTime();
  347. $query = "DELETE
  348. FROM %s
  349. WHERE period = ?
  350. AND ts_archived < ?";
  351. $bind = array(Piwik::$idPeriods['range'], $yesterday);
  352. Piwik::log("Purging Custom Range archives: done [ purged archives older than $yesterday from $blobTable and $numericTable ]");
  353. Piwik_Query(sprintf($query, $blobTable), $bind);
  354. Piwik_Query(sprintf($query, $numericTable), $bind);
  355. // these tables will be OPTIMIZEd daily in a scheduled task, to claim lost space
  356. }
  357. else
  358. {
  359. Piwik::log("Purging temporary archives: skipped.");
  360. }
  361. if(!isset($this->archives))
  362. {
  363. return;
  364. }
  365. foreach($this->archives as $archive)
  366. {
  367. destroy($archive);
  368. }
  369. $this->archives = array();
  370. }
  371. }