PageRenderTime 55ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/core/ArchiveProcessor.php

https://github.com/CodeYellowBV/piwik
PHP | 495 lines | 237 code | 44 blank | 214 comment | 26 complexity | 7b29f8f15e5ec5f5bb7b4a07fd96776b MD5 | raw file
Possible License(s): LGPL-3.0, JSON, MIT, GPL-3.0, LGPL-2.1, GPL-2.0, AGPL-1.0, BSD-2-Clause, BSD-3-Clause
  1. <?php
  2. /**
  3. * Piwik - free/libre analytics platform
  4. *
  5. * @link http://piwik.org
  6. * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
  7. *
  8. */
  9. namespace Piwik;
  10. use Exception;
  11. use Piwik\ArchiveProcessor\Parameters;
  12. use Piwik\DataAccess\ArchiveWriter;
  13. use Piwik\DataAccess\LogAggregator;
  14. use Piwik\DataTable\Manager;
  15. use Piwik\DataTable\Map;
  16. use Piwik\DataTable\Row;
  17. use Piwik\Db;
  18. use Piwik\Period;
  19. /**
  20. * Used by {@link Piwik\Plugin\Archiver} instances to insert and aggregate archive data.
  21. *
  22. * ### See also
  23. *
  24. * - **{@link Piwik\Plugin\Archiver}** - to learn how plugins should implement their own analytics
  25. * aggregation logic.
  26. * - **{@link Piwik\DataAccess\LogAggregator}** - to learn how plugins can perform data aggregation
  27. * across Piwik's log tables.
  28. *
  29. * ### Examples
  30. *
  31. * **Inserting numeric data**
  32. *
  33. * // function in an Archiver descendant
  34. * public function aggregateDayReport()
  35. * {
  36. * $archiveProcessor = $this->getProcessor();
  37. *
  38. * $myFancyMetric = // ... calculate the metric value ...
  39. * $archiveProcessor->insertNumericRecord('MyPlugin_myFancyMetric', $myFancyMetric);
  40. * }
  41. *
  42. * **Inserting serialized DataTables**
  43. *
  44. * // function in an Archiver descendant
  45. * public function aggregateDayReport()
  46. * {
  47. * $archiveProcessor = $this->getProcessor();
  48. *
  49. * $maxRowsInTable = Config::getInstance()->General['datatable_archiving_maximum_rows_standard'];j
  50. *
  51. * $dataTable = // ... build by aggregating visits ...
  52. * $serializedData = $dataTable->getSerialized($maxRowsInTable, $maxRowsInSubtable = $maxRowsInTable,
  53. * $columnToSortBy = Metrics::INDEX_NB_VISITS);
  54. *
  55. * $archiveProcessor->insertBlobRecords('MyPlugin_myFancyReport', $serializedData);
  56. * }
  57. *
  58. * **Aggregating archive data**
  59. *
  60. * // function in Archiver descendant
  61. * public function aggregateMultipleReports()
  62. * {
  63. * $archiveProcessor = $this->getProcessor();
  64. *
  65. * // aggregate a metric
  66. * $archiveProcessor->aggregateNumericMetrics('MyPlugin_myFancyMetric');
  67. * $archiveProcessor->aggregateNumericMetrics('MyPlugin_mySuperFancyMetric', 'max');
  68. *
  69. * // aggregate a report
  70. * $archiveProcessor->aggregateDataTableRecords('MyPlugin_myFancyReport');
  71. * }
  72. *
  73. */
  74. class ArchiveProcessor
  75. {
  76. /**
  77. * @var \Piwik\DataAccess\ArchiveWriter
  78. */
  79. protected $archiveWriter;
  80. /**
  81. * @var \Piwik\DataAccess\LogAggregator
  82. */
  83. protected $logAggregator;
  84. /**
  85. * @var Archive
  86. */
  87. public $archive = null;
  88. /**
  89. * @var Parameters
  90. */
  91. protected $params;
  92. /**
  93. * @var int
  94. */
  95. protected $numberOfVisits = false;
  96. protected $numberOfVisitsConverted = false;
  97. public function __construct(Parameters $params, ArchiveWriter $archiveWriter)
  98. {
  99. $this->params = $params;
  100. $this->logAggregator = new LogAggregator($params);
  101. $this->archiveWriter = $archiveWriter;
  102. }
  103. protected function getArchive()
  104. {
  105. if(empty($this->archive)) {
  106. $subPeriods = $this->params->getSubPeriods();
  107. $idSites = $this->params->getIdSites();
  108. $this->archive = Archive::factory($this->params->getSegment(), $subPeriods, $idSites);
  109. }
  110. return $this->archive;
  111. }
  112. public function setNumberOfVisits($visits, $visitsConverted)
  113. {
  114. $this->numberOfVisits = $visits;
  115. $this->numberOfVisitsConverted = $visitsConverted;
  116. }
  117. /**
  118. * Returns the {@link Parameters} object containing the site, period and segment we're archiving
  119. * data for.
  120. *
  121. * @return Parameters
  122. * @api
  123. */
  124. public function getParams()
  125. {
  126. return $this->params;
  127. }
  128. /**
  129. * Returns a `{@link Piwik\DataAccess\LogAggregator}` instance for the site, period and segment this
  130. * ArchiveProcessor will insert archive data for.
  131. *
  132. * @return LogAggregator
  133. * @api
  134. */
  135. public function getLogAggregator()
  136. {
  137. return $this->logAggregator;
  138. }
  139. /**
  140. * Array of (column name before => column name renamed) of the columns for which sum operation is invalid.
  141. * These columns will be renamed as per this mapping.
  142. * @var array
  143. */
  144. protected static $columnsToRenameAfterAggregation = array(
  145. Metrics::INDEX_NB_UNIQ_VISITORS => Metrics::INDEX_SUM_DAILY_NB_UNIQ_VISITORS
  146. );
  147. /**
  148. * Sums records for every subperiod of the current period and inserts the result as the record
  149. * for this period.
  150. *
  151. * DataTables are summed recursively so subtables will be summed as well.
  152. *
  153. * @param string|array $recordNames Name(s) of the report we are aggregating, eg, `'Referrers_type'`.
  154. * @param int $maximumRowsInDataTableLevelZero Maximum number of rows allowed in the top level DataTable.
  155. * @param int $maximumRowsInSubDataTable Maximum number of rows allowed in each subtable.
  156. * @param string $columnToSortByBeforeTruncation The name of the column to sort by before truncating a DataTable.
  157. * @param array $columnsAggregationOperation Operations for aggregating columns, see {@link Row::sumRow()}.
  158. * @param array $columnsToRenameAfterAggregation Columns mapped to new names for columns that must change names
  159. * when summed because they cannot be summed, eg,
  160. * `array('nb_uniq_visitors' => 'sum_daily_nb_uniq_visitors')`.
  161. * @return array Returns the row counts of each aggregated report before truncation, eg,
  162. *
  163. * array(
  164. * 'report1' => array('level0' => $report1->getRowsCount,
  165. * 'recursive' => $report1->getRowsCountRecursive()),
  166. * 'report2' => array('level0' => $report2->getRowsCount,
  167. * 'recursive' => $report2->getRowsCountRecursive()),
  168. * ...
  169. * )
  170. * @api
  171. */
  172. public function aggregateDataTableRecords($recordNames,
  173. $maximumRowsInDataTableLevelZero = null,
  174. $maximumRowsInSubDataTable = null,
  175. $columnToSortByBeforeTruncation = null,
  176. &$columnsAggregationOperation = null,
  177. $columnsToRenameAfterAggregation = null)
  178. {
  179. if (!is_array($recordNames)) {
  180. $recordNames = array($recordNames);
  181. }
  182. $nameToCount = array();
  183. foreach ($recordNames as $recordName) {
  184. $latestUsedTableId = Manager::getInstance()->getMostRecentTableId();
  185. $table = $this->aggregateDataTableRecord($recordName, $columnsAggregationOperation, $columnsToRenameAfterAggregation);
  186. $rowsCount = $table->getRowsCount();
  187. $nameToCount[$recordName]['level0'] = $rowsCount;
  188. $rowsCountRecursive = $rowsCount;
  189. if($this->isAggregateSubTables()) {
  190. $rowsCountRecursive = $table->getRowsCountRecursive();
  191. }
  192. $nameToCount[$recordName]['recursive'] = $rowsCountRecursive;
  193. $blob = $table->getSerialized($maximumRowsInDataTableLevelZero, $maximumRowsInSubDataTable, $columnToSortByBeforeTruncation);
  194. Common::destroy($table);
  195. $this->insertBlobRecord($recordName, $blob);
  196. unset($blob);
  197. DataTable\Manager::getInstance()->deleteAll($latestUsedTableId);
  198. }
  199. return $nameToCount;
  200. }
  201. /**
  202. * Aggregates one or more metrics for every subperiod of the current period and inserts the results
  203. * as metrics for the current period.
  204. *
  205. * @param array|string $columns Array of metric names to aggregate.
  206. * @param bool|string $operationToApply The operation to apply to the metric. Either `'sum'`, `'max'` or `'min'`.
  207. * @return array|int Returns the array of aggregate values. If only one metric was aggregated,
  208. * the aggregate value will be returned as is, not in an array.
  209. * For example, if `array('nb_visits', 'nb_hits')` is supplied for `$columns`,
  210. *
  211. * array(
  212. * 'nb_visits' => 3040,
  213. * 'nb_hits' => 405
  214. * )
  215. *
  216. * could be returned. If `array('nb_visits')` or `'nb_visits'` is used for `$columns`,
  217. * then `3040` would be returned.
  218. * @api
  219. */
  220. public function aggregateNumericMetrics($columns, $operationToApply = false)
  221. {
  222. $metrics = $this->getAggregatedNumericMetrics($columns, $operationToApply);
  223. foreach($metrics as $column => $value) {
  224. $this->archiveWriter->insertRecord($column, $value);
  225. }
  226. // if asked for only one field to sum
  227. if (count($metrics) == 1) {
  228. return reset($metrics);
  229. }
  230. // returns the array of records once summed
  231. return $metrics;
  232. }
  233. public function getNumberOfVisits()
  234. {
  235. if($this->numberOfVisits === false) {
  236. throw new Exception("visits should have been set here");
  237. }
  238. return $this->numberOfVisits;
  239. }
  240. public function getNumberOfVisitsConverted()
  241. {
  242. return $this->numberOfVisitsConverted;
  243. }
  244. /**
  245. * Caches multiple numeric records in the archive for this processor's site, period
  246. * and segment.
  247. *
  248. * @param array $numericRecords A name-value mapping of numeric values that should be
  249. * archived, eg,
  250. *
  251. * array('Referrers_distinctKeywords' => 23, 'Referrers_distinctCampaigns' => 234)
  252. * @api
  253. */
  254. public function insertNumericRecords($numericRecords)
  255. {
  256. foreach ($numericRecords as $name => $value) {
  257. $this->insertNumericRecord($name, $value);
  258. }
  259. }
  260. /**
  261. * Caches a single numeric record in the archive for this processor's site, period and
  262. * segment.
  263. *
  264. * Numeric values are not inserted if they equal `0`.
  265. *
  266. * @param string $name The name of the numeric value, eg, `'Referrers_distinctKeywords'`.
  267. * @param float $value The numeric value.
  268. * @api
  269. */
  270. public function insertNumericRecord($name, $value)
  271. {
  272. $value = round($value, 2);
  273. $this->archiveWriter->insertRecord($name, $value);
  274. }
  275. /**
  276. * Caches one or more blob records in the archive for this processor's site, period
  277. * and segment.
  278. *
  279. * @param string $name The name of the record, eg, 'Referrers_type'.
  280. * @param string|array $values A blob string or an array of blob strings. If an array
  281. * is used, the first element in the array will be inserted
  282. * with the `$name` name. The others will be inserted with
  283. * `$name . '_' . $index` as the record name (where $index is
  284. * the index of the blob record in `$values`).
  285. * @api
  286. */
  287. public function insertBlobRecord($name, $values)
  288. {
  289. $this->archiveWriter->insertBlobRecord($name, $values);
  290. }
  291. /**
  292. * This method selects all DataTables that have the name $name over the period.
  293. * All these DataTables are then added together, and the resulting DataTable is returned.
  294. *
  295. * @param string $name
  296. * @param array $columnsAggregationOperation Operations for aggregating columns, @see Row::sumRow()
  297. * @param array $columnsToRenameAfterAggregation columns in the array (old name, new name) to be renamed as the sum operation is not valid on them (eg. nb_uniq_visitors->sum_daily_nb_uniq_visitors)
  298. * @return DataTable
  299. */
  300. protected function aggregateDataTableRecord($name, $columnsAggregationOperation = null, $columnsToRenameAfterAggregation = null)
  301. {
  302. if($this->isAggregateSubTables()) {
  303. // By default we shall aggregate all sub-tables.
  304. $dataTable = $this->getArchive()->getDataTableExpanded($name, $idSubTable = null, $depth = null, $addMetadataSubtableId = false);
  305. } else {
  306. // In some cases (eg. Actions plugin when period=range),
  307. // for better performance we will only aggregate the parent table
  308. $dataTable = $this->getArchive()->getDataTable($name, $idSubTable = null);
  309. }
  310. if ($dataTable instanceof Map) {
  311. // see http://dev.piwik.org/trac/ticket/4377
  312. foreach ($dataTable->getDataTables() as $table) {
  313. $this->renameColumnsAfterAggregation($table, $columnsToRenameAfterAggregation);
  314. }
  315. }
  316. $dataTable = $this->getAggregatedDataTableMap($dataTable, $columnsAggregationOperation);
  317. $this->renameColumnsAfterAggregation($dataTable, $columnsToRenameAfterAggregation);
  318. return $dataTable;
  319. }
  320. protected function getOperationForColumns($columns, $defaultOperation)
  321. {
  322. $operationForColumn = array();
  323. foreach ($columns as $name) {
  324. $operation = $defaultOperation;
  325. if (empty($operation)) {
  326. $operation = $this->guessOperationForColumn($name);
  327. }
  328. $operationForColumn[$name] = $operation;
  329. }
  330. return $operationForColumn;
  331. }
  332. protected function enrichWithUniqueVisitorsMetric(Row $row)
  333. {
  334. if(!$this->getParams()->isSingleSite() ) {
  335. // we only compute unique visitors for a single site
  336. return;
  337. }
  338. if ( $row->getColumn('nb_uniq_visitors') !== false) {
  339. if (SettingsPiwik::isUniqueVisitorsEnabled($this->getParams()->getPeriod()->getLabel())) {
  340. $uniqueVisitors = (float)$this->computeNbUniqVisitors();
  341. $row->setColumn('nb_uniq_visitors', $uniqueVisitors);
  342. } else {
  343. $row->deleteColumn('nb_uniq_visitors');
  344. }
  345. }
  346. }
  347. protected function guessOperationForColumn($column)
  348. {
  349. if (strpos($column, 'max_') === 0) {
  350. return 'max';
  351. }
  352. if (strpos($column, 'min_') === 0) {
  353. return 'min';
  354. }
  355. return 'sum';
  356. }
  357. /**
  358. * Processes number of unique visitors for the given period
  359. *
  360. * This is the only Period metric (ie. week/month/year/range) that we process from the logs directly,
  361. * since unique visitors cannot be summed like other metrics.
  362. *
  363. * @return int
  364. */
  365. protected function computeNbUniqVisitors()
  366. {
  367. $logAggregator = $this->getLogAggregator();
  368. $query = $logAggregator->queryVisitsByDimension(array(), false, array(), array(Metrics::INDEX_NB_UNIQ_VISITORS));
  369. $data = $query->fetch();
  370. return $data[Metrics::INDEX_NB_UNIQ_VISITORS];
  371. }
  372. /**
  373. * If the DataTable is a Map, sums all DataTable in the map and return the DataTable.
  374. *
  375. *
  376. * @param $data DataTable|DataTable\Map
  377. * @param $columnsToRenameAfterAggregation array
  378. * @return DataTable
  379. */
  380. protected function getAggregatedDataTableMap($data, $columnsAggregationOperation)
  381. {
  382. $table = new DataTable();
  383. if (!empty($columnsAggregationOperation)) {
  384. $table->setMetadata(DataTable::COLUMN_AGGREGATION_OPS_METADATA_NAME, $columnsAggregationOperation);
  385. }
  386. if ($data instanceof DataTable\Map) {
  387. // as $date => $tableToSum
  388. $this->aggregatedDataTableMapsAsOne($data, $table);
  389. } else {
  390. $table->addDataTable($data, $this->isAggregateSubTables());
  391. }
  392. return $table;
  393. }
  394. /**
  395. * Aggregates the DataTable\Map into the destination $aggregated
  396. * @param $map
  397. * @param $aggregated
  398. */
  399. protected function aggregatedDataTableMapsAsOne(Map $map, DataTable $aggregated)
  400. {
  401. foreach ($map->getDataTables() as $tableToAggregate) {
  402. if($tableToAggregate instanceof Map) {
  403. $this->aggregatedDataTableMapsAsOne($tableToAggregate, $aggregated);
  404. } else {
  405. $aggregated->addDataTable($tableToAggregate, $this->isAggregateSubTables());
  406. }
  407. }
  408. }
  409. protected function renameColumnsAfterAggregation(DataTable $table, $columnsToRenameAfterAggregation = null)
  410. {
  411. // Rename columns after aggregation
  412. if (is_null($columnsToRenameAfterAggregation)) {
  413. $columnsToRenameAfterAggregation = self::$columnsToRenameAfterAggregation;
  414. }
  415. foreach ($columnsToRenameAfterAggregation as $oldName => $newName) {
  416. $table->renameColumn($oldName, $newName, $this->isAggregateSubTables());
  417. }
  418. }
  419. protected function getAggregatedNumericMetrics($columns, $operationToApply)
  420. {
  421. if (!is_array($columns)) {
  422. $columns = array($columns);
  423. }
  424. $operationForColumn = $this->getOperationForColumns($columns, $operationToApply);
  425. $dataTable = $this->getArchive()->getDataTableFromNumeric($columns);
  426. $results = $this->getAggregatedDataTableMap($dataTable, $operationForColumn);
  427. if ($results->getRowsCount() > 1) {
  428. throw new Exception("A DataTable is an unexpected state:" . var_export($results, true));
  429. }
  430. $rowMetrics = $results->getFirstRow();
  431. if($rowMetrics === false) {
  432. $rowMetrics = new Row;
  433. }
  434. $this->enrichWithUniqueVisitorsMetric($rowMetrics);
  435. $this->renameColumnsAfterAggregation($results);
  436. $metrics = $rowMetrics->getColumns();
  437. foreach ($columns as $name) {
  438. if (!isset($metrics[$name])) {
  439. $metrics[$name] = 0;
  440. }
  441. }
  442. return $metrics;
  443. }
  444. /**
  445. * @return bool
  446. */
  447. protected function isAggregateSubTables()
  448. {
  449. return !$this->getParams()->isSkipAggregationOfSubTables();
  450. }
  451. }