PageRenderTime 51ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/core/Archive.php

https://github.com/CodeYellowBV/piwik
PHP | 807 lines | 319 code | 73 blank | 415 comment | 54 complexity | 5b0b8a6710465f5b2ed08217e6fcf73d MD5 | raw file
Possible License(s): LGPL-3.0, JSON, MIT, GPL-3.0, LGPL-2.1, GPL-2.0, AGPL-1.0, BSD-2-Clause, BSD-3-Clause
  1. <?php
  2. /**
  3. * Piwik - free/libre analytics platform
  4. *
  5. * @link http://piwik.org
  6. * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
  7. *
  8. */
  9. namespace Piwik;
  10. use Piwik\Archive\Parameters;
  11. use Piwik\ArchiveProcessor\Rules;
  12. use Piwik\DataAccess\ArchiveSelector;
  13. use Piwik\Period\Factory;
  14. /**
  15. * The **Archive** class is used to query cached analytics statistics
  16. * (termed "archive data").
  17. *
  18. * You can use **Archive** instances to get data that was archived for one or more sites,
  19. * for one or more periods and one optional segment.
  20. *
  21. * If archive data is not found, this class will initiate the archiving process. [1](#footnote-1)
  22. *
  23. * **Archive** instances must be created using the {@link build()} factory method;
  24. * they cannot be constructed.
  25. *
  26. * You can search for metrics (such as `nb_visits`) using the {@link getNumeric()} and
  27. * {@link getDataTableFromNumeric()} methods. You can search for
  28. * reports using the {@link getBlob()}, {@link getDataTable()} and {@link getDataTableExpanded()} methods.
  29. *
  30. * If you're creating an API that returns report data, you may want to use the
  31. * {@link getDataTableFromArchive()} helper function.
  32. *
  33. * ### Learn more
  34. *
  35. * Learn more about _archiving_ [here](/guides/all-about-analytics-data).
  36. *
  37. * ### Limitations
  38. *
  39. * - You cannot get data for multiple range periods in a single query.
  40. * - You cannot get data for periods of different types in a single query.
  41. *
  42. * ### Examples
  43. *
  44. * **_Querying metrics for an API method_**
  45. *
  46. * // one site and one period
  47. * $archive = Archive::build($idSite = 1, $period = 'week', $date = '2013-03-08');
  48. * return $archive->getDataTableFromNumeric(array('nb_visits', 'nb_actions'));
  49. *
  50. * // all sites and multiple dates
  51. * $archive = Archive::build($idSite = 'all', $period = 'month', $date = '2013-01-02,2013-03-08');
  52. * return $archive->getDataTableFromNumeric(array('nb_visits', 'nb_actions'));
  53. *
  54. * **_Querying and using metrics immediately_**
  55. *
  56. * // one site and one period
  57. * $archive = Archive::build($idSite = 1, $period = 'week', $date = '2013-03-08');
  58. * $data = $archive->getNumeric(array('nb_visits', 'nb_actions'));
  59. *
  60. * $visits = $data['nb_visits'];
  61. * $actions = $data['nb_actions'];
  62. *
  63. * // ... do something w/ metric data ...
  64. *
  65. * // multiple sites and multiple dates
  66. * $archive = Archive::build($idSite = '1,2,3', $period = 'month', $date = '2013-01-02,2013-03-08');
  67. * $data = $archive->getNumeric('nb_visits');
  68. *
  69. * $janSite1Visits = $data['1']['2013-01-01,2013-01-31']['nb_visits'];
  70. * $febSite1Visits = $data['1']['2013-02-01,2013-02-28']['nb_visits'];
  71. * // ... etc.
  72. *
  73. * **_Querying for reports_**
  74. *
  75. * $archive = Archive::build($idSite = 1, $period = 'week', $date = '2013-03-08');
  76. * $dataTable = $archive->getDataTable('MyPlugin_MyReport');
  77. * // ... manipulate $dataTable ...
  78. * return $dataTable;
  79. *
  80. * **_Querying a report for an API method_**
  81. *
  82. * public function getMyReport($idSite, $period, $date, $segment = false, $expanded = false)
  83. * {
  84. * $dataTable = Archive::getDataTableFromArchive('MyPlugin_MyReport', $idSite, $period, $date, $segment, $expanded);
  85. * $dataTable->queueFilter('ReplaceColumnNames');
  86. * return $dataTable;
  87. * }
  88. *
  89. * **_Querying data for multiple range periods_**
  90. *
  91. * // get data for first range
  92. * $archive = Archive::build($idSite = 1, $period = 'range', $date = '2013-03-08,2013-03-12');
  93. * $dataTable = $archive->getDataTableFromNumeric(array('nb_visits', 'nb_actions'));
  94. *
  95. * // get data for second range
  96. * $archive = Archive::build($idSite = 1, $period = 'range', $date = '2013-03-15,2013-03-20');
  97. * $dataTable = $archive->getDataTableFromNumeric(array('nb_visits', 'nb_actions'));
  98. *
  99. * <a name="footnote-1"></a>
  100. * [1]: The archiving process will not be launched if browser archiving is disabled
  101. * and the current request came from a browser.
  102. *
  103. *
  104. * @api
  105. */
  106. class Archive
  107. {
  108. const REQUEST_ALL_WEBSITES_FLAG = 'all';
  109. const ARCHIVE_ALL_PLUGINS_FLAG = 'all';
  110. const ID_SUBTABLE_LOAD_ALL_SUBTABLES = 'all';
  111. /**
  112. * List of archive IDs for the site, periods and segment we are querying with.
  113. * Archive IDs are indexed by done flag and period, ie:
  114. *
  115. * array(
  116. * 'done.Referrers' => array(
  117. * '2010-01-01' => 1,
  118. * '2010-01-02' => 2,
  119. * ),
  120. * 'done.VisitsSummary' => array(
  121. * '2010-01-01' => 3,
  122. * '2010-01-02' => 4,
  123. * ),
  124. * )
  125. *
  126. * or,
  127. *
  128. * array(
  129. * 'done.all' => array(
  130. * '2010-01-01' => 1,
  131. * '2010-01-02' => 2
  132. * )
  133. * )
  134. *
  135. * @var array
  136. */
  137. private $idarchives = array();
  138. /**
  139. * If set to true, the result of all get functions (ie, getNumeric, getBlob, etc.)
  140. * will be indexed by the site ID, even if we're only querying data for one site.
  141. *
  142. * @var bool
  143. */
  144. private $forceIndexedBySite;
  145. /**
  146. * If set to true, the result of all get functions (ie, getNumeric, getBlob, etc.)
  147. * will be indexed by the period, even if we're only querying data for one period.
  148. *
  149. * @var bool
  150. */
  151. private $forceIndexedByDate;
  152. /**
  153. * @var Parameters
  154. */
  155. private $params;
  156. /**
  157. * @param Parameters $params
  158. * @param bool $forceIndexedBySite Whether to force index the result of a query by site ID.
  159. * @param bool $forceIndexedByDate Whether to force index the result of a query by period.
  160. */
  161. protected function __construct(Parameters $params, $forceIndexedBySite = false,
  162. $forceIndexedByDate = false)
  163. {
  164. $this->params = $params;
  165. $this->forceIndexedBySite = $forceIndexedBySite;
  166. $this->forceIndexedByDate = $forceIndexedByDate;
  167. }
  168. /**
  169. * Returns a new Archive instance that will query archive data for the given set of
  170. * sites and periods, using an optional Segment.
  171. *
  172. * This method uses data that is found in query parameters, so the parameters to this
  173. * function can be string values.
  174. *
  175. * If you want to create an Archive instance with an array of Period instances, use
  176. * {@link Archive::factory()}.
  177. *
  178. * @param string|int|array $idSites A single ID (eg, `'1'`), multiple IDs (eg, `'1,2,3'` or `array(1, 2, 3)`),
  179. * or `'all'`.
  180. * @param string $period 'day', `'week'`, `'month'`, `'year'` or `'range'`
  181. * @param Date|string $strDate 'YYYY-MM-DD', magic keywords (ie, 'today'; {@link Date::factory()}
  182. * or date range (ie, 'YYYY-MM-DD,YYYY-MM-DD').
  183. * @param bool|false|string $segment Segment definition or false if no segment should be used. {@link Piwik\Segment}
  184. * @param bool|false|string $_restrictSitesToLogin Used only when running as a scheduled task.
  185. * @param bool $skipAggregationOfSubTables Whether the archive, when it is processed, should also aggregate all sub-tables
  186. * @return Archive
  187. */
  188. public static function build($idSites, $period, $strDate, $segment = false, $_restrictSitesToLogin = false, $skipAggregationOfSubTables = false)
  189. {
  190. $websiteIds = Site::getIdSitesFromIdSitesString($idSites, $_restrictSitesToLogin);
  191. if (Period::isMultiplePeriod($strDate, $period)) {
  192. $oPeriod = Factory::build($period, $strDate);
  193. $allPeriods = $oPeriod->getSubperiods();
  194. } else {
  195. $timezone = count($websiteIds) == 1 ? Site::getTimezoneFor($websiteIds[0]) : false;
  196. $oPeriod = Factory::makePeriodFromQueryParams($timezone, $period, $strDate);
  197. $allPeriods = array($oPeriod);
  198. }
  199. $segment = new Segment($segment, $websiteIds);
  200. $idSiteIsAll = $idSites == self::REQUEST_ALL_WEBSITES_FLAG;
  201. $isMultipleDate = Period::isMultiplePeriod($strDate, $period);
  202. return Archive::factory($segment, $allPeriods, $websiteIds, $idSiteIsAll, $isMultipleDate, $skipAggregationOfSubTables);
  203. }
  204. /**
  205. * Returns a new Archive instance that will query archive data for the given set of
  206. * sites and periods, using an optional segment.
  207. *
  208. * This method uses an array of Period instances and a Segment instance, instead of strings
  209. * like {@link build()}.
  210. *
  211. * If you want to create an Archive instance using data found in query parameters,
  212. * use {@link build()}.
  213. *
  214. * @param Segment $segment The segment to use. For no segment, use `new Segment('', $idSites)`.
  215. * @param array $periods An array of Period instances.
  216. * @param array $idSites An array of site IDs (eg, `array(1, 2, 3)`).
  217. * @param bool $idSiteIsAll Whether `'all'` sites are being queried or not. If true, then
  218. * the result of querying functions will be indexed by site, regardless
  219. * of whether `count($idSites) == 1`.
  220. * @param bool $isMultipleDate Whether multiple dates are being queried or not. If true, then
  221. * the result of querying functions will be indexed by period,
  222. * regardless of whether `count($periods) == 1`.
  223. * @param bool $skipAggregationOfSubTables Whether the archive should skip aggregation of all sub-tables
  224. *
  225. * @return Archive
  226. */
  227. public static function factory(Segment $segment, array $periods, array $idSites, $idSiteIsAll = false, $isMultipleDate = false, $skipAggregationOfSubTables = false)
  228. {
  229. $forceIndexedBySite = false;
  230. $forceIndexedByDate = false;
  231. if ($idSiteIsAll || count($idSites) > 1) {
  232. $forceIndexedBySite = true;
  233. }
  234. if (count($periods) > 1 || $isMultipleDate) {
  235. $forceIndexedByDate = true;
  236. }
  237. $params = new Parameters($idSites, $periods, $segment, $skipAggregationOfSubTables);
  238. return new Archive($params, $forceIndexedBySite, $forceIndexedByDate);
  239. }
  240. /**
  241. * Queries and returns metric data in an array.
  242. *
  243. * If multiple sites were requested in {@link build()} or {@link factory()} the result will
  244. * be indexed by site ID.
  245. *
  246. * If multiple periods were requested in {@link build()} or {@link factory()} the result will
  247. * be indexed by period.
  248. *
  249. * The site ID index is always first, so if multiple sites & periods were requested, the result
  250. * will be indexed by site ID first, then period.
  251. *
  252. * @param string|array $names One or more archive names, eg, `'nb_visits'`, `'Referrers_distinctKeywords'`,
  253. * etc.
  254. * @return false|numeric|array `false` if there is no data to return, a single numeric value if we're not querying
  255. * for multiple sites/periods, or an array if multiple sites, periods or names are
  256. * queried for.
  257. */
  258. public function getNumeric($names)
  259. {
  260. $data = $this->get($names, 'numeric');
  261. $resultIndices = $this->getResultIndices();
  262. $result = $data->getIndexedArray($resultIndices);
  263. // if only one metric is returned, just return it as a numeric value
  264. if (empty($resultIndices)
  265. && count($result) <= 1
  266. && (!is_array($names) || count($names) == 1)
  267. ) {
  268. $result = (float)reset($result); // convert to float in case $result is empty
  269. }
  270. return $result;
  271. }
  272. /**
  273. * Queries and returns blob data in an array.
  274. *
  275. * Reports are stored in blobs as serialized arrays of {@link DataTable\Row} instances, but this
  276. * data can technically be anything. In other words, you can store whatever you want
  277. * as archive data blobs.
  278. *
  279. * If multiple sites were requested in {@link build()} or {@link factory()} the result will
  280. * be indexed by site ID.
  281. *
  282. * If multiple periods were requested in {@link build()} or {@link factory()} the result will
  283. * be indexed by period.
  284. *
  285. * The site ID index is always first, so if multiple sites & periods were requested, the result
  286. * will be indexed by site ID first, then period.
  287. *
  288. * @param string|array $names One or more archive names, eg, `'Referrers_keywordBySearchEngine'`.
  289. * @param null|string $idSubtable If we're returning serialized DataTable data, then this refers
  290. * to the subtable ID to return. If set to 'all', all subtables
  291. * of each requested report are returned.
  292. * @return array An array of appropriately indexed blob data.
  293. */
  294. public function getBlob($names, $idSubtable = null)
  295. {
  296. $data = $this->get($names, 'blob', $idSubtable);
  297. return $data->getIndexedArray($this->getResultIndices());
  298. }
  299. /**
  300. * Queries and returns metric data in a DataTable instance.
  301. *
  302. * If multiple sites were requested in {@link build()} or {@link factory()} the result will
  303. * be a DataTable\Map that is indexed by site ID.
  304. *
  305. * If multiple periods were requested in {@link build()} or {@link factory()} the result will
  306. * be a {@link DataTable\Map} that is indexed by period.
  307. *
  308. * The site ID index is always first, so if multiple sites & periods were requested, the result
  309. * will be a {@link DataTable\Map} indexed by site ID which contains {@link DataTable\Map} instances that are
  310. * indexed by period.
  311. *
  312. * _Note: Every DataTable instance returned will have at most one row in it. The contents of each
  313. * row will be the requested metrics for the appropriate site and period._
  314. *
  315. * @param string|array $names One or more archive names, eg, 'nb_visits', 'Referrers_distinctKeywords',
  316. * etc.
  317. * @return DataTable|DataTable\Map A DataTable if multiple sites and periods were not requested.
  318. * An appropriately indexed DataTable\Map if otherwise.
  319. */
  320. public function getDataTableFromNumeric($names)
  321. {
  322. $data = $this->get($names, 'numeric');
  323. return $data->getDataTable($this->getResultIndices());
  324. }
  325. /**
  326. * Queries and returns one or more reports as DataTable instances.
  327. *
  328. * This method will query blob data that is a serialized array of of {@link DataTable\Row}'s and
  329. * unserialize it.
  330. *
  331. * If multiple sites were requested in {@link build()} or {@link factory()} the result will
  332. * be a {@link DataTable\Map} that is indexed by site ID.
  333. *
  334. * If multiple periods were requested in {@link build()} or {@link factory()} the result will
  335. * be a DataTable\Map that is indexed by period.
  336. *
  337. * The site ID index is always first, so if multiple sites & periods were requested, the result
  338. * will be a {@link DataTable\Map} indexed by site ID which contains {@link DataTable\Map} instances that are
  339. * indexed by period.
  340. *
  341. * @param string $name The name of the record to get. This method can only query one record at a time.
  342. * @param int|string|null $idSubtable The ID of the subtable to get (if any).
  343. * @return DataTable|DataTable\Map A DataTable if multiple sites and periods were not requested.
  344. * An appropriately indexed {@link DataTable\Map} if otherwise.
  345. */
  346. public function getDataTable($name, $idSubtable = null)
  347. {
  348. $data = $this->get($name, 'blob', $idSubtable);
  349. return $data->getDataTable($this->getResultIndices());
  350. }
  351. /**
  352. * Queries and returns one report with all of its subtables loaded.
  353. *
  354. * If multiple sites were requested in {@link build()} or {@link factory()} the result will
  355. * be a DataTable\Map that is indexed by site ID.
  356. *
  357. * If multiple periods were requested in {@link build()} or {@link factory()} the result will
  358. * be a DataTable\Map that is indexed by period.
  359. *
  360. * The site ID index is always first, so if multiple sites & periods were requested, the result
  361. * will be a {@link DataTable\Map indexed} by site ID which contains {@link DataTable\Map} instances that are
  362. * indexed by period.
  363. *
  364. * @param string $name The name of the record to get.
  365. * @param int|string|null $idSubtable The ID of the subtable to get (if any). The subtable will be expanded.
  366. * @param int|null $depth The maximum number of subtable levels to load. If null, all levels are loaded.
  367. * For example, if `1` is supplied, then the DataTable returned will have its subtables
  368. * loaded. Those subtables, however, will NOT have their subtables loaded.
  369. * @param bool $addMetadataSubtableId Whether to add the database subtable ID as metadata to each datatable,
  370. * or not.
  371. * @return DataTable|DataTable\Map
  372. */
  373. public function getDataTableExpanded($name, $idSubtable = null, $depth = null, $addMetadataSubtableId = true)
  374. {
  375. $data = $this->get($name, 'blob', self::ID_SUBTABLE_LOAD_ALL_SUBTABLES);
  376. return $data->getExpandedDataTable($this->getResultIndices(), $idSubtable, $depth, $addMetadataSubtableId);
  377. }
  378. /**
  379. * Returns the list of plugins that archive the given reports.
  380. *
  381. * @param array $archiveNames
  382. * @return array
  383. */
  384. private function getRequestedPlugins($archiveNames)
  385. {
  386. $result = array();
  387. foreach ($archiveNames as $name) {
  388. $result[] = self::getPluginForReport($name);
  389. }
  390. return array_unique($result);
  391. }
  392. /**
  393. * Returns an object describing the set of sites, the set of periods and the segment
  394. * this Archive will query data for.
  395. *
  396. * @return Parameters
  397. */
  398. public function getParams()
  399. {
  400. return $this->params;
  401. }
  402. /**
  403. * Helper function that creates an Archive instance and queries for report data using
  404. * query parameter data. API methods can use this method to reduce code redundancy.
  405. *
  406. * @param string $name The name of the report to return.
  407. * @param int|string|array $idSite @see {@link build()}
  408. * @param string $period @see {@link build()}
  409. * @param string $date @see {@link build()}
  410. * @param string $segment @see {@link build()}
  411. * @param bool $expanded If true, loads all subtables. See {@link getDataTableExpanded()}
  412. * @param int|null $idSubtable See {@link getDataTableExpanded()}
  413. * @param bool $skipAggregationOfSubTables Whether or not we should skip the aggregation of all sub-tables and only aggregate parent DataTable.
  414. * @param int|null $depth See {@link getDataTableExpanded()}
  415. * @return DataTable|DataTable\Map See {@link getDataTable()} and
  416. * {@link getDataTableExpanded()} for more
  417. * information
  418. */
  419. public static function getDataTableFromArchive($name, $idSite, $period, $date, $segment, $expanded,
  420. $idSubtable = null, $skipAggregationOfSubTables = false, $depth = null)
  421. {
  422. Piwik::checkUserHasViewAccess($idSite);
  423. if($skipAggregationOfSubTables && ($expanded || $idSubtable)) {
  424. throw new \Exception("Not expected to skipAggregationOfSubTables when expanded=1 or idSubtable is set.");
  425. }
  426. $archive = Archive::build($idSite, $period, $date, $segment, $_restrictSitesToLogin = false, $skipAggregationOfSubTables);
  427. if ($idSubtable === false) {
  428. $idSubtable = null;
  429. }
  430. if ($expanded) {
  431. $dataTable = $archive->getDataTableExpanded($name, $idSubtable, $depth);
  432. } else {
  433. $dataTable = $archive->getDataTable($name, $idSubtable);
  434. }
  435. $dataTable->queueFilter('ReplaceSummaryRowLabel');
  436. return $dataTable;
  437. }
  438. private function appendIdSubtable($recordName, $id)
  439. {
  440. return $recordName . "_" . $id;
  441. }
  442. /**
  443. * Queries archive tables for data and returns the result.
  444. * @param array|string $archiveNames
  445. * @param $archiveDataType
  446. * @param null|int $idSubtable
  447. * @return Archive\DataCollection
  448. */
  449. private function get($archiveNames, $archiveDataType, $idSubtable = null)
  450. {
  451. if (!is_array($archiveNames)) {
  452. $archiveNames = array($archiveNames);
  453. }
  454. // apply idSubtable
  455. if ($idSubtable !== null
  456. && $idSubtable != self::ID_SUBTABLE_LOAD_ALL_SUBTABLES
  457. ) {
  458. foreach ($archiveNames as &$name) {
  459. $name = $this->appendIdsubtable($name, $idSubtable);
  460. }
  461. }
  462. $result = new Archive\DataCollection(
  463. $archiveNames, $archiveDataType, $this->params->getIdSites(), $this->params->getPeriods(), $defaultRow = null);
  464. $archiveIds = $this->getArchiveIds($archiveNames);
  465. if (empty($archiveIds)) {
  466. return $result;
  467. }
  468. $loadAllSubtables = $idSubtable == self::ID_SUBTABLE_LOAD_ALL_SUBTABLES;
  469. $archiveData = ArchiveSelector::getArchiveData($archiveIds, $archiveNames, $archiveDataType, $loadAllSubtables);
  470. foreach ($archiveData as $row) {
  471. // values are grouped by idsite (site ID), date1-date2 (date range), then name (field name)
  472. $idSite = $row['idsite'];
  473. $periodStr = $row['date1'] . "," . $row['date2'];
  474. if ($archiveDataType == 'numeric') {
  475. $value = $this->formatNumericValue($row['value']);
  476. } else {
  477. $value = $this->uncompress($row['value']);
  478. $result->addMetadata($idSite, $periodStr, 'ts_archived', $row['ts_archived']);
  479. }
  480. $resultRow = & $result->get($idSite, $periodStr);
  481. $resultRow[$row['name']] = $value;
  482. }
  483. return $result;
  484. }
  485. /**
  486. * Returns archive IDs for the sites, periods and archive names that are being
  487. * queried. This function will use the idarchive cache if it has the right data,
  488. * query archive tables for IDs w/o launching archiving, or launch archiving and
  489. * get the idarchive from ArchiveProcessor instances.
  490. */
  491. private function getArchiveIds($archiveNames)
  492. {
  493. $plugins = $this->getRequestedPlugins($archiveNames);
  494. // figure out which archives haven't been processed (if an archive has been processed,
  495. // then we have the archive IDs in $this->idarchives)
  496. $doneFlags = array();
  497. $archiveGroups = array();
  498. foreach ($plugins as $plugin) {
  499. $doneFlag = $this->getDoneStringForPlugin($plugin);
  500. $doneFlags[$doneFlag] = true;
  501. if (!isset($this->idarchives[$doneFlag])) {
  502. $archiveGroup = $this->getArchiveGroupOfPlugin($plugin);
  503. if($archiveGroup == self::ARCHIVE_ALL_PLUGINS_FLAG) {
  504. $archiveGroup = reset($plugins);
  505. }
  506. $archiveGroups[] = $archiveGroup;
  507. }
  508. }
  509. $archiveGroups = array_unique($archiveGroups);
  510. // cache id archives for plugins we haven't processed yet
  511. if (!empty($archiveGroups)) {
  512. if (!Rules::isArchivingDisabledFor($this->params->getIdSites(), $this->params->getSegment(), $this->getPeriodLabel())) {
  513. $this->cacheArchiveIdsAfterLaunching($archiveGroups, $plugins);
  514. } else {
  515. $this->cacheArchiveIdsWithoutLaunching($plugins);
  516. }
  517. }
  518. // order idarchives by the table month they belong to
  519. $idArchivesByMonth = array();
  520. foreach (array_keys($doneFlags) as $doneFlag) {
  521. if (empty($this->idarchives[$doneFlag])) {
  522. continue;
  523. }
  524. foreach ($this->idarchives[$doneFlag] as $dateRange => $idarchives) {
  525. foreach ($idarchives as $id) {
  526. $idArchivesByMonth[$dateRange][] = $id;
  527. }
  528. }
  529. }
  530. return $idArchivesByMonth;
  531. }
  532. /**
  533. * Gets the IDs of the archives we're querying for and stores them in $this->archives.
  534. * This function will launch the archiving process for each period/site/plugin if
  535. * metrics/reports have not been calculated/archived already.
  536. *
  537. * @param array $archiveGroups @see getArchiveGroupOfReport
  538. * @param array $plugins List of plugin names to archive.
  539. */
  540. private function cacheArchiveIdsAfterLaunching($archiveGroups, $plugins)
  541. {
  542. $today = Date::today();
  543. foreach ($this->params->getPeriods() as $period) {
  544. $twoDaysBeforePeriod = $period->getDateStart()->subDay(2);
  545. $twoDaysAfterPeriod = $period->getDateEnd()->addDay(2);
  546. foreach ($this->params->getIdSites() as $idSite) {
  547. $site = new Site($idSite);
  548. // if the END of the period is BEFORE the website creation date
  549. // we already know there are no stats for this period
  550. // we add one day to make sure we don't miss the day of the website creation
  551. if ($twoDaysAfterPeriod->isEarlier($site->getCreationDate())) {
  552. Log::verbose("Archive site %s, %s (%s) skipped, archive is before the website was created.",
  553. $idSite, $period->getLabel(), $period->getPrettyString());
  554. continue;
  555. }
  556. // if the starting date is in the future we know there is no visiidsite = ?t
  557. if ($twoDaysBeforePeriod->isLater($today)) {
  558. Log::verbose("Archive site %s, %s (%s) skipped, archive is after today.",
  559. $idSite, $period->getLabel(), $period->getPrettyString());
  560. continue;
  561. }
  562. $this->prepareArchive($archiveGroups, $site, $period);
  563. }
  564. }
  565. }
  566. /**
  567. * Gets the IDs of the archives we're querying for and stores them in $this->archives.
  568. * This function will not launch the archiving process (and is thus much, much faster
  569. * than cacheArchiveIdsAfterLaunching).
  570. *
  571. * @param array $plugins List of plugin names from which data is being requested.
  572. */
  573. private function cacheArchiveIdsWithoutLaunching($plugins)
  574. {
  575. $idarchivesByReport = ArchiveSelector::getArchiveIds(
  576. $this->params->getIdSites(), $this->params->getPeriods(), $this->params->getSegment(), $plugins, $this->params->isSkipAggregationOfSubTables());
  577. // initialize archive ID cache for each report
  578. foreach ($plugins as $plugin) {
  579. $doneFlag = $this->getDoneStringForPlugin($plugin);
  580. $this->initializeArchiveIdCache($doneFlag);
  581. }
  582. foreach ($idarchivesByReport as $doneFlag => $idarchivesByDate) {
  583. foreach ($idarchivesByDate as $dateRange => $idarchives) {
  584. foreach ($idarchives as $idarchive) {
  585. $this->idarchives[$doneFlag][$dateRange][] = $idarchive;
  586. }
  587. }
  588. }
  589. }
  590. /**
  591. * Returns the done string flag for a plugin using this instance's segment & periods.
  592. * @param string $plugin
  593. * @return string
  594. */
  595. private function getDoneStringForPlugin($plugin)
  596. {
  597. return Rules::getDoneStringFlagFor(
  598. $this->params->getIdSites(),
  599. $this->params->getSegment(),
  600. $this->getPeriodLabel(),
  601. $plugin,
  602. $this->params->isSkipAggregationOfSubTables()
  603. );
  604. }
  605. private function getPeriodLabel()
  606. {
  607. $periods = $this->params->getPeriods();
  608. return reset($periods)->getLabel();
  609. }
  610. /**
  611. * Returns an array describing what metadata to use when indexing a query result.
  612. * For use with DataCollection.
  613. *
  614. * @return array
  615. */
  616. private function getResultIndices()
  617. {
  618. $indices = array();
  619. if (count($this->params->getIdSites()) > 1
  620. || $this->forceIndexedBySite
  621. ) {
  622. $indices['site'] = 'idSite';
  623. }
  624. if (count($this->params->getPeriods()) > 1
  625. || $this->forceIndexedByDate
  626. ) {
  627. $indices['period'] = 'date';
  628. }
  629. return $indices;
  630. }
  631. private function formatNumericValue($value)
  632. {
  633. // If there is no dot, we return as is
  634. // Note: this could be an integer bigger than 32 bits
  635. if (strpos($value, '.') === false) {
  636. if ($value === false) {
  637. return 0;
  638. }
  639. return (float)$value;
  640. }
  641. // Round up the value with 2 decimals
  642. // we cast the result as float because returns false when no visitors
  643. return round((float)$value, 2);
  644. }
  645. private function uncompress($data)
  646. {
  647. return @gzuncompress($data);
  648. }
  649. /**
  650. * Initializes the archive ID cache ($this->idarchives) for a particular 'done' flag.
  651. *
  652. * It is necessary that each archive ID caching function call this method for each
  653. * unique 'done' flag it encounters, since the getArchiveIds function determines
  654. * whether archiving should be launched based on whether $this->idarchives has a
  655. * an entry for a specific 'done' flag.
  656. *
  657. * If this function is not called, then periods with no visits will not add
  658. * entries to the cache. If the archive is used again, SQL will be executed to
  659. * try and find the archive IDs even though we know there are none.
  660. */
  661. private function initializeArchiveIdCache($doneFlag)
  662. {
  663. if (!isset($this->idarchives[$doneFlag])) {
  664. $this->idarchives[$doneFlag] = array();
  665. }
  666. }
  667. /**
  668. * Returns the archiving group identifier given a plugin.
  669. *
  670. * More than one plugin can be called at once when archiving. In such a case
  671. * we don't want to launch archiving three times for three plugins if doing
  672. * it once is enough, so getArchiveIds makes sure to get the archive group of
  673. * all reports.
  674. *
  675. * If the period isn't a range, then all plugins' archiving code is executed.
  676. * If the period is a range, then archiving code is executed individually for
  677. * each plugin.
  678. */
  679. private function getArchiveGroupOfPlugin($plugin)
  680. {
  681. if ($this->getPeriodLabel() != 'range') {
  682. return self::ARCHIVE_ALL_PLUGINS_FLAG;
  683. }
  684. return $plugin;
  685. }
  686. /**
  687. * Returns the name of the plugin that archives a given report.
  688. *
  689. * @param string $report Archive data name, eg, `'nb_visits'`, `'UserSettings_...'`, etc.
  690. * @return string Plugin name.
  691. * @throws \Exception If a plugin cannot be found or if the plugin for the report isn't
  692. * activated.
  693. */
  694. private static function getPluginForReport($report)
  695. {
  696. // Core metrics are always processed in Core, for the requested date/period/segment
  697. if (in_array($report, Metrics::getVisitsMetricNames())) {
  698. $report = 'VisitsSummary_CoreMetrics';
  699. } // Goal_* metrics are processed by the Goals plugin (HACK)
  700. else if (strpos($report, 'Goal_') === 0) {
  701. $report = 'Goals_Metrics';
  702. } else if (strrpos($report, '_returning') === strlen($report) - strlen('_returning')) { // HACK
  703. $report = 'VisitFrequency_Metrics';
  704. }
  705. $plugin = substr($report, 0, strpos($report, '_'));
  706. if (empty($plugin)
  707. || !\Piwik\Plugin\Manager::getInstance()->isPluginActivated($plugin)
  708. ) {
  709. throw new \Exception("Error: The report '$report' was requested but it is not available at this stage."
  710. . " (Plugin '$plugin' is not activated.)");
  711. }
  712. return $plugin;
  713. }
  714. /**
  715. * @param $archiveGroups
  716. * @param $site
  717. * @param $period
  718. */
  719. private function prepareArchive(array $archiveGroups, Site $site, Period $period)
  720. {
  721. $parameters = new ArchiveProcessor\Parameters($site, $period, $this->params->getSegment(), $this->params->isSkipAggregationOfSubTables());
  722. $archiveLoader = new ArchiveProcessor\Loader($parameters);
  723. $periodString = $period->getRangeString();
  724. // process for each plugin as well
  725. foreach ($archiveGroups as $plugin) {
  726. $doneFlag = $this->getDoneStringForPlugin($plugin);
  727. $this->initializeArchiveIdCache($doneFlag);
  728. $idArchive = $archiveLoader->prepareArchive($plugin);
  729. if($idArchive) {
  730. $this->idarchives[$doneFlag][$periodString][] = $idArchive;
  731. }
  732. }
  733. }
  734. }