PageRenderTime 49ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/core/DataAccess/ArchiveSelector.php

https://github.com/CodeYellowBV/piwik
PHP | 358 lines | 273 code | 31 blank | 54 comment | 21 complexity | 1083a2925e2725d0e9e0c26a7ab4ba8d MD5 | raw file
Possible License(s): LGPL-3.0, JSON, MIT, GPL-3.0, LGPL-2.1, GPL-2.0, AGPL-1.0, BSD-2-Clause, BSD-3-Clause
  1. <?php
  2. /**
  3. * Piwik - free/libre analytics platform
  4. *
  5. * @link http://piwik.org
  6. * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
  7. *
  8. */
  9. namespace Piwik\DataAccess;
  10. use Exception;
  11. use Piwik\ArchiveProcessor\Rules;
  12. use Piwik\ArchiveProcessor;
  13. use Piwik\Common;
  14. use Piwik\Date;
  15. use Piwik\Db;
  16. use Piwik\Log;
  17. use Piwik\Period;
  18. use Piwik\Period\Range;
  19. use Piwik\Piwik;
  20. use Piwik\Segment;
  21. /**
  22. * Data Access object used to query archives
  23. *
  24. * A record in the Database for a given report is defined by
  25. * - idarchive = unique ID that is associated to all the data of this archive (idsite+period+date)
  26. * - idsite = the ID of the website
  27. * - date1 = starting day of the period
  28. * - date2 = ending day of the period
  29. * - period = integer that defines the period (day/week/etc.). @see period::getId()
  30. * - ts_archived = timestamp when the archive was processed (UTC)
  31. * - name = the name of the report (ex: uniq_visitors or search_keywords_by_search_engines)
  32. * - value = the actual data (a numeric value, or a blob of compressed serialized data)
  33. *
  34. */
  35. class ArchiveSelector
  36. {
  37. const NB_VISITS_RECORD_LOOKED_UP = "nb_visits";
  38. const NB_VISITS_CONVERTED_RECORD_LOOKED_UP = "nb_visits_converted";
  39. static public function getArchiveIdAndVisits(ArchiveProcessor\Parameters $params, $minDatetimeArchiveProcessedUTC)
  40. {
  41. $dateStart = $params->getPeriod()->getDateStart();
  42. $bindSQL = array($params->getSite()->getId(),
  43. $dateStart->toString('Y-m-d'),
  44. $params->getPeriod()->getDateEnd()->toString('Y-m-d'),
  45. $params->getPeriod()->getId(),
  46. );
  47. $timeStampWhere = '';
  48. if ($minDatetimeArchiveProcessedUTC) {
  49. $timeStampWhere = " AND ts_archived >= ? ";
  50. $bindSQL[] = Date::factory($minDatetimeArchiveProcessedUTC)->getDatetime();
  51. }
  52. $requestedPlugin = $params->getRequestedPlugin();
  53. $segment = $params->getSegment();
  54. $isSkipAggregationOfSubTables = $params->isSkipAggregationOfSubTables();
  55. $plugins = array("VisitsSummary", $requestedPlugin);
  56. $sqlWhereArchiveName = self::getNameCondition($plugins, $segment, $isSkipAggregationOfSubTables);
  57. $sqlQuery = " SELECT idarchive, value, name, date1 as startDate
  58. FROM " . ArchiveTableCreator::getNumericTable($dateStart) . "``
  59. WHERE idsite = ?
  60. AND date1 = ?
  61. AND date2 = ?
  62. AND period = ?
  63. AND ( ($sqlWhereArchiveName)
  64. OR name = '" . self::NB_VISITS_RECORD_LOOKED_UP . "'
  65. OR name = '" . self::NB_VISITS_CONVERTED_RECORD_LOOKED_UP . "')
  66. $timeStampWhere
  67. ORDER BY idarchive DESC";
  68. $results = Db::fetchAll($sqlQuery, $bindSQL);
  69. if (empty($results)) {
  70. return false;
  71. }
  72. $idArchive = self::getMostRecentIdArchiveFromResults($segment, $requestedPlugin, $isSkipAggregationOfSubTables, $results);
  73. $idArchiveVisitsSummary = self::getMostRecentIdArchiveFromResults($segment, "VisitsSummary", $isSkipAggregationOfSubTables, $results);
  74. list($visits, $visitsConverted) = self::getVisitsMetricsFromResults($idArchive, $idArchiveVisitsSummary, $results);
  75. if ($visits === false
  76. && $idArchive === false
  77. ) {
  78. return false;
  79. }
  80. return array($idArchive, $visits, $visitsConverted);
  81. }
  82. protected static function getVisitsMetricsFromResults($idArchive, $idArchiveVisitsSummary, $results)
  83. {
  84. $visits = $visitsConverted = false;
  85. $archiveWithVisitsMetricsWasFound = ($idArchiveVisitsSummary !== false);
  86. if ($archiveWithVisitsMetricsWasFound) {
  87. $visits = $visitsConverted = 0;
  88. }
  89. foreach ($results as $result) {
  90. if (in_array($result['idarchive'], array($idArchive, $idArchiveVisitsSummary))) {
  91. $value = (int)$result['value'];
  92. if (empty($visits)
  93. && $result['name'] == self::NB_VISITS_RECORD_LOOKED_UP
  94. ) {
  95. $visits = $value;
  96. }
  97. if (empty($visitsConverted)
  98. && $result['name'] == self::NB_VISITS_CONVERTED_RECORD_LOOKED_UP
  99. ) {
  100. $visitsConverted = $value;
  101. }
  102. }
  103. }
  104. return array($visits, $visitsConverted);
  105. }
  106. protected static function getMostRecentIdArchiveFromResults(Segment $segment, $requestedPlugin, $isSkipAggregationOfSubTables, $results)
  107. {
  108. $idArchive = false;
  109. $namesRequestedPlugin = Rules::getDoneFlags(array($requestedPlugin), $segment, $isSkipAggregationOfSubTables);
  110. foreach ($results as $result) {
  111. if ($idArchive === false
  112. && in_array($result['name'], $namesRequestedPlugin)
  113. ) {
  114. $idArchive = $result['idarchive'];
  115. break;
  116. }
  117. }
  118. return $idArchive;
  119. }
  120. /**
  121. * Queries and returns archive IDs for a set of sites, periods, and a segment.
  122. *
  123. * @param array $siteIds
  124. * @param array $periods
  125. * @param Segment $segment
  126. * @param array $plugins List of plugin names for which data is being requested.
  127. * @param bool $isSkipAggregationOfSubTables Whether we are selecting an archive that may be partial (no sub-tables)
  128. * @return array Archive IDs are grouped by archive name and period range, ie,
  129. * array(
  130. * 'VisitsSummary.done' => array(
  131. * '2010-01-01' => array(1,2,3)
  132. * )
  133. * )
  134. */
  135. static public function getArchiveIds($siteIds, $periods, $segment, $plugins, $isSkipAggregationOfSubTables = false)
  136. {
  137. $getArchiveIdsSql = "SELECT idsite, name, date1, date2, MAX(idarchive) as idarchive
  138. FROM %s
  139. WHERE %s
  140. AND " . self::getNameCondition($plugins, $segment, $isSkipAggregationOfSubTables) . "
  141. AND idsite IN (" . implode(',', $siteIds) . ")
  142. GROUP BY idsite, date1, date2";
  143. $monthToPeriods = array();
  144. foreach ($periods as $period) {
  145. /** @var Period $period */
  146. $table = ArchiveTableCreator::getNumericTable($period->getDateStart());
  147. $monthToPeriods[$table][] = $period;
  148. }
  149. // for every month within the archive query, select from numeric table
  150. $result = array();
  151. foreach ($monthToPeriods as $table => $periods) {
  152. $firstPeriod = reset($periods);
  153. $bind = array();
  154. if ($firstPeriod instanceof Range) {
  155. $dateCondition = "period = ? AND date1 = ? AND date2 = ?";
  156. $bind[] = $firstPeriod->getId();
  157. $bind[] = $firstPeriod->getDateStart()->toString('Y-m-d');
  158. $bind[] = $firstPeriod->getDateEnd()->toString('Y-m-d');
  159. } else {
  160. // we assume there is no range date in $periods
  161. $dateCondition = '(';
  162. foreach ($periods as $period) {
  163. if (strlen($dateCondition) > 1) {
  164. $dateCondition .= ' OR ';
  165. }
  166. $dateCondition .= "(period = ? AND date1 = ? AND date2 = ?)";
  167. $bind[] = $period->getId();
  168. $bind[] = $period->getDateStart()->toString('Y-m-d');
  169. $bind[] = $period->getDateEnd()->toString('Y-m-d');
  170. }
  171. $dateCondition .= ')';
  172. }
  173. $sql = sprintf($getArchiveIdsSql, $table, $dateCondition);
  174. // get the archive IDs
  175. foreach (Db::fetchAll($sql, $bind) as $row) {
  176. $archiveName = $row['name'];
  177. //FIXMEA duplicate with Archive.php
  178. $dateStr = $row['date1'] . "," . $row['date2'];
  179. $result[$archiveName][$dateStr][] = $row['idarchive'];
  180. }
  181. }
  182. return $result;
  183. }
  184. /**
  185. * Queries and returns archive data using a set of archive IDs.
  186. *
  187. * @param array $archiveIds The IDs of the archives to get data from.
  188. * @param array $recordNames The names of the data to retrieve (ie, nb_visits, nb_actions, etc.)
  189. * @param string $archiveDataType The archive data type (either, 'blob' or 'numeric').
  190. * @param bool $loadAllSubtables Whether to pre-load all subtables
  191. * @throws Exception
  192. * @return array
  193. */
  194. static public function getArchiveData($archiveIds, $recordNames, $archiveDataType, $loadAllSubtables)
  195. {
  196. // create the SQL to select archive data
  197. $inNames = Common::getSqlStringFieldsArray($recordNames);
  198. if ($loadAllSubtables) {
  199. $name = reset($recordNames);
  200. // select blobs w/ name like "$name_[0-9]+" w/o using RLIKE
  201. $nameEnd = strlen($name) + 2;
  202. $whereNameIs = "(name = ?
  203. OR (name LIKE ?
  204. AND SUBSTRING(name, $nameEnd, 1) >= '0'
  205. AND SUBSTRING(name, $nameEnd, 1) <= '9') )";
  206. $bind = array($name, $name . '%');
  207. } else {
  208. $whereNameIs = "name IN ($inNames)";
  209. $bind = array_values($recordNames);
  210. }
  211. $getValuesSql = "SELECT value, name, idsite, date1, date2, ts_archived
  212. FROM %s
  213. WHERE idarchive IN (%s)
  214. AND " . $whereNameIs;
  215. // get data from every table we're querying
  216. $rows = array();
  217. foreach ($archiveIds as $period => $ids) {
  218. if (empty($ids)) {
  219. throw new Exception("Unexpected: id archive not found for period '$period' '");
  220. }
  221. // $period = "2009-01-04,2009-01-04",
  222. $date = Date::factory(substr($period, 0, 10));
  223. if ($archiveDataType == 'numeric') {
  224. $table = ArchiveTableCreator::getNumericTable($date);
  225. } else {
  226. $table = ArchiveTableCreator::getBlobTable($date);
  227. }
  228. $sql = sprintf($getValuesSql, $table, implode(',', $ids));
  229. $dataRows = Db::fetchAll($sql, $bind);
  230. foreach ($dataRows as $row) {
  231. $rows[] = $row;
  232. }
  233. }
  234. return $rows;
  235. }
  236. /**
  237. * Returns the SQL condition used to find successfully completed archives that
  238. * this instance is querying for.
  239. *
  240. * @param array $plugins
  241. * @param Segment $segment
  242. * @param bool $isSkipAggregationOfSubTables
  243. * @return string
  244. */
  245. static private function getNameCondition(array $plugins, Segment $segment, $isSkipAggregationOfSubTables)
  246. {
  247. // the flags used to tell how the archiving process for a specific archive was completed,
  248. // if it was completed
  249. $doneFlags = Rules::getDoneFlags($plugins, $segment, $isSkipAggregationOfSubTables);
  250. $allDoneFlags = "'" . implode("','", $doneFlags) . "'";
  251. // create the SQL to find archives that are DONE
  252. return "((name IN ($allDoneFlags)) AND " .
  253. " (value = '" . ArchiveWriter::DONE_OK . "' OR " .
  254. " value = '" . ArchiveWriter::DONE_OK_TEMPORARY . "'))";
  255. }
  256. static public function purgeOutdatedArchives(Date $dateStart)
  257. {
  258. $purgeArchivesOlderThan = Rules::shouldPurgeOutdatedArchives($dateStart);
  259. if (!$purgeArchivesOlderThan) {
  260. return;
  261. }
  262. $idArchivesToDelete = self::getTemporaryArchiveIdsOlderThan($dateStart, $purgeArchivesOlderThan);
  263. if (!empty($idArchivesToDelete)) {
  264. self::deleteArchiveIds($dateStart, $idArchivesToDelete);
  265. }
  266. self::deleteArchivesWithPeriodRange($dateStart);
  267. Log::debug("Purging temporary archives: done [ purged archives older than %s in %s ] [Deleted IDs: %s]",
  268. $purgeArchivesOlderThan, $dateStart->toString("Y-m"), implode(',', $idArchivesToDelete));
  269. }
  270. /*
  271. * Deleting "Custom Date Range" reports after 1 day, since they can be re-processed and would take up un-necessary space
  272. */
  273. protected static function deleteArchivesWithPeriodRange(Date $date)
  274. {
  275. $query = "DELETE FROM %s WHERE period = ? AND ts_archived < ?";
  276. $yesterday = Date::factory('yesterday')->getDateTime();
  277. $bind = array(Piwik::$idPeriods['range'], $yesterday);
  278. $numericTable = ArchiveTableCreator::getNumericTable($date);
  279. Db::query(sprintf($query, $numericTable), $bind);
  280. Log::debug("Purging Custom Range archives: done [ purged archives older than %s from %s / blob ]", $yesterday, $numericTable);
  281. try {
  282. Db::query(sprintf($query, ArchiveTableCreator::getBlobTable($date)), $bind);
  283. } catch (Exception $e) {
  284. // Individual blob tables could be missing
  285. }
  286. }
  287. protected static function deleteArchiveIds(Date $date, $idArchivesToDelete)
  288. {
  289. $query = "DELETE FROM %s WHERE idarchive IN (" . implode(',', $idArchivesToDelete) . ")";
  290. Db::query(sprintf($query, ArchiveTableCreator::getNumericTable($date)));
  291. try {
  292. Db::query(sprintf($query, ArchiveTableCreator::getBlobTable($date)));
  293. } catch (Exception $e) {
  294. // Individual blob tables could be missing
  295. }
  296. }
  297. protected static function getTemporaryArchiveIdsOlderThan(Date $date, $purgeArchivesOlderThan)
  298. {
  299. $query = "SELECT idarchive
  300. FROM " . ArchiveTableCreator::getNumericTable($date) . "
  301. WHERE name LIKE 'done%'
  302. AND (( value = " . ArchiveWriter::DONE_OK_TEMPORARY . "
  303. AND ts_archived < ?)
  304. OR value = " . ArchiveWriter::DONE_ERROR . ")";
  305. $result = Db::fetchAll($query, array($purgeArchivesOlderThan));
  306. $idArchivesToDelete = array();
  307. if (!empty($result)) {
  308. foreach ($result as $row) {
  309. $idArchivesToDelete[] = $row['idarchive'];
  310. }
  311. }
  312. return $idArchivesToDelete;
  313. }
  314. }