PageRenderTime 45ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/plugins/PrivacyManager/ReportsPurger.php

https://github.com/CodeYellowBV/piwik
PHP | 393 lines | 212 code | 54 blank | 127 comment | 34 complexity | 5564b292e001aa6278983a933a46d2f3 MD5 | raw file
Possible License(s): LGPL-3.0, JSON, MIT, GPL-3.0, LGPL-2.1, GPL-2.0, AGPL-1.0, BSD-2-Clause, BSD-3-Clause
  1. <?php
  2. /**
  3. * Piwik - free/libre analytics platform
  4. *
  5. * @link http://piwik.org
  6. * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
  7. *
  8. */
  9. namespace Piwik\Plugins\PrivacyManager;
  10. use Piwik\Common;
  11. use Piwik\DataAccess\ArchiveTableCreator;
  12. use Piwik\Date;
  13. use Piwik\Db;
  14. use Piwik\DbHelper;
  15. use Piwik\Piwik;
  16. /**
  17. * Purges archived reports and metrics that are considered old.
  18. */
  19. class ReportsPurger
  20. {
  21. // constant used in database purging estimate to signify a table should be dropped
  22. const DROP_TABLE = -1;
  23. /**
  24. * The max set of rows each table scan select should query at one time.
  25. */
  26. public static $selectSegmentSize = 100000;
  27. /**
  28. * The number of months after which report/metric data is considered old.
  29. */
  30. private $deleteReportsOlderThan;
  31. /**
  32. * Whether to keep basic metrics or not.
  33. */
  34. private $keepBasicMetrics;
  35. /**
  36. * Array of period types. Reports for these periods will not be purged.
  37. */
  38. private $reportPeriodsToKeep;
  39. /**
  40. * Whether to keep reports for segments or not.
  41. */
  42. private $keepSegmentReports;
  43. /**
  44. * The maximum number of rows to delete per DELETE query.
  45. */
  46. private $maxRowsToDeletePerQuery;
  47. /**
  48. * List of metrics that should be kept when purging. If $keepBasicMetrics is true,
  49. * these metrics will be saved.
  50. */
  51. private $metricsToKeep;
  52. /**
  53. * Array that maps a year and month ('2012_01') with lists of archive IDs for segmented
  54. * archives. Used to keep segmented reports when purging.
  55. */
  56. private $segmentArchiveIds = null;
  57. /**
  58. * Constructor.
  59. *
  60. * @param int $deleteReportsOlderThan The number of months after which report/metric data
  61. * is considered old.
  62. * @param bool $keepBasicMetrics Whether to keep basic metrics or not.
  63. * @param array $reportPeriodsToKeep Array of period types. Reports for these periods will not
  64. * be purged.
  65. * @param bool $keepSegmentReports Whether to keep reports for segments or not.
  66. * @param array $metricsToKeep List of metrics that should be kept. if $keepBasicMetrics
  67. * is true, these metrics will be saved.
  68. * @param int $maxRowsToDeletePerQuery The maximum number of rows to delete per DELETE query.
  69. */
  70. public function __construct($deleteReportsOlderThan, $keepBasicMetrics, $reportPeriodsToKeep,
  71. $keepSegmentReports, $metricsToKeep, $maxRowsToDeletePerQuery)
  72. {
  73. $this->deleteReportsOlderThan = (int) $deleteReportsOlderThan;
  74. $this->keepBasicMetrics = (bool) $keepBasicMetrics;
  75. $this->reportPeriodsToKeep = $reportPeriodsToKeep;
  76. $this->keepSegmentReports = (bool) $keepSegmentReports;
  77. $this->metricsToKeep = $metricsToKeep;
  78. $this->maxRowsToDeletePerQuery = (int) $maxRowsToDeletePerQuery;
  79. }
  80. /**
  81. * Purges old report/metric data.
  82. *
  83. * If $keepBasicMetrics is false, old numeric tables will be dropped, otherwise only
  84. * the metrics not in $metricsToKeep will be deleted.
  85. *
  86. * If $reportPeriodsToKeep is an empty array, old blob tables will be dropped. Otherwise,
  87. * specific reports will be deleted, except reports for periods in $reportPeriodsToKeep.
  88. *
  89. * @param bool $optimize If tables should be optimized after rows are deleted. Normally,
  90. * this is handled by a scheduled task.
  91. */
  92. public function purgeData($optimize = false)
  93. {
  94. list($oldNumericTables, $oldBlobTables) = $this->getArchiveTablesToPurge();
  95. // process blob tables first, since archive status is stored in the numeric archives
  96. if (!empty($oldBlobTables)) {
  97. foreach ($oldBlobTables as $table) {
  98. $where = $this->getBlobTableWhereExpr($oldNumericTables, $table);
  99. if (!empty($where)) {
  100. $where = "WHERE $where";
  101. }
  102. Db::deleteAllRows($table, $where, "idarchive ASC", $this->maxRowsToDeletePerQuery);
  103. }
  104. if ($optimize) {
  105. Db::optimizeTables($oldBlobTables);
  106. }
  107. }
  108. $this->segmentArchiveIds = null;
  109. if (!empty($oldNumericTables)) {
  110. foreach ($oldNumericTables as $table) {
  111. $conditions = array("name NOT LIKE 'done%'");
  112. $bind = array();
  113. if ($this->keepBasicMetrics && !empty($this->metricsToKeep)) {
  114. $metricFields = Common::getSqlStringFieldsArray($this->metricsToKeep);
  115. $bind = $this->metricsToKeep;
  116. $conditions[] = sprintf("name NOT IN (%s)", $metricFields);
  117. }
  118. $keepWhere = $this->getBlobTableWhereExpr($oldNumericTables, $table);
  119. if (!empty($keepWhere)) {
  120. $conditions[] = $keepWhere;
  121. }
  122. $where = 'WHERE ' . implode(' AND ', $conditions);
  123. Db::deleteAllRows($table, $where, "idarchive ASC", $this->maxRowsToDeletePerQuery, $bind);
  124. }
  125. if ($optimize) {
  126. Db::optimizeTables($oldNumericTables);
  127. }
  128. }
  129. }
  130. /**
  131. * Returns an array describing what data would be purged if purging were invoked.
  132. *
  133. * This function returns an array that maps table names with the number of rows
  134. * that will be deleted. If a table name is mapped with self::DROP_TABLE, the table
  135. * will be dropped.
  136. *
  137. * @return array
  138. */
  139. public function getPurgeEstimate()
  140. {
  141. $result = array();
  142. // get archive tables that will be purged
  143. list($oldNumericTables, $oldBlobTables) = $this->getArchiveTablesToPurge();
  144. // process blob tables first, since archive status is stored in the numeric archives
  145. if (empty($this->reportPeriodsToKeep) && !$this->keepSegmentReports) {
  146. // not keeping any reports, so drop all tables
  147. foreach ($oldBlobTables as $table) {
  148. $result[$table] = self::DROP_TABLE;
  149. }
  150. } else {
  151. // figure out which rows will be deleted
  152. foreach ($oldBlobTables as $table) {
  153. $rowCount = $this->getBlobTableDeleteCount($oldNumericTables, $table);
  154. if ($rowCount > 0) {
  155. $result[$table] = $rowCount;
  156. }
  157. }
  158. }
  159. // deal w/ numeric tables
  160. if ($this->keepBasicMetrics) {
  161. // figure out which rows will be deleted
  162. foreach ($oldNumericTables as $table) {
  163. $rowCount = $this->getNumericTableDeleteCount($table);
  164. if ($rowCount > 0) {
  165. $result[$table] = $rowCount;
  166. }
  167. }
  168. } else {
  169. // not keeping any metrics, so drop the entire table
  170. foreach ($oldNumericTables as $table) {
  171. $result[$table] = self::DROP_TABLE;
  172. }
  173. }
  174. return $result;
  175. }
  176. /**
  177. * Utility function that finds every archive table whose reports are considered
  178. * old.
  179. *
  180. * @return array An array of two arrays. The first holds the numeric archive table
  181. * names, and the second holds the blob archive table names.
  182. */
  183. private function getArchiveTablesToPurge()
  184. {
  185. // get month for which reports as old or older than, should be deleted
  186. // reports whose creation date <= this month will be deleted
  187. // (NOTE: we ignore how far we are in the current month)
  188. $toRemoveDate = Date::factory('today')->subMonth(1 + $this->deleteReportsOlderThan);
  189. // find all archive tables that are older than N months
  190. $oldNumericTables = array();
  191. $oldBlobTables = array();
  192. foreach (DbHelper::getTablesInstalled() as $table) {
  193. $type = ArchiveTableCreator::getTypeFromTableName($table);
  194. if ($type === false) {
  195. continue;
  196. }
  197. $date = ArchiveTableCreator::getDateFromTableName($table);
  198. list($year, $month) = explode('_', $date);
  199. if (self::shouldReportBePurged($year, $month, $toRemoveDate)) {
  200. if ($type == ArchiveTableCreator::NUMERIC_TABLE) {
  201. $oldNumericTables[] = $table;
  202. } else {
  203. $oldBlobTables[] = $table;
  204. }
  205. }
  206. }
  207. return array($oldNumericTables, $oldBlobTables);
  208. }
  209. /**
  210. * Returns true if a report with the given year & month should be purged or not.
  211. *
  212. * @param int $reportDateYear The year of the report in question.
  213. * @param int $reportDateMonth The month of the report in question.
  214. * @param Date $toRemoveDate The date a report must be older than in order to be purged.
  215. * @return bool
  216. */
  217. public static function shouldReportBePurged($reportDateYear, $reportDateMonth, $toRemoveDate)
  218. {
  219. $toRemoveYear = (int)$toRemoveDate->toString('Y');
  220. $toRemoveMonth = (int)$toRemoveDate->toString('m');
  221. return $reportDateYear < $toRemoveYear
  222. || ($reportDateYear == $toRemoveYear && $reportDateMonth <= $toRemoveMonth);
  223. }
  224. private function getNumericTableDeleteCount($table)
  225. {
  226. $maxIdArchive = Db::fetchOne("SELECT MAX(idarchive) FROM $table");
  227. $sql = "SELECT COUNT(*)
  228. FROM $table
  229. WHERE name NOT IN ('" . implode("','", $this->metricsToKeep) . "')
  230. AND name NOT LIKE 'done%'
  231. AND idarchive >= ?
  232. AND idarchive < ?";
  233. $segments = Db::segmentedFetchOne($sql, 0, $maxIdArchive, self::$selectSegmentSize);
  234. return array_sum($segments);
  235. }
  236. private function getBlobTableDeleteCount($oldNumericTables, $table)
  237. {
  238. $maxIdArchive = Db::fetchOne("SELECT MAX(idarchive) FROM $table");
  239. $sql = "SELECT COUNT(*)
  240. FROM $table
  241. WHERE " . $this->getBlobTableWhereExpr($oldNumericTables, $table) . "
  242. AND idarchive >= ?
  243. AND idarchive < ?";
  244. $segments = Db::segmentedFetchOne($sql, 0, $maxIdArchive, self::$selectSegmentSize);
  245. return array_sum($segments);
  246. }
  247. /** Returns SQL WHERE expression used to find reports that should be purged. */
  248. private function getBlobTableWhereExpr($oldNumericTables, $table)
  249. {
  250. $where = "";
  251. if (!empty($this->reportPeriodsToKeep)) // if keeping reports
  252. {
  253. $where = "period NOT IN (" . implode(',', $this->reportPeriodsToKeep) . ")";
  254. // if not keeping segments make sure segments w/ kept periods are also deleted
  255. if (!$this->keepSegmentReports) {
  256. $this->findSegmentArchives($oldNumericTables);
  257. $dateFromTable = ArchiveTableCreator::getDateFromTableName($table);
  258. if (!empty($this->segmentArchiveIds[$dateFromTable])) {
  259. $archiveIds = $this->segmentArchiveIds[$dateFromTable];
  260. $where .= " OR idarchive IN (" . implode(',', $archiveIds) . ")";
  261. }
  262. }
  263. $where = "($where)";
  264. }
  265. return $where;
  266. }
  267. /**
  268. * If we're going to keep segmented reports, we need to know which archives are
  269. * for segments. This info is only in the numeric tables, so we must query them.
  270. */
  271. private function findSegmentArchives($numericTables)
  272. {
  273. if (!is_null($this->segmentArchiveIds) || empty($numericTables)) {
  274. return;
  275. }
  276. foreach ($numericTables as $table) {
  277. $tableDate = ArchiveTableCreator::getDateFromTableName($table);
  278. $maxIdArchive = Db::fetchOne("SELECT MAX(idarchive) FROM $table");
  279. $sql = "SELECT idarchive
  280. FROM $table
  281. WHERE name != 'done'
  282. AND name LIKE 'done_%.%'
  283. AND idarchive >= ?
  284. AND idarchive < ?";
  285. if (is_null($this->segmentArchiveIds)) {
  286. $this->segmentArchiveIds = array();
  287. }
  288. $this->segmentArchiveIds[$tableDate] = array();
  289. foreach (Db::segmentedFetchAll($sql, 0, $maxIdArchive, self::$selectSegmentSize) as $row) {
  290. $this->segmentArchiveIds[$tableDate][] = $row['idarchive'];
  291. }
  292. }
  293. }
  294. /**
  295. * Utility function. Creates a new instance of ReportsPurger with the supplied array
  296. * of settings.
  297. *
  298. * $settings must contain the following keys:
  299. * -'delete_reports_older_than': The number of months after which reports/metrics are
  300. * considered old.
  301. * -'delete_reports_keep_basic_metrics': 1 if basic metrics should be kept, 0 if otherwise.
  302. * -'delete_reports_keep_day_reports': 1 if daily reports should be kept, 0 if otherwise.
  303. * -'delete_reports_keep_week_reports': 1 if weekly reports should be kept, 0 if otherwise.
  304. * -'delete_reports_keep_month_reports': 1 if monthly reports should be kept, 0 if otherwise.
  305. * -'delete_reports_keep_year_reports': 1 if yearly reports should be kept, 0 if otherwise.
  306. * -'delete_reports_keep_range_reports': 1 if range reports should be kept, 0 if otherwise.
  307. * -'delete_reports_keep_segment_reports': 1 if reports for segments should be kept, 0 if otherwise.
  308. * -'delete_logs_max_rows_per_query': Maximum number of rows to delete in one DELETE query.
  309. */
  310. public static function make($settings, $metricsToKeep)
  311. {
  312. return new ReportsPurger(
  313. $settings['delete_reports_older_than'],
  314. $settings['delete_reports_keep_basic_metrics'] == 1,
  315. self::getReportPeriodsToKeep($settings),
  316. $settings['delete_reports_keep_segment_reports'] == 1,
  317. $metricsToKeep,
  318. $settings['delete_logs_max_rows_per_query']
  319. );
  320. }
  321. /**
  322. * Utility function that returns an array period values based on the 'delete_reports_keep_*'
  323. * settings. The period values returned are the integer values stored in the DB.
  324. *
  325. * @param array $settings The settings to use.
  326. * @return array An array of period values that should be kept when purging old data.
  327. */
  328. private static function getReportPeriodsToKeep($settings)
  329. {
  330. $keepReportPeriods = array();
  331. foreach (Piwik::$idPeriods as $strPeriod => $intPeriod) {
  332. $optionName = "delete_reports_keep_{$strPeriod}_reports";
  333. if ($settings[$optionName] == 1) {
  334. $keepReportPeriods[] = $intPeriod;
  335. }
  336. }
  337. return $keepReportPeriods;
  338. }
  339. }