PageRenderTime 98ms CodeModel.GetById 32ms RepoModel.GetById 3ms app.codeStats 0ms

/analytics/core/Archive/ArchivePurger.php

https://gitlab.com/yasminmostfa/thomas-site
PHP | 272 lines | 135 code | 39 blank | 98 comment | 9 complexity | 038dc88337663e38f0d529a85f8371d2 MD5 | raw file
  1. <?php
  2. /**
  3. * Piwik - free/libre analytics platform
  4. *
  5. * @link http://piwik.org
  6. * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
  7. *
  8. */
  9. namespace Piwik\Archive;
  10. use Piwik\ArchiveProcessor\Rules;
  11. use Piwik\Config;
  12. use Piwik\Container\StaticContainer;
  13. use Piwik\DataAccess\ArchiveTableCreator;
  14. use Piwik\DataAccess\Model;
  15. use Piwik\Date;
  16. use Piwik\Piwik;
  17. use Psr\Log\LoggerInterface;
  18. use Psr\Log\LogLevel;
  19. /**
  20. * Service that purges temporary, error-ed, invalid and custom range archives from archive tables.
  21. *
  22. * Temporary archives are purged if they were archived before a specific time. The time is dependent
  23. * on whether browser triggered archiving is enabled or not.
  24. *
  25. * Error-ed archives are purged w/o constraint.
  26. *
  27. * Invalid archives are purged if a new, valid, archive exists w/ the same site, date, period combination.
  28. * Archives are marked as invalid via Piwik\Archive\ArchiveInvalidator.
  29. */
  30. class ArchivePurger
  31. {
  32. /**
  33. * @var Model
  34. */
  35. private $model;
  36. /**
  37. * Date threshold for purging custom range archives. Archives that are older than this date
  38. * are purged unconditionally from the requested archive table.
  39. *
  40. * @var Date
  41. */
  42. private $purgeCustomRangesOlderThan;
  43. /**
  44. * Date to use for 'yesterday'. Exists so tests can override this value.
  45. *
  46. * @var Date
  47. */
  48. private $yesterday;
  49. /**
  50. * Date to use for 'today'. Exists so tests can override this value.
  51. *
  52. * @var $today
  53. */
  54. private $today;
  55. /**
  56. * Date to use for 'now'. Exists so tests can override this value.
  57. *
  58. * @var int
  59. */
  60. private $now;
  61. /**
  62. * @var LoggerInterface
  63. */
  64. private $logger;
  65. public function __construct(Model $model = null, Date $purgeCustomRangesOlderThan = null, LoggerInterface $logger = null)
  66. {
  67. $this->model = $model ?: new Model();
  68. $this->purgeCustomRangesOlderThan = $purgeCustomRangesOlderThan ?: self::getDefaultCustomRangeToPurgeAgeThreshold();
  69. $this->yesterday = Date::factory('yesterday');
  70. $this->today = Date::factory('today');
  71. $this->now = time();
  72. $this->logger = $logger ?: StaticContainer::get('Psr\Log\LoggerInterface');
  73. }
  74. /**
  75. * Purge all invalidate archives for whom there are newer, valid archives from the archive
  76. * table that stores data for `$date`.
  77. *
  78. * @param Date $date The date identifying the archive table.
  79. * @return int The total number of archive rows deleted (from both the blog & numeric tables).
  80. */
  81. public function purgeInvalidatedArchivesFrom(Date $date)
  82. {
  83. $numericTable = ArchiveTableCreator::getNumericTable($date);
  84. // we don't want to do an INNER JOIN on every row in a archive table that can potentially have tens to hundreds of thousands of rows,
  85. // so we first look for sites w/ invalidated archives, and use this as a constraint in getInvalidatedArchiveIdsSafeToDelete() below.
  86. // the constraint will hit an INDEX and speed up the inner join that happens in getInvalidatedArchiveIdsSafeToDelete().
  87. $idSites = $this->model->getSitesWithInvalidatedArchive($numericTable);
  88. if (empty($idSites)) {
  89. $this->logger->debug("No sites with invalidated archives found in {table}.", array('table' => $numericTable));
  90. return 0;
  91. }
  92. $archiveIds = $this->model->getInvalidatedArchiveIdsSafeToDelete($numericTable, $idSites);
  93. if (empty($archiveIds)) {
  94. $this->logger->debug("No invalidated archives found in {table} with newer, valid archives.", array('table' => $numericTable));
  95. return 0;
  96. }
  97. $this->logger->info("Found {countArchiveIds} invalidated archives safe to delete in {table}.", array(
  98. 'table' => $numericTable, 'countArchiveIds' => count($archiveIds)
  99. ));
  100. $deletedRowCount = $this->deleteArchiveIds($date, $archiveIds);
  101. $this->logger->debug("Deleted {count} rows in {table} and its associated blob table.", array(
  102. 'table' => $numericTable, 'count' => $deletedRowCount
  103. ));
  104. return $deletedRowCount;
  105. }
  106. /**
  107. * Removes the outdated archives for the given month.
  108. * (meaning they are marked with a done flag of ArchiveWriter::DONE_OK_TEMPORARY or ArchiveWriter::DONE_ERROR)
  109. *
  110. * @param Date $dateStart Only the month will be used
  111. * @return int Returns the total number of rows deleted.
  112. */
  113. public function purgeOutdatedArchives(Date $dateStart)
  114. {
  115. $purgeArchivesOlderThan = $this->getOldestTemporaryArchiveToKeepThreshold();
  116. $deletedRowCount = 0;
  117. $idArchivesToDelete = $this->getOutdatedArchiveIds($dateStart, $purgeArchivesOlderThan);
  118. if (!empty($idArchivesToDelete)) {
  119. $deletedRowCount = $this->deleteArchiveIds($dateStart, $idArchivesToDelete);
  120. $this->logger->info("Deleted {count} rows in archive tables (numeric + blob) for {date}.", array(
  121. 'count' => $deletedRowCount,
  122. 'date' => $dateStart
  123. ));
  124. } else {
  125. $this->logger->debug("No outdated archives found in archive numeric table for {date}.", array('date' => $dateStart));
  126. }
  127. $this->logger->debug("Purging temporary archives: done [ purged archives older than {date} in {yearMonth} ] [Deleted IDs: {deletedIds}]", array(
  128. 'date' => $purgeArchivesOlderThan,
  129. 'yearMonth' => $dateStart->toString('Y-m'),
  130. 'deletedIds' => implode(',', $idArchivesToDelete)
  131. ));
  132. return $deletedRowCount;
  133. }
  134. protected function getOutdatedArchiveIds(Date $date, $purgeArchivesOlderThan)
  135. {
  136. $archiveTable = ArchiveTableCreator::getNumericTable($date);
  137. $result = $this->model->getTemporaryArchivesOlderThan($archiveTable, $purgeArchivesOlderThan);
  138. $idArchivesToDelete = array();
  139. if (!empty($result)) {
  140. foreach ($result as $row) {
  141. $idArchivesToDelete[] = $row['idarchive'];
  142. }
  143. }
  144. return $idArchivesToDelete;
  145. }
  146. /**
  147. * Deleting "Custom Date Range" reports after 1 day, since they can be re-processed and would take up un-necessary space.
  148. *
  149. * @param $date Date
  150. * @return int The total number of rows deleted from both the numeric & blob table.
  151. */
  152. public function purgeArchivesWithPeriodRange(Date $date)
  153. {
  154. $numericTable = ArchiveTableCreator::getNumericTable($date);
  155. $blobTable = ArchiveTableCreator::getBlobTable($date);
  156. $deletedCount = $this->model->deleteArchivesWithPeriod(
  157. $numericTable, $blobTable, Piwik::$idPeriods['range'], $this->purgeCustomRangesOlderThan);
  158. $level = $deletedCount == 0 ? LogLevel::DEBUG : LogLevel::INFO;
  159. $this->logger->log($level, "Purged {count} range archive rows from {numericTable} & {blobTable}.", array(
  160. 'count' => $deletedCount,
  161. 'numericTable' => $numericTable,
  162. 'blobTable' => $blobTable
  163. ));
  164. $this->logger->debug(" [ purged archives older than {threshold} ]", array('threshold' => $this->purgeCustomRangesOlderThan));
  165. return $deletedCount;
  166. }
  167. /**
  168. * Deletes by batches Archive IDs in the specified month,
  169. *
  170. * @param Date $date
  171. * @param $idArchivesToDelete
  172. * @return int Number of rows deleted from both numeric + blob table.
  173. */
  174. protected function deleteArchiveIds(Date $date, $idArchivesToDelete)
  175. {
  176. $batches = array_chunk($idArchivesToDelete, 1000);
  177. $numericTable = ArchiveTableCreator::getNumericTable($date);
  178. $blobTable = ArchiveTableCreator::getBlobTable($date);
  179. $deletedCount = 0;
  180. foreach ($batches as $idsToDelete) {
  181. $deletedCount += $this->model->deleteArchiveIds($numericTable, $blobTable, $idsToDelete);
  182. }
  183. return $deletedCount;
  184. }
  185. /**
  186. * Returns a timestamp indicating outdated archives older than this timestamp (processed before) can be purged.
  187. *
  188. * @return int|bool Outdated archives older than this timestamp should be purged
  189. */
  190. protected function getOldestTemporaryArchiveToKeepThreshold()
  191. {
  192. $temporaryArchivingTimeout = Rules::getTodayArchiveTimeToLive();
  193. if (Rules::isBrowserTriggerEnabled()) {
  194. // If Browser Archiving is enabled, it is likely there are many more temporary archives
  195. // We delete more often which is safe, since reports are re-processed on demand
  196. return Date::factory($this->now - 2 * $temporaryArchivingTimeout)->getDateTime();
  197. }
  198. // If cron core:archive command is building the reports, we should keep all temporary reports from today
  199. return $this->yesterday->getDateTime();
  200. }
  201. private static function getDefaultCustomRangeToPurgeAgeThreshold()
  202. {
  203. $daysRangesValid = Config::getInstance()->General['purge_date_range_archives_after_X_days'];
  204. return Date::factory('today')->subDay($daysRangesValid)->getDateTime();
  205. }
  206. /**
  207. * For tests.
  208. *
  209. * @param Date $yesterday
  210. */
  211. public function setYesterdayDate(Date $yesterday)
  212. {
  213. $this->yesterday = $yesterday;
  214. }
  215. /**
  216. * For tests.
  217. *
  218. * @param Date $today
  219. */
  220. public function setTodayDate(Date $today)
  221. {
  222. $this->today = $today;
  223. }
  224. /**
  225. * For tests.
  226. *
  227. * @param int $now
  228. */
  229. public function setNow($now)
  230. {
  231. $this->now = $now;
  232. }
  233. }