/plugins/PrivacyManager/LogDataPurger.php

https://github.com/CodeYellowBV/piwik · PHP · 334 lines · 202 code · 46 blank · 86 comment · 15 complexity · 61b5d8b6999ff061b7c41e62c682fbf2 MD5 · raw file

  1. <?php
  2. /**
  3. * Piwik - free/libre analytics platform
  4. *
  5. * @link http://piwik.org
  6. * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
  7. *
  8. */
  9. namespace Piwik\Plugins\PrivacyManager;
  10. use Piwik\Common;
  11. use Piwik\Date;
  12. use Piwik\Db;
  13. use Piwik\Log;
  14. use Piwik\Piwik;
  15. /**
  16. * Purges the log_visit, log_conversion and related tables of old visit data.
  17. */
  18. class LogDataPurger
  19. {
  20. const TEMP_TABLE_NAME = 'tmp_log_actions_to_keep';
  21. /**
  22. * The max set of rows each table scan select should query at one time.
  23. */
  24. public static $selectSegmentSize = 100000;
  25. /**
  26. * The number of days after which log entries are considered old.
  27. */
  28. private $deleteLogsOlderThan;
  29. /**
  30. * The number of rows to delete per DELETE query.
  31. */
  32. private $maxRowsToDeletePerQuery;
  33. /**
  34. * Constructor.
  35. *
  36. * @param int $deleteLogsOlderThan The number of days after which log entires are considered old.
  37. * Visits and related data whose age is greater than this number
  38. * will be purged.
  39. * @param int $maxRowsToDeletePerQuery The maximum number of rows to delete in one query. Used to
  40. * make sure log tables aren't locked for too long.
  41. */
  42. public function __construct($deleteLogsOlderThan, $maxRowsToDeletePerQuery)
  43. {
  44. $this->deleteLogsOlderThan = $deleteLogsOlderThan;
  45. $this->maxRowsToDeletePerQuery = $maxRowsToDeletePerQuery;
  46. }
  47. /**
  48. * Purges old data from the following tables:
  49. * - log_visit
  50. * - log_link_visit_action
  51. * - log_conversion
  52. * - log_conversion_item
  53. * - log_action
  54. */
  55. public function purgeData()
  56. {
  57. $maxIdVisit = $this->getDeleteIdVisitOffset();
  58. // break if no ID was found (nothing to delete for given period)
  59. if (empty($maxIdVisit)) {
  60. return;
  61. }
  62. $logTables = self::getDeleteTableLogTables();
  63. // delete data from log tables
  64. $where = "WHERE idvisit <= ?";
  65. foreach ($logTables as $logTable) {
  66. // deleting from log_action must be handled differently, so we do it later
  67. if ($logTable != Common::prefixTable('log_action')) {
  68. Db::deleteAllRows($logTable, $where, "idvisit ASC", $this->maxRowsToDeletePerQuery, array($maxIdVisit));
  69. }
  70. }
  71. // delete unused actions from the log_action table (but only if we can lock tables)
  72. if (Db::isLockPrivilegeGranted()) {
  73. $this->purgeUnusedLogActions();
  74. } else {
  75. $logMessage = get_class($this) . ": LOCK TABLES privilege not granted; skipping unused actions purge";
  76. Log::warning($logMessage);
  77. }
  78. // optimize table overhead after deletion
  79. Db::optimizeTables($logTables);
  80. }
  81. /**
  82. * Returns an array describing what data would be purged if purging were invoked.
  83. *
  84. * This function returns an array that maps table names with the number of rows
  85. * that will be deleted.
  86. *
  87. * @return array
  88. */
  89. public function getPurgeEstimate()
  90. {
  91. $result = array();
  92. // deal w/ log tables that will be purged
  93. $maxIdVisit = $this->getDeleteIdVisitOffset();
  94. if (!empty($maxIdVisit)) {
  95. foreach ($this->getDeleteTableLogTables() as $table) {
  96. // getting an estimate for log_action is not supported since it can take too long
  97. if ($table != Common::prefixTable('log_action')) {
  98. $rowCount = $this->getLogTableDeleteCount($table, $maxIdVisit);
  99. if ($rowCount > 0) {
  100. $result[$table] = $rowCount;
  101. }
  102. }
  103. }
  104. }
  105. return $result;
  106. }
  107. /**
  108. * Safely delete all unused log_action rows.
  109. */
  110. private function purgeUnusedLogActions()
  111. {
  112. $this->createTempTable();
  113. // get current max ID in log tables w/ idaction references.
  114. $maxIds = $this->getMaxIdsInLogTables();
  115. // do large insert (inserting everything before maxIds) w/o locking tables...
  116. $this->insertActionsToKeep($maxIds, $deleteOlderThanMax = true);
  117. // ... then do small insert w/ locked tables to minimize the amount of time tables are locked.
  118. $this->lockLogTables();
  119. $this->insertActionsToKeep($maxIds, $deleteOlderThanMax = false);
  120. // delete before unlocking tables so there's no chance a new log row that references an
  121. // unused action will be inserted.
  122. $this->deleteUnusedActions();
  123. Db::unlockAllTables();
  124. }
  125. /**
  126. * get highest idVisit to delete rows from
  127. * @return string
  128. */
  129. private function getDeleteIdVisitOffset()
  130. {
  131. $logVisit = Common::prefixTable("log_visit");
  132. // get max idvisit
  133. $maxIdVisit = Db::fetchOne("SELECT MAX(idvisit) FROM $logVisit");
  134. if (empty($maxIdVisit)) {
  135. return false;
  136. }
  137. // select highest idvisit to delete from
  138. $dateStart = Date::factory("today")->subDay($this->deleteLogsOlderThan);
  139. $sql = "SELECT idvisit
  140. FROM $logVisit
  141. WHERE '" . $dateStart->toString('Y-m-d H:i:s') . "' > visit_last_action_time
  142. AND idvisit <= ?
  143. AND idvisit > ?
  144. ORDER BY idvisit DESC
  145. LIMIT 1";
  146. return Db::segmentedFetchFirst($sql, $maxIdVisit, 0, -self::$selectSegmentSize);
  147. }
  148. private function getLogTableDeleteCount($table, $maxIdVisit)
  149. {
  150. $sql = "SELECT COUNT(*) FROM $table WHERE idvisit <= ?";
  151. return (int)Db::fetchOne($sql, array($maxIdVisit));
  152. }
  153. private function createTempTable()
  154. {
  155. $sql = "CREATE TEMPORARY TABLE " . Common::prefixTable(self::TEMP_TABLE_NAME) . " (
  156. idaction INT(11),
  157. PRIMARY KEY (idaction)
  158. )";
  159. Db::query($sql);
  160. }
  161. private function getMaxIdsInLogTables()
  162. {
  163. $tables = array('log_conversion', 'log_link_visit_action', 'log_visit', 'log_conversion_item');
  164. $idColumns = $this->getTableIdColumns();
  165. $result = array();
  166. foreach ($tables as $table) {
  167. $idCol = $idColumns[$table];
  168. $result[$table] = Db::fetchOne("SELECT MAX($idCol) FROM " . Common::prefixTable($table));
  169. }
  170. return $result;
  171. }
  172. private function insertActionsToKeep($maxIds, $olderThan = true)
  173. {
  174. $tempTableName = Common::prefixTable(self::TEMP_TABLE_NAME);
  175. $idColumns = $this->getTableIdColumns();
  176. foreach ($this->getIdActionColumns() as $table => $columns) {
  177. $idCol = $idColumns[$table];
  178. foreach ($columns as $col) {
  179. $select = "SELECT $col FROM " . Common::prefixTable($table) . " WHERE $idCol >= ? AND $idCol < ?";
  180. $sql = "INSERT IGNORE INTO $tempTableName $select";
  181. if ($olderThan) {
  182. $start = 0;
  183. $finish = $maxIds[$table];
  184. } else {
  185. $start = $maxIds[$table];
  186. $finish = Db::fetchOne("SELECT MAX($idCol) FROM " . Common::prefixTable($table));
  187. }
  188. Db::segmentedQuery($sql, $start, $finish, self::$selectSegmentSize);
  189. }
  190. }
  191. // allow code to be executed after data is inserted. for concurrency testing purposes.
  192. if ($olderThan) {
  193. /**
  194. * @ignore
  195. */
  196. Piwik::postEvent("LogDataPurger.ActionsToKeepInserted.olderThan");
  197. } else {
  198. /**
  199. * @ignore
  200. */
  201. Piwik::postEvent("LogDataPurger.ActionsToKeepInserted.newerThan");
  202. }
  203. }
  204. private function lockLogTables()
  205. {
  206. Db::lockTables(
  207. $readLocks = Common::prefixTables('log_conversion',
  208. 'log_link_visit_action',
  209. 'log_visit',
  210. 'log_conversion_item'),
  211. $writeLocks = Common::prefixTables('log_action')
  212. );
  213. }
  214. private function deleteUnusedActions()
  215. {
  216. list($logActionTable, $tempTableName) = Common::prefixTables("log_action", self::TEMP_TABLE_NAME);
  217. $deleteSql = "DELETE LOW_PRIORITY QUICK IGNORE $logActionTable
  218. FROM $logActionTable
  219. LEFT JOIN $tempTableName tmp ON tmp.idaction = $logActionTable.idaction
  220. WHERE tmp.idaction IS NULL";
  221. Db::query($deleteSql);
  222. }
  223. private function getIdActionColumns()
  224. {
  225. return array(
  226. 'log_link_visit_action' => array('idaction_url',
  227. 'idaction_url_ref',
  228. 'idaction_name',
  229. 'idaction_name_ref',
  230. 'idaction_event_category',
  231. 'idaction_event_action'
  232. ),
  233. 'log_conversion' => array('idaction_url'),
  234. 'log_visit' => array('visit_exit_idaction_url',
  235. 'visit_exit_idaction_name',
  236. 'visit_entry_idaction_url',
  237. 'visit_entry_idaction_name'),
  238. 'log_conversion_item' => array('idaction_sku',
  239. 'idaction_name',
  240. 'idaction_category',
  241. 'idaction_category2',
  242. 'idaction_category3',
  243. 'idaction_category4',
  244. 'idaction_category5')
  245. );
  246. }
  247. private function getTableIdColumns()
  248. {
  249. return array(
  250. 'log_link_visit_action' => 'idlink_va',
  251. 'log_conversion' => 'idvisit',
  252. 'log_visit' => 'idvisit',
  253. 'log_conversion_item' => 'idvisit'
  254. );
  255. }
  256. // let's hardcode, since these are not dynamically created tables
  257. public static function getDeleteTableLogTables()
  258. {
  259. $result = Common::prefixTables('log_conversion',
  260. 'log_link_visit_action',
  261. 'log_visit',
  262. 'log_conversion_item');
  263. if (Db::isLockPrivilegeGranted()) {
  264. $result[] = Common::prefixTable('log_action');
  265. }
  266. return $result;
  267. }
  268. /**
  269. * Utility function. Creates a new instance of LogDataPurger with the supplied array
  270. * of settings.
  271. *
  272. * $settings must contain values for the following keys:
  273. * - 'delete_logs_older_than': The number of days after which log entries are considered
  274. * old.
  275. * - 'delete_logs_max_rows_per_query': Max number of rows to DELETE in one query.
  276. *
  277. * @param array $settings Array of settings
  278. * @param bool $useRealTable
  279. * @return \Piwik\Plugins\PrivacyManager\LogDataPurger
  280. */
  281. public static function make($settings, $useRealTable = false)
  282. {
  283. return new LogDataPurger(
  284. $settings['delete_logs_older_than'],
  285. $settings['delete_logs_max_rows_per_query']
  286. );
  287. }
  288. }