PageRenderTime 53ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 1ms

/core/CronArchive.php

https://github.com/CodeYellowBV/piwik
PHP | 1215 lines | 885 code | 136 blank | 194 comment | 114 complexity | a6a25d82096a7f1eac421b2583db7e4d MD5 | raw file
Possible License(s): LGPL-3.0, JSON, MIT, GPL-3.0, LGPL-2.1, GPL-2.0, AGPL-1.0, BSD-2-Clause, BSD-3-Clause
  1. <?php
  2. /**
  3. * Piwik - free/libre analytics platform
  4. *
  5. * @link http://piwik.org
  6. * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
  7. *
  8. */
  9. namespace Piwik;
  10. use Exception;
  11. use Piwik\ArchiveProcessor\Rules;
  12. use Piwik\CronArchive\FixedSiteIds;
  13. use Piwik\CronArchive\SharedSiteIds;
  14. use Piwik\Plugins\CoreAdminHome\API as APICoreAdminHome;
  15. use Piwik\Plugins\SitesManager\API as APISitesManager;
  16. /**
  17. * ./console core:archive runs as a cron and is a useful tool for general maintenance,
  18. * and pre-process reports for a Fast dashboard rendering.
  19. */
  20. class CronArchive
  21. {
  22. // the url can be set here before the init, and it will be used instead of --url=
  23. static public $url = false;
  24. // Max parallel requests for a same site's segments
  25. const MAX_CONCURRENT_API_REQUESTS = 3;
  26. // force-timeout-for-periods default (1 hour)
  27. const SECONDS_DELAY_BETWEEN_PERIOD_ARCHIVES = 3600;
  28. // force-all-periods default (7 days)
  29. const ARCHIVE_SITES_WITH_TRAFFIC_SINCE = 604800;
  30. // By default, will process last 52 days and months
  31. // It will be overwritten by the number of days since last archiving ran until completion.
  32. const DEFAULT_DATE_LAST = 52;
  33. // Since weeks are not used in yearly archives, we make sure that all possible weeks are processed
  34. const DEFAULT_DATE_LAST_WEEKS = 260;
  35. const DEFAULT_DATE_LAST_YEARS = 7;
  36. // Flag to know when the archive cron is calling the API
  37. const APPEND_TO_API_REQUEST = '&trigger=archivephp';
  38. // Flag used to record timestamp in Option::
  39. const OPTION_ARCHIVING_FINISHED_TS = "LastCompletedFullArchiving";
  40. // Name of option used to store starting timestamp
  41. const OPTION_ARCHIVING_STARTED_TS = "LastFullArchivingStartTime";
  42. // Show only first N characters from Piwik API output in case of errors
  43. const TRUNCATE_ERROR_MESSAGE_SUMMARY = 6000;
  44. // archiving will be triggered on all websites with traffic in the last $shouldArchiveOnlySitesWithTrafficSince seconds
  45. private $shouldArchiveOnlySitesWithTrafficSince;
  46. // By default, we only process the current week/month/year at most once an hour
  47. private $processPeriodsMaximumEverySeconds;
  48. private $todayArchiveTimeToLive;
  49. private $websiteDayHasFinishedSinceLastRun = array();
  50. private $idSitesInvalidatedOldReports = array();
  51. private $shouldArchiveSpecifiedSites = array();
  52. private $shouldSkipSpecifiedSites = array();
  53. /**
  54. * @var SharedSiteIds|FixedSiteIds
  55. */
  56. private $websites = array();
  57. private $allWebsites = array();
  58. private $segments = array();
  59. private $piwikUrl = false;
  60. private $token_auth = false;
  61. private $visitsToday = 0;
  62. private $requests = 0;
  63. private $output = '';
  64. private $archiveAndRespectTTL = true;
  65. private $shouldArchiveAllSites = false;
  66. private $shouldStartProfiler = false;
  67. private $acceptInvalidSSLCertificate = false;
  68. private $lastSuccessRunTimestamp = false;
  69. private $errors = array();
  70. private $isCoreInited = false;
  71. const NO_ERROR = "no error";
  72. public $testmode = false;
  73. /**
  74. * Returns the option name of the option that stores the time core:archive was last executed.
  75. *
  76. * @param int $idSite
  77. * @param string $period
  78. * @return string
  79. */
  80. static public function lastRunKey($idSite, $period)
  81. {
  82. return "lastRunArchive" . $period . "_" . $idSite;
  83. }
  84. /**
  85. * Initializes and runs the cron archiver.
  86. */
  87. public function main()
  88. {
  89. $this->init();
  90. $this->run();
  91. $this->runScheduledTasks();
  92. $this->end();
  93. }
  94. public function init()
  95. {
  96. // Note: the order of methods call matters here.
  97. $this->initLog();
  98. $this->initPiwikHost();
  99. $this->initCore();
  100. $this->initTokenAuth();
  101. $this->initCheckCli();
  102. $this->initStateFromParameters();
  103. Piwik::setUserHasSuperUserAccess(true);
  104. $this->logInitInfo();
  105. $this->checkPiwikUrlIsValid();
  106. $this->logArchiveTimeoutInfo();
  107. // record archiving start time
  108. Option::set(self::OPTION_ARCHIVING_STARTED_TS, time());
  109. $this->segments = $this->initSegmentsToArchive();
  110. $this->allWebsites = APISitesManager::getInstance()->getAllSitesId();
  111. $websitesIds = $this->initWebsiteIds();
  112. $this->filterWebsiteIds($websitesIds);
  113. if (!empty($this->shouldArchiveSpecifiedSites)
  114. || !empty($this->shouldArchiveAllSites)
  115. || !SharedSiteIds::isSupported()) {
  116. $this->websites = new FixedSiteIds($websitesIds);
  117. } else {
  118. $this->websites = new SharedSiteIds($websitesIds);
  119. if ($this->websites->getInitialSiteIds() != $websitesIds) {
  120. $this->log('Will ignore websites and help finish a previous started queue instead. IDs: ' . implode(', ', $this->websites->getInitialSiteIds()));
  121. }
  122. }
  123. if ($this->shouldStartProfiler) {
  124. \Piwik\Profiler::setupProfilerXHProf($mainRun = true);
  125. $this->log("XHProf profiling is enabled.");
  126. }
  127. /**
  128. * This event is triggered after a CronArchive instance is initialized.
  129. *
  130. * @param array $websiteIds The list of website IDs this CronArchive instance is processing.
  131. * This will be the entire list of IDs regardless of whether some have
  132. * already been processed.
  133. */
  134. Piwik::postEvent('CronArchive.init.finish', array($this->websites->getInitialSiteIds()));
  135. }
  136. public function runScheduledTasksInTrackerMode()
  137. {
  138. $this->initPiwikHost();
  139. $this->initLog();
  140. $this->initCore();
  141. $this->initTokenAuth();
  142. $this->logInitInfo();
  143. $this->checkPiwikUrlIsValid();
  144. $this->runScheduledTasks();
  145. }
  146. // TODO: replace w/ $this->
  147. private $websitesWithVisitsSinceLastRun = 0;
  148. private $skippedPeriodsArchivesWebsite = 0;
  149. private $skippedDayArchivesWebsites = 0;
  150. private $skipped = 0;
  151. private $processed = 0;
  152. private $archivedPeriodsArchivesWebsite = 0;
  153. /**
  154. * Main function, runs archiving on all websites with new activity
  155. */
  156. public function run()
  157. {
  158. $timer = new Timer;
  159. $this->logSection("START");
  160. $this->log("Starting Piwik reports archiving...");
  161. do {
  162. $idSite = $this->websites->getNextSiteId();
  163. if (null === $idSite) {
  164. break;
  165. }
  166. flush();
  167. $requestsBefore = $this->requests;
  168. if ($idSite <= 0) {
  169. continue;
  170. }
  171. $skipWebsiteForced = in_array($idSite, $this->shouldSkipSpecifiedSites);
  172. if($skipWebsiteForced) {
  173. $this->log("Skipped website id $idSite, found in --skip-idsites ");
  174. $this->skipped++;
  175. continue;
  176. }
  177. /**
  178. * This event is triggered before the cron archiving process starts archiving data for a single
  179. * site.
  180. *
  181. * @param int $idSite The ID of the site we're archiving data for.
  182. */
  183. Piwik::postEvent('CronArchive.archiveSingleSite.start', array($idSite));
  184. $completed = $this->archiveSingleSite($idSite, $requestsBefore);
  185. /**
  186. * This event is triggered immediately after the cron archiving process starts archiving data for a single
  187. * site.
  188. *
  189. * @param int $idSite The ID of the site we're archiving data for.
  190. */
  191. Piwik::postEvent('CronArchive.archiveSingleSite.finish', array($idSite, $completed));
  192. } while (!empty($idSite));
  193. $this->log("Done archiving!");
  194. $this->logSection("SUMMARY");
  195. $this->log("Total visits for today across archived websites: " . $this->visitsToday);
  196. $totalWebsites = count($this->allWebsites);
  197. $this->skipped = $totalWebsites - $this->websitesWithVisitsSinceLastRun;
  198. $this->log("Archived today's reports for {$this->websitesWithVisitsSinceLastRun} websites");
  199. $this->log("Archived week/month/year for {$this->archivedPeriodsArchivesWebsite} websites");
  200. $this->log("Skipped {$this->skipped} websites: no new visit since the last script execution");
  201. $this->log("Skipped {$this->skippedDayArchivesWebsites} websites day archiving: existing daily reports are less than {$this->todayArchiveTimeToLive} seconds old");
  202. $this->log("Skipped {$this->skippedPeriodsArchivesWebsite} websites week/month/year archiving: existing periods reports are less than {$this->processPeriodsMaximumEverySeconds} seconds old");
  203. $this->log("Total API requests: {$this->requests}");
  204. //DONE: done/total, visits, wtoday, wperiods, reqs, time, errors[count]: first eg.
  205. $percent = $this->websites->getNumSites() == 0
  206. ? ""
  207. : " " . round($this->processed * 100 / $this->websites->getNumSites(), 0) . "%";
  208. $this->log("done: " .
  209. $this->processed . "/" . $this->websites->getNumSites() . "" . $percent . ", " .
  210. $this->visitsToday . " vtoday, $this->websitesWithVisitsSinceLastRun wtoday, {$this->archivedPeriodsArchivesWebsite} wperiods, " .
  211. $this->requests . " req, " . round($timer->getTimeMs()) . " ms, " .
  212. (empty($this->errors)
  213. ? self::NO_ERROR
  214. : (count($this->errors) . " errors."))
  215. );
  216. $this->log($timer->__toString());
  217. }
  218. /**
  219. * End of the script
  220. */
  221. public function end()
  222. {
  223. if (empty($this->errors)) {
  224. // No error -> Logs the successful script execution until completion
  225. Option::set(self::OPTION_ARCHIVING_FINISHED_TS, time());
  226. return;
  227. }
  228. $this->logSection("SUMMARY OF ERRORS");
  229. foreach ($this->errors as $error) {
  230. // do not logError since errors are already in stderr
  231. $this->log("Error: " . $error);
  232. }
  233. $summary = count($this->errors) . " total errors during this script execution, please investigate and try and fix these errors.";
  234. $this->logFatalError($summary);
  235. }
  236. public function logFatalError($m)
  237. {
  238. $this->logError($m);
  239. exit(1);
  240. }
  241. public function runScheduledTasks()
  242. {
  243. $this->logSection("SCHEDULED TASKS");
  244. if($this->getParameterFromCli('--disable-scheduled-tasks')) {
  245. $this->log("Scheduled tasks are disabled with --disable-scheduled-tasks");
  246. return;
  247. }
  248. $this->log("Starting Scheduled tasks... ");
  249. $tasksOutput = $this->request("?module=API&method=CoreAdminHome.runScheduledTasks&format=csv&convertToUnicode=0&token_auth=" . $this->token_auth);
  250. if ($tasksOutput == \Piwik\DataTable\Renderer\Csv::NO_DATA_AVAILABLE) {
  251. $tasksOutput = " No task to run";
  252. }
  253. $this->log($tasksOutput);
  254. $this->log("done");
  255. $this->logSection("");
  256. }
  257. private function archiveSingleSite($idSite, $requestsBefore)
  258. {
  259. $timerWebsite = new Timer;
  260. $lastTimestampWebsiteProcessedPeriods = $lastTimestampWebsiteProcessedDay = false;
  261. if ($this->archiveAndRespectTTL) {
  262. Option::clearCachedOption($this->lastRunKey($idSite, "periods"));
  263. $lastTimestampWebsiteProcessedPeriods = Option::get($this->lastRunKey($idSite, "periods"));
  264. Option::clearCachedOption($this->lastRunKey($idSite, "day"));
  265. $lastTimestampWebsiteProcessedDay = Option::get($this->lastRunKey($idSite, "day"));
  266. }
  267. $this->updateIdSitesInvalidatedOldReports();
  268. // For period other than days, we only re-process the reports at most
  269. // 1) every $processPeriodsMaximumEverySeconds
  270. $secondsSinceLastExecution = time() - $lastTimestampWebsiteProcessedPeriods;
  271. // if timeout is more than 10 min, we account for a 5 min processing time, and allow trigger 1 min earlier
  272. if ($this->processPeriodsMaximumEverySeconds > 10 * 60) {
  273. $secondsSinceLastExecution += 5 * 60;
  274. }
  275. $shouldArchivePeriods = $secondsSinceLastExecution > $this->processPeriodsMaximumEverySeconds;
  276. if (empty($lastTimestampWebsiteProcessedPeriods)) {
  277. // 2) OR always if script never executed for this website before
  278. $shouldArchivePeriods = true;
  279. }
  280. // (*) If the website is archived because it is a new day in its timezone
  281. // We make sure all periods are archived, even if there is 0 visit today
  282. $dayHasEndedMustReprocess = in_array($idSite, $this->websiteDayHasFinishedSinceLastRun);
  283. if ($dayHasEndedMustReprocess) {
  284. $shouldArchivePeriods = true;
  285. }
  286. // (*) If there was some old reports invalidated for this website
  287. // we make sure all these old reports are triggered at least once
  288. $websiteIsOldDataInvalidate = in_array($idSite, $this->idSitesInvalidatedOldReports);
  289. if ($websiteIsOldDataInvalidate) {
  290. $shouldArchivePeriods = true;
  291. }
  292. $websiteIdIsForced = in_array($idSite, $this->shouldArchiveSpecifiedSites);
  293. if($websiteIdIsForced) {
  294. $shouldArchivePeriods = true;
  295. }
  296. // Test if we should process this website at all
  297. $elapsedSinceLastArchiving = time() - $lastTimestampWebsiteProcessedDay;
  298. // Skip this day archive if last archive was older than TTL
  299. $existingArchiveIsValid = ($elapsedSinceLastArchiving < $this->todayArchiveTimeToLive);
  300. $skipDayArchive = $existingArchiveIsValid;
  301. // Invalidate old website forces the archiving for this site
  302. $skipDayArchive = $skipDayArchive && !$websiteIsOldDataInvalidate;
  303. // Also reprocess when day has ended since last run
  304. if ($dayHasEndedMustReprocess
  305. // it might have reprocessed for that day by another cron
  306. && !$this->hasBeenProcessedSinceMidnight($idSite, $lastTimestampWebsiteProcessedDay)
  307. && !$existingArchiveIsValid) {
  308. $skipDayArchive = false;
  309. }
  310. if ($websiteIdIsForced) {
  311. $skipDayArchive = false;
  312. }
  313. if ($skipDayArchive) {
  314. $this->log("Skipped website id $idSite, already done "
  315. . \Piwik\MetricsFormatter::getPrettyTimeFromSeconds($elapsedSinceLastArchiving, true, $isHtml = false)
  316. . " ago, " . $timerWebsite->__toString());
  317. $this->skippedDayArchivesWebsites++;
  318. $this->skipped++;
  319. return false;
  320. }
  321. // Fake that the request is already done, so that other core:archive commands
  322. // running do not grab the same website from the queue
  323. Option::set($this->lastRunKey($idSite, "day"), time());
  324. // Remove this website from the list of websites to be invalidated
  325. // since it's now just about to being re-processed, makes sure another running cron archiving process
  326. // does not archive the same idSite
  327. if ($websiteIsOldDataInvalidate) {
  328. $this->setSiteIsArchived($idSite);
  329. }
  330. // when some data was purged from this website
  331. // we make sure we query all previous days/weeks/months
  332. $processDaysSince = $lastTimestampWebsiteProcessedDay;
  333. if($websiteIsOldDataInvalidate
  334. // when --force-all-websites option,
  335. // also forces to archive last52 days to be safe
  336. || $this->shouldArchiveAllSites) {
  337. $processDaysSince = false;
  338. }
  339. $timer = new Timer;
  340. $dateLast = $this->getApiDateLastParameter($idSite, "day", $processDaysSince);
  341. $url = $this->getVisitsRequestUrl($idSite, "day", $dateLast);
  342. $content = $this->request($url);
  343. $response = @unserialize($content);
  344. $visitsToday = $this->getVisitsLastPeriodFromApiResponse($response);
  345. $visitsLastDays = $this->getVisitsFromApiResponse($response);
  346. if (empty($content)
  347. || !is_array($response)
  348. || count($response) == 0
  349. ) {
  350. // cancel the succesful run flag
  351. Option::set($this->lastRunKey($idSite, "day"), 0);
  352. $this->log("WARNING: Empty or invalid response '$content' for website id $idSite, " . $timerWebsite->__toString() . ", skipping");
  353. $this->skipped++;
  354. return false;
  355. }
  356. $this->requests++;
  357. $this->processed++;
  358. // If there is no visit today and we don't need to process this website, we can skip remaining archives
  359. if ($visitsToday == 0
  360. && !$shouldArchivePeriods
  361. ) {
  362. $this->log("Skipped website id $idSite, no visit today, " . $timerWebsite->__toString());
  363. $this->skipped++;
  364. return false;
  365. }
  366. if ($visitsLastDays == 0
  367. && !$shouldArchivePeriods
  368. && $this->shouldArchiveAllSites
  369. ) {
  370. $this->log("Skipped website id $idSite, no visits in the last " . $dateLast . " days, " . $timerWebsite->__toString());
  371. $this->skipped++;
  372. return false;
  373. }
  374. $this->visitsToday += $visitsToday;
  375. $this->websitesWithVisitsSinceLastRun++;
  376. $this->archiveVisitsAndSegments($idSite, "day", $lastTimestampWebsiteProcessedDay);
  377. $this->logArchivedWebsite($idSite, "day", $dateLast, $visitsLastDays, $visitsToday, $timer);
  378. if (!$shouldArchivePeriods) {
  379. $this->log("Skipped website id $idSite periods processing, already done "
  380. . \Piwik\MetricsFormatter::getPrettyTimeFromSeconds($elapsedSinceLastArchiving, true, $isHtml = false)
  381. . " ago, " . $timerWebsite->__toString());
  382. $this->skippedDayArchivesWebsites++;
  383. $this->skipped++;
  384. return false;
  385. }
  386. $success = true;
  387. foreach (array('week', 'month', 'year') as $period) {
  388. $success = $this->archiveVisitsAndSegments($idSite, $period, $lastTimestampWebsiteProcessedPeriods)
  389. && $success;
  390. }
  391. // Record succesful run of this website's periods archiving
  392. if ($success) {
  393. Option::set($this->lastRunKey($idSite, "periods"), time());
  394. }
  395. $this->archivedPeriodsArchivesWebsite++;
  396. $requestsWebsite = $this->requests - $requestsBefore;
  397. Log::info("Archived website id = $idSite, "
  398. . $requestsWebsite . " API requests, "
  399. . $timerWebsite->__toString()
  400. . " [" . $this->websites->getNumProcessedWebsites() . "/"
  401. . $this->websites->getNumSites()
  402. . " done]");
  403. return true;
  404. }
  405. /**
  406. * Checks the config file is found.
  407. *
  408. * @param $piwikUrl
  409. * @throws Exception
  410. */
  411. protected function initConfigObject($piwikUrl)
  412. {
  413. // HOST is required for the Config object
  414. $parsed = parse_url($piwikUrl);
  415. Url::setHost($parsed['host']);
  416. Config::getInstance()->clear();
  417. try {
  418. Config::getInstance()->checkLocalConfigFound();
  419. } catch (Exception $e) {
  420. throw new Exception("The configuration file for Piwik could not be found. " .
  421. "Please check that config/config.ini.php is readable by the user " .
  422. get_current_user());
  423. }
  424. }
  425. /**
  426. * Returns base URL to process reports for the $idSite on a given $period
  427. */
  428. private function getVisitsRequestUrl($idSite, $period, $dateLast)
  429. {
  430. return "?module=API&method=API.get&idSite=$idSite&period=$period&date=last" . $dateLast . "&format=php&token_auth=" . $this->token_auth;
  431. }
  432. private function initSegmentsToArchive()
  433. {
  434. $segments = \Piwik\SettingsPiwik::getKnownSegmentsToArchive();
  435. if (empty($segments)) {
  436. return array();
  437. }
  438. $this->log("- Will pre-process " . count($segments) . " Segments for each website and each period: " . implode(", ", $segments));
  439. return $segments;
  440. }
  441. private function getSegmentsForSite($idSite)
  442. {
  443. $segmentsAllSites = $this->segments;
  444. $segmentsThisSite = \Piwik\SettingsPiwik::getKnownSegmentsToArchiveForSite($idSite);
  445. if (!empty($segmentsThisSite)) {
  446. $this->log("Will pre-process the following " . count($segmentsThisSite) . " Segments for this website (id = $idSite): " . implode(", ", $segmentsThisSite));
  447. }
  448. $segments = array_unique(array_merge($segmentsAllSites, $segmentsThisSite));
  449. return $segments;
  450. }
  451. /**
  452. * Will trigger API requests for the specified Website $idSite,
  453. * for the specified $period, for all segments that are pre-processed for this website.
  454. * Requests are triggered using cURL multi handle
  455. *
  456. * @param $idSite int
  457. * @param $period
  458. * @param $lastTimestampWebsiteProcessed
  459. * @return bool True on success, false if some request failed
  460. */
  461. private function archiveVisitsAndSegments($idSite, $period, $lastTimestampWebsiteProcessed)
  462. {
  463. $timer = new Timer();
  464. $url = $this->piwikUrl;
  465. $dateLast = $this->getApiDateLastParameter($idSite, $period, $lastTimestampWebsiteProcessed);
  466. $url .= $this->getVisitsRequestUrl($idSite, $period, $dateLast);
  467. $url .= self::APPEND_TO_API_REQUEST;
  468. $visitsInLastPeriods = $visitsLastPeriod = 0;
  469. $success = true;
  470. $urls = array();
  471. $noSegmentUrl = $url;
  472. // already processed above for "day"
  473. if ($period != "day") {
  474. $urls[] = $url;
  475. $this->requests++;
  476. }
  477. foreach ($this->getSegmentsForSite($idSite) as $segment) {
  478. $urlWithSegment = $url . '&segment=' . urlencode($segment);
  479. $urls[] = $urlWithSegment;
  480. $this->requests++;
  481. }
  482. $cliMulti = new CliMulti();
  483. $cliMulti->setAcceptInvalidSSLCertificate($this->acceptInvalidSSLCertificate);
  484. $cliMulti->setConcurrentProcessesLimit(self::MAX_CONCURRENT_API_REQUESTS);
  485. $response = $cliMulti->request($urls);
  486. foreach ($urls as $index => $url) {
  487. $content = array_key_exists($index, $response) ? $response[$index] : null;
  488. $success = $success && $this->checkResponse($content, $url);
  489. if ($noSegmentUrl === $url && $success) {
  490. $stats = @unserialize($content);
  491. if (!is_array($stats)) {
  492. $this->logError("Error unserializing the following response from $url: " . $content);
  493. }
  494. $visitsInLastPeriods = $this->getVisitsFromApiResponse($stats);
  495. $visitsLastPeriod = $this->getVisitsLastPeriodFromApiResponse($stats);
  496. }
  497. }
  498. // we have already logged the daily archive above
  499. if($period != "day") {
  500. $this->logArchivedWebsite($idSite, $period, $dateLast, $visitsInLastPeriods, $visitsLastPeriod, $timer);
  501. }
  502. return $success;
  503. }
  504. /**
  505. * Logs a section in the output
  506. */
  507. private function logSection($title = "")
  508. {
  509. $this->log("---------------------------");
  510. if(!empty($title)) {
  511. $this->log($title);
  512. }
  513. }
  514. public function log($m)
  515. {
  516. $this->output .= $m . "\n";
  517. try {
  518. Log::info($m);
  519. } catch(Exception $e) {
  520. print($m . "\n");
  521. }
  522. }
  523. public function logError($m)
  524. {
  525. if (!defined('PIWIK_ARCHIVE_NO_TRUNCATE')) {
  526. $m = substr($m, 0, self::TRUNCATE_ERROR_MESSAGE_SUMMARY);
  527. }
  528. $m = str_replace(array("\n", "\t"), " ", $m);
  529. $this->errors[] = $m;
  530. Log::error($m);
  531. }
  532. private function logNetworkError($url, $response)
  533. {
  534. $message = "Got invalid response from API request: $url. ";
  535. if (empty($response)) {
  536. $message .= "The response was empty. This usually means a server error. This solution to this error is generally to increase the value of 'memory_limit' in your php.ini file. Please check your Web server Error Log file for more details.";
  537. } else {
  538. $message .= "Response was '$response'";
  539. }
  540. $this->logError($message);
  541. return false;
  542. }
  543. /**
  544. * Issues a request to $url
  545. */
  546. private function request($url)
  547. {
  548. $url = $this->piwikUrl . $url . self::APPEND_TO_API_REQUEST;
  549. if($this->shouldStartProfiler) {
  550. $url .= "&xhprof=2";
  551. }
  552. if ($this->testmode) {
  553. $url .= "&testmode=1";
  554. }
  555. try {
  556. $cliMulti = new CliMulti();
  557. $cliMulti->setAcceptInvalidSSLCertificate($this->acceptInvalidSSLCertificate);
  558. $responses = $cliMulti->request(array($url));
  559. $response = !empty($responses) ? array_shift($responses) : null;
  560. } catch (Exception $e) {
  561. return $this->logNetworkError($url, $e->getMessage());
  562. }
  563. if ($this->checkResponse($response, $url)) {
  564. return $response;
  565. }
  566. return false;
  567. }
  568. private function checkResponse($response, $url)
  569. {
  570. if (empty($response)
  571. || stripos($response, 'error')
  572. ) {
  573. return $this->logNetworkError($url, $response);
  574. }
  575. return true;
  576. }
  577. /**
  578. * Configures Piwik\Log so messages are written in output
  579. */
  580. private function initLog()
  581. {
  582. $config = Config::getInstance();
  583. /**
  584. * access a property that is not overriden by TestingEnvironment before accessing log as the
  585. * log section is used in TestingEnvironment. Otherwise access to magic __get('log') fails in
  586. * TestingEnvironment as it tries to acccess it already here with __get('log').
  587. * $config->log ==> __get('log') ==> Config.createConfigInstance ==> nested __get('log') ==> returns null
  588. */
  589. $initConfigToPreventErrorWhenAccessingLog = $config->mail;
  590. $log = $config->log;
  591. $log['log_only_when_debug_parameter'] = 0;
  592. $log[\Piwik\Log::LOG_WRITERS_CONFIG_OPTION] = array("screen");
  593. if (!empty($_GET['forcelogtoscreen'])) {
  594. Log::getInstance()->addLogWriter('screen');
  595. }
  596. // Make sure we log at least INFO (if logger is set to DEBUG then keep it)
  597. $logLevel = @$log[\Piwik\Log::LOG_LEVEL_CONFIG_OPTION];
  598. if ($logLevel != 'VERBOSE'
  599. && $logLevel != 'DEBUG'
  600. ) {
  601. $log[\Piwik\Log::LOG_LEVEL_CONFIG_OPTION] = 'INFO';
  602. Log::getInstance()->setLogLevel(Log::INFO);
  603. }
  604. $config->log = $log;
  605. }
  606. /**
  607. * Script does run on http:// ONLY if the SU token is specified
  608. */
  609. private function initCheckCli()
  610. {
  611. if (Common::isPhpCliMode()) {
  612. return;
  613. }
  614. $token_auth = Common::getRequestVar('token_auth', '', 'string');
  615. if ($token_auth != $this->token_auth
  616. || strlen($token_auth) != 32
  617. ) {
  618. die('<b>You must specify the Super User token_auth as a parameter to this script, eg. <code>?token_auth=XYZ</code> if you wish to run this script through the browser. </b><br>
  619. However it is recommended to run it <a href="http://piwik.org/docs/setup-auto-archiving/">via cron in the command line</a>, since it can take a long time to run.<br/>
  620. In a shell, execute for example the following to trigger archiving on the local Piwik server:<br/>
  621. <code>$ /path/to/php /path/to/piwik/console core:archive --url=http://your-website.org/path/to/piwik/</code>');
  622. }
  623. }
  624. /**
  625. * Init Piwik, connect DB, create log & config objects, etc.
  626. */
  627. private function initCore()
  628. {
  629. try {
  630. FrontController::getInstance()->init();
  631. $this->isCoreInited = true;
  632. } catch (Exception $e) {
  633. throw new Exception("ERROR: During Piwik init, Message: " . $e->getMessage());
  634. }
  635. }
  636. public function isCoreInited()
  637. {
  638. return $this->isCoreInited;
  639. }
  640. /**
  641. * Initializes the various parameters to the script, based on input parameters.
  642. *
  643. */
  644. private function initStateFromParameters()
  645. {
  646. $this->todayArchiveTimeToLive = Rules::getTodayArchiveTimeToLive();
  647. $this->acceptInvalidSSLCertificate = $this->getParameterFromCli("accept-invalid-ssl-certificate");
  648. $this->processPeriodsMaximumEverySeconds = $this->getDelayBetweenPeriodsArchives();
  649. $this->shouldArchiveAllSites = (bool) $this->getParameterFromCli("force-all-websites");
  650. $this->shouldStartProfiler = (bool) $this->getParameterFromCli("xhprof");
  651. $restrictToIdSites = $this->getParameterFromCli("force-idsites", true);
  652. $skipIdSites = $this->getParameterFromCli("skip-idsites", true);
  653. $this->shouldArchiveSpecifiedSites = \Piwik\Site::getIdSitesFromIdSitesString($restrictToIdSites);
  654. $this->shouldSkipSpecifiedSites = \Piwik\Site::getIdSitesFromIdSitesString($skipIdSites);
  655. $this->lastSuccessRunTimestamp = Option::get(self::OPTION_ARCHIVING_FINISHED_TS);
  656. $this->shouldArchiveOnlySitesWithTrafficSince = $this->isShouldArchiveAllSitesWithTrafficSince();
  657. if($this->shouldArchiveOnlySitesWithTrafficSince === false) {
  658. // force-all-periods is not set here
  659. if (empty($this->lastSuccessRunTimestamp)) {
  660. // First time we run the script
  661. $this->shouldArchiveOnlySitesWithTrafficSince = self::ARCHIVE_SITES_WITH_TRAFFIC_SINCE;
  662. } else {
  663. // there was a previous successful run
  664. $this->shouldArchiveOnlySitesWithTrafficSince = time() - $this->lastSuccessRunTimestamp;
  665. }
  666. } else {
  667. // force-all-periods is set here
  668. $this->archiveAndRespectTTL = false;
  669. if($this->shouldArchiveOnlySitesWithTrafficSince === true) {
  670. // force-all-periods without value
  671. $this->shouldArchiveOnlySitesWithTrafficSince = self::ARCHIVE_SITES_WITH_TRAFFIC_SINCE;
  672. }
  673. }
  674. }
  675. public function filterWebsiteIds(&$websiteIds)
  676. {
  677. // Keep only the websites that do exist
  678. $websiteIds = array_intersect($websiteIds, $this->allWebsites);
  679. /**
  680. * Triggered by the **core:archive** console command so plugins can modify the list of
  681. * websites that the archiving process will be launched for.
  682. *
  683. * Plugins can use this hook to add websites to archive, remove websites to archive, or change
  684. * the order in which websites will be archived.
  685. *
  686. * @param array $websiteIds The list of website IDs to launch the archiving process for.
  687. */
  688. Piwik::postEvent('CronArchive.filterWebsiteIds', array(&$websiteIds));
  689. }
  690. /**
  691. * Returns the list of sites to loop over and archive.
  692. * @return array
  693. */
  694. public function initWebsiteIds()
  695. {
  696. if(count($this->shouldArchiveSpecifiedSites) > 0) {
  697. $this->log("- Will process " . count($this->shouldArchiveSpecifiedSites) . " websites (--force-idsites)");
  698. return $this->shouldArchiveSpecifiedSites;
  699. }
  700. if ($this->shouldArchiveAllSites) {
  701. $this->log("- Will process all " . count($this->allWebsites) . " websites");
  702. return $this->allWebsites;
  703. }
  704. $websiteIds = array_merge(
  705. $this->addWebsiteIdsWithVisitsSinceLastRun(),
  706. $this->getWebsiteIdsToInvalidate()
  707. );
  708. $websiteIds = array_merge($websiteIds, $this->addWebsiteIdsInTimezoneWithNewDay($websiteIds));
  709. return array_unique($websiteIds);
  710. }
  711. private function initTokenAuth()
  712. {
  713. $superUser = Db::get()->fetchRow("SELECT login, token_auth
  714. FROM " . Common::prefixTable("user") . "
  715. WHERE superuser_access = 1
  716. ORDER BY date_registered ASC");
  717. $this->token_auth = $superUser['token_auth'];
  718. }
  719. private function initPiwikHost()
  720. {
  721. // If core:archive command run as a web cron, we use the current hostname+path
  722. if (!Common::isPhpCliMode()) {
  723. if (!empty(self::$url)) {
  724. $piwikUrl = self::$url;
  725. } else {
  726. // example.org/piwik/
  727. $piwikUrl = SettingsPiwik::getPiwikUrl();
  728. }
  729. } else {
  730. // If core:archive command run as CLI/shell we require the piwik url to be set
  731. $piwikUrl = $this->getParameterFromCli("url", true);
  732. if (!$piwikUrl) {
  733. $this->logFatalErrorUrlExpected();
  734. }
  735. if(!\Piwik\UrlHelper::isLookLikeUrl($piwikUrl)) {
  736. // try adding http:// in case it's missing
  737. $piwikUrl = "http://" . $piwikUrl;
  738. }
  739. if(!\Piwik\UrlHelper::isLookLikeUrl($piwikUrl)) {
  740. $this->logFatalErrorUrlExpected();
  741. }
  742. // ensure there is a trailing slash
  743. if ($piwikUrl[strlen($piwikUrl) - 1] != '/' && !Common::stringEndsWith($piwikUrl, 'index.php')) {
  744. $piwikUrl .= '/';
  745. }
  746. }
  747. $this->initConfigObject($piwikUrl);
  748. if (Config::getInstance()->General['force_ssl'] == 1) {
  749. $piwikUrl = str_replace('http://', 'https://', $piwikUrl);
  750. }
  751. if (!Common::stringEndsWith($piwikUrl, 'index.php')) {
  752. $piwikUrl .= 'index.php';
  753. }
  754. $this->piwikUrl = $piwikUrl;
  755. }
  756. /**
  757. * Returns if the requested parameter is defined in the command line arguments.
  758. * If $valuePossible is true, then a value is possibly set for this parameter,
  759. * ie. --force-timeout-for-periods=3600 would return 3600
  760. *
  761. * @param $parameter
  762. * @param bool $valuePossible
  763. * @return true or the value (int,string) if set, false otherwise
  764. */
  765. public static function getParameterFromCli($parameter, $valuePossible = false)
  766. {
  767. if (!Common::isPhpCliMode()) {
  768. return false;
  769. }
  770. if($parameter == 'url' && self::$url) {
  771. return self::$url;
  772. }
  773. $parameters = array(
  774. "--$parameter",
  775. "-$parameter",
  776. $parameter
  777. );
  778. if(empty($_SERVER['argv'])) {
  779. return false;
  780. }
  781. foreach ($parameters as $parameter) {
  782. foreach ($_SERVER['argv'] as $arg) {
  783. if (strpos($arg, $parameter) === 0) {
  784. if ($valuePossible) {
  785. $parameterFound = $arg;
  786. if (($posEqual = strpos($parameterFound, '=')) !== false) {
  787. $return = substr($parameterFound, $posEqual + 1);
  788. if ($return !== false) {
  789. return $return;
  790. }
  791. }
  792. }
  793. return true;
  794. }
  795. }
  796. }
  797. return false;
  798. }
  799. private function updateIdSitesInvalidatedOldReports()
  800. {
  801. $this->idSitesInvalidatedOldReports = APICoreAdminHome::getWebsiteIdsToInvalidate();
  802. }
  803. /**
  804. * Return All websites that had reports in the past which were invalidated recently
  805. * (see API CoreAdminHome.invalidateArchivedReports)
  806. * eg. when using Python log import script
  807. *
  808. * @return array
  809. */
  810. private function getWebsiteIdsToInvalidate()
  811. {
  812. $this->updateIdSitesInvalidatedOldReports();
  813. if (count($this->idSitesInvalidatedOldReports) > 0) {
  814. $ids = ", IDs: " . implode(", ", $this->idSitesInvalidatedOldReports);
  815. $this->log("- Will process " . count($this->idSitesInvalidatedOldReports)
  816. . " other websites because some old data reports have been invalidated (eg. using the Log Import script) "
  817. . $ids);
  818. }
  819. return $this->idSitesInvalidatedOldReports;
  820. }
  821. /**
  822. * Returns all sites that had visits since specified time
  823. *
  824. * @return string
  825. */
  826. private function addWebsiteIdsWithVisitsSinceLastRun()
  827. {
  828. $sitesIdWithVisits = APISitesManager::getInstance()->getSitesIdWithVisits(time() - $this->shouldArchiveOnlySitesWithTrafficSince);
  829. $websiteIds = !empty($sitesIdWithVisits) ? ", IDs: " . implode(", ", $sitesIdWithVisits) : "";
  830. $prettySeconds = \Piwik\MetricsFormatter::getPrettyTimeFromSeconds( $this->shouldArchiveOnlySitesWithTrafficSince, true, false);
  831. $this->log("- Will process " . count($sitesIdWithVisits) . " websites with new visits since "
  832. . $prettySeconds
  833. . " "
  834. . $websiteIds);
  835. return $sitesIdWithVisits;
  836. }
  837. /**
  838. * Returns the list of timezones where the specified timestamp in that timezone
  839. * is on a different day than today in that timezone.
  840. *
  841. * @return array
  842. */
  843. private function getTimezonesHavingNewDay()
  844. {
  845. $timestamp = time() - $this->shouldArchiveOnlySitesWithTrafficSince;
  846. $uniqueTimezones = APISitesManager::getInstance()->getUniqueSiteTimezones();
  847. $timezoneToProcess = array();
  848. foreach ($uniqueTimezones as &$timezone) {
  849. $processedDateInTz = Date::factory((int)$timestamp, $timezone);
  850. $currentDateInTz = Date::factory('now', $timezone);
  851. if ($processedDateInTz->toString() != $currentDateInTz->toString()) {
  852. $timezoneToProcess[] = $timezone;
  853. }
  854. }
  855. return $timezoneToProcess;
  856. }
  857. private function hasBeenProcessedSinceMidnight($idSite, $lastTimestampWebsiteProcessedDay)
  858. {
  859. if (false === $lastTimestampWebsiteProcessedDay) {
  860. return true;
  861. }
  862. $timezone = Site::getTimezoneFor($idSite);
  863. $dateInTimezone = Date::factory('now', $timezone);
  864. $midnightInTimezone = $dateInTimezone->setTime('00:00:00');
  865. $lastProcessedDateInTimezone = Date::factory((int) $lastTimestampWebsiteProcessedDay, $timezone);
  866. return $lastProcessedDateInTimezone->getTimestamp() >= $midnightInTimezone->getTimestamp();
  867. }
  868. /**
  869. * Returns the list of websites in which timezones today is a new day
  870. * (compared to the last time archiving was executed)
  871. *
  872. * @param $websiteIds
  873. * @return array Website IDs
  874. */
  875. private function addWebsiteIdsInTimezoneWithNewDay($websiteIds)
  876. {
  877. $timezones = $this->getTimezonesHavingNewDay();
  878. $websiteDayHasFinishedSinceLastRun = APISitesManager::getInstance()->getSitesIdFromTimezones($timezones);
  879. $websiteDayHasFinishedSinceLastRun = array_diff($websiteDayHasFinishedSinceLastRun, $websiteIds);
  880. $this->websiteDayHasFinishedSinceLastRun = $websiteDayHasFinishedSinceLastRun;
  881. if (count($websiteDayHasFinishedSinceLastRun) > 0) {
  882. $ids = !empty($websiteDayHasFinishedSinceLastRun) ? ", IDs: " . implode(", ", $websiteDayHasFinishedSinceLastRun) : "";
  883. $this->log("- Will process " . count($websiteDayHasFinishedSinceLastRun)
  884. . " other websites because the last time they were archived was on a different day (in the website's timezone) "
  885. . $ids);
  886. }
  887. return $websiteDayHasFinishedSinceLastRun;
  888. }
  889. /**
  890. * Test that the specified piwik URL is a valid Piwik endpoint.
  891. */
  892. private function checkPiwikUrlIsValid()
  893. {
  894. $response = $this->request("?module=API&method=API.getDefaultMetricTranslations&format=original&serialize=1");
  895. $responseUnserialized = @unserialize($response);
  896. if ($response === false
  897. || !is_array($responseUnserialized)
  898. ) {
  899. $this->logFatalError("The Piwik URL {$this->piwikUrl} does not seem to be pointing to a Piwik server. Response was '$response'.");
  900. }
  901. }
  902. private function logInitInfo()
  903. {
  904. $this->logSection("INIT");
  905. $this->log("Piwik is installed at: {$this->piwikUrl}");
  906. $this->log("Running Piwik " . Version::VERSION . " as Super User");
  907. }
  908. private function logArchiveTimeoutInfo()
  909. {
  910. $this->logSection("NOTES");
  911. // Recommend to disable browser archiving when using this script
  912. if (Rules::isBrowserTriggerEnabled()) {
  913. $this->log("- If you execute this script at least once per hour (or more often) in a crontab, you may disable 'Browser trigger archiving' in Piwik UI > Settings > General Settings. ");
  914. $this->log(" See the doc at: http://piwik.org/docs/setup-auto-archiving/");
  915. }
  916. $this->log("- Reports for today will be processed at most every " . $this->todayArchiveTimeToLive
  917. . " seconds. You can change this value in Piwik UI > Settings > General Settings.");
  918. $this->log("- Reports for the current week/month/year will be refreshed at most every "
  919. . $this->processPeriodsMaximumEverySeconds . " seconds.");
  920. // Try and not request older data we know is already archived
  921. if ($this->lastSuccessRunTimestamp !== false) {
  922. $dateLast = time() - $this->lastSuccessRunTimestamp;
  923. $this->log("- Archiving was last executed without error " . \Piwik\MetricsFormatter::getPrettyTimeFromSeconds($dateLast, true, $isHtml = false) . " ago");
  924. }
  925. }
  926. /**
  927. * Returns the delay in seconds, that should be enforced, between calling archiving for Periods Archives.
  928. * It can be set by --force-timeout-for-periods=X
  929. *
  930. * @return int
  931. */
  932. private function getDelayBetweenPeriodsArchives()
  933. {
  934. $forceTimeoutPeriod = $this->getParameterFromCli("force-timeout-for-periods", $valuePossible = true);
  935. if (empty($forceTimeoutPeriod) || $forceTimeoutPeriod === true) {
  936. return self::SECONDS_DELAY_BETWEEN_PERIOD_ARCHIVES;
  937. }
  938. // Ensure the cache for periods is at least as high as cache for today
  939. if ($forceTimeoutPeriod > $this->todayArchiveTimeToLive) {
  940. return $forceTimeoutPeriod;
  941. }
  942. $this->log("WARNING: Automatically increasing --force-timeout-for-periods from $forceTimeoutPeriod to "
  943. . $this->todayArchiveTimeToLive
  944. . " to match the cache timeout for Today's report specified in Piwik UI > Settings > General Settings");
  945. return $this->todayArchiveTimeToLive;
  946. }
  947. private function isShouldArchiveAllSitesWithTrafficSince()
  948. {
  949. $shouldArchiveAllPeriodsSince = $this->getParameterFromCli("force-all-periods", $valuePossible = true);
  950. if(empty($shouldArchiveAllPeriodsSince)) {
  951. return false;
  952. }
  953. if ( is_numeric($shouldArchiveAllPeriodsSince)
  954. && $shouldArchiveAllPeriodsSince > 1
  955. ) {
  956. return (int)$shouldArchiveAllPeriodsSince;
  957. }
  958. return true;
  959. }
  960. /**
  961. * @param $idSite
  962. */
  963. protected function setSiteIsArchived($idSite)
  964. {
  965. $websiteIdsInvalidated = APICoreAdminHome::getWebsiteIdsToInvalidate();
  966. if (count($websiteIdsInvalidated)) {
  967. $found = array_search($idSite, $websiteIdsInvalidated);
  968. if ($found !== false) {
  969. unset($websiteIdsInvalidated[$found]);
  970. Option::set(APICoreAdminHome::OPTION_INVALIDATED_IDSITES, serialize($websiteIdsInvalidated));
  971. }
  972. }
  973. }
  974. private function logFatalErrorUrlExpected()
  975. {
  976. $this->logFatalError("./console core:archive expects the argument 'url' to be set to your Piwik URL, for example: --url=http://example.org/piwik/ "
  977. . "\n--help for more information", $backtrace = false);
  978. }
  979. private function getVisitsLastPeriodFromApiResponse($stats)
  980. {
  981. if(empty($stats)) {
  982. return 0;
  983. }
  984. $today = end($stats);
  985. return $today['nb_visits'];
  986. }
  987. private function getVisitsFromApiResponse($stats)
  988. {
  989. if(empty($stats)) {
  990. return 0;
  991. }
  992. $visits = 0;
  993. foreach($stats as $metrics) {
  994. if(empty($metrics['nb_visits'])) {
  995. continue;
  996. }
  997. $visits += $metrics['nb_visits'];
  998. }
  999. return $visits;
  1000. }
  1001. /**
  1002. * @param $idSite
  1003. * @param $period
  1004. * @param $lastTimestampWebsiteProcessed
  1005. * @return float|int|true
  1006. */
  1007. private function getApiDateLastParameter($idSite, $period, $lastTimestampWebsiteProcessed = false)
  1008. {
  1009. $dateLastMax = self::DEFAULT_DATE_LAST;
  1010. if ($period == 'year') {
  1011. $dateLastMax = self::DEFAULT_DATE_LAST_YEARS;
  1012. } elseif ($period == 'week') {
  1013. $dateLastMax = self::DEFAULT_DATE_LAST_WEEKS;
  1014. }
  1015. if (empty($lastTimestampWebsiteProcessed)) {
  1016. $lastTimestampWebsiteProcessed = strtotime(\Piwik\Site::getCreationDateFor($idSite));
  1017. }
  1018. // Enforcing last2 at minimum to work around timing issues and ensure we make most archives available
  1019. $dateLast = floor((time() - $lastTimestampWebsiteProcessed) / 86400) + 2;
  1020. if ($dateLast > $dateLastMax) {
  1021. $dateLast = $dateLastMax;
  1022. }
  1023. $dateLastForced = $this->getParameterFromCli('--force-date-last-n', true);
  1024. if (!empty($dateLastForced)) {
  1025. $dateLast = $dateLastForced;
  1026. return $dateLast;
  1027. }
  1028. return $dateLast;
  1029. }
  1030. /**
  1031. * @param $idSite
  1032. * @param $period
  1033. * @param $dateLast
  1034. * @param $visitsInLastPeriods
  1035. * @param $visitsToday
  1036. * @param $timer
  1037. */
  1038. private function logArchivedWebsite($idSite, $period, $dateLast, $visitsInLastPeriods, $visitsToday, Timer $timer)
  1039. {
  1040. $thisPeriod = $period == "day" ? "today" : "this " . $period;
  1041. $this->log("Archived website id = $idSite, period = $period, "
  1042. . (int)$visitsInLastPeriods . " visits in last " . $dateLast . " " . $period . "s, "
  1043. . (int)$visitsToday . " visits " . $thisPeriod . ", "
  1044. . $timer->__toString());
  1045. }
  1046. }