PageRenderTime 48ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/plugins/UserCountry/GeoIPAutoUpdater.php

https://github.com/CodeYellowBV/piwik
PHP | 682 lines | 394 code | 93 blank | 195 comment | 59 complexity | 29002ca398a946ab4ca050c16fced670 MD5 | raw file
Possible License(s): LGPL-3.0, JSON, MIT, GPL-3.0, LGPL-2.1, GPL-2.0, AGPL-1.0, BSD-2-Clause, BSD-3-Clause
  1. <?php
  2. /**
  3. * Piwik - free/libre analytics platform
  4. *
  5. * @link http://piwik.org
  6. * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
  7. *
  8. */
  9. namespace Piwik\Plugins\UserCountry;
  10. require_once PIWIK_INCLUDE_PATH . "/core/ScheduledTask.php"; // for the tracker which doesn't include this file
  11. use Exception;
  12. use Piwik\Common;
  13. use Piwik\Date;
  14. use Piwik\Http;
  15. use Piwik\Log;
  16. use Piwik\Option;
  17. use Piwik\Piwik;
  18. use Piwik\Plugins\UserCountry\LocationProvider\GeoIp\Php;
  19. use Piwik\Plugins\UserCountry\LocationProvider\GeoIp;
  20. use Piwik\Plugins\UserCountry\LocationProvider;
  21. use Piwik\ScheduledTask;
  22. use Piwik\ScheduledTaskTimetable;
  23. use Piwik\ScheduledTime\Monthly;
  24. use Piwik\ScheduledTime\Weekly;
  25. use Piwik\TaskScheduler;
  26. use Piwik\Unzip;
  27. /**
  28. * Used to automatically update installed GeoIP databases, and manages the updater's
  29. * scheduled task.
  30. */
  31. class GeoIPAutoUpdater extends ScheduledTask
  32. {
  33. const SCHEDULE_PERIOD_MONTHLY = 'month';
  34. const SCHEDULE_PERIOD_WEEKLY = 'week';
  35. const SCHEDULE_PERIOD_OPTION_NAME = 'geoip.updater_period';
  36. const LOC_URL_OPTION_NAME = 'geoip.loc_db_url';
  37. const ISP_URL_OPTION_NAME = 'geoip.isp_db_url';
  38. const ORG_URL_OPTION_NAME = 'geoip.org_db_url';
  39. const LAST_RUN_TIME_OPTION_NAME = 'geoip.updater_last_run_time';
  40. private static $urlOptions = array(
  41. 'loc' => self::LOC_URL_OPTION_NAME,
  42. 'isp' => self::ISP_URL_OPTION_NAME,
  43. 'org' => self::ORG_URL_OPTION_NAME,
  44. );
  45. /**
  46. * PHP Error caught through a custom error handler while trying to use a downloaded
  47. * GeoIP database. See catchGeoIPError for more info.
  48. *
  49. * @var array
  50. */
  51. private static $unzipPhpError = null;
  52. /**
  53. * Constructor.
  54. */
  55. public function __construct()
  56. {
  57. $schedulePeriodStr = self::getSchedulePeriod();
  58. // created the scheduledtime instance, also, since GeoIP updates are done on tuesdays,
  59. // get new DBs on Wednesday
  60. switch ($schedulePeriodStr) {
  61. case self::SCHEDULE_PERIOD_WEEKLY:
  62. $schedulePeriod = new Weekly();
  63. $schedulePeriod->setDay(3);
  64. break;
  65. case self::SCHEDULE_PERIOD_MONTHLY:
  66. default:
  67. $schedulePeriod = new Monthly();
  68. $schedulePeriod->setDayOfWeek(3, 0);
  69. break;
  70. }
  71. parent::__construct($this, 'update', null, $schedulePeriod, ScheduledTask::LOWEST_PRIORITY);
  72. }
  73. /**
  74. * Attempts to download new location, ISP & organization GeoIP databases and
  75. * replace the existing ones w/ them.
  76. */
  77. public function update()
  78. {
  79. try {
  80. Option::set(self::LAST_RUN_TIME_OPTION_NAME, Date::factory('today')->getTimestamp());
  81. $locUrl = Option::get(self::LOC_URL_OPTION_NAME);
  82. if (!empty($locUrl)) {
  83. $this->downloadFile('loc', $locUrl);
  84. }
  85. $ispUrl = Option::get(self::ISP_URL_OPTION_NAME);
  86. if (!empty($ispUrl)) {
  87. $this->downloadFile('isp', $ispUrl);
  88. }
  89. $orgUrl = Option::get(self::ORG_URL_OPTION_NAME);
  90. if (!empty($orgUrl)) {
  91. $this->downloadFile('org', $orgUrl);
  92. }
  93. } catch (Exception $ex) {
  94. // message will already be prefixed w/ 'GeoIPAutoUpdater: '
  95. Log::error($ex);
  96. $this->performRedundantDbChecks();
  97. throw $ex;
  98. }
  99. $this->performRedundantDbChecks();
  100. }
  101. /**
  102. * Downloads a GeoIP database archive, extracts the .dat file and overwrites the existing
  103. * old database.
  104. *
  105. * If something happens that causes the download to fail, no exception is thrown, but
  106. * an error is logged.
  107. *
  108. * @param string $dbType
  109. * @param string $url URL to the database to download. The type of database is determined
  110. * from this URL.
  111. * @throws Exception
  112. */
  113. protected function downloadFile($dbType, $url)
  114. {
  115. $ext = GeoIPAutoUpdater::getGeoIPUrlExtension($url);
  116. // NOTE: using the first item in $dbNames[$dbType] makes sure GeoLiteCity will be renamed to GeoIPCity
  117. $zippedFilename = GeoIp::$dbNames[$dbType][0] . '.' . $ext;
  118. $zippedOutputPath = GeoIp::getPathForGeoIpDatabase($zippedFilename);
  119. $url = self::removeDateFromUrl($url);
  120. // download zipped file to misc dir
  121. try {
  122. $success = Http::sendHttpRequest($url, $timeout = 3600, $userAgent = null, $zippedOutputPath);
  123. } catch (Exception $ex) {
  124. throw new Exception("GeoIPAutoUpdater: failed to download '$url' to "
  125. . "'$zippedOutputPath': " . $ex->getMessage());
  126. }
  127. if ($success !== true) {
  128. throw new Exception("GeoIPAutoUpdater: failed to download '$url' to "
  129. . "'$zippedOutputPath'! (Unknown error)");
  130. }
  131. Log::info("GeoIPAutoUpdater: successfully downloaded '%s'", $url);
  132. try {
  133. self::unzipDownloadedFile($zippedOutputPath, $unlink = true);
  134. } catch (Exception $ex) {
  135. throw new Exception("GeoIPAutoUpdater: failed to unzip '$zippedOutputPath' after "
  136. . "downloading " . "'$url': " . $ex->getMessage());
  137. }
  138. Log::info("GeoIPAutoUpdater: successfully updated GeoIP database '%s'", $url);
  139. }
  140. /**
  141. * Unzips a downloaded GeoIP database. Only unzips .gz & .tar.gz files.
  142. *
  143. * @param string $path Path to zipped file.
  144. * @param bool $unlink Whether to unlink archive or not.
  145. * @throws Exception
  146. */
  147. public static function unzipDownloadedFile($path, $unlink = false)
  148. {
  149. $parts = explode('.', basename($path));
  150. $filenameStart = $parts[0];
  151. $dbFilename = $filenameStart . '.dat';
  152. $tempFilename = $filenameStart . '.dat.new';
  153. $outputPath = GeoIp::getPathForGeoIpDatabase($tempFilename);
  154. // extract file
  155. if (substr($path, -7, 7) == '.tar.gz') {
  156. // find the .dat file in the tar archive
  157. $unzip = Unzip::factory('tar.gz', $path);
  158. $content = $unzip->listContent();
  159. if (empty($content)) {
  160. throw new Exception(Piwik::translate('UserCountry_CannotListContent',
  161. array("'$path'", $unzip->errorInfo())));
  162. }
  163. $datFile = null;
  164. foreach ($content as $info) {
  165. $archivedPath = $info['filename'];
  166. if (basename($archivedPath) === $dbFilename) {
  167. $datFile = $archivedPath;
  168. }
  169. }
  170. if ($datFile === null) {
  171. throw new Exception(Piwik::translate('UserCountry_CannotFindGeoIPDatabaseInArchive',
  172. array($dbFilename, "'$path'")));
  173. }
  174. // extract JUST the .dat file
  175. $unzipped = $unzip->extractInString($datFile);
  176. if (empty($unzipped)) {
  177. throw new Exception(Piwik::translate('UserCountry_CannotUnzipDatFile',
  178. array("'$path'", $unzip->errorInfo())));
  179. }
  180. // write unzipped to file
  181. $fd = fopen($outputPath, 'wb');
  182. fwrite($fd, $unzipped);
  183. fclose($fd);
  184. } else if (substr($path, -3, 3) == '.gz') {
  185. $unzip = Unzip::factory('gz', $path);
  186. $success = $unzip->extract($outputPath);
  187. if ($success !== true) {
  188. throw new Exception(Piwik::translate('UserCountry_CannotUnzipDatFile',
  189. array("'$path'", $unzip->errorInfo())));
  190. }
  191. } else {
  192. $ext = end(explode(basename($path), '.', 2));
  193. throw new Exception(Piwik::translate('UserCountry_UnsupportedArchiveType', "'$ext'"));
  194. }
  195. try {
  196. // test that the new archive is a valid GeoIP database
  197. $dbType = GeoIp::getGeoIPDatabaseTypeFromFilename($dbFilename);
  198. if ($dbType === false) // sanity check
  199. {
  200. throw new Exception("Unexpected GeoIP archive file name '$path'.");
  201. }
  202. $customDbNames = array(
  203. 'loc' => array(),
  204. 'isp' => array(),
  205. 'org' => array()
  206. );
  207. $customDbNames[$dbType] = array($tempFilename);
  208. $phpProvider = new Php($customDbNames);
  209. $location = self::getTestLocationCatchPhpErrors($phpProvider);
  210. if (empty($location)
  211. || self::$unzipPhpError !== null
  212. ) {
  213. if (self::$unzipPhpError !== null) {
  214. list($errno, $errstr, $errfile, $errline) = self::$unzipPhpError;
  215. Log::info("GeoIPAutoUpdater: Encountered PHP error when testing newly downloaded" .
  216. " GeoIP database: %s: %s on line %s of %s.", $errno, $errstr, $errline, $errfile);
  217. }
  218. throw new Exception(Piwik::translate('UserCountry_ThisUrlIsNotAValidGeoIPDB'));
  219. }
  220. // delete the existing GeoIP database (if any) and rename the downloaded file
  221. $oldDbFile = GeoIp::getPathForGeoIpDatabase($dbFilename);
  222. if (file_exists($oldDbFile)) {
  223. unlink($oldDbFile);
  224. }
  225. $tempFile = GeoIp::getPathForGeoIpDatabase($tempFilename);
  226. rename($existing = $tempFile, $newName = $oldDbFile);
  227. // delete original archive
  228. if ($unlink) {
  229. unlink($path);
  230. }
  231. } catch (Exception $ex) {
  232. // remove downloaded files
  233. if (file_exists($outputPath)) {
  234. unlink($outputPath);
  235. }
  236. unlink($path);
  237. throw $ex;
  238. }
  239. }
  240. /**
  241. * Sets the options used by this class based on query parameter values.
  242. *
  243. * See setUpdaterOptions for query params used.
  244. */
  245. public static function setUpdaterOptionsFromUrl()
  246. {
  247. $options = array(
  248. 'loc' => Common::getRequestVar('loc_db', false, 'string'),
  249. 'isp' => Common::getRequestVar('isp_db', false, 'string'),
  250. 'org' => Common::getRequestVar('org_db', false, 'string'),
  251. 'period' => Common::getRequestVar('period', false, 'string'),
  252. );
  253. foreach (self::$urlOptions as $optionKey => $optionName) {
  254. $options[$optionKey] = Common::unsanitizeInputValue($options[$optionKey]); // URLs should not be sanitized
  255. }
  256. self::setUpdaterOptions($options);
  257. }
  258. /**
  259. * Sets the options used by this class based on the elements in $options.
  260. *
  261. * The following elements of $options are used:
  262. * 'loc' - URL for location database.
  263. * 'isp' - URL for ISP database.
  264. * 'org' - URL for Organization database.
  265. * 'period' - 'weekly' or 'monthly'. When to run the updates.
  266. *
  267. * @param array $options
  268. * @throws Exception
  269. */
  270. public static function setUpdaterOptions($options)
  271. {
  272. // set url options
  273. foreach (self::$urlOptions as $optionKey => $optionName) {
  274. if (!isset($options[$optionKey])) {
  275. continue;
  276. }
  277. $url = $options[$optionKey];
  278. $url = self::removeDateFromUrl($url);
  279. Option::set($optionName, $url);
  280. }
  281. // set period option
  282. if (!empty($options['period'])) {
  283. $period = $options['period'];
  284. if ($period != self::SCHEDULE_PERIOD_MONTHLY
  285. && $period != self::SCHEDULE_PERIOD_WEEKLY
  286. ) {
  287. throw new Exception(Piwik::translate(
  288. 'UserCountry_InvalidGeoIPUpdatePeriod',
  289. array("'$period'", "'" . self::SCHEDULE_PERIOD_MONTHLY . "', '" . self::SCHEDULE_PERIOD_WEEKLY . "'")
  290. ));
  291. }
  292. Option::set(self::SCHEDULE_PERIOD_OPTION_NAME, $period);
  293. TaskScheduler::rescheduleTask(new GeoIPAutoUpdater());
  294. }
  295. }
  296. /**
  297. * Returns true if the auto-updater is setup to update at least one type of
  298. * database. False if otherwise.
  299. *
  300. * @return bool
  301. */
  302. public static function isUpdaterSetup()
  303. {
  304. if (Option::get(self::LOC_URL_OPTION_NAME) !== false
  305. || Option::get(self::ISP_URL_OPTION_NAME) !== false
  306. || Option::get(self::ORG_URL_OPTION_NAME) !== false
  307. ) {
  308. return true;
  309. }
  310. return false;
  311. }
  312. /**
  313. * Retrieves the URLs used to update various GeoIP database files.
  314. *
  315. * @return array
  316. */
  317. public static function getConfiguredUrls()
  318. {
  319. $result = array();
  320. foreach (self::$urlOptions as $key => $optionName) {
  321. $result[$key] = Option::get($optionName);
  322. }
  323. return $result;
  324. }
  325. /**
  326. * Returns the confiured URL (if any) for a type of database.
  327. *
  328. * @param string $key 'loc', 'isp' or 'org'
  329. * @throws Exception
  330. * @return string|false
  331. */
  332. public static function getConfiguredUrl($key)
  333. {
  334. if (empty(self::$urlOptions[$key])) {
  335. throw new Exception("Invalid key $key");
  336. }
  337. $url = Option::get(self::$urlOptions[$key]);
  338. return $url;
  339. }
  340. /**
  341. * Performs a GeoIP database update.
  342. */
  343. public static function performUpdate()
  344. {
  345. $instance = new GeoIPAutoUpdater();
  346. $instance->update();
  347. }
  348. /**
  349. * Returns the configured update period, either 'week' or 'month'. Defaults to
  350. * 'month'.
  351. *
  352. * @return string
  353. */
  354. public static function getSchedulePeriod()
  355. {
  356. $period = Option::get(self::SCHEDULE_PERIOD_OPTION_NAME);
  357. if ($period === false) {
  358. $period = self::SCHEDULE_PERIOD_MONTHLY;
  359. }
  360. return $period;
  361. }
  362. /**
  363. * Returns an array of strings for GeoIP databases that have update URLs configured, but
  364. * are not present in the misc directory. Each string is a key describing the type of
  365. * database (ie, 'loc', 'isp' or 'org').
  366. *
  367. * @return array
  368. */
  369. public static function getMissingDatabases()
  370. {
  371. $result = array();
  372. foreach (self::getConfiguredUrls() as $key => $url) {
  373. if (!empty($url)) {
  374. // if a database of the type does not exist, but there's a url to update, then
  375. // a database is missing
  376. $path = GeoIp::getPathToGeoIpDatabase(
  377. GeoIp::$dbNames[$key]);
  378. if ($path === false) {
  379. $result[] = $key;
  380. }
  381. }
  382. }
  383. return $result;
  384. }
  385. /**
  386. * Returns the extension of a URL used to update a GeoIP database, if it can be found.
  387. */
  388. public static function getGeoIPUrlExtension($url)
  389. {
  390. // check for &suffix= query param that is special to MaxMind URLs
  391. if (preg_match('/suffix=([^&]+)/', $url, $matches)) {
  392. $ext = $matches[1];
  393. } else {
  394. // use basename of url
  395. $filenameParts = explode('.', basename($url), 2);
  396. if (count($filenameParts) > 1) {
  397. $ext = end($filenameParts);
  398. } else {
  399. $ext = reset($filenameParts);
  400. }
  401. }
  402. self::checkForSupportedArchiveType($ext);
  403. return $ext;
  404. }
  405. /**
  406. * Avoid downloading archive types we don't support. No point in downloading it,
  407. * if we can't unzip it...
  408. *
  409. * @param string $ext The URL file's extension.
  410. * @throws \Exception
  411. */
  412. private static function checkForSupportedArchiveType($ext)
  413. {
  414. if ($ext != 'tar.gz'
  415. && $ext != 'gz'
  416. && $ext != 'dat.gz'
  417. ) {
  418. throw new \Exception(Piwik::translate('UserCountry_UnsupportedArchiveType', "'$ext'"));
  419. }
  420. }
  421. /**
  422. * Tests a location provider using a test IP address and catches PHP errors
  423. * (ie, notices) if they occur. PHP error information is held in self::$unzipPhpError.
  424. *
  425. * @param LocationProvider $provider The provider to test.
  426. * @return array|false $location The result of geolocation. False if no location
  427. * can be found.
  428. */
  429. private static function getTestLocationCatchPhpErrors($provider)
  430. {
  431. // note: in most cases where this will fail, the error will usually be a PHP fatal error/notice.
  432. // in order to delete the files in such a case (which can be caused by a man-in-the-middle attack)
  433. // we need to catch them, so we set a new error handler.
  434. self::$unzipPhpError = null;
  435. set_error_handler(array('Piwik\Plugins\UserCountry\GeoIPAutoUpdater', 'catchGeoIPError'));
  436. $location = $provider->getLocation(array('ip' => GeoIp::TEST_IP));
  437. restore_error_handler();
  438. return $location;
  439. }
  440. /**
  441. * Utility function that checks if geolocation works with each installed database,
  442. * and if one or more doesn't, they are renamed to make sure tracking will work.
  443. * This is a safety measure used to make sure tracking isn't affected if strange
  444. * update errors occur.
  445. *
  446. * Databases are renamed to ${original}.broken .
  447. *
  448. * Note: method is protected for testability.
  449. */
  450. protected function performRedundantDbChecks()
  451. {
  452. $databaseTypes = array_keys(GeoIp::$dbNames);
  453. foreach ($databaseTypes as $type) {
  454. $customNames = array(
  455. 'loc' => array(),
  456. 'isp' => array(),
  457. 'org' => array()
  458. );
  459. $customNames[$type] = GeoIp::$dbNames[$type];
  460. // create provider that only uses the DB type we're testing
  461. $provider = new Php($customNames);
  462. // test the provider. on error, we rename the broken DB.
  463. self::getTestLocationCatchPhpErrors($provider);
  464. if (self::$unzipPhpError !== null) {
  465. list($errno, $errstr, $errfile, $errline) = self::$unzipPhpError;
  466. Log::warning("GeoIPAutoUpdater: Encountered PHP error when performing redundant tests on GeoIP "
  467. . "%s database: %s: %s on line %s of %s.", $type, $errno, $errstr, $errline, $errfile);
  468. // get the current filename for the DB and an available new one to rename it to
  469. list($oldPath, $newPath) = $this->getOldAndNewPathsForBrokenDb($customNames[$type]);
  470. // rename the DB so tracking will not fail
  471. if ($oldPath !== false
  472. && $newPath !== false
  473. ) {
  474. if (file_exists($newPath)) {
  475. unlink($newPath);
  476. }
  477. rename($oldPath, $newPath);
  478. }
  479. }
  480. }
  481. }
  482. /**
  483. * Returns the path to a GeoIP database and a path to rename it to if it's broken.
  484. *
  485. * @param array $possibleDbNames The possible names of the database.
  486. * @return array Array with two elements, the path to the existing database, and
  487. * the path to rename it to if it is broken. The second will end
  488. * with something like .broken .
  489. */
  490. private function getOldAndNewPathsForBrokenDb($possibleDbNames)
  491. {
  492. $pathToDb = GeoIp::getPathToGeoIpDatabase($possibleDbNames);
  493. $newPath = false;
  494. if ($pathToDb !== false) {
  495. $newPath = $pathToDb . ".broken";
  496. }
  497. return array($pathToDb, $newPath);
  498. }
  499. /**
  500. * Custom PHP error handler used to catch any PHP errors that occur when
  501. * testing a downloaded GeoIP file.
  502. *
  503. * If we download a file that is supposed to be a GeoIP database, we need to make
  504. * sure it is one. This is done simply by attempting to use it. If this fails, it
  505. * will most of the time fail as a PHP error, which we catch w/ this function
  506. * after it is passed to set_error_handler.
  507. *
  508. * The PHP error is stored in self::$unzipPhpError.
  509. *
  510. * @param int $errno
  511. * @param string $errstr
  512. * @param string $errfile
  513. * @param int $errline
  514. */
  515. public static function catchGeoIPError($errno, $errstr, $errfile, $errline)
  516. {
  517. self::$unzipPhpError = array($errno, $errstr, $errfile, $errline);
  518. }
  519. /**
  520. * Returns the time the auto updater was last run.
  521. *
  522. * @return Date|false
  523. */
  524. public static function getLastRunTime()
  525. {
  526. $timestamp = Option::get(self::LAST_RUN_TIME_OPTION_NAME);
  527. return $timestamp === false ? false : Date::factory((int)$timestamp);
  528. }
  529. /**
  530. * Removes the &date=... query parameter if present in the URL. This query parameter
  531. * is in MaxMind URLs by default and will force the download of an old database.
  532. *
  533. * @param string $url
  534. * @return string
  535. */
  536. private static function removeDateFromUrl($url)
  537. {
  538. return preg_replace("/&date=[^&#]*/", '', $url);
  539. }
  540. /**
  541. * Returns the next scheduled time for the auto updater.
  542. *
  543. * @return Date|false
  544. */
  545. public static function getNextRunTime()
  546. {
  547. $task = new GeoIPAutoUpdater();
  548. $timetable = new ScheduledTaskTimetable();
  549. return $timetable->getScheduledTaskTime($task->getName());
  550. }
  551. /**
  552. * See {@link Piwik\ScheduledTime::getRescheduledTime()}.
  553. */
  554. public function getRescheduledTime()
  555. {
  556. $nextScheduledTime = parent::getRescheduledTime();
  557. // if a geoip database is out of date, run the updater as soon as possible
  558. if ($this->isAtLeastOneGeoIpDbOutOfDate($nextScheduledTime)) {
  559. return time();
  560. }
  561. return $nextScheduledTime;
  562. }
  563. private function isAtLeastOneGeoIpDbOutOfDate($rescheduledTime)
  564. {
  565. $previousScheduledRuntime = $this->getPreviousScheduledTime($rescheduledTime)->setTime("00:00:00")->getTimestamp();
  566. foreach (GeoIp::$dbNames as $type => $dbNames) {
  567. $dbUrl = Option::get(self::$urlOptions[$type]);
  568. $dbPath = GeoIp::getPathToGeoIpDatabase($dbNames);
  569. // if there is a URL for this DB type and the GeoIP DB file's last modified time is before
  570. // the time the updater should have been previously run, then **the file is out of date**
  571. if (!empty($dbUrl)
  572. && filemtime($dbPath) < $previousScheduledRuntime
  573. ) {
  574. return true;
  575. }
  576. }
  577. return false;
  578. }
  579. private function getPreviousScheduledTime($rescheduledTime)
  580. {
  581. $updaterPeriod = self::getSchedulePeriod();
  582. if ($updaterPeriod == self::SCHEDULE_PERIOD_WEEKLY) {
  583. return Date::factory($rescheduledTime)->subWeek(1);
  584. } else if ($updaterPeriod == self::SCHEDULE_PERIOD_MONTHLY) {
  585. return Date::factory($rescheduledTime)->subMonth(1);
  586. }
  587. throw new Exception("Unknown GeoIP updater period found in database: %s", $updaterPeriod);
  588. }
  589. }