PageRenderTime 41ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/kernel/classes/ezstaticcache.php

http://github.com/ezsystems/ezpublish
PHP | 637 lines | 370 code | 64 blank | 203 comment | 42 complexity | 735447c163f5d066fc4543c2259f6032 MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.1
  1. <?php
  2. /**
  3. * File containing the eZStaticCache class
  4. *
  5. * @copyright Copyright (C) eZ Systems AS. All rights reserved.
  6. * @license For full copyright and license information view LICENSE file distributed with this source code.
  7. * @version //autogentag//
  8. * @package kernel
  9. */
  10. /**
  11. * The eZStaticCache class manages the static cache system.
  12. *
  13. * This class can be used to generate static cache files usable
  14. * by the static cache system.
  15. *
  16. * Generating static cache is done by instantiating the class and then
  17. * calling generateCache(). For example:
  18. *
  19. * <code>
  20. * $staticCache = new eZStaticCache();
  21. * $staticCache->generateCache();
  22. * </code>
  23. *
  24. * To generate the URLs that must always be updated call generateAlwaysUpdatedCache()
  25. *
  26. * @package kernel
  27. */
  28. class eZStaticCache implements ezpStaticCache
  29. {
  30. /**
  31. * User-Agent string
  32. */
  33. const USER_AGENT = 'eZ Publish static cache generator';
  34. private static $actionList = array();
  35. /**
  36. * The name of the host to fetch HTML data from.
  37. *
  38. * @deprecated deprecated since version 4.4, site.ini.[SiteSettings].SiteURL is used instead
  39. * @var string
  40. */
  41. private $hostName;
  42. /**
  43. * The base path for the directory where static files are placed.
  44. *
  45. * @var string
  46. */
  47. private $staticStorage;
  48. /**
  49. * The maximum depth of URLs that will be cached.
  50. *
  51. * @var int
  52. */
  53. private $maxCacheDepth;
  54. /**
  55. * Array of URLs to cache.
  56. *
  57. * @var array(int=>string)
  58. */
  59. private $cachedURLArray = array();
  60. /**
  61. * An array with siteaccesses names that will be cached.
  62. *
  63. * @var array(int=>string)
  64. */
  65. private $cachedSiteAccesses = array();
  66. /**
  67. * An array with URLs that is to always be updated.
  68. *
  69. * @var array(int=>string)
  70. */
  71. private $alwaysUpdate;
  72. /**
  73. * Initialises the static cache object with settings from staticcache.ini.
  74. */
  75. public function __construct()
  76. {
  77. $ini = eZINI::instance( 'staticcache.ini');
  78. $this->hostName = $ini->variable( 'CacheSettings', 'HostName' );
  79. $this->staticStorageDir = $ini->variable( 'CacheSettings', 'StaticStorageDir' );
  80. $this->maxCacheDepth = $ini->variable( 'CacheSettings', 'MaxCacheDepth' );
  81. $this->cachedURLArray = $ini->variable( 'CacheSettings', 'CachedURLArray' );
  82. $this->cachedSiteAccesses = $ini->variable( 'CacheSettings', 'CachedSiteAccesses' );
  83. $this->alwaysUpdate = $ini->variable( 'CacheSettings', 'AlwaysUpdateArray' );
  84. }
  85. /**
  86. * Getter method for {@link eZStaticCache::$hostName}
  87. *
  88. * @deprecated deprecated since version 4.4
  89. * @return string The currently configured host-name.
  90. */
  91. public function hostName()
  92. {
  93. return $this->hostName;
  94. }
  95. /**
  96. * Getter method for {@link eZStaticCache::$staticStorageDir}
  97. *
  98. * @return string The currently configured storage directory for the static cache.
  99. */
  100. public function storageDirectory()
  101. {
  102. return $this->staticStorageDir;
  103. }
  104. /**
  105. * Getter method for {@link eZStaticCache::$maxCacheDepth}
  106. *
  107. * @return int The maximum depth in the url which will be cached.
  108. */
  109. public function maxCacheDepth()
  110. {
  111. return $this->maxCacheDepth;
  112. }
  113. /**
  114. * Getter method for {@link eZStaticCache::$cachedSiteAccesses}
  115. *
  116. * @return array An array with site-access names that should be cached.
  117. */
  118. public function cachedSiteAccesses()
  119. {
  120. return $this->cachedSiteAccesses;
  121. }
  122. /**
  123. * Getter method for {@link eZStaticCache::$cachedURLArray}
  124. *
  125. * @return array An array with URLs that is to be cached statically, the URLs may contain wildcards.
  126. */
  127. public function cachedURLArray()
  128. {
  129. return $this->cachedURLArray;
  130. }
  131. /**
  132. * Getter method for {@link eZStaticCache::$alwaysUpdate}
  133. *
  134. * These URLs are configured with AlwaysUpdateArray in staticcache.ini.
  135. *
  136. * @see eZStaticCache::generateAlwaysUpdatedCache()
  137. * @return array An array with URLs that is to always be updated.
  138. */
  139. function alwaysUpdateURLArray()
  140. {
  141. return $this->alwaysUpdate;
  142. }
  143. /**
  144. * Generates the caches for all URLs that must always be generated.
  145. *
  146. * @param bool $quiet If true then the function will not output anything.
  147. * @param eZCLI|false $cli The eZCLI object or false if no output can be done.
  148. * @param bool $delay
  149. */
  150. public function generateAlwaysUpdatedCache( $quiet = false, $cli = false, $delay = true )
  151. {
  152. foreach ( $this->alwaysUpdate as $uri )
  153. {
  154. if ( !$quiet and $cli )
  155. $cli->output( "caching: $uri ", false );
  156. $this->storeCache( $uri, $this->staticStorageDir, array(), false, $delay );
  157. if ( !$quiet and $cli )
  158. $cli->output( "done" );
  159. }
  160. }
  161. /**
  162. * Generates caches for all the urls of nodes in $nodeList.
  163. *
  164. * The associative array must have on of these entries:
  165. * - node_id - ID of the node
  166. * - path_identification_string - The path_identification_string from the node table, is used to fetch the node ID if node_id is missing.
  167. *
  168. * @param array $nodeList An array with node entries, each entry is either the node ID or an associative array.
  169. */
  170. public function generateNodeListCache( $nodeList )
  171. {
  172. $db = eZDB::instance();
  173. foreach ( $nodeList as $uri )
  174. {
  175. if ( is_array( $uri ) )
  176. {
  177. if ( !isset( $uri['node_id'] ) )
  178. {
  179. eZDebug::writeError( "node_id is not set for uri entry " . var_export( $uri ) . ", will need to perform extra query to get node_id" );
  180. $node = eZContentObjectTreeNode::fetchByURLPath( $uri['path_identification_string'] );
  181. $nodeID = (int)$node->attribute( 'node_id' );
  182. }
  183. else
  184. {
  185. $nodeID = (int)$uri['node_id'];
  186. }
  187. }
  188. else
  189. {
  190. $nodeID = (int)$uri;
  191. }
  192. $elements = eZURLAliasML::fetchByAction( 'eznode', $nodeID, true, true, true );
  193. foreach ( $elements as $element )
  194. {
  195. $path = $element->getPath();
  196. $this->cacheURL( '/' . $path );
  197. }
  198. }
  199. }
  200. /**
  201. * Generates the static cache from the configured INI settings.
  202. *
  203. * @param bool $force If true then it will create all static caches even if it is not outdated.
  204. * @param bool $quiet If true then the function will not output anything.
  205. * @param eZCLI|false $cli The eZCLI object or false if no output can be done.
  206. * @param bool $delay
  207. */
  208. public function generateCache( $force = false, $quiet = false, $cli = false, $delay = true )
  209. {
  210. $staticURLArray = $this->cachedURLArray();
  211. $db = eZDB::instance();
  212. $configSettingCount = count( $staticURLArray );
  213. $currentSetting = 0;
  214. // This contains parent elements which must checked to find new urls and put them in $generateList
  215. // Each entry contains:
  216. // - url - Url of parent
  217. // - glob - A glob string to filter direct children based on name
  218. // - org_url - The original url which was requested
  219. // - parent_id - The element ID of the parent (optional)
  220. // The parent_id will be used to quickly fetch the children, if not it will use the url
  221. $parentList = array();
  222. // A list of urls which must generated, each entry is a string with the url
  223. $generateList = array();
  224. foreach ( $staticURLArray as $url )
  225. {
  226. $currentSetting++;
  227. if ( strpos( $url, '*') === false )
  228. {
  229. $generateList[] = $url;
  230. }
  231. else
  232. {
  233. $queryURL = ltrim( str_replace( '*', '', $url ), '/' );
  234. $dir = dirname( $queryURL );
  235. if ( $dir == '.' )
  236. $dir = '';
  237. $glob = basename( $queryURL );
  238. $parentList[] = array( 'url' => $dir,
  239. 'glob' => $glob,
  240. 'org_url' => $url );
  241. }
  242. }
  243. // As long as we have urls to generate or parents to check we loop
  244. while ( count( $generateList ) > 0 || count( $parentList ) > 0 )
  245. {
  246. // First generate single urls
  247. foreach ( $generateList as $generateURL )
  248. {
  249. if ( !$quiet and $cli )
  250. $cli->output( "caching: $generateURL ", false );
  251. $this->cacheURL( $generateURL, false, !$force, $delay );
  252. if ( !$quiet and $cli )
  253. $cli->output( "done" );
  254. }
  255. $generateList = array();
  256. // Then check for more data
  257. $newParentList = array();
  258. foreach ( $parentList as $parentURL )
  259. {
  260. if ( isset( $parentURL['parent_id'] ) )
  261. {
  262. $elements = eZURLAliasML::fetchByParentID( $parentURL['parent_id'], true, true, false );
  263. foreach ( $elements as $element )
  264. {
  265. $path = '/' . $element->getPath();
  266. $generateList[] = $path;
  267. $newParentList[] = array( 'parent_id' => $element->attribute( 'id' ) );
  268. }
  269. }
  270. else
  271. {
  272. if ( !$quiet and $cli and $parentURL['glob'] )
  273. $cli->output( "wildcard cache: " . $parentURL['url'] . '/' . $parentURL['glob'] . "*" );
  274. $elements = eZURLAliasML::fetchByPath( $parentURL['url'], $parentURL['glob'] );
  275. foreach ( $elements as $element )
  276. {
  277. $path = '/' . $element->getPath();
  278. $generateList[] = $path;
  279. $newParentList[] = array( 'parent_id' => $element->attribute( 'id' ) );
  280. }
  281. }
  282. }
  283. $parentList = $newParentList;
  284. }
  285. }
  286. /**
  287. * Generates the caches for the url $url using the currently configured storageDirectory().
  288. *
  289. * @param string $url The URL to cache, e.g /news
  290. * @param int|false $nodeID The ID of the node to cache, if supplied it will also cache content/view/full/xxx.
  291. * @param bool $skipExisting If true it will not unlink existing cache files.
  292. * @return bool
  293. */
  294. public function cacheURL( $url, $nodeID = false, $skipExisting = false, $delay = true )
  295. {
  296. // Check if URL should be cached
  297. if ( substr_count( $url, "/") >= $this->maxCacheDepth )
  298. return false;
  299. $doCacheURL = false;
  300. foreach ( $this->cachedURLArray as $cacheURL )
  301. {
  302. if ( $url == $cacheURL )
  303. {
  304. $doCacheURL = true;
  305. break;
  306. }
  307. else if ( strpos( $cacheURL, '*') !== false )
  308. {
  309. if ( strpos( $url, str_replace( '*', '', $cacheURL ) ) === 0 )
  310. {
  311. $doCacheURL = true;
  312. break;
  313. }
  314. }
  315. }
  316. if ( $doCacheURL == false )
  317. {
  318. return false;
  319. }
  320. $this->storeCache( $url, $this->staticStorageDir, $nodeID ? array( "/content/view/full/$nodeID" ) : array(), $skipExisting, $delay );
  321. return true;
  322. }
  323. /**
  324. * Stores the static cache for $url and hostname defined in site.ini.[SiteSettings].SiteURL for cached siteaccess
  325. * by fetching the web page using {@link eZHTTPTool::getDataByURL()} and storing the fetched HTML data.
  326. *
  327. * @param string $url The URL to cache, e.g /news
  328. * @param string $staticStorageDir The base directory for storing cache files.
  329. * @param array $alternativeStaticLocations
  330. * @param bool $skipUnlink If true it will not unlink existing cache files.
  331. * @param bool $delay
  332. */
  333. private function storeCache( $url, $staticStorageDir, $alternativeStaticLocations = array(), $skipUnlink = false, $delay = true )
  334. {
  335. $dirs = array();
  336. foreach ( $this->cachedSiteAccesses as $cachedSiteAccess )
  337. {
  338. $dirs[] = $this->buildCacheDirPath( $cachedSiteAccess );
  339. }
  340. foreach ( $dirs as $dirParts )
  341. {
  342. foreach ( $dirParts as $dirPart )
  343. {
  344. $dir = $dirPart['dir'];
  345. $siteURL = $dirPart['site_url'];
  346. $cacheFiles = array();
  347. $cacheFiles[] = $this->buildCacheFilename( $staticStorageDir, $dir . $url );
  348. foreach ( $alternativeStaticLocations as $location )
  349. {
  350. $cacheFiles[] = $this->buildCacheFilename( $staticStorageDir, $dir . $location );
  351. }
  352. // Store new content
  353. $content = false;
  354. foreach ( $cacheFiles as $file )
  355. {
  356. if ( !$skipUnlink || !file_exists( $file ) )
  357. {
  358. // Deprecated since 4.4, will be removed in future version
  359. $fileName = "http://{$this->hostName}{$dir}{$url}";
  360. // staticcache.ini.[CacheSettings].HostName has been deprecated since version 4.4
  361. // hostname is read from site.ini.[SiteSettings].SiteURL per siteaccess
  362. // defined in staticcache.ini.[CacheSettings].CachedSiteAccesses
  363. if ( !$this->hostName )
  364. {
  365. $fileName = "http://{$siteURL}{$url}";
  366. }
  367. if ( $delay )
  368. {
  369. $this->addAction( 'store', array( $file, $fileName ) );
  370. }
  371. else
  372. {
  373. // Generate content, if required
  374. if ( $content === false )
  375. {
  376. if ( eZHTTPTool::getDataByURL( $fileName, true, eZStaticCache::USER_AGENT ) )
  377. $content = eZHTTPTool::getDataByURL( $fileName, false, eZStaticCache::USER_AGENT );
  378. }
  379. if ( $content === false )
  380. {
  381. eZDebug::writeError( "Could not grab content (from $fileName), is the hostname correct and Apache running?", 'Static Cache' );
  382. }
  383. else
  384. {
  385. eZStaticCache::storeCachedFile( $file, $content );
  386. }
  387. }
  388. }
  389. }
  390. }
  391. }
  392. }
  393. /**
  394. * Generates a full path to the cache file (index.html) based on the input parameters.
  395. *
  396. * @param string $staticStorageDir The storage for cache files.
  397. * @param string $url The URL for the current item, e.g /news
  398. * @return string The full path to the cache file (index.html).
  399. */
  400. private function buildCacheFilename( $staticStorageDir, $url )
  401. {
  402. $file = "{$staticStorageDir}{$url}/index.html";
  403. $file = preg_replace( '#//+#', '/', $file );
  404. return $file;
  405. }
  406. /**
  407. * Generates a cache directory parts including path, siteaccess name, site URL
  408. * depending on the match order type.
  409. *
  410. * @param string $siteAccess
  411. * @return array
  412. */
  413. private function buildCacheDirPath( $siteAccess )
  414. {
  415. $dirParts = array();
  416. $ini = eZINI::instance();
  417. $matchOderArray = $ini->variableArray( 'SiteAccessSettings', 'MatchOrder' );
  418. foreach ( $matchOderArray as $matchOrderItem )
  419. {
  420. switch ( $matchOrderItem )
  421. {
  422. case 'host_uri':
  423. foreach ( $ini->variable( 'SiteAccessSettings', 'HostUriMatchMapItems' ) as $hostUriMatchMapItem )
  424. {
  425. $parts = explode( ';', $hostUriMatchMapItem );
  426. if ( $parts[2] === $siteAccess )
  427. {
  428. $dirParts[] = $this->buildCacheDirPart( ( $parts[0] ? '/' . $parts[0] : '' ) .
  429. ( $parts[1] ? '/' . $parts[1] : '' ), $siteAccess );
  430. }
  431. }
  432. break;
  433. case 'host':
  434. foreach ( $ini->variable( 'SiteAccessSettings', 'HostMatchMapItems' ) as $hostMatchMapItem )
  435. {
  436. $parts = explode( ';', $hostMatchMapItem );
  437. if ( $parts[1] === $siteAccess )
  438. {
  439. $dirParts[] = $this->buildCacheDirPart( ( $parts[0] ? '/' . $parts[0] : '' ), $siteAccess );
  440. }
  441. }
  442. break;
  443. default:
  444. $dirParts[] = $this->buildCacheDirPart( '/' . $siteAccess, $siteAccess );
  445. break;
  446. }
  447. }
  448. return $dirParts;
  449. }
  450. /**
  451. * A helper method used to create directory parts array
  452. *
  453. * @param string $dir
  454. * @param string $siteAccess
  455. * @return array
  456. */
  457. private function buildCacheDirPart( $dir, $siteAccess )
  458. {
  459. return array( 'dir' => $dir,
  460. 'access_name' => $siteAccess,
  461. 'site_url' => eZSiteAccess::getIni( $siteAccess, 'site.ini' )->variable( 'SiteSettings', 'SiteURL' ) );
  462. }
  463. /**
  464. * Stores the cache file $file with contents $content.
  465. * Takes care of setting proper permissions on the new file.
  466. *
  467. * @param string $file
  468. * @param string $content
  469. */
  470. static function storeCachedFile( $file, $content )
  471. {
  472. $dir = dirname( $file );
  473. if ( !is_dir( $dir ) )
  474. {
  475. eZDir::mkdir( $dir, false, true );
  476. }
  477. $oldumask = umask( 0 );
  478. $tmpFileName = $file . '.' . md5( $file. uniqid( "ezp". getmypid(), true ) );
  479. // Remove files, this might be necessary for Windows
  480. @unlink( $tmpFileName );
  481. // Write the new cache file with the data attached
  482. $fp = fopen( $tmpFileName, 'w' );
  483. if ( $fp )
  484. {
  485. $comment = ( eZINI::instance( 'staticcache.ini' )->variable( 'CacheSettings', 'AppendGeneratedTime' ) === 'true' ) ? "<!-- Generated: " . date( 'Y-m-d H:i:s' ). " -->\n\n" : null;
  486. fwrite( $fp, $content . $comment );
  487. fclose( $fp );
  488. eZFile::rename( $tmpFileName, $file, false, eZFile::CLEAN_ON_FAILURE | eZFile::APPEND_DEBUG_ON_FAILURE );
  489. $perm = eZINI::instance()->variable( 'FileSettings', 'StorageFilePermissions' );
  490. chmod( $file, octdec( $perm ) );
  491. }
  492. umask( $oldumask );
  493. }
  494. /**
  495. * Removes the static cache file (index.html) and its directory if it exists.
  496. * The directory path is based upon the URL $url and the configured static storage dir.
  497. *
  498. * @param string $url The URL for the current item, e.g /news
  499. */
  500. function removeURL( $url )
  501. {
  502. $dir = eZDir::path( array( $this->staticStorageDir, $url ) );
  503. @unlink( $dir . "/index.html" );
  504. @rmdir( $dir );
  505. }
  506. /**
  507. * This function adds an action to the list that is used at the end of the
  508. * request to remove and regenerate static cache files.
  509. *
  510. * @param string $action
  511. * @param array $parameters
  512. */
  513. private function addAction( $action, $parameters )
  514. {
  515. self::$actionList[] = array( $action, $parameters );
  516. }
  517. /**
  518. * This function goes over the list of recorded actions and excecutes them.
  519. */
  520. static function executeActions()
  521. {
  522. if ( empty( self::$actionList ) )
  523. {
  524. return;
  525. }
  526. $fileContentCache = array();
  527. $doneDestList = array();
  528. $ini = eZINI::instance( 'staticcache.ini');
  529. $clearByCronjob = ( $ini->variable( 'CacheSettings', 'CronjobCacheClear' ) == 'enabled' );
  530. if ( $clearByCronjob )
  531. {
  532. $db = eZDB::instance();
  533. }
  534. foreach ( self::$actionList as $action )
  535. {
  536. list( $action, $parameters ) = $action;
  537. switch( $action ) {
  538. case 'store':
  539. list( $destination, $source ) = $parameters;
  540. if ( isset( $doneDestList[$destination] ) )
  541. continue 2;
  542. if ( $clearByCronjob )
  543. {
  544. $param = $db->escapeString( $destination . ',' . $source );
  545. $db->query( 'INSERT INTO ezpending_actions( action, param ) VALUES ( \'static_store\', \''. $param . '\' )' );
  546. $doneDestList[$destination] = 1;
  547. }
  548. else
  549. {
  550. if ( !isset( $fileContentCache[$source] ) )
  551. {
  552. if ( eZHTTPTool::getDataByURL( $source, true, eZStaticCache::USER_AGENT ) )
  553. $fileContentCache[$source] = eZHTTPTool::getDataByURL( $source, false, eZStaticCache::USER_AGENT );
  554. else
  555. $fileContentCache[$source] = false;
  556. }
  557. if ( $fileContentCache[$source] === false )
  558. {
  559. eZDebug::writeError( "Could not grab content (from $source), is the hostname correct and Apache running?", 'Static Cache' );
  560. }
  561. else
  562. {
  563. eZStaticCache::storeCachedFile( $destination, $fileContentCache[$source] );
  564. $doneDestList[$destination] = 1;
  565. }
  566. }
  567. break;
  568. }
  569. }
  570. self::$actionList = array();
  571. }
  572. }
  573. ?>