PageRenderTime 60ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/inc/popoon/sitemap.php

https://github.com/chregu/fluxcms
PHP | 677 lines | 387 code | 86 blank | 204 comment | 78 complexity | 60c8f32dfdead9b3a026d1411dac21f3 MD5 | raw file
Possible License(s): GPL-2.0, BSD-3-Clause, Apache-2.0, LGPL-2.1
  1. <?php
  2. // +----------------------------------------------------------------------+
  3. // | popoon |
  4. // +----------------------------------------------------------------------+
  5. // | Copyright (c) 2001-2006 Bitflux GmbH |
  6. // +----------------------------------------------------------------------+
  7. // | Licensed under the Apache License, Version 2.0 (the "License"); |
  8. // | you may not use this file except in compliance with the License. |
  9. // | You may obtain a copy of the License at |
  10. // | http://www.apache.org/licenses/LICENSE-2.0 |
  11. // | Unless required by applicable law or agreed to in writing, software |
  12. // | distributed under the License is distributed on an "AS IS" BASIS, |
  13. // | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or |
  14. // | implied. See the License for the specific language governing |
  15. // | permissions and limitations under the License. |
  16. // +----------------------------------------------------------------------+
  17. // | Author: Christian Stocker <chregu@bitflux.ch> |
  18. // +----------------------------------------------------------------------+
  19. //
  20. // $Id$
  21. /**
  22. * Class for doing the sitemap parsing stuff
  23. *
  24. * @author Christian Stocker <chregu@bitflux.ch>
  25. * @version $Id$
  26. * @package popoon
  27. */
  28. class popoon_sitemap {
  29. private $maps = array();
  30. public $file = null;
  31. public $uri = "";
  32. public $xml = null;
  33. public $rootFile = null;
  34. /**
  35. * Contains the header, which will be output just before serializing
  36. *
  37. * @var array
  38. */
  39. public $header = array();
  40. /**
  41. * HTTP response code which is sent with the first header
  42. *
  43. * @var array
  44. */
  45. private $responseCode = NULL;
  46. /**
  47. * The contenttype
  48. *
  49. * should be set within components (resp. serializers) with
  50. * $this->sitemap->setContentType($type);
  51. *
  52. * @var string contenttype
  53. */
  54. private $contentType = "text/html";
  55. /**
  56. * The Directory, where the cached sitemaps should be saved
  57. *
  58. * @var string
  59. */
  60. public $cacheDir = "./tmp/";
  61. /**
  62. * Contains instance of ComponentCache or false
  63. *
  64. * @var mixed
  65. * @see enableCaching(), disableCaching()
  66. */
  67. private $componentCache = false;
  68. public $outputCache = false;
  69. /**
  70. * The directory, where the xslt documents for
  71. * sitemap 2 php translation are.
  72. *
  73. * @var string
  74. */
  75. private $sm2php_xsl_dir = "sitemap/";
  76. /**
  77. * The XSL File for transforming sitemap.xsl to a cached php file
  78. *
  79. * You shouldn't have to change it.
  80. *
  81. * @var string
  82. */
  83. private $sm2php_xsl = "sitemap2php.xsl";
  84. private $sm2phpincludes_xsl = "sitemap2phpincludes.xsl";
  85. public $options;
  86. /**
  87. * Construtor
  88. *
  89. * Almost everything happens here. The cached sitemap is generated here, if it
  90. * doesn't exist or if it's older
  91. * Then this sitemap is included and the code in it is run
  92. *
  93. * @param string $sitemapFile the location of sitemap.xml, can be relativ
  94. * @param string $uri the uri of the call, optional (takes _SERVER["REQUEST_URI"] then)
  95. * @param array $options options, to be defined
  96. * @access public
  97. * @return bool
  98. */
  99. function __construct($sitemapFile, $uri = null, popoon_classes_config $options = NULL, $maps = NULL) {
  100. if (! $this->rootFile) {
  101. $this->rootFile = $sitemapFile;
  102. }
  103. //replace class-properties by values in the options-array()
  104. if ($maps) {
  105. $this->maps = $maps;
  106. }
  107. $this->options = $options;
  108. //FIXME use new config object class...
  109. if (! isset($options['sm2php_xsl_dir']) && isset($options['sm2php_xsl'])) {
  110. $options['sm2php_xsl_dir'] = dirname($options['sm2php_xsl']);
  111. $options['sm2php_xsl'] = basename($options['sm2php_xsl']);
  112. }
  113. foreach ($options as $key => $value) {
  114. if (isset($this->$key)) {
  115. $this->$key = $value;
  116. }
  117. }
  118. $this->file = $sitemapFile;
  119. if ($uri === null) {
  120. $this->uri = $_SERVER["REQUEST_URI"];
  121. } else {
  122. $this->uri = $uri;
  123. }
  124. //generate paths and ids
  125. $sitemapRealPath = realpath($sitemapFile);
  126. if (! $sitemapRealPath) {
  127. return popoon::raiseError("Sitemap $sitemapFile does not exist", POPOON_ERROR_FATAL);
  128. }
  129. $sitemapId = $this->generateSitemapID($sitemapRealPath);
  130. /* if (substr($this->cacheDir,0,1) != "/") {
  131. $this->cacheDir = BX_PROJECT_DIR."/".$this->cacheDir;
  132. }*/
  133. $sitemapCachedFile = $this->cacheDir . $sitemapId;
  134. //check if sitemapCache does exists and if it's older than the sitemap.xml
  135. if ((! (file_exists($sitemapCachedFile) && filemtime($sitemapCachedFile) >= filemtime($sitemapRealPath)))) {
  136. //if it is, make new sitemapCached file
  137. $err = $this->sitemap2php($sitemapRealPath, $sitemapCachedFile);
  138. }
  139. $pipelineHit = $this->runSitemap($sitemapCachedFile);
  140. return $pipelineHit;
  141. }
  142. /**
  143. * Runs the cached Sitemap
  144. *
  145. * @param string $sitemapCachedFile location of the cached file
  146. * @return mixed true on success, pear error on error
  147. */
  148. function runSitemap($sitemapCachedFile) {
  149. //include the sitemap file
  150. $pipelineHit = false;
  151. include ($sitemapCachedFile);
  152. $this->pipelineHit = $pipelineHit;
  153. return $pipelineHit;
  154. }
  155. public function getOptions($clone = false) {
  156. return clone $this->options;
  157. }
  158. /**
  159. * Sets a header, which is output just before the serializer
  160. *
  161. * @param string $name name of the header
  162. * @param string $value value of the header
  163. */
  164. function setHeader($name, $value = null) {
  165. if ($value) {
  166. $this->header[$name] = $value;
  167. } else {
  168. if (isset($this->header[$name])) {
  169. unset($this->header[$name]);
  170. }
  171. }
  172. }
  173. /**
  174. * Sets multiple headers which are merged into the existing ones.
  175. *
  176. * @param array $value headers to be set
  177. */
  178. function setHeaders($headers) {
  179. $this->header = array_merge($this->header, $headers);
  180. }
  181. function setUserData($name, $value) {
  182. $this->header['_' . $name] = $value;
  183. }
  184. function getUserData($name) {
  185. return $this->header['_' . $name];
  186. }
  187. function setHeaderIfNotExists($name, $value) {
  188. if (! isset($this->header[$name])) {
  189. $this->setHeader($name, $value);
  190. }
  191. }
  192. /**
  193. * Sets a HTTP response code, which is sent with the first header
  194. *
  195. * @param int $value response code value
  196. */
  197. function setResponseCode($value) {
  198. $this->responseCode = $value;
  199. }
  200. /**
  201. * Sets a header, and directly outputs it
  202. *
  203. * Very useful in DomStart of serializers, as the printHeader()
  204. * function is called before...
  205. *
  206. * @param string $name name of the header
  207. * @param string $value value of the header
  208. */
  209. function setHeaderAndPrint($name, $value) {
  210. $this->setHeader($name, $value);
  211. header("$name: $value");
  212. }
  213. /**
  214. * Sets the last modified time
  215. *
  216. * @param int $time unixtime last modified
  217. */
  218. function setLastModified($time) {
  219. if ($time > 0) {
  220. $this->setHeader("Last-Modified", gmdate("r", $time));
  221. }
  222. }
  223. /**
  224. * Sets the content type of the document
  225. *
  226. * @param string $type content type
  227. */
  228. function setContentType($type) {
  229. $this->contentType = $type;
  230. }
  231. /**
  232. * If noCache is set, disables all http caching
  233. * headers according to http://dclp-faq.de/q/q-http-caching.html
  234. */
  235. function setCacheHeaders($noCache, $expireTime = 10) {
  236. if ($noCache || $expireTime === 0) {
  237. $date = gmdate("D, d M Y H:i:s", time() - 10);
  238. $this->setHeader("Expires", $date . " GMT");
  239. $this->setHeaderIfNotExists("Last-Modified", $date . " GMT");
  240. $this->setHeader("Pragma", "no-cache");
  241. $this->setHeader("Cache-Control", "no-store, no-cache, must-revalidate, post-check=0, pre-check=0");
  242. } else {
  243. if (! $expireTime) {
  244. $expireTime = 10;
  245. }
  246. //My Apache 2 sends max-age=10800, which is insanely high.. change that
  247. // here to 10 seconds (at least, we have something then, even if not that high)
  248. if (! empty($_SERVER['HTTP_USER_AGENT']) && ! empty($this->header['Last-Modified']) && strpos($_SERVER['HTTP_USER_AGENT'], "bot") > 0) {
  249. $maxTime = 24 * 14 * 3600;
  250. $t = strtotime($this->header['Last-Modified']);
  251. $expireTime = time() - $t;
  252. if ($expireTime > $maxTime) {
  253. $expireTime = $maxTime;
  254. }
  255. }
  256. $this->setHeaderIfNotExists("Expires", gmdate("r", time() + $expireTime));
  257. $this->setHeaderIfNotExists("Cache-Control", "public, max-age=$expireTime");
  258. $this->setHeader("Pragma");
  259. }
  260. }
  261. /**
  262. * Prints all the header in $this->header
  263. *
  264. * this function is called from components/serializer.php in the constructor
  265. */
  266. function printHeader() {
  267. if (! headers_sent()) {
  268. $this->setHeader("Content-Type", $this->contentType);
  269. // flag for an already sent response header
  270. $responseCodeSent = FALSE;
  271. foreach ($this->header as $name => $value) {
  272. if (substr($name, 0, 1) != "_") {
  273. // only send response code with first header
  274. if ($responseCodeSent) {
  275. header("$name: $value");
  276. } else {
  277. header("$name: $value", TRUE, $this->responseCode);
  278. $responseCodeSent = TRUE;
  279. }
  280. }
  281. }
  282. }
  283. }
  284. /**
  285. * generates a unique ID out of a string (path+filename)
  286. *
  287. * This method is used for generating a unique ID for every sitemap
  288. * which has to be compiled. It just takes the realpath as input,
  289. * and returns a unique ID for it.
  290. * In this case it replaces every DIRECTORY_SEPERATOR with _ for better
  291. * debugging. Theoretically one could use md5 as well.
  292. *
  293. * @param string $realpath Any string, but in this class normally a fullpath+filename
  294. * @access public
  295. * @return string ID
  296. */
  297. function generateSitemapID($realpath) {
  298. return str_replace(array(DIRECTORY_SEPARATOR, ":"), "_", $realpath);
  299. }
  300. /**
  301. * Generates the cached sitemap
  302. *
  303. * This function generates a php file out of a sitemap.xml with the help
  304. * an xsl file (poponn/sitemap/sitemap2xsl.php)
  305. *
  306. * Libxslt and Sablotron is supported right now.
  307. *
  308. * @param string $sitemapRealPath the absolute location of sitemap.xml
  309. * @param string $sitemapCachedFile the absolute location of the cached sitemap (php file)
  310. * @access public
  311. * @return bool
  312. */
  313. function sitemap2php($sitemapRealPath, $sitemapCachedFile) {
  314. //file_exists and is writable should be the normal case...
  315. if (! is_writable($sitemapCachedFile)) {
  316. if (! realpath(dirname($sitemapCachedFile))) {
  317. if (! mkdir(dirname($sitemapCachedFile) . "/")) {
  318. return popoon::raiseError("The cache directory " . dirname($sitemapCachedFile) . " does not exist", POPOON_ERROR_FATAL);
  319. }
  320. }
  321. if (! is_writable(dirname($sitemapCachedFile))) {
  322. return popoon::raiseError("The cache directory " . realpath(dirname($sitemapCachedFile)) . " is not writable", POPOON_ERROR_FATAL, __FILE__, __LINE__);
  323. } else
  324. if ((file_exists($sitemapCachedFile) && ! is_writable($sitemapCachedFile))) {
  325. return popoon::raiseError("The cache file " . realpath($sitemapCachedFile) . " is not writable", POPOON_ERROR_FATAL);
  326. }
  327. }
  328. //check if we have domxml/xslt
  329. $xslDom = new DomDocument();
  330. $xslDom->resolveExternals = true;
  331. $xslDom->substituteEntities = true;
  332. $xslDom->load($this->sm2php_xsl_dir . "/" . $this->sm2php_xsl);
  333. if (! class_exists("XsltProcessor")) {
  334. return popoon::raiseError("Popoon doesn't run without XSLT support in PHP.", POPOON_ERROR_FATAL, __FILE__, __LINE__);
  335. }
  336. $xsl = new XsltProcessor();
  337. $xsl->importStylesheet($xslDom);
  338. $xsl->registerPhpFunctions();
  339. $xslincludesDom = new DomDocument();
  340. $xslincludesDom->load($this->sm2php_xsl_dir . "/" . $this->sm2phpincludes_xsl);
  341. $xslincludes = new XsltProcessor();
  342. $xslincludes->importStylesheet($xslincludesDom);
  343. $sm = new DomDocument();
  344. $sm->resolveExternals = true;
  345. $sm->substituteEntities = true;
  346. if (! $sm->load($sitemapRealPath)) {
  347. if (! file_exists($sitemapRealPath)) {
  348. throw new PopoonFileNotFoundException($sitemapRealPath);
  349. } else {
  350. throw new PopoonXMLParseErrorException("Could not load $sitemapRealPath");
  351. }
  352. }
  353. $xsl->setParameter("", "popoonDir", dirname(__FILE__));
  354. $result = $xslincludes->transformToDoc($sm);
  355. $result = $xsl->transformToUri($result, $sitemapCachedFile);
  356. return True;
  357. }
  358. function convertXML($object, &$xml) {
  359. if ($object->XmlFormat == "DomDocument") {
  360. self::var2XMLObject($xml);
  361. } elseif ($object->XmlFormat == "XmlString") {
  362. self::var2XMLString($xml);
  363. }
  364. return True;
  365. }
  366. /**
  367. * Converts it's parameter into a DomDocument object
  368. *
  369. * @param mixed xmldoc can either be a XML String or a DomDocument object
  370. * @return bool
  371. * @access private
  372. */
  373. static public function var2XMLObject(&$xmldoc) {
  374. if (is_string($xmldoc)) {
  375. $xmldom = new DomDocument();
  376. $xmldom->loadXML($xmldoc);
  377. $xmldoc = $xmldom;
  378. }
  379. if (strtolower(get_class($xmldoc)) != "domdocument") {
  380. return popoon::raiseError('First parameter to var2XMLObject() is neither a XML String nor a XML DomDocument object. It is: ' . var_export($xmldoc, true), POPOON_ERROR_FATAL);
  381. }
  382. return True;
  383. }
  384. function redirectTo($uri) {
  385. header("Location: " . popoon_sitemap::translateScheme($uri));
  386. exit();
  387. }
  388. /**
  389. * Converts it's parameter into a XML String
  390. *
  391. * @param mixed xmldoc can either be a XML String or a DomDocument object
  392. * @return bool
  393. * @access private
  394. */
  395. static public function var2XMLString(&$xmldoc) {
  396. if (strtolower(get_class($xmldoc)) == "domdocument") {
  397. $xmldoc = $xmldoc->saveXML();
  398. }
  399. if (! is_string($xmldoc)) {
  400. return popoon::raiseError('First parameter to var2XMLString() is neither a XML String nor a XML DomDocument object. It is: ' . var_export($xmldoc, true), POPOON_ERROR_FATAL);
  401. } else {
  402. return True;
  403. }
  404. }
  405. /* in the array doNotTranslate we can give some values, which should not be translated, as for example http...*/
  406. function translateScheme($value, $doNotTranslate = array(), $onSitemapGeneration = false) {
  407. // don't do anything, if we don't have any scheme stuff in the $value;
  408. // strpos should be rather fast, i assume.
  409. if (is_object($value) || is_array($value) || strpos($value, ":/") === false && strpos($value, "{") === false) {
  410. return $value;
  411. }
  412. $scheme = popoon_sitemap::getSchemeParts($value);
  413. //checks if value ends with } and starts with { with no { after the first position
  414. // then we don't need this fairly complicated preg from below and can substitute also arrays and alike
  415. if (! $onSitemapGeneration && substr($scheme["value"], - 1, 1) == "}" && strrpos($scheme["value"], "{") === 0) {
  416. $scheme["value"] = substr($scheme["value"], 1, - 1);
  417. $scheme["value"] = @$this->maps[substr_count($scheme["value"], '../')][str_replace("../", "", $scheme["value"])];
  418. } else
  419. if ($onSitemapGeneration) {
  420. $scheme["value"] = preg_replace("#\{([\./]*([^}]+))\}#e", "popoon_sitemap::translateSchemeSubPartsOnSitemapGeneration('$1','$2')", $scheme["value"]);
  421. } else {
  422. $scheme["value"] = preg_replace("#\{([\./]*([^}]+))\}#e", "\$this->translateSchemeSubParts('$1','$2')", $scheme["value"]);
  423. }
  424. if (in_array($scheme["scheme"], $doNotTranslate)) {
  425. return $value;
  426. } else
  427. if ($scheme["scheme"] != "default") {
  428. if (! @include_once ("popoon/components/schemes/" . $scheme["scheme"] . ".php")) {
  429. return $value;
  430. }
  431. if ($onSitemapGeneration) {
  432. if (function_exists("scheme_" . $scheme["scheme"] . "_onSitemapGeneration")) {
  433. return call_user_func("scheme_" . $scheme["scheme"] . "_onSitemapGeneration", $scheme["value"]);
  434. } else {
  435. return $value;
  436. }
  437. } else {
  438. return call_user_func("scheme_" . $scheme["scheme"], $scheme["value"], $this);
  439. }
  440. } else {
  441. return $scheme["value"];
  442. }
  443. }
  444. function translateSchemeSubParts($value, $value2) {
  445. if (strpos($value, ":/") === false) {
  446. return $this->maps[substr_count($value, '../')][$value2];
  447. } else {
  448. return popoon_sitemap::translateScheme($value);
  449. }
  450. }
  451. static function translateSchemeSubPartsOnSitemapGeneration($value, $value2) {
  452. if (strpos($value, ":/") === false) {
  453. return '{' . $value . '}';
  454. } else {
  455. $newVal = popoon_sitemap::translateScheme($value, array(), true);
  456. if ($newVal == $value) {
  457. return '{' . $value . '}';
  458. } else {
  459. return $newVal;
  460. }
  461. }
  462. }
  463. static function getSchemeParts($value) {
  464. $scheme = array();
  465. if (preg_match("#^'(.*)'$#", $value, $match)) {
  466. $scheme["scheme"] = "default";
  467. $scheme["value"] = $match[1];
  468. } elseif (preg_match("#^([_a-zA-Z0-9]+)://(.*)#", $value, $match)) {
  469. $scheme["scheme"] = $match[1];
  470. $scheme["value"] = $match[2];
  471. } else {
  472. $scheme["scheme"] = "default";
  473. $scheme["value"] = $value;
  474. }
  475. return $scheme;
  476. }
  477. function addMap($map) {
  478. array_unshift($this->maps, $map);
  479. }
  480. function removeMap() {
  481. array_shift($this->maps);
  482. }
  483. function setGlobalOptions($name, $data) {
  484. $GLOBALS["_POPOON_globalContainer"]->options[$name] = $data;
  485. }
  486. function setGlobalOptionsAll($data) {
  487. $GLOBALS["_POPOON_globalContainer"]->options = $data;
  488. }
  489. function getGlobalOptions($name) {
  490. return $GLOBALS["_POPOON_globalContainer"]->options[$name];
  491. }
  492. function getGlobalOptionsAll() {
  493. if (isset($GLOBALS["_POPOON_globalContainer"]->options)) {
  494. return $GLOBALS["_POPOON_globalContainer"]->options;
  495. } else {
  496. return null;
  497. }
  498. }
  499. /**
  500. * Mounts a second sitemap
  501. *
  502. * Not a very elegant solution, should be rewritten some day
  503. *
  504. */
  505. private function _mount($attribs) {
  506. $file = popoon_sitemap::translateScheme($attribs["src"]);
  507. $old_uri = $this->uri;
  508. if (isset($attribs["uri-prefix"])) {
  509. $prefix = popoon_sitemap::translateScheme($attribs["uri-prefix"]);
  510. if ($prefix) {
  511. $this->uri = preg_replace("#^/*$prefix/*#", "", $this->uri);
  512. }
  513. }
  514. // I hope, this doesn't have too many sideeffects
  515. $pipelineHit = $this->__construct($file, $this->uri, $this->options, $this->maps);
  516. $this->uri = $old_uri;
  517. return $pipelineHit;
  518. }
  519. private function _scheme($attribs) {
  520. if (! isset($GLOBALS["_POPOON_globalContainer"])) {
  521. $GLOBALS["_POPOON_globalContainer"] = new stdClass();
  522. }
  523. if (! isset($GLOBALS["_POPOON_globalContainer"]->schemes)) {
  524. $GLOBALS["_POPOON_globalContainer"]->schemes = array();
  525. }
  526. $GLOBALS["_POPOON_globalContainer"]->schemes[$attribs["name"]] = array();
  527. if (isset($attribs["subname"])) {
  528. $GLOBALS["_POPOON_globalContainer"]->schemes[$attribs["name"]][$attribs["subname"]] = array();
  529. foreach ($attribs as $value => $key) {
  530. $GLOBALS["_POPOON_globalContainer"]->schemes[$attribs["name"]][$attribs["subname"]][$value] = popoon_sitemap::translateScheme($key);
  531. }
  532. } else {
  533. foreach ($attribs as $value => $key) {
  534. $GLOBALS["_POPOON_globalContainer"]->schemes[$attribs["name"]][$value] = popoon_sitemap::translateScheme($key);
  535. }
  536. }
  537. }
  538. /**
  539. * Starts component caching
  540. *
  541. * @param array Attributes of current pipeline
  542. * @see disableCaching(), $componentCache
  543. */
  544. function enableCaching($pipelineAttribs) {
  545. include ('popoon/components/cache.php');
  546. $this->componentCache = new ComponentCache($pipelineAttribs, $this);
  547. }
  548. /**
  549. * Disables component caching
  550. *
  551. * Reset $this->componentCache
  552. *
  553. * @see enableCaching(), $componentCache
  554. */
  555. function disableCaching() {
  556. $this->componentCache = false;
  557. }
  558. function disableOutputCaching() {
  559. $this->options->disableOutputCaching();
  560. }
  561. }
  562. function sitemap_formatValues($value) {
  563. $value = str_replace("'", "\'", $value);
  564. //replace constant() with content
  565. preg_match_all("#constant\(([^\)]+)\)#", $value, $matches);
  566. $c = count($matches[0]);
  567. if ($c > 0) {
  568. for ($i = 0; $i < $c; $i ++) {
  569. if (defined($matches[1][$i])) {
  570. $value = str_replace($matches[0][$i], "'." . $matches[1][$i] . ".'", $value);
  571. }
  572. }
  573. }
  574. //check if there are any schemes... else we can return here
  575. if (strpos($value, ":/") === false && strpos($value, "{") === false) {
  576. return sitemap_fixValue($value);
  577. }
  578. // translate translatabe scheme
  579. $value = popoon_sitemap::translateScheme($value, array(), true);
  580. return sitemap_fixValue($value);
  581. }
  582. function sitemap_fixValue($value) {
  583. $value = "'" . $value . "'";
  584. return preg_replace(array("#\.''$#", "#^''\.#", "#\.''\.#"), "", $value);
  585. }