PageRenderTime 54ms CodeModel.GetById 25ms RepoModel.GetById 1ms app.codeStats 0ms

/bitrix/modules/seo/lib/sitemapfile.php

https://bitbucket.org/bohdan1217/norka
PHP | 442 lines | 254 code | 56 blank | 132 comment | 26 complexity | c00068bef0e8f481b3ad605c9b61e926 MD5 | raw file
  1. <?php
  2. /**
  3. * Bitrix Framework
  4. * @package bitrix
  5. * @subpackage seo
  6. * @copyright 2001-2013 Bitrix
  7. */
  8. namespace Bitrix\Seo;
  9. use Bitrix\Main\IO\Path;
  10. use Bitrix\Main\IO\File;
  11. use Bitrix\Main\SiteTable;
  12. use Bitrix\Main\Text\Converter;
  13. /**
  14. * Base class for sitemapfile
  15. * Class SitemapFile
  16. * @package Bitrix\Seo
  17. */
  18. class SitemapFile
  19. extends File
  20. {
  21. const XML_HEADER = '<?xml version="1.0" encoding="UTF-8"?>';
  22. const FILE_HEADER = '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">';
  23. const FILE_FOOTER = '</urlset>';
  24. const ENTRY_TPL = '<url><loc>%s</loc><lastmod>%s</lastmod></url>';
  25. const ENTRY_TPL_SEARCH = '<url><loc>%s</loc>';
  26. const XPATH_URL = '/urlset/url';
  27. const MAX_SIZE = 5000000;
  28. const FILE_EXT = '.xml';
  29. const FILE_PART_SUFFIX = '.part';
  30. protected $settings = array();
  31. protected $parser = false;
  32. protected $partFile = '';
  33. protected $partList = array();
  34. protected $part = 0;
  35. protected $partChanged = false;
  36. protected $urlToSearch = '';
  37. protected $urlFound = false;
  38. public function __construct($fileName, $settings)
  39. {
  40. $this->settings = array(
  41. 'SITE_ID' => $settings['SITE_ID'],
  42. 'PROTOCOL' => $settings['PROTOCOL'] == 'https' ? 'https' : 'http',
  43. 'DOMAIN' => $settings['DOMAIN'],
  44. );
  45. $site = SiteTable::getRow(array("filter" => array("LID" => $this->settings['SITE_ID'])));
  46. $this->siteRoot = Path::combine(
  47. SiteTable::getDocumentRoot($this->settings['SITE_ID']),
  48. $site['DIR']
  49. );
  50. if(substr($fileName, -strlen(self::FILE_EXT)) != self::FILE_EXT)
  51. {
  52. $fileName .= self::FILE_EXT;
  53. }
  54. if($this->partFile == '')
  55. {
  56. $this->partFile = $fileName;
  57. }
  58. $this->pathPhysical = null; // hack for object reconstuct during file splitting
  59. parent::__construct($this->siteRoot.'/'.$fileName, $this->settings['SITE_ID']);
  60. $this->partChanged = $this->isExists() && !$this->isSplitNeeded();
  61. }
  62. /**
  63. * Reinitializes current object with new file name.
  64. *
  65. * @param string $fileName New file name.
  66. */
  67. protected function reInit($fileName)
  68. {
  69. $this->__construct($fileName, $this->settings);
  70. }
  71. /**
  72. * Adds header to the current sitemap file.
  73. *
  74. * @return void
  75. */
  76. public function addHeader()
  77. {
  78. $this->partChanged = true;
  79. $this->putContents(self::XML_HEADER.self::FILE_HEADER);
  80. }
  81. /**
  82. * Checks is it needed to create new part of sitemap file
  83. *
  84. * @return bool
  85. * @throws \Bitrix\Main\IO\FileNotFoundException
  86. */
  87. protected function isSplitNeeded()
  88. {
  89. return $this->isExists() && $this->getSize() >= self::MAX_SIZE;
  90. }
  91. /**
  92. * Adds new entry to the current sitemap file
  93. *
  94. * Entry array keys
  95. * XML_LOC - loc field value
  96. * XML_LASTMOD - lastmod field value
  97. *
  98. * @param array $entry Entry array.
  99. *
  100. * @return void
  101. */
  102. public function addEntry($entry)
  103. {
  104. if($this->isSplitNeeded())
  105. {
  106. $this->split();
  107. $this->addEntry($entry);
  108. }
  109. else
  110. {
  111. if(!$this->partChanged)
  112. {
  113. $this->addHeader();
  114. }
  115. $this->putContents(
  116. sprintf(
  117. self::ENTRY_TPL,
  118. Converter::getXmlConverter()->encode($entry['XML_LOC']),
  119. Converter::getXmlConverter()->encode($entry['XML_LASTMOD'])
  120. ), self::APPEND
  121. );
  122. }
  123. }
  124. /**
  125. * Creates next sitemap file part. Returns new part file name.
  126. *
  127. * @return string
  128. */
  129. public function split()
  130. {
  131. if($this->partChanged)
  132. {
  133. $this->addFooter();
  134. }
  135. $this->partList[] = $this->getName();
  136. $this->part++;
  137. $fileName = $this->partFile;
  138. $fileName = substr($fileName, 0, -strlen(self::FILE_EXT)).self::FILE_PART_SUFFIX.$this->part.substr($fileName, -strlen(self::FILE_EXT));
  139. $this->reInit($fileName);
  140. $this->partChanged = $this->isExists() && !$this->isSplitNeeded();
  141. return $fileName;
  142. }
  143. /**
  144. * Returns list of file parts.
  145. *
  146. * @return array
  147. */
  148. public function getNameList()
  149. {
  150. return $this->isCurrentPartNotEmpty() ? array_merge($this->partList, array($this->getName())) : $this->partList;
  151. }
  152. /**
  153. * Returns if the whole sitemap is empty (not only current part).
  154. *
  155. * @return bool
  156. */
  157. public function isNotEmpty()
  158. {
  159. return (count($this->partList) > 0) || $this->isCurrentPartNotEmpty();
  160. }
  161. /**
  162. * Returns if current sitemap part contains something besides header.
  163. *
  164. * @return bool
  165. */
  166. public function isCurrentPartNotEmpty()
  167. {
  168. if($this->isExists())
  169. {
  170. $c = $this->getContents();
  171. return strlen($c) > 0 && $c != self::XML_HEADER.self::FILE_HEADER;
  172. }
  173. return false;
  174. }
  175. /**
  176. * Appends new entry to the existing and finished sitemap file
  177. *
  178. * Entry array keys
  179. * XML_LOC - loc field value
  180. * XML_LASTMOD - lastmod field value
  181. *
  182. * @param array $entry Entry array.
  183. *
  184. * @return void
  185. */
  186. public function appendEntry($entry)
  187. {
  188. if($this->isSplitNeeded())
  189. {
  190. $this->split();
  191. $this->appendEntry($entry);
  192. }
  193. else
  194. {
  195. if(!$this->partChanged)
  196. {
  197. $this->addHeader();
  198. $offset = $this->getSize();
  199. }
  200. else
  201. {
  202. $offset = $this->getSize()-strlen(self::FILE_FOOTER);
  203. }
  204. $fd = $this->open('r+');
  205. fseek($fd, $offset);
  206. fwrite($fd, sprintf(
  207. self::ENTRY_TPL,
  208. Converter::getXmlConverter()->encode($entry['XML_LOC']),
  209. Converter::getXmlConverter()->encode($entry['XML_LASTMOD'])
  210. ).self::FILE_FOOTER);
  211. fclose($fd);
  212. }
  213. }
  214. /**
  215. * Searches and removes entry to the existing and finished sitemap file
  216. *
  217. * Entry array keys
  218. * XML_LOC - loc field value
  219. * XML_LASTMOD - lastmod field value
  220. *
  221. * @param string $url Entry URL.
  222. *
  223. * @return void
  224. */
  225. public function removeEntry($url)
  226. {
  227. $url = $this->settings['PROTOCOL'].'://'.\CBXPunycode::toASCII($this->settings['DOMAIN'], $e = null).$url;
  228. $pattern = sprintf(self::ENTRY_TPL_SEARCH, $url);
  229. while($this->isExists())
  230. {
  231. $c = $this->getContents();
  232. $p = strpos($c, $pattern);
  233. unset($c);
  234. if($p !== false)
  235. {
  236. $fd = $this->open('r+');
  237. fseek($fd, intval($p));
  238. fwrite($fd, str_repeat(" ", strlen(sprintf(
  239. self::ENTRY_TPL,
  240. Converter::getXmlConverter()->encode($url),
  241. Converter::getXmlConverter()->encode(date('c'))
  242. ))));
  243. fclose($fd);
  244. break;
  245. }
  246. if(!$this->isSplitNeeded())
  247. {
  248. break;
  249. }
  250. else
  251. {
  252. $this->part++;
  253. $fileName = $this->partFile;
  254. $fileName = substr($fileName, 0, -strlen(self::FILE_EXT)).self::FILE_PART_SUFFIX.$this->part.substr($fileName, -strlen(self::FILE_EXT));
  255. $this->reInit($fileName);
  256. }
  257. }
  258. }
  259. /**
  260. * Adds new file entry to the current sitemap
  261. *
  262. * @param File $f File to add.
  263. *
  264. * @return void
  265. * @throws \Bitrix\Main\IO\FileNotFoundException
  266. */
  267. public function addFileEntry(File $f)
  268. {
  269. if($f->isExists() && !$f->isSystem())
  270. {
  271. $this->addEntry(array(
  272. 'XML_LOC' => $this->settings['PROTOCOL'].'://'.\CBXPunycode::toASCII($this->settings['DOMAIN'], $e = null).$this->getFileUrl($f),
  273. 'XML_LASTMOD' => date('c', $f->getModificationTime()),
  274. ));
  275. }
  276. }
  277. /**
  278. * Adds new IBlock entry to the current sitemap
  279. *
  280. * @param string $url IBlock entry URL.
  281. * @param string $modifiedDate IBlock entry modify timestamp.
  282. *
  283. * @return void
  284. */
  285. public function addIBlockEntry($url, $modifiedDate)
  286. {
  287. $this->addEntry(array(
  288. 'XML_LOC' => $this->settings['PROTOCOL'].'://'.\CBXPunycode::toASCII($this->settings['DOMAIN'], $e = null).$url,
  289. 'XML_LASTMOD' => date('c', $modifiedDate - \CTimeZone::getOffset()),
  290. ));
  291. }
  292. /**
  293. * Appends new IBlock entry to the existing finished sitemap
  294. *
  295. * @param string $url IBlock entry URL.
  296. * @param string $modifiedDate IBlock entry modify timestamp.
  297. *
  298. * @return void
  299. */
  300. public function appendIBlockEntry($url, $modifiedDate)
  301. {
  302. if($this->isExists())
  303. {
  304. $this->appendEntry(array(
  305. 'XML_LOC' => $this->settings['PROTOCOL'].'://'.\CBXPunycode::toASCII($this->settings['DOMAIN'], $e = null).$url,
  306. 'XML_LASTMOD' => date('c', $modifiedDate - \CTimeZone::getOffset()),
  307. ));
  308. }
  309. else
  310. {
  311. $this->addHeader();
  312. $this->addIBlockEntry($url, $modifiedDate);
  313. $this->addFooter();
  314. }
  315. }
  316. /**
  317. * Adds footer to the current sitemap part
  318. *
  319. * @return void
  320. */
  321. public function addFooter()
  322. {
  323. $this->putContents(self::FILE_FOOTER, self::APPEND);
  324. }
  325. /**
  326. * Returns sitemap site root
  327. *
  328. * @return mixed|string
  329. */
  330. public function getSiteRoot()
  331. {
  332. return $this->siteRoot;
  333. }
  334. /**
  335. * Returns sitemap file URL
  336. *
  337. * @return string
  338. */
  339. public function getUrl()
  340. {
  341. return $this->settings['PROTOCOL'].'://'.\CBXPunycode::toASCII($this->settings['DOMAIN'], $e = null).$this->getFileUrl($this);
  342. }
  343. /**
  344. * Parses sitemap file
  345. *
  346. * @return bool|\CDataXML
  347. * @throws \Bitrix\Main\IO\FileNotFoundException
  348. */
  349. public function parse()
  350. {
  351. if(!$this->parser)
  352. {
  353. if($this->isExists())
  354. {
  355. $this->parser = new \CDataXML();
  356. $this->parser->loadString($this->getContents());
  357. }
  358. }
  359. return $this->parser;
  360. }
  361. /**
  362. * Returns file relative path for URL.
  363. *
  364. * @param File $f File object.
  365. *
  366. * @return string
  367. */
  368. protected function getFileUrl(File $f)
  369. {
  370. static $indexNames;
  371. if(!is_array($indexNames))
  372. {
  373. $indexNames = GetDirIndexArray();
  374. }
  375. $path = '/';
  376. if (substr($this->path, 0, strlen($this->documentRoot)) === $this->documentRoot)
  377. {
  378. $path = '/'.substr($f->getPath(), strlen($this->documentRoot));
  379. }
  380. $path = Path::convertLogicalToUri($path);
  381. $path = in_array($f->getName(), $indexNames)
  382. ? str_replace('/'.$f->getName(), '/', $path)
  383. : $path;
  384. return '/'.ltrim($path, '/');
  385. }
  386. }