PageRenderTime 54ms CodeModel.GetById 26ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/Lampcms/SiteMap.php

https://github.com/kanitw/LampCMS
PHP | 364 lines | 131 code | 60 blank | 173 comment | 13 complexity | 0e084cd132c7f36549590294c692538f MD5 | raw file
  1. <?php
  2. /**
  3. *
  4. * License, TERMS and CONDITIONS
  5. *
  6. * This software is lisensed under the GNU LESSER GENERAL PUBLIC LICENSE (LGPL) version 3
  7. * Please read the license here : http://www.gnu.org/licenses/lgpl-3.0.txt
  8. *
  9. * Redistribution and use in source and binary forms, with or without
  10. * modification, are permitted provided that the following conditions are met:
  11. * 1. Redistributions of source code must retain the above copyright
  12. * notice, this list of conditions and the following disclaimer.
  13. * 2. Redistributions in binary form must reproduce the above copyright
  14. * notice, this list of conditions and the following disclaimer in the
  15. * documentation and/or other materials provided with the distribution.
  16. * 3. The name of the author may not be used to endorse or promote products
  17. * derived from this software without specific prior written permission.
  18. *
  19. * ATTRIBUTION REQUIRED
  20. * 4. All web pages generated by the use of this software, or at least
  21. * the page that lists the recent questions (usually home page) must include
  22. * a link to the http://www.lampcms.com and text of the link must indicate that
  23. * the website's Questions/Answers functionality is powered by lampcms.com
  24. * An example of acceptable link would be "Powered by <a href="http://www.lampcms.com">LampCMS</a>"
  25. * The location of the link is not important, it can be in the footer of the page
  26. * but it must not be hidden by style attibutes
  27. *
  28. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR IMPLIED
  29. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  30. * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  31. * IN NO EVENT SHALL THE FREEBSD PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY
  32. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  33. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  34. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  35. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  36. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  37. * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  38. *
  39. * This product includes GeoLite data created by MaxMind,
  40. * available from http://www.maxmind.com/
  41. *
  42. *
  43. * @author Dmitri Snytkine <cms@lampcms.com>
  44. * @copyright 2005-2011 (or current year) ExamNotes.net inc.
  45. * @license http://www.gnu.org/licenses/lgpl-3.0.txt GNU LESSER GENERAL PUBLIC LICENSE (LGPL) version 3
  46. * @link http://www.lampcms.com Lampcms.com project
  47. * @version Release: @package_version@
  48. *
  49. *
  50. */
  51. namespace Lampcms;
  52. /**
  53. * Generate sitemap about every 4-6 hours
  54. * Once it's generated, create a dir if necessary based on date
  55. * /sitemap/2010/08/02/
  56. *
  57. * Update SITEMAPS table to update latest item ID
  58. *
  59. * save it there as .tgz file,
  60. * update the master sitemap file, resave it
  61. * ping Google and other services like Bing, Yahoo
  62. * using that NEW file (new file only, not master file)
  63. *
  64. * Always use urlencode() for location part of XML
  65. * Always make sure it's in UTF-8, which is basically automatically done
  66. * in our case since our url_text is always utf-8
  67. *
  68. *
  69. * @author Dmitri Snytkine
  70. *
  71. */
  72. class SiteMap extends LampcmsObject
  73. {
  74. /**
  75. * Object of type MongoDoc
  76. * that holds array of latest IDs
  77. *
  78. * @var object MongoDoc
  79. */
  80. protected $oLatest;
  81. /**
  82. * This is used to create a new sitemap xml file
  83. *
  84. * @var string
  85. */
  86. const XML_START = '<?xml version="1.0" encoding="UTF-8"?>
  87. <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"/>';
  88. /**
  89. * This is used to create new sitemap index file
  90. * with collection of sitemaps
  91. *
  92. * @var string
  93. */
  94. const INDEX_START = '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"/>';
  95. /**
  96. * Path to writable global site map file
  97. * that holds list of daily sitemaps
  98. * New maps will be appended to this file as needed!
  99. *
  100. * it should already exist and be in xml format
  101. * this is relative to the writable directory 'w'
  102. * whis is this constant: LAMPCMS_DATA_DIR
  103. *
  104. * @var string
  105. */
  106. protected $rootMapFilePath = 'sitemap/index.xml';
  107. /**
  108. * SimpleXMLElement class
  109. *
  110. * @var object of type SimpleXMLElement
  111. */
  112. protected $oSXESitemap;
  113. /**
  114. * Name of sitemap of this file
  115. * It will be eithre supplied to the run() method
  116. * or generated based on today date like sitemap_20100808.xml
  117. *
  118. * @var string
  119. */
  120. protected $siteMapName;
  121. /**
  122. *
  123. * @var unknown_type
  124. */
  125. protected $siteMapGz;
  126. /**
  127. * SimpleXML representing urlindex sitemap file
  128. * this is object created from the rootMapFilePath file
  129. * it holds collection of xml.tgz files
  130. * we append newly created sitemap to it
  131. *
  132. * @var object of type SimpleXMLElement
  133. */
  134. protected $oSXEIndexMap;
  135. protected $aLatestIds = array();
  136. protected $aPingUrls = array('bing' => 'http://www.bing.com/webmaster/ping.aspx?siteMap=%s',
  137. 'google' => 'http://www.google.com/webmasters/sitemaps/ping?sitemap=%s',
  138. 'yahoo' => 'http://search.yahooapis.com/SiteExplorerService/V1/updateNotification?appid=YahooDemo&url=%s');
  139. public function run($fileName = null){
  140. $this->siteMapName = $fileName;
  141. $this->getLatestIds()
  142. ->makeSXEObject()
  143. ->addNewQuestions()
  144. ->saveNewMapFile()
  145. ->updateIndexFile()
  146. ->pingSearchSites();
  147. }
  148. protected function getLatestIds()
  149. {
  150. $oMongo = $this->oRegistry->Mongo;
  151. $aLatest = $oMongo->getCollection('SITEMAP_LATEST')->findOne();
  152. $aLatest = (!$aLatest) ? array() : $aLatest;
  153. $this->oLatest = new MongoDoc($this->oRegistry, 'SITEMAP_LATEST', $aLatest);
  154. return $this;
  155. }
  156. /**
  157. * Create root object for the new
  158. * sitemap
  159. *
  160. * @return object $this
  161. */
  162. protected function makeSXEObject()
  163. {
  164. if(false === $this->oSXESitemap = simplexml_load_string(self::XML_START)){
  165. throw new DevException('Unable to load xml file '.self::XML_START);
  166. }
  167. return $this;
  168. }
  169. public function addNewQuestions()
  170. {
  171. $id = (int)$this->oLatest['i_qid'];
  172. d('latest QID: '.$id);
  173. $urlPrefix = $this->oRegistry->Ini->SITE_URL.'/';
  174. $oMongo = $this->oRegistry->Mongo;
  175. $coll = $oMongo->getCollection('QUESTIONS');
  176. $cursor = $coll->find(array('_id' => array( '$gt' => $id)), array('_id', 'url', 'i_ts'))->limit(12000);
  177. d('cursor: '.get_class($cursor));
  178. if($cursor && ($cursor instanceof \MongoCursor) && ($cursor->count() > 0)){
  179. d('cursor count: '.$cursor->count());
  180. foreach($cursor as $aMessage){
  181. if(!empty($aMessage)){
  182. $loc = $urlPrefix.'q'.$aMessage['_id'].'/'. $aMessage['url'];
  183. $lastmod = date('Y-m-d', $aMessage['i_ts']);
  184. $this->addUrl($loc, $lastmod, 'yearly');
  185. $this->oLatest['i_qid'] = $aMessage['_id'];
  186. }
  187. }
  188. }
  189. d('latest qid: '.$this->oLatest['i_qid']);
  190. return $this;
  191. }
  192. /**
  193. * Save newly generated sitemap file
  194. *
  195. * @return object $this
  196. */
  197. protected function saveNewMapFile()
  198. {
  199. $this->siteMapName = (null !== $this->siteMapName) ? $this->siteMapName : 'sitemap_'.date('Ymd').'.xml';
  200. $xmlFile = LAMPCMS_DATA_DIR.'sitemap/'.$this->siteMapName;
  201. /**
  202. * If this sitemap file already exists then we
  203. * can either reuse it and append new urls to IT
  204. * OR create a new file with timestamp prefixed to it.
  205. * Google recommends NOT to keep creating many new files
  206. * but instead modify existing ones, but
  207. * it's not mandatory and if we worry about files being
  208. * close to site/file size limit then we should create new
  209. * files
  210. */
  211. if(file_exists($xmlFile)){
  212. $this->siteMapName = time().'_'.$this->siteMapName;
  213. $xmlFile = LAMPCMS_DATA_DIR.'sitemap/'.$this->siteMapName;
  214. }
  215. d('xmlFile: '.$xmlFile);
  216. if(false === $this->oSXESitemap->asXML($xmlFile)){
  217. $err = 'Unable to save new sitemap file: '.$xmlFile;
  218. d($err);
  219. throw new DevException($err);
  220. }
  221. return $this;
  222. }
  223. /**
  224. * Append the main sitemap index file
  225. * and add the latest just created sitemap to it
  226. * and then resave it
  227. *
  228. * @return object $this
  229. */
  230. protected function updateIndexFile()
  231. {
  232. $file = LAMPCMS_DATA_DIR.$this->rootMapFilePath;
  233. if(file_exists($file)){
  234. if(!is_writable($file)){
  235. throw new DevException('file: '.$file.' is not writable');
  236. }
  237. if(false === $this->oSXEIndexMap = simplexml_load_file($file)){
  238. throw new DevException('Unable to load xml file: '.$file);
  239. }
  240. } else {
  241. if(false === $this->oSXEIndexMap = simplexml_load_string(self::INDEX_START)){
  242. throw new DevException('Unable to load xml string: '.self::INDEX_START);
  243. }
  244. }
  245. $oMap = $this->oSXEIndexMap->addChild('sitemap');
  246. $oMap->addChild('loc', $this->oRegistry->Ini->SITE_URL.'/w/sitemap/'.$this->siteMapName);
  247. $oMap->addChild('lastmod', date('c'));
  248. if(false === $this->oSXEIndexMap->asXml($file)){
  249. throw new DevException('Unable to save sitemap index file: '.$file);
  250. }
  251. return $this;
  252. }
  253. /**
  254. * Accepts array with keys like
  255. * url, lastmod, howOften
  256. * and appends them as DOM Elements to root
  257. * @param string $url
  258. * @param string $time must be in W3C Datetime format!
  259. * @param string $changefreq change frequency: one of these values:
  260. *
  261. * always
  262. * hourly
  263. * daily
  264. * weekly
  265. * monthly
  266. * yearly
  267. * never
  268. *
  269. * @return object $this
  270. */
  271. protected function addUrl($url, $time, $changefreq = 'yearly')
  272. {
  273. $oSXUrl = $this->oSXESitemap->addChild('url');
  274. $oSXUrl->addChild('loc', $url);
  275. $oSXUrl->addChild('lastmod', $time);
  276. $oSXUrl->addChild('changefreq', $changefreq);
  277. return $this;
  278. }
  279. /**
  280. * Ping a bunch of search engines to tell
  281. * them about our new sitemap file
  282. *
  283. * @return object $this
  284. */
  285. protected function pingSearchSites()
  286. {
  287. $oHttp = new Curl();
  288. $url = $this->oRegistry->Ini->SITE_URL.'/w/sitemap/'.$this->siteMapName;
  289. foreach($this->aPingUrls as $key => $val){
  290. try{
  291. $pingUrl = sprintf($val, $url);
  292. d('going to ping '.$key.' url: '.$pingUrl);
  293. $oHttp->getDocument($url);
  294. $code = $oHttp->getHttpResponseCode()->checkResponse();
  295. d('pinged '.$key.' response code: '.$code);
  296. } catch (\Exception $e){
  297. $err = 'Unable to ping '.$key.' got error: '.$e->getMessage();
  298. e('Error: '.$err);
  299. }
  300. }
  301. return $this;
  302. }
  303. }
  304. ?>