PageRenderTime 44ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/SimplePie/Locator.php

http://github.com/simplepie/simplepie
PHP | 337 lines | 259 code | 28 blank | 50 comment | 61 complexity | 76eac51f15c45ea159310fcb9928b7c8 MD5 | raw file
Possible License(s): LGPL-2.1
  1. <?php
  2. /**
  3. * SimplePie
  4. *
  5. * A PHP-Based RSS and Atom Feed Framework.
  6. * Takes the hard work out of managing a complete RSS/Atom solution.
  7. *
  8. * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
  9. * All rights reserved.
  10. *
  11. * Redistribution and use in source and binary forms, with or without modification, are
  12. * permitted provided that the following conditions are met:
  13. *
  14. * * Redistributions of source code must retain the above copyright notice, this list of
  15. * conditions and the following disclaimer.
  16. *
  17. * * Redistributions in binary form must reproduce the above copyright notice, this list
  18. * of conditions and the following disclaimer in the documentation and/or other materials
  19. * provided with the distribution.
  20. *
  21. * * Neither the name of the SimplePie Team nor the names of its contributors may be used
  22. * to endorse or promote products derived from this software without specific prior
  23. * written permission.
  24. *
  25. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
  26. * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
  27. * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
  28. * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  29. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  30. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  31. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  32. * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  33. * POSSIBILITY OF SUCH DAMAGE.
  34. *
  35. * @package SimplePie
  36. * @version 1.3-dev
  37. * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
  38. * @author Ryan Parman
  39. * @author Geoffrey Sneddon
  40. * @author Ryan McCue
  41. * @link http://simplepie.org/ SimplePie
  42. * @license http://www.opensource.org/licenses/bsd-license.php BSD License
  43. */
  44. /**
  45. * Used for feed auto-discovery
  46. *
  47. *
  48. * This class can be overloaded with {@see SimplePie::set_locator_class()}
  49. *
  50. * @package SimplePie
  51. */
  52. class SimplePie_Locator
  53. {
  54. var $useragent;
  55. var $timeout;
  56. var $file;
  57. var $local = array();
  58. var $elsewhere = array();
  59. var $cached_entities = array();
  60. var $http_base;
  61. var $base;
  62. var $base_location = 0;
  63. var $checked_feeds = 0;
  64. var $max_checked_feeds = 10;
  65. protected $registry;
  66. public function __construct(&$file, $timeout = 10, $useragent = null, $max_checked_feeds = 10)
  67. {
  68. $this->file =& $file;
  69. $this->useragent = $useragent;
  70. $this->timeout = $timeout;
  71. $this->max_checked_feeds = $max_checked_feeds;
  72. $this->dom = new DOMDocument();
  73. set_error_handler(array('SimplePie_Misc', 'silence_errors'));
  74. $this->dom->loadHTML($this->file->body);
  75. restore_error_handler();
  76. }
  77. public function set_registry(SimplePie_Registry &$registry)
  78. {
  79. $this->registry = &$registry;
  80. }
  81. public function find($type = SIMPLEPIE_LOCATOR_ALL, &$working)
  82. {
  83. if ($this->is_feed($this->file))
  84. {
  85. return $this->file;
  86. }
  87. if ($this->file->method & SIMPLEPIE_FILE_SOURCE_REMOTE)
  88. {
  89. $sniffer = $this->registry->create('Content_Type_Sniffer', array($this->file));
  90. if ($sniffer->get_type() !== 'text/html')
  91. {
  92. return null;
  93. }
  94. }
  95. if ($type & ~SIMPLEPIE_LOCATOR_NONE)
  96. {
  97. $this->get_base();
  98. }
  99. if ($type & SIMPLEPIE_LOCATOR_AUTODISCOVERY && $working = $this->autodiscovery())
  100. {
  101. return $working[0];
  102. }
  103. if ($type & (SIMPLEPIE_LOCATOR_LOCAL_EXTENSION | SIMPLEPIE_LOCATOR_LOCAL_BODY | SIMPLEPIE_LOCATOR_REMOTE_EXTENSION | SIMPLEPIE_LOCATOR_REMOTE_BODY) && $this->get_links())
  104. {
  105. if ($type & SIMPLEPIE_LOCATOR_LOCAL_EXTENSION && $working = $this->extension($this->local))
  106. {
  107. return $working;
  108. }
  109. if ($type & SIMPLEPIE_LOCATOR_LOCAL_BODY && $working = $this->body($this->local))
  110. {
  111. return $working;
  112. }
  113. if ($type & SIMPLEPIE_LOCATOR_REMOTE_EXTENSION && $working = $this->extension($this->elsewhere))
  114. {
  115. return $working;
  116. }
  117. if ($type & SIMPLEPIE_LOCATOR_REMOTE_BODY && $working = $this->body($this->elsewhere))
  118. {
  119. return $working;
  120. }
  121. }
  122. return null;
  123. }
  124. public function is_feed(&$file)
  125. {
  126. if ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE)
  127. {
  128. $sniffer = $this->registry->create('Content_Type_Sniffer', array($file));
  129. $sniffed = $sniffer->get_type();
  130. if (in_array($sniffed, array('application/rss+xml', 'application/rdf+xml', 'text/rdf', 'application/atom+xml', 'text/xml', 'application/xml')))
  131. {
  132. return true;
  133. }
  134. else
  135. {
  136. return false;
  137. }
  138. }
  139. elseif ($file->method & SIMPLEPIE_FILE_SOURCE_LOCAL)
  140. {
  141. return true;
  142. }
  143. else
  144. {
  145. return false;
  146. }
  147. }
  148. public function get_base()
  149. {
  150. $this->http_base = $this->file->url;
  151. $this->base = $this->http_base;
  152. $elements = $this->dom->getElementsByTagName('base');
  153. foreach ($elements as $element)
  154. {
  155. if ($element->hasAttribute('href'))
  156. {
  157. $this->base = $this->registry->call('Misc', 'absolutize_url', array(trim($element->getAttribute('href')), $this->http_base));
  158. $this->base_location = $element->getLineNo();
  159. break;
  160. }
  161. }
  162. }
  163. public function autodiscovery()
  164. {
  165. $done = array();
  166. $feeds = array();
  167. $feeds = array_merge($feeds, $this->search_elements_by_tag('link', $done, $feeds));
  168. $feeds = array_merge($feeds, $this->search_elements_by_tag('a', $done, $feeds));
  169. $feeds = array_merge($feeds, $this->search_elements_by_tag('area', $done, $feeds));
  170. if (!empty($feeds))
  171. {
  172. return array_values($feeds);
  173. }
  174. else
  175. {
  176. return null;
  177. }
  178. }
  179. protected function search_elements_by_tag($name, &$done, $feeds)
  180. {
  181. $links = $this->dom->getElementsByTagName($name);
  182. foreach ($links as $link)
  183. {
  184. if ($this->checked_feeds === $this->max_checked_feeds)
  185. {
  186. break;
  187. }
  188. if ($link->hasAttribute('href') && $link->hasAttribute('rel'))
  189. {
  190. $rel = array_unique($this->registry->call('Misc', 'space_seperated_tokens', array(strtolower($link->getAttribute('rel')))));
  191. if ($this->base_location < $link->getLineNo())
  192. {
  193. $href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->base));
  194. }
  195. else
  196. {
  197. $href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->http_base));
  198. }
  199. if (!in_array($href, $done) && in_array('feed', $rel) || (in_array('alternate', $rel) && !in_array('stylesheet', $rel) && $link->hasAttribute('type') && in_array(strtolower($this->registry->call('Misc', 'parse_mime', array($link->getAttribute('type')))), array('application/rss+xml', 'application/atom+xml'))) && !isset($feeds[$href]))
  200. {
  201. $this->checked_feeds++;
  202. $headers = array(
  203. 'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
  204. );
  205. $feed = $this->registry->create('File', array($href, $this->timeout, 5, $headers, $this->useragent));
  206. if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed))
  207. {
  208. $feeds[$href] = $feed;
  209. }
  210. }
  211. $done[] = $href;
  212. }
  213. }
  214. return $feeds;
  215. }
  216. public function get_links()
  217. {
  218. $links = $this->dom->getElementsByTagName('a');
  219. foreach ($links as $link)
  220. {
  221. if ($link->hasAttribute('href'))
  222. {
  223. $href = trim($link->getAttribute('href'));
  224. $parsed = $this->registry->call('Misc', 'parse_url', array($href));
  225. if ($parsed['scheme'] === '' || preg_match('/^(http(s)|feed)?$/i', $parsed['scheme']))
  226. {
  227. if ($this->base_location < $link->getLineNo())
  228. {
  229. $href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->base));
  230. }
  231. else
  232. {
  233. $href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->http_base));
  234. }
  235. $current = $this->registry->call('Misc', 'parse_url', array($this->file->url));
  236. if ($parsed['authority'] === '' || $parsed['authority'] === $current['authority'])
  237. {
  238. $this->local[] = $href;
  239. }
  240. else
  241. {
  242. $this->elsewhere[] = $href;
  243. }
  244. }
  245. }
  246. }
  247. $this->local = array_unique($this->local);
  248. $this->elsewhere = array_unique($this->elsewhere);
  249. if (!empty($this->local) || !empty($this->elsewhere))
  250. {
  251. return true;
  252. }
  253. return null;
  254. }
  255. public function extension(&$array)
  256. {
  257. foreach ($array as $key => $value)
  258. {
  259. if ($this->checked_feeds === $this->max_checked_feeds)
  260. {
  261. break;
  262. }
  263. if (in_array(strtolower(strrchr($value, '.')), array('.rss', '.rdf', '.atom', '.xml')))
  264. {
  265. $this->checked_feeds++;
  266. $headers = array(
  267. 'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
  268. );
  269. $feed = $this->registry->create('File', array($value, $this->timeout, 5, $headers, $this->useragent));
  270. if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed))
  271. {
  272. return $feed;
  273. }
  274. else
  275. {
  276. unset($array[$key]);
  277. }
  278. }
  279. }
  280. return null;
  281. }
  282. public function body(&$array)
  283. {
  284. foreach ($array as $key => $value)
  285. {
  286. if ($this->checked_feeds === $this->max_checked_feeds)
  287. {
  288. break;
  289. }
  290. if (preg_match('/(rss|rdf|atom|xml)/i', $value))
  291. {
  292. $this->checked_feeds++;
  293. $headers = array(
  294. 'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
  295. );
  296. $feed = $this->registry->create('File', array($value, $this->timeout, 5, null, $this->useragent));
  297. if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed))
  298. {
  299. return $feed;
  300. }
  301. else
  302. {
  303. unset($array[$key]);
  304. }
  305. }
  306. }
  307. return null;
  308. }
  309. }