PageRenderTime 47ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/library/SimplePie/Locator.php

http://github.com/simplepie/simplepie
PHP | 424 lines | 342 code | 33 blank | 49 comment | 75 complexity | 5c507b28e5e4fffcaef265307e8670d0 MD5 | raw file
Possible License(s): LGPL-2.1
  1. <?php
  2. /**
  3. * SimplePie
  4. *
  5. * A PHP-Based RSS and Atom Feed Framework.
  6. * Takes the hard work out of managing a complete RSS/Atom solution.
  7. *
  8. * Copyright (c) 2004-2016, Ryan Parman, Sam Sneddon, Ryan McCue, and contributors
  9. * All rights reserved.
  10. *
  11. * Redistribution and use in source and binary forms, with or without modification, are
  12. * permitted provided that the following conditions are met:
  13. *
  14. * * Redistributions of source code must retain the above copyright notice, this list of
  15. * conditions and the following disclaimer.
  16. *
  17. * * Redistributions in binary form must reproduce the above copyright notice, this list
  18. * of conditions and the following disclaimer in the documentation and/or other materials
  19. * provided with the distribution.
  20. *
  21. * * Neither the name of the SimplePie Team nor the names of its contributors may be used
  22. * to endorse or promote products derived from this software without specific prior
  23. * written permission.
  24. *
  25. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
  26. * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
  27. * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
  28. * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  29. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  30. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  31. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  32. * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  33. * POSSIBILITY OF SUCH DAMAGE.
  34. *
  35. * @package SimplePie
  36. * @copyright 2004-2016 Ryan Parman, Sam Sneddon, Ryan McCue
  37. * @author Ryan Parman
  38. * @author Sam Sneddon
  39. * @author Ryan McCue
  40. * @link http://simplepie.org/ SimplePie
  41. * @license http://www.opensource.org/licenses/bsd-license.php BSD License
  42. */
  43. /**
  44. * Used for feed auto-discovery
  45. *
  46. *
  47. * This class can be overloaded with {@see SimplePie::set_locator_class()}
  48. *
  49. * @package SimplePie
  50. */
  51. class SimplePie_Locator
  52. {
  53. var $useragent;
  54. var $timeout;
  55. var $file;
  56. var $local = array();
  57. var $elsewhere = array();
  58. var $cached_entities = array();
  59. var $http_base;
  60. var $base;
  61. var $base_location = 0;
  62. var $checked_feeds = 0;
  63. var $max_checked_feeds = 10;
  64. var $force_fsockopen = false;
  65. var $curl_options = array();
  66. protected $registry;
  67. public function __construct(SimplePie_File $file, $timeout = 10, $useragent = null, $max_checked_feeds = 10, $force_fsockopen = false, $curl_options = array())
  68. {
  69. $this->file = $file;
  70. $this->useragent = $useragent;
  71. $this->timeout = $timeout;
  72. $this->max_checked_feeds = $max_checked_feeds;
  73. $this->force_fsockopen = $force_fsockopen;
  74. $this->curl_options = $curl_options;
  75. if (class_exists('DOMDocument'))
  76. {
  77. $this->dom = new DOMDocument();
  78. set_error_handler(array('SimplePie_Misc', 'silence_errors'));
  79. $this->dom->loadHTML($this->file->body);
  80. restore_error_handler();
  81. }
  82. else
  83. {
  84. $this->dom = null;
  85. }
  86. }
  87. public function set_registry(SimplePie_Registry $registry)
  88. {
  89. $this->registry = $registry;
  90. }
  91. public function find($type = SIMPLEPIE_LOCATOR_ALL, &$working)
  92. {
  93. if ($this->is_feed($this->file))
  94. {
  95. return $this->file;
  96. }
  97. if ($this->file->method & SIMPLEPIE_FILE_SOURCE_REMOTE)
  98. {
  99. $sniffer = $this->registry->create('Content_Type_Sniffer', array($this->file));
  100. if ($sniffer->get_type() !== 'text/html')
  101. {
  102. return null;
  103. }
  104. }
  105. if ($type & ~SIMPLEPIE_LOCATOR_NONE)
  106. {
  107. $this->get_base();
  108. }
  109. if ($type & SIMPLEPIE_LOCATOR_AUTODISCOVERY && $working = $this->autodiscovery())
  110. {
  111. return $working[0];
  112. }
  113. if ($type & (SIMPLEPIE_LOCATOR_LOCAL_EXTENSION | SIMPLEPIE_LOCATOR_LOCAL_BODY | SIMPLEPIE_LOCATOR_REMOTE_EXTENSION | SIMPLEPIE_LOCATOR_REMOTE_BODY) && $this->get_links())
  114. {
  115. if ($type & SIMPLEPIE_LOCATOR_LOCAL_EXTENSION && $working = $this->extension($this->local))
  116. {
  117. return $working[0];
  118. }
  119. if ($type & SIMPLEPIE_LOCATOR_LOCAL_BODY && $working = $this->body($this->local))
  120. {
  121. return $working[0];
  122. }
  123. if ($type & SIMPLEPIE_LOCATOR_REMOTE_EXTENSION && $working = $this->extension($this->elsewhere))
  124. {
  125. return $working[0];
  126. }
  127. if ($type & SIMPLEPIE_LOCATOR_REMOTE_BODY && $working = $this->body($this->elsewhere))
  128. {
  129. return $working[0];
  130. }
  131. }
  132. return null;
  133. }
  134. public function is_feed($file, $check_html = false)
  135. {
  136. if ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE)
  137. {
  138. $sniffer = $this->registry->create('Content_Type_Sniffer', array($file));
  139. $sniffed = $sniffer->get_type();
  140. $mime_types = array('application/rss+xml', 'application/rdf+xml',
  141. 'text/rdf', 'application/atom+xml', 'text/xml',
  142. 'application/xml', 'application/x-rss+xml');
  143. if ($check_html)
  144. {
  145. $mime_types[] = 'text/html';
  146. }
  147. return in_array($sniffed, $mime_types);
  148. }
  149. elseif ($file->method & SIMPLEPIE_FILE_SOURCE_LOCAL)
  150. {
  151. return true;
  152. }
  153. else
  154. {
  155. return false;
  156. }
  157. }
  158. public function get_base()
  159. {
  160. if ($this->dom === null)
  161. {
  162. throw new SimplePie_Exception('DOMDocument not found, unable to use locator');
  163. }
  164. $this->http_base = $this->file->url;
  165. $this->base = $this->http_base;
  166. $elements = $this->dom->getElementsByTagName('base');
  167. foreach ($elements as $element)
  168. {
  169. if ($element->hasAttribute('href'))
  170. {
  171. $base = $this->registry->call('Misc', 'absolutize_url', array(trim($element->getAttribute('href')), $this->http_base));
  172. if ($base === false)
  173. {
  174. continue;
  175. }
  176. $this->base = $base;
  177. $this->base_location = method_exists($element, 'getLineNo') ? $element->getLineNo() : 0;
  178. break;
  179. }
  180. }
  181. }
  182. public function autodiscovery()
  183. {
  184. $done = array();
  185. $feeds = array();
  186. $feeds = array_merge($feeds, $this->search_elements_by_tag('link', $done, $feeds));
  187. $feeds = array_merge($feeds, $this->search_elements_by_tag('a', $done, $feeds));
  188. $feeds = array_merge($feeds, $this->search_elements_by_tag('area', $done, $feeds));
  189. if (!empty($feeds))
  190. {
  191. return array_values($feeds);
  192. }
  193. return null;
  194. }
  195. protected function search_elements_by_tag($name, &$done, $feeds)
  196. {
  197. if ($this->dom === null)
  198. {
  199. throw new SimplePie_Exception('DOMDocument not found, unable to use locator');
  200. }
  201. $links = $this->dom->getElementsByTagName($name);
  202. foreach ($links as $link)
  203. {
  204. if ($this->checked_feeds === $this->max_checked_feeds)
  205. {
  206. break;
  207. }
  208. if ($link->hasAttribute('href') && $link->hasAttribute('rel'))
  209. {
  210. $rel = array_unique($this->registry->call('Misc', 'space_separated_tokens', array(strtolower($link->getAttribute('rel')))));
  211. $line = method_exists($link, 'getLineNo') ? $link->getLineNo() : 1;
  212. if ($this->base_location < $line)
  213. {
  214. $href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->base));
  215. }
  216. else
  217. {
  218. $href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->http_base));
  219. }
  220. if ($href === false)
  221. {
  222. continue;
  223. }
  224. if (!in_array($href, $done) && in_array('feed', $rel) || (in_array('alternate', $rel) && !in_array('stylesheet', $rel) && $link->hasAttribute('type') && in_array(strtolower($this->registry->call('Misc', 'parse_mime', array($link->getAttribute('type')))), array('text/html', 'application/rss+xml', 'application/atom+xml'))) && !isset($feeds[$href]))
  225. {
  226. $this->checked_feeds++;
  227. $headers = array(
  228. 'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
  229. );
  230. $feed = $this->registry->create('File', array($href, $this->timeout, 5, $headers, $this->useragent, $this->force_fsockopen, $this->curl_options));
  231. if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed, true))
  232. {
  233. $feeds[$href] = $feed;
  234. }
  235. }
  236. $done[] = $href;
  237. }
  238. }
  239. return $feeds;
  240. }
  241. public function get_links()
  242. {
  243. if ($this->dom === null)
  244. {
  245. throw new SimplePie_Exception('DOMDocument not found, unable to use locator');
  246. }
  247. $links = $this->dom->getElementsByTagName('a');
  248. foreach ($links as $link)
  249. {
  250. if ($link->hasAttribute('href'))
  251. {
  252. $href = trim($link->getAttribute('href'));
  253. $parsed = $this->registry->call('Misc', 'parse_url', array($href));
  254. if ($parsed['scheme'] === '' || preg_match('/^(https?|feed)?$/i', $parsed['scheme']))
  255. {
  256. if (method_exists($link, 'getLineNo') && $this->base_location < $link->getLineNo())
  257. {
  258. $href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->base));
  259. }
  260. else
  261. {
  262. $href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->http_base));
  263. }
  264. if ($href === false)
  265. {
  266. continue;
  267. }
  268. $current = $this->registry->call('Misc', 'parse_url', array($this->file->url));
  269. if ($parsed['authority'] === '' || $parsed['authority'] === $current['authority'])
  270. {
  271. $this->local[] = $href;
  272. }
  273. else
  274. {
  275. $this->elsewhere[] = $href;
  276. }
  277. }
  278. }
  279. }
  280. $this->local = array_unique($this->local);
  281. $this->elsewhere = array_unique($this->elsewhere);
  282. if (!empty($this->local) || !empty($this->elsewhere))
  283. {
  284. return true;
  285. }
  286. return null;
  287. }
  288. public function get_rel_link($rel)
  289. {
  290. if ($this->dom === null)
  291. {
  292. throw new SimplePie_Exception('DOMDocument not found, unable to use '.
  293. 'locator');
  294. }
  295. if (!class_exists('DOMXpath'))
  296. {
  297. throw new SimplePie_Exception('DOMXpath not found, unable to use '.
  298. 'get_rel_link');
  299. }
  300. $xpath = new DOMXpath($this->dom);
  301. $query = '//a[@rel and @href] | //link[@rel and @href]';
  302. foreach ($xpath->query($query) as $link)
  303. {
  304. $href = trim($link->getAttribute('href'));
  305. $parsed = $this->registry->call('Misc', 'parse_url', array($href));
  306. if ($parsed['scheme'] === '' ||
  307. preg_match('/^https?$/i', $parsed['scheme']))
  308. {
  309. if (method_exists($link, 'getLineNo') &&
  310. $this->base_location < $link->getLineNo())
  311. {
  312. $href =
  313. $this->registry->call('Misc', 'absolutize_url',
  314. array(trim($link->getAttribute('href')),
  315. $this->base));
  316. }
  317. else
  318. {
  319. $href =
  320. $this->registry->call('Misc', 'absolutize_url',
  321. array(trim($link->getAttribute('href')),
  322. $this->http_base));
  323. }
  324. if ($href === false)
  325. {
  326. return null;
  327. }
  328. $rel_values = explode(' ', strtolower($link->getAttribute('rel')));
  329. if (in_array($rel, $rel_values))
  330. {
  331. return $href;
  332. }
  333. }
  334. }
  335. return null;
  336. }
  337. public function extension(&$array)
  338. {
  339. foreach ($array as $key => $value)
  340. {
  341. if ($this->checked_feeds === $this->max_checked_feeds)
  342. {
  343. break;
  344. }
  345. if (in_array(strtolower(strrchr($value, '.')), array('.rss', '.rdf', '.atom', '.xml')))
  346. {
  347. $this->checked_feeds++;
  348. $headers = array(
  349. 'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
  350. );
  351. $feed = $this->registry->create('File', array($value, $this->timeout, 5, $headers, $this->useragent, $this->force_fsockopen, $this->curl_options));
  352. if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed))
  353. {
  354. return array($feed);
  355. }
  356. else
  357. {
  358. unset($array[$key]);
  359. }
  360. }
  361. }
  362. return null;
  363. }
  364. public function body(&$array)
  365. {
  366. foreach ($array as $key => $value)
  367. {
  368. if ($this->checked_feeds === $this->max_checked_feeds)
  369. {
  370. break;
  371. }
  372. if (preg_match('/(feed|rss|rdf|atom|xml)/i', $value))
  373. {
  374. $this->checked_feeds++;
  375. $headers = array(
  376. 'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
  377. );
  378. $feed = $this->registry->create('File', array($value, $this->timeout, 5, null, $this->useragent, $this->force_fsockopen, $this->curl_options));
  379. if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed))
  380. {
  381. return array($feed);
  382. }
  383. else
  384. {
  385. unset($array[$key]);
  386. }
  387. }
  388. }
  389. return null;
  390. }
  391. }