PageRenderTime 100ms CodeModel.GetById 34ms RepoModel.GetById 0ms app.codeStats 0ms

/library/Zend/Feed/Reader/Feed/Rss.php

https://bitbucket.org/aboozar/zf2
PHP | 700 lines | 474 code | 113 blank | 113 comment | 111 complexity | d5e404ca199b734944adfb36bf22e21b MD5 | raw file
Possible License(s): BSD-3-Clause
  1. <?php
  2. /**
  3. * Zend Framework (http://framework.zend.com/)
  4. *
  5. * @link http://github.com/zendframework/zf2 for the canonical source repository
  6. * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  7. * @license http://framework.zend.com/license/new-bsd New BSD License
  8. * @package Zend_Feed
  9. */
  10. namespace Zend\Feed\Reader\Feed;
  11. use DateTime;
  12. use DOMDocument;
  13. use Zend\Feed\Reader;
  14. use Zend\Feed\Reader\Collection;
  15. use Zend\Feed\Reader\Exception;
  16. /**
  17. * @category Zend
  18. * @package Reader
  19. */
  20. class Rss extends AbstractFeed
  21. {
  22. /**
  23. * Constructor
  24. *
  25. * @param DOMDocument $dom
  26. * @param string $type
  27. */
  28. public function __construct(DOMDocument $dom, $type = null)
  29. {
  30. parent::__construct($dom, $type);
  31. $dublinCoreClass = Reader\Reader::getPluginLoader()->getClassName('DublinCore\Feed');
  32. $this->_extensions['DublinCore\Feed'] = new $dublinCoreClass($dom, $this->_data['type'], $this->_xpath);
  33. $atomClass = Reader\Reader::getPluginLoader()->getClassName('Atom\\Feed');
  34. $this->_extensions['Atom\Feed'] = new $atomClass($dom, $this->_data['type'], $this->_xpath);
  35. if ($this->getType() !== Reader\Reader::TYPE_RSS_10 && $this->getType() !== Reader\Reader::TYPE_RSS_090) {
  36. $xpathPrefix = '/rss/channel';
  37. } else {
  38. $xpathPrefix = '/rdf:RDF/rss:channel';
  39. }
  40. foreach ($this->_extensions as $extension) {
  41. $extension->setXpathPrefix($xpathPrefix);
  42. }
  43. }
  44. /**
  45. * Get a single author
  46. *
  47. * @param int $index
  48. * @return string|null
  49. */
  50. public function getAuthor($index = 0)
  51. {
  52. $authors = $this->getAuthors();
  53. if (isset($authors[$index])) {
  54. return $authors[$index];
  55. }
  56. return null;
  57. }
  58. /**
  59. * Get an array with feed authors
  60. *
  61. * @return array
  62. */
  63. public function getAuthors()
  64. {
  65. if (array_key_exists('authors', $this->_data)) {
  66. return $this->_data['authors'];
  67. }
  68. $authors = array();
  69. $authors_dc = $this->getExtension('DublinCore')->getAuthors();
  70. if (!empty($authors_dc)) {
  71. foreach ($authors_dc as $author) {
  72. $authors[] = array(
  73. 'name' => $author['name']
  74. );
  75. }
  76. }
  77. /**
  78. * Technically RSS doesn't specific author element use at the feed level
  79. * but it's supported on a "just in case" basis.
  80. */
  81. if ($this->getType() !== Reader\Reader::TYPE_RSS_10
  82. && $this->getType() !== Reader\Reader::TYPE_RSS_090) {
  83. $list = $this->_xpath->query('//author');
  84. } else {
  85. $list = $this->_xpath->query('//rss:author');
  86. }
  87. if ($list->length) {
  88. foreach ($list as $author) {
  89. $string = trim($author->nodeValue);
  90. $email = null;
  91. $name = null;
  92. $data = array();
  93. // Pretty rough parsing - but it's a catchall
  94. if (preg_match("/^.*@[^ ]*/", $string, $matches)) {
  95. $data['email'] = trim($matches[0]);
  96. if (preg_match("/\((.*)\)$/", $string, $matches)) {
  97. $data['name'] = $matches[1];
  98. }
  99. $authors[] = $data;
  100. }
  101. }
  102. }
  103. if (count($authors) == 0) {
  104. $authors = $this->getExtension('Atom')->getAuthors();
  105. } else {
  106. $authors = new Reader\Collection\Author(
  107. Reader\Reader::arrayUnique($authors)
  108. );
  109. }
  110. if (count($authors) == 0) {
  111. $authors = null;
  112. }
  113. $this->_data['authors'] = $authors;
  114. return $this->_data['authors'];
  115. }
  116. /**
  117. * Get the copyright entry
  118. *
  119. * @return string|null
  120. */
  121. public function getCopyright()
  122. {
  123. if (array_key_exists('copyright', $this->_data)) {
  124. return $this->_data['copyright'];
  125. }
  126. $copyright = null;
  127. if ($this->getType() !== Reader\Reader::TYPE_RSS_10 &&
  128. $this->getType() !== Reader\Reader::TYPE_RSS_090) {
  129. $copyright = $this->_xpath->evaluate('string(/rss/channel/copyright)');
  130. }
  131. if (!$copyright && $this->getExtension('DublinCore') !== null) {
  132. $copyright = $this->getExtension('DublinCore')->getCopyright();
  133. }
  134. if (empty($copyright)) {
  135. $copyright = $this->getExtension('Atom')->getCopyright();
  136. }
  137. if (!$copyright) {
  138. $copyright = null;
  139. }
  140. $this->_data['copyright'] = $copyright;
  141. return $this->_data['copyright'];
  142. }
  143. /**
  144. * Get the feed creation date
  145. *
  146. * @return string|null
  147. */
  148. public function getDateCreated()
  149. {
  150. return $this->getDateModified();
  151. }
  152. /**
  153. * Get the feed modification date
  154. *
  155. * @return DateTime
  156. * @throws Exception\RuntimeException
  157. */
  158. public function getDateModified()
  159. {
  160. if (array_key_exists('datemodified', $this->_data)) {
  161. return $this->_data['datemodified'];
  162. }
  163. $dateModified = null;
  164. $date = null;
  165. if ($this->getType() !== Reader\Reader::TYPE_RSS_10 &&
  166. $this->getType() !== Reader\Reader::TYPE_RSS_090) {
  167. $dateModified = $this->_xpath->evaluate('string(/rss/channel/pubDate)');
  168. if (!$dateModified) {
  169. $dateModified = $this->_xpath->evaluate('string(/rss/channel/lastBuildDate)');
  170. }
  171. if ($dateModified) {
  172. $dateModifiedParsed = strtotime($dateModified);
  173. if ($dateModifiedParsed) {
  174. $date = new DateTime('@' . $dateModifiedParsed);
  175. } else {
  176. $dateStandards = array(DateTime::RSS, DateTime::RFC822,
  177. DateTime::RFC2822, null);
  178. foreach ($dateStandards as $standard) {
  179. try {
  180. $date = DateTime::createFromFormat($standard, $dateModified);
  181. break;
  182. } catch (\Exception $e) {
  183. if ($standard == null) {
  184. throw new Exception\RuntimeException(
  185. 'Could not load date due to unrecognised'
  186. .' format (should follow RFC 822 or 2822):'
  187. . $e->getMessage(),
  188. 0, $e
  189. );
  190. }
  191. }
  192. }
  193. }
  194. }
  195. }
  196. if (!$date) {
  197. $date = $this->getExtension('DublinCore')->getDate();
  198. }
  199. if (!$date) {
  200. $date = $this->getExtension('Atom')->getDateModified();
  201. }
  202. if (!$date) {
  203. $date = null;
  204. }
  205. $this->_data['datemodified'] = $date;
  206. return $this->_data['datemodified'];
  207. }
  208. /**
  209. * Get the feed lastBuild date
  210. *
  211. * @return DateTime
  212. */
  213. public function getLastBuildDate()
  214. {
  215. if (array_key_exists('lastBuildDate', $this->_data)) {
  216. return $this->_data['lastBuildDate'];
  217. }
  218. $lastBuildDate = null;
  219. $date = null;
  220. if ($this->getType() !== Reader\Reader::TYPE_RSS_10 &&
  221. $this->getType() !== Reader\Reader::TYPE_RSS_090) {
  222. $lastBuildDate = $this->_xpath->evaluate('string(/rss/channel/lastBuildDate)');
  223. if ($lastBuildDate) {
  224. $lastBuildDateParsed = strtotime($lastBuildDate);
  225. if ($lastBuildDateParsed) {
  226. $date = new DateTime('@' . $lastBuildDateParsed);
  227. } else {
  228. $dateStandards = array(DateTime::RSS, DateTime::RFC822,
  229. DateTime::RFC2822, null);
  230. foreach ($dateStandards as $standard) {
  231. try {
  232. $date = DateTime::createFromFormat($standard, $lastBuildDateParsed);
  233. break;
  234. } catch (\Exception $e) {
  235. if ($standard == null) {
  236. throw new Exception\RuntimeException(
  237. 'Could not load date due to unrecognised'
  238. .' format (should follow RFC 822 or 2822):'
  239. . $e->getMessage(),
  240. 0, $e
  241. );
  242. }
  243. }
  244. }
  245. }
  246. }
  247. }
  248. if (!$date) {
  249. $date = null;
  250. }
  251. $this->_data['lastBuildDate'] = $date;
  252. return $this->_data['lastBuildDate'];
  253. }
  254. /**
  255. * Get the feed description
  256. *
  257. * @return string|null
  258. */
  259. public function getDescription()
  260. {
  261. if (array_key_exists('description', $this->_data)) {
  262. return $this->_data['description'];
  263. }
  264. $description = null;
  265. if ($this->getType() !== Reader\Reader::TYPE_RSS_10 &&
  266. $this->getType() !== Reader\Reader::TYPE_RSS_090) {
  267. $description = $this->_xpath->evaluate('string(/rss/channel/description)');
  268. } else {
  269. $description = $this->_xpath->evaluate('string(/rdf:RDF/rss:channel/rss:description)');
  270. }
  271. if (!$description && $this->getExtension('DublinCore') !== null) {
  272. $description = $this->getExtension('DublinCore')->getDescription();
  273. }
  274. if (empty($description)) {
  275. $description = $this->getExtension('Atom')->getDescription();
  276. }
  277. if (!$description) {
  278. $description = null;
  279. }
  280. $this->_data['description'] = $description;
  281. return $this->_data['description'];
  282. }
  283. /**
  284. * Get the feed ID
  285. *
  286. * @return string|null
  287. */
  288. public function getId()
  289. {
  290. if (array_key_exists('id', $this->_data)) {
  291. return $this->_data['id'];
  292. }
  293. $id = null;
  294. if ($this->getType() !== Reader\Reader::TYPE_RSS_10 &&
  295. $this->getType() !== Reader\Reader::TYPE_RSS_090) {
  296. $id = $this->_xpath->evaluate('string(/rss/channel/guid)');
  297. }
  298. if (!$id && $this->getExtension('DublinCore') !== null) {
  299. $id = $this->getExtension('DublinCore')->getId();
  300. }
  301. if (empty($id)) {
  302. $id = $this->getExtension('Atom')->getId();
  303. }
  304. if (!$id) {
  305. if ($this->getLink()) {
  306. $id = $this->getLink();
  307. } elseif ($this->getTitle()) {
  308. $id = $this->getTitle();
  309. } else {
  310. $id = null;
  311. }
  312. }
  313. $this->_data['id'] = $id;
  314. return $this->_data['id'];
  315. }
  316. /**
  317. * Get the feed image data
  318. *
  319. * @return array|null
  320. */
  321. public function getImage()
  322. {
  323. if (array_key_exists('image', $this->_data)) {
  324. return $this->_data['image'];
  325. }
  326. if ($this->getType() !== Reader\Reader::TYPE_RSS_10 &&
  327. $this->getType() !== Reader\Reader::TYPE_RSS_090) {
  328. $list = $this->_xpath->query('/rss/channel/image');
  329. $prefix = '/rss/channel/image[1]';
  330. } else {
  331. $list = $this->_xpath->query('/rdf:RDF/rss:channel/rss:image');
  332. $prefix = '/rdf:RDF/rss:channel/rss:image[1]';
  333. }
  334. if ($list->length > 0) {
  335. $image = array();
  336. $value = $this->_xpath->evaluate('string(' . $prefix . '/url)');
  337. if ($value) {
  338. $image['uri'] = $value;
  339. }
  340. $value = $this->_xpath->evaluate('string(' . $prefix . '/link)');
  341. if ($value) {
  342. $image['link'] = $value;
  343. }
  344. $value = $this->_xpath->evaluate('string(' . $prefix . '/title)');
  345. if ($value) {
  346. $image['title'] = $value;
  347. }
  348. $value = $this->_xpath->evaluate('string(' . $prefix . '/height)');
  349. if ($value) {
  350. $image['height'] = $value;
  351. }
  352. $value = $this->_xpath->evaluate('string(' . $prefix . '/width)');
  353. if ($value) {
  354. $image['width'] = $value;
  355. }
  356. $value = $this->_xpath->evaluate('string(' . $prefix . '/description)');
  357. if ($value) {
  358. $image['description'] = $value;
  359. }
  360. } else {
  361. $image = null;
  362. }
  363. $this->_data['image'] = $image;
  364. return $this->_data['image'];
  365. }
  366. /**
  367. * Get the feed language
  368. *
  369. * @return string|null
  370. */
  371. public function getLanguage()
  372. {
  373. if (array_key_exists('language', $this->_data)) {
  374. return $this->_data['language'];
  375. }
  376. $language = null;
  377. if ($this->getType() !== Reader\Reader::TYPE_RSS_10 &&
  378. $this->getType() !== Reader\Reader::TYPE_RSS_090) {
  379. $language = $this->_xpath->evaluate('string(/rss/channel/language)');
  380. }
  381. if (!$language && $this->getExtension('DublinCore') !== null) {
  382. $language = $this->getExtension('DublinCore')->getLanguage();
  383. }
  384. if (empty($language)) {
  385. $language = $this->getExtension('Atom')->getLanguage();
  386. }
  387. if (!$language) {
  388. $language = $this->_xpath->evaluate('string(//@xml:lang[1])');
  389. }
  390. if (!$language) {
  391. $language = null;
  392. }
  393. $this->_data['language'] = $language;
  394. return $this->_data['language'];
  395. }
  396. /**
  397. * Get a link to the feed
  398. *
  399. * @return string|null
  400. */
  401. public function getLink()
  402. {
  403. if (array_key_exists('link', $this->_data)) {
  404. return $this->_data['link'];
  405. }
  406. $link = null;
  407. if ($this->getType() !== Reader\Reader::TYPE_RSS_10 &&
  408. $this->getType() !== Reader\Reader::TYPE_RSS_090) {
  409. $link = $this->_xpath->evaluate('string(/rss/channel/link)');
  410. } else {
  411. $link = $this->_xpath->evaluate('string(/rdf:RDF/rss:channel/rss:link)');
  412. }
  413. if (empty($link)) {
  414. $link = $this->getExtension('Atom')->getLink();
  415. }
  416. if (!$link) {
  417. $link = null;
  418. }
  419. $this->_data['link'] = $link;
  420. return $this->_data['link'];
  421. }
  422. /**
  423. * Get a link to the feed XML
  424. *
  425. * @return string|null
  426. */
  427. public function getFeedLink()
  428. {
  429. if (array_key_exists('feedlink', $this->_data)) {
  430. return $this->_data['feedlink'];
  431. }
  432. $link = null;
  433. $link = $this->getExtension('Atom')->getFeedLink();
  434. if ($link === null || empty($link)) {
  435. $link = $this->getOriginalSourceUri();
  436. }
  437. $this->_data['feedlink'] = $link;
  438. return $this->_data['feedlink'];
  439. }
  440. /**
  441. * Get the feed generator entry
  442. *
  443. * @return string|null
  444. */
  445. public function getGenerator()
  446. {
  447. if (array_key_exists('generator', $this->_data)) {
  448. return $this->_data['generator'];
  449. }
  450. $generator = null;
  451. if ($this->getType() !== Reader\Reader::TYPE_RSS_10 &&
  452. $this->getType() !== Reader\Reader::TYPE_RSS_090) {
  453. $generator = $this->_xpath->evaluate('string(/rss/channel/generator)');
  454. }
  455. if (!$generator) {
  456. if ($this->getType() !== Reader\Reader::TYPE_RSS_10 &&
  457. $this->getType() !== Reader\Reader::TYPE_RSS_090) {
  458. $generator = $this->_xpath->evaluate('string(/rss/channel/atom:generator)');
  459. } else {
  460. $generator = $this->_xpath->evaluate('string(/rdf:RDF/rss:channel/atom:generator)');
  461. }
  462. }
  463. if (empty($generator)) {
  464. $generator = $this->getExtension('Atom')->getGenerator();
  465. }
  466. if (!$generator) {
  467. $generator = null;
  468. }
  469. $this->_data['generator'] = $generator;
  470. return $this->_data['generator'];
  471. }
  472. /**
  473. * Get the feed title
  474. *
  475. * @return string|null
  476. */
  477. public function getTitle()
  478. {
  479. if (array_key_exists('title', $this->_data)) {
  480. return $this->_data['title'];
  481. }
  482. $title = null;
  483. if ($this->getType() !== Reader\Reader::TYPE_RSS_10 &&
  484. $this->getType() !== Reader\Reader::TYPE_RSS_090) {
  485. $title = $this->_xpath->evaluate('string(/rss/channel/title)');
  486. } else {
  487. $title = $this->_xpath->evaluate('string(/rdf:RDF/rss:channel/rss:title)');
  488. }
  489. if (!$title && $this->getExtension('DublinCore') !== null) {
  490. $title = $this->getExtension('DublinCore')->getTitle();
  491. }
  492. if (!$title) {
  493. $title = $this->getExtension('Atom')->getTitle();
  494. }
  495. if (!$title) {
  496. $title = null;
  497. }
  498. $this->_data['title'] = $title;
  499. return $this->_data['title'];
  500. }
  501. /**
  502. * Get an array of any supported Pusubhubbub endpoints
  503. *
  504. * @return array|null
  505. */
  506. public function getHubs()
  507. {
  508. if (array_key_exists('hubs', $this->_data)) {
  509. return $this->_data['hubs'];
  510. }
  511. $hubs = $this->getExtension('Atom')->getHubs();
  512. if (empty($hubs)) {
  513. $hubs = null;
  514. } else {
  515. $hubs = array_unique($hubs);
  516. }
  517. $this->_data['hubs'] = $hubs;
  518. return $this->_data['hubs'];
  519. }
  520. /**
  521. * Get all categories
  522. *
  523. * @return Reader\Collection\Category
  524. */
  525. public function getCategories()
  526. {
  527. if (array_key_exists('categories', $this->_data)) {
  528. return $this->_data['categories'];
  529. }
  530. if ($this->getType() !== Reader\Reader::TYPE_RSS_10 &&
  531. $this->getType() !== Reader\Reader::TYPE_RSS_090) {
  532. $list = $this->_xpath->query('/rss/channel//category');
  533. } else {
  534. $list = $this->_xpath->query('/rdf:RDF/rss:channel//rss:category');
  535. }
  536. if ($list->length) {
  537. $categoryCollection = new Collection\Category;
  538. foreach ($list as $category) {
  539. $categoryCollection[] = array(
  540. 'term' => $category->nodeValue,
  541. 'scheme' => $category->getAttribute('domain'),
  542. 'label' => $category->nodeValue,
  543. );
  544. }
  545. } else {
  546. $categoryCollection = $this->getExtension('DublinCore')->getCategories();
  547. }
  548. if (count($categoryCollection) == 0) {
  549. $categoryCollection = $this->getExtension('Atom')->getCategories();
  550. }
  551. $this->_data['categories'] = $categoryCollection;
  552. return $this->_data['categories'];
  553. }
  554. /**
  555. * Read all entries to the internal entries array
  556. *
  557. */
  558. protected function _indexEntries()
  559. {
  560. $entries = array();
  561. if ($this->getType() !== Reader\Reader::TYPE_RSS_10 && $this->getType() !== Reader\Reader::TYPE_RSS_090) {
  562. $entries = $this->_xpath->evaluate('//item');
  563. } else {
  564. $entries = $this->_xpath->evaluate('//rss:item');
  565. }
  566. foreach($entries as $index=>$entry) {
  567. $this->_entries[$index] = $entry;
  568. }
  569. }
  570. /**
  571. * Register the default namespaces for the current feed format
  572. *
  573. */
  574. protected function _registerNamespaces()
  575. {
  576. switch ($this->_data['type']) {
  577. case Reader\Reader::TYPE_RSS_10:
  578. $this->_xpath->registerNamespace('rdf', Reader\Reader::NAMESPACE_RDF);
  579. $this->_xpath->registerNamespace('rss', Reader\Reader::NAMESPACE_RSS_10);
  580. break;
  581. case Reader\Reader::TYPE_RSS_090:
  582. $this->_xpath->registerNamespace('rdf', Reader\Reader::NAMESPACE_RDF);
  583. $this->_xpath->registerNamespace('rss', Reader\Reader::NAMESPACE_RSS_090);
  584. break;
  585. }
  586. }
  587. }