PageRenderTime 52ms CodeModel.GetById 23ms RepoModel.GetById 1ms app.codeStats 0ms

/Zend/Feed/Reader/Feed/Rss.php

https://bitbucket.org/simukti/zf1
PHP | 733 lines | 477 code | 116 blank | 140 comment | 111 complexity | 61a9b2cc7c9658ba681d1646aca415c3 MD5 | raw file
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Feed_Reader
  17. * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  18. * @license http://framework.zend.com/license/new-bsd New BSD License
  19. * @version $Id: Rss.php 24593 2012-01-05 20:35:02Z matthew $
  20. */
  21. /**
  22. * @see Zend_Feed_Reader_FeedAbstract
  23. */
  24. require_once 'Zend/Feed/Reader/FeedAbstract.php';
  25. /**
  26. * @see Zend_feed_Reader_Extension_Atom_Feed
  27. */
  28. require_once 'Zend/Feed/Reader/Extension/Atom/Feed.php';
  29. /**
  30. * @see Zend_Feed_Reader_Extension_DublinCore_Feed
  31. */
  32. require_once 'Zend/Feed/Reader/Extension/DublinCore/Feed.php';
  33. /**
  34. * @see Zend_Date
  35. */
  36. require_once 'Zend/Date.php';
  37. /**
  38. * @see Zend_Feed_Reader_Collection_Author
  39. */
  40. require_once 'Zend/Feed/Reader/Collection/Author.php';
  41. /**
  42. * @category Zend
  43. * @package Zend_Feed_Reader
  44. * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  45. * @license http://framework.zend.com/license/new-bsd New BSD License
  46. */
  47. class Zend_Feed_Reader_Feed_Rss extends Zend_Feed_Reader_FeedAbstract
  48. {
  49. /**
  50. * Constructor
  51. *
  52. * @param DOMDocument $dom
  53. * @param string $type
  54. */
  55. public function __construct(DomDocument $dom, $type = null)
  56. {
  57. parent::__construct($dom, $type);
  58. $dublinCoreClass = Zend_Feed_Reader::getPluginLoader()->getClassName('DublinCore_Feed');
  59. $this->_extensions['DublinCore_Feed'] = new $dublinCoreClass($dom, $this->_data['type'], $this->_xpath);
  60. $atomClass = Zend_Feed_Reader::getPluginLoader()->getClassName('Atom_Feed');
  61. $this->_extensions['Atom_Feed'] = new $atomClass($dom, $this->_data['type'], $this->_xpath);
  62. if ($this->getType() !== Zend_Feed_Reader::TYPE_RSS_10 && $this->getType() !== Zend_Feed_Reader::TYPE_RSS_090) {
  63. $xpathPrefix = '/rss/channel';
  64. } else {
  65. $xpathPrefix = '/rdf:RDF/rss:channel';
  66. }
  67. foreach ($this->_extensions as $extension) {
  68. $extension->setXpathPrefix($xpathPrefix);
  69. }
  70. }
  71. /**
  72. * Get a single author
  73. *
  74. * @param int $index
  75. * @return string|null
  76. */
  77. public function getAuthor($index = 0)
  78. {
  79. $authors = $this->getAuthors();
  80. if (isset($authors[$index])) {
  81. return $authors[$index];
  82. }
  83. return null;
  84. }
  85. /**
  86. * Get an array with feed authors
  87. *
  88. * @return array
  89. */
  90. public function getAuthors()
  91. {
  92. if (array_key_exists('authors', $this->_data)) {
  93. return $this->_data['authors'];
  94. }
  95. $authors = array();
  96. $authors_dc = $this->getExtension('DublinCore')->getAuthors();
  97. if (!empty($authors_dc)) {
  98. foreach ($authors_dc as $author) {
  99. $authors[] = array(
  100. 'name' => $author['name']
  101. );
  102. }
  103. }
  104. /**
  105. * Technically RSS doesn't specific author element use at the feed level
  106. * but it's supported on a "just in case" basis.
  107. */
  108. if ($this->getType() !== Zend_Feed_Reader::TYPE_RSS_10
  109. && $this->getType() !== Zend_Feed_Reader::TYPE_RSS_090) {
  110. $list = $this->_xpath->query('//author');
  111. } else {
  112. $list = $this->_xpath->query('//rss:author');
  113. }
  114. if ($list->length) {
  115. foreach ($list as $author) {
  116. $string = trim($author->nodeValue);
  117. $email = null;
  118. $name = null;
  119. $data = array();
  120. // Pretty rough parsing - but it's a catchall
  121. if (preg_match("/^.*@[^ ]*/", $string, $matches)) {
  122. $data['email'] = trim($matches[0]);
  123. if (preg_match("/\((.*)\)$/", $string, $matches)) {
  124. $data['name'] = $matches[1];
  125. }
  126. $authors[] = $data;
  127. }
  128. }
  129. }
  130. if (count($authors) == 0) {
  131. $authors = $this->getExtension('Atom')->getAuthors();
  132. } else {
  133. $authors = new Zend_Feed_Reader_Collection_Author(
  134. Zend_Feed_Reader::arrayUnique($authors)
  135. );
  136. }
  137. if (count($authors) == 0) {
  138. $authors = null;
  139. }
  140. $this->_data['authors'] = $authors;
  141. return $this->_data['authors'];
  142. }
  143. /**
  144. * Get the copyright entry
  145. *
  146. * @return string|null
  147. */
  148. public function getCopyright()
  149. {
  150. if (array_key_exists('copyright', $this->_data)) {
  151. return $this->_data['copyright'];
  152. }
  153. $copyright = null;
  154. if ($this->getType() !== Zend_Feed_Reader::TYPE_RSS_10 &&
  155. $this->getType() !== Zend_Feed_Reader::TYPE_RSS_090) {
  156. $copyright = $this->_xpath->evaluate('string(/rss/channel/copyright)');
  157. }
  158. if (!$copyright && $this->getExtension('DublinCore') !== null) {
  159. $copyright = $this->getExtension('DublinCore')->getCopyright();
  160. }
  161. if (empty($copyright)) {
  162. $copyright = $this->getExtension('Atom')->getCopyright();
  163. }
  164. if (!$copyright) {
  165. $copyright = null;
  166. }
  167. $this->_data['copyright'] = $copyright;
  168. return $this->_data['copyright'];
  169. }
  170. /**
  171. * Get the feed creation date
  172. *
  173. * @return string|null
  174. */
  175. public function getDateCreated()
  176. {
  177. return $this->getDateModified();
  178. }
  179. /**
  180. * Get the feed modification date
  181. *
  182. * @return Zend_Date
  183. */
  184. public function getDateModified()
  185. {
  186. if (array_key_exists('datemodified', $this->_data)) {
  187. return $this->_data['datemodified'];
  188. }
  189. $dateModified = null;
  190. $date = null;
  191. if ($this->getType() !== Zend_Feed_Reader::TYPE_RSS_10 &&
  192. $this->getType() !== Zend_Feed_Reader::TYPE_RSS_090) {
  193. $dateModified = $this->_xpath->evaluate('string(/rss/channel/pubDate)');
  194. if (!$dateModified) {
  195. $dateModified = $this->_xpath->evaluate('string(/rss/channel/lastBuildDate)');
  196. }
  197. if ($dateModified) {
  198. $dateModifiedParsed = strtotime($dateModified);
  199. if ($dateModifiedParsed) {
  200. $date = new Zend_Date($dateModifiedParsed);
  201. } else {
  202. $dateStandards = array(Zend_Date::RSS, Zend_Date::RFC_822,
  203. Zend_Date::RFC_2822, Zend_Date::DATES);
  204. $date = new Zend_Date;
  205. foreach ($dateStandards as $standard) {
  206. try {
  207. $date->set($dateModified, $standard);
  208. break;
  209. } catch (Zend_Date_Exception $e) {
  210. if ($standard == Zend_Date::DATES) {
  211. require_once 'Zend/Feed/Exception.php';
  212. throw new Zend_Feed_Exception(
  213. 'Could not load date due to unrecognised'
  214. .' format (should follow RFC 822 or 2822):'
  215. . $e->getMessage(),
  216. 0, $e
  217. );
  218. }
  219. }
  220. }
  221. }
  222. }
  223. }
  224. if (!$date) {
  225. $date = $this->getExtension('DublinCore')->getDate();
  226. }
  227. if (!$date) {
  228. $date = $this->getExtension('Atom')->getDateModified();
  229. }
  230. if (!$date) {
  231. $date = null;
  232. }
  233. $this->_data['datemodified'] = $date;
  234. return $this->_data['datemodified'];
  235. }
  236. /**
  237. * Get the feed lastBuild date
  238. *
  239. * @return Zend_Date
  240. */
  241. public function getLastBuildDate()
  242. {
  243. if (array_key_exists('lastBuildDate', $this->_data)) {
  244. return $this->_data['lastBuildDate'];
  245. }
  246. $lastBuildDate = null;
  247. $date = null;
  248. if ($this->getType() !== Zend_Feed_Reader::TYPE_RSS_10 &&
  249. $this->getType() !== Zend_Feed_Reader::TYPE_RSS_090) {
  250. $lastBuildDate = $this->_xpath->evaluate('string(/rss/channel/lastBuildDate)');
  251. if ($lastBuildDate) {
  252. $lastBuildDateParsed = strtotime($lastBuildDate);
  253. if ($lastBuildDateParsed) {
  254. $date = new Zend_Date($lastBuildDateParsed);
  255. } else {
  256. $dateStandards = array(Zend_Date::RSS, Zend_Date::RFC_822,
  257. Zend_Date::RFC_2822, Zend_Date::DATES);
  258. $date = new Zend_Date;
  259. foreach ($dateStandards as $standard) {
  260. try {
  261. $date->set($lastBuildDate, $standard);
  262. break;
  263. } catch (Zend_Date_Exception $e) {
  264. if ($standard == Zend_Date::DATES) {
  265. require_once 'Zend/Feed/Exception.php';
  266. throw new Zend_Feed_Exception(
  267. 'Could not load date due to unrecognised'
  268. .' format (should follow RFC 822 or 2822):'
  269. . $e->getMessage(),
  270. 0, $e
  271. );
  272. }
  273. }
  274. }
  275. }
  276. }
  277. }
  278. if (!$date) {
  279. $date = null;
  280. }
  281. $this->_data['lastBuildDate'] = $date;
  282. return $this->_data['lastBuildDate'];
  283. }
  284. /**
  285. * Get the feed description
  286. *
  287. * @return string|null
  288. */
  289. public function getDescription()
  290. {
  291. if (array_key_exists('description', $this->_data)) {
  292. return $this->_data['description'];
  293. }
  294. $description = null;
  295. if ($this->getType() !== Zend_Feed_Reader::TYPE_RSS_10 &&
  296. $this->getType() !== Zend_Feed_Reader::TYPE_RSS_090) {
  297. $description = $this->_xpath->evaluate('string(/rss/channel/description)');
  298. } else {
  299. $description = $this->_xpath->evaluate('string(/rdf:RDF/rss:channel/rss:description)');
  300. }
  301. if (!$description && $this->getExtension('DublinCore') !== null) {
  302. $description = $this->getExtension('DublinCore')->getDescription();
  303. }
  304. if (empty($description)) {
  305. $description = $this->getExtension('Atom')->getDescription();
  306. }
  307. if (!$description) {
  308. $description = null;
  309. }
  310. $this->_data['description'] = $description;
  311. return $this->_data['description'];
  312. }
  313. /**
  314. * Get the feed ID
  315. *
  316. * @return string|null
  317. */
  318. public function getId()
  319. {
  320. if (array_key_exists('id', $this->_data)) {
  321. return $this->_data['id'];
  322. }
  323. $id = null;
  324. if ($this->getType() !== Zend_Feed_Reader::TYPE_RSS_10 &&
  325. $this->getType() !== Zend_Feed_Reader::TYPE_RSS_090) {
  326. $id = $this->_xpath->evaluate('string(/rss/channel/guid)');
  327. }
  328. if (!$id && $this->getExtension('DublinCore') !== null) {
  329. $id = $this->getExtension('DublinCore')->getId();
  330. }
  331. if (empty($id)) {
  332. $id = $this->getExtension('Atom')->getId();
  333. }
  334. if (!$id) {
  335. if ($this->getLink()) {
  336. $id = $this->getLink();
  337. } elseif ($this->getTitle()) {
  338. $id = $this->getTitle();
  339. } else {
  340. $id = null;
  341. }
  342. }
  343. $this->_data['id'] = $id;
  344. return $this->_data['id'];
  345. }
  346. /**
  347. * Get the feed image data
  348. *
  349. * @return array|null
  350. */
  351. public function getImage()
  352. {
  353. if (array_key_exists('image', $this->_data)) {
  354. return $this->_data['image'];
  355. }
  356. if ($this->getType() !== Zend_Feed_Reader::TYPE_RSS_10 &&
  357. $this->getType() !== Zend_Feed_Reader::TYPE_RSS_090) {
  358. $list = $this->_xpath->query('/rss/channel/image');
  359. $prefix = '/rss/channel/image[1]';
  360. } else {
  361. $list = $this->_xpath->query('/rdf:RDF/rss:channel/rss:image');
  362. $prefix = '/rdf:RDF/rss:channel/rss:image[1]';
  363. }
  364. if ($list->length > 0) {
  365. $image = array();
  366. $value = $this->_xpath->evaluate('string(' . $prefix . '/url)');
  367. if ($value) {
  368. $image['uri'] = $value;
  369. }
  370. $value = $this->_xpath->evaluate('string(' . $prefix . '/link)');
  371. if ($value) {
  372. $image['link'] = $value;
  373. }
  374. $value = $this->_xpath->evaluate('string(' . $prefix . '/title)');
  375. if ($value) {
  376. $image['title'] = $value;
  377. }
  378. $value = $this->_xpath->evaluate('string(' . $prefix . '/height)');
  379. if ($value) {
  380. $image['height'] = $value;
  381. }
  382. $value = $this->_xpath->evaluate('string(' . $prefix . '/width)');
  383. if ($value) {
  384. $image['width'] = $value;
  385. }
  386. $value = $this->_xpath->evaluate('string(' . $prefix . '/description)');
  387. if ($value) {
  388. $image['description'] = $value;
  389. }
  390. } else {
  391. $image = null;
  392. }
  393. $this->_data['image'] = $image;
  394. return $this->_data['image'];
  395. }
  396. /**
  397. * Get the feed language
  398. *
  399. * @return string|null
  400. */
  401. public function getLanguage()
  402. {
  403. if (array_key_exists('language', $this->_data)) {
  404. return $this->_data['language'];
  405. }
  406. $language = null;
  407. if ($this->getType() !== Zend_Feed_Reader::TYPE_RSS_10 &&
  408. $this->getType() !== Zend_Feed_Reader::TYPE_RSS_090) {
  409. $language = $this->_xpath->evaluate('string(/rss/channel/language)');
  410. }
  411. if (!$language && $this->getExtension('DublinCore') !== null) {
  412. $language = $this->getExtension('DublinCore')->getLanguage();
  413. }
  414. if (empty($language)) {
  415. $language = $this->getExtension('Atom')->getLanguage();
  416. }
  417. if (!$language) {
  418. $language = $this->_xpath->evaluate('string(//@xml:lang[1])');
  419. }
  420. if (!$language) {
  421. $language = null;
  422. }
  423. $this->_data['language'] = $language;
  424. return $this->_data['language'];
  425. }
  426. /**
  427. * Get a link to the feed
  428. *
  429. * @return string|null
  430. */
  431. public function getLink()
  432. {
  433. if (array_key_exists('link', $this->_data)) {
  434. return $this->_data['link'];
  435. }
  436. $link = null;
  437. if ($this->getType() !== Zend_Feed_Reader::TYPE_RSS_10 &&
  438. $this->getType() !== Zend_Feed_Reader::TYPE_RSS_090) {
  439. $link = $this->_xpath->evaluate('string(/rss/channel/link)');
  440. } else {
  441. $link = $this->_xpath->evaluate('string(/rdf:RDF/rss:channel/rss:link)');
  442. }
  443. if (empty($link)) {
  444. $link = $this->getExtension('Atom')->getLink();
  445. }
  446. if (!$link) {
  447. $link = null;
  448. }
  449. $this->_data['link'] = $link;
  450. return $this->_data['link'];
  451. }
  452. /**
  453. * Get a link to the feed XML
  454. *
  455. * @return string|null
  456. */
  457. public function getFeedLink()
  458. {
  459. if (array_key_exists('feedlink', $this->_data)) {
  460. return $this->_data['feedlink'];
  461. }
  462. $link = null;
  463. $link = $this->getExtension('Atom')->getFeedLink();
  464. if ($link === null || empty($link)) {
  465. $link = $this->getOriginalSourceUri();
  466. }
  467. $this->_data['feedlink'] = $link;
  468. return $this->_data['feedlink'];
  469. }
  470. /**
  471. * Get the feed generator entry
  472. *
  473. * @return string|null
  474. */
  475. public function getGenerator()
  476. {
  477. if (array_key_exists('generator', $this->_data)) {
  478. return $this->_data['generator'];
  479. }
  480. $generator = null;
  481. if ($this->getType() !== Zend_Feed_Reader::TYPE_RSS_10 &&
  482. $this->getType() !== Zend_Feed_Reader::TYPE_RSS_090) {
  483. $generator = $this->_xpath->evaluate('string(/rss/channel/generator)');
  484. }
  485. if (!$generator) {
  486. if ($this->getType() !== Zend_Feed_Reader::TYPE_RSS_10 &&
  487. $this->getType() !== Zend_Feed_Reader::TYPE_RSS_090) {
  488. $generator = $this->_xpath->evaluate('string(/rss/channel/atom:generator)');
  489. } else {
  490. $generator = $this->_xpath->evaluate('string(/rdf:RDF/rss:channel/atom:generator)');
  491. }
  492. }
  493. if (empty($generator)) {
  494. $generator = $this->getExtension('Atom')->getGenerator();
  495. }
  496. if (!$generator) {
  497. $generator = null;
  498. }
  499. $this->_data['generator'] = $generator;
  500. return $this->_data['generator'];
  501. }
  502. /**
  503. * Get the feed title
  504. *
  505. * @return string|null
  506. */
  507. public function getTitle()
  508. {
  509. if (array_key_exists('title', $this->_data)) {
  510. return $this->_data['title'];
  511. }
  512. $title = null;
  513. if ($this->getType() !== Zend_Feed_Reader::TYPE_RSS_10 &&
  514. $this->getType() !== Zend_Feed_Reader::TYPE_RSS_090) {
  515. $title = $this->_xpath->evaluate('string(/rss/channel/title)');
  516. } else {
  517. $title = $this->_xpath->evaluate('string(/rdf:RDF/rss:channel/rss:title)');
  518. }
  519. if (!$title && $this->getExtension('DublinCore') !== null) {
  520. $title = $this->getExtension('DublinCore')->getTitle();
  521. }
  522. if (!$title) {
  523. $title = $this->getExtension('Atom')->getTitle();
  524. }
  525. if (!$title) {
  526. $title = null;
  527. }
  528. $this->_data['title'] = $title;
  529. return $this->_data['title'];
  530. }
  531. /**
  532. * Get an array of any supported Pusubhubbub endpoints
  533. *
  534. * @return array|null
  535. */
  536. public function getHubs()
  537. {
  538. if (array_key_exists('hubs', $this->_data)) {
  539. return $this->_data['hubs'];
  540. }
  541. $hubs = $this->getExtension('Atom')->getHubs();
  542. if (empty($hubs)) {
  543. $hubs = null;
  544. } else {
  545. $hubs = array_unique($hubs);
  546. }
  547. $this->_data['hubs'] = $hubs;
  548. return $this->_data['hubs'];
  549. }
  550. /**
  551. * Get all categories
  552. *
  553. * @return Zend_Feed_Reader_Collection_Category
  554. */
  555. public function getCategories()
  556. {
  557. if (array_key_exists('categories', $this->_data)) {
  558. return $this->_data['categories'];
  559. }
  560. if ($this->getType() !== Zend_Feed_Reader::TYPE_RSS_10 &&
  561. $this->getType() !== Zend_Feed_Reader::TYPE_RSS_090) {
  562. $list = $this->_xpath->query('/rss/channel//category');
  563. } else {
  564. $list = $this->_xpath->query('/rdf:RDF/rss:channel//rss:category');
  565. }
  566. if ($list->length) {
  567. $categoryCollection = new Zend_Feed_Reader_Collection_Category;
  568. foreach ($list as $category) {
  569. $categoryCollection[] = array(
  570. 'term' => $category->nodeValue,
  571. 'scheme' => $category->getAttribute('domain'),
  572. 'label' => $category->nodeValue,
  573. );
  574. }
  575. } else {
  576. $categoryCollection = $this->getExtension('DublinCore')->getCategories();
  577. }
  578. if (count($categoryCollection) == 0) {
  579. $categoryCollection = $this->getExtension('Atom')->getCategories();
  580. }
  581. $this->_data['categories'] = $categoryCollection;
  582. return $this->_data['categories'];
  583. }
  584. /**
  585. * Read all entries to the internal entries array
  586. *
  587. */
  588. protected function _indexEntries()
  589. {
  590. $entries = array();
  591. if ($this->getType() !== Zend_Feed_Reader::TYPE_RSS_10 && $this->getType() !== Zend_Feed_Reader::TYPE_RSS_090) {
  592. $entries = $this->_xpath->evaluate('//item');
  593. } else {
  594. $entries = $this->_xpath->evaluate('//rss:item');
  595. }
  596. foreach($entries as $index=>$entry) {
  597. $this->_entries[$index] = $entry;
  598. }
  599. }
  600. /**
  601. * Register the default namespaces for the current feed format
  602. *
  603. */
  604. protected function _registerNamespaces()
  605. {
  606. switch ($this->_data['type']) {
  607. case Zend_Feed_Reader::TYPE_RSS_10:
  608. $this->_xpath->registerNamespace('rdf', Zend_Feed_Reader::NAMESPACE_RDF);
  609. $this->_xpath->registerNamespace('rss', Zend_Feed_Reader::NAMESPACE_RSS_10);
  610. break;
  611. case Zend_Feed_Reader::TYPE_RSS_090:
  612. $this->_xpath->registerNamespace('rdf', Zend_Feed_Reader::NAMESPACE_RDF);
  613. $this->_xpath->registerNamespace('rss', Zend_Feed_Reader::NAMESPACE_RSS_090);
  614. break;
  615. }
  616. }
  617. }