PageRenderTime 25ms CodeModel.GetById 15ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/DataParser/LegacyRSSDataParser.php

http://github.com/modolabs/Kurogo-Mobile-Web
PHP | 276 lines | 222 code | 34 blank | 20 comment | 38 complexity | 4575306c9cdd9c061d0d579a73dbcc1c MD5 | raw file
Possible License(s): LGPL-3.0, LGPL-2.1
  1. <?php
  2. /*
  3. * Copyright © 2010 - 2013 Modo Labs Inc. All rights reserved.
  4. *
  5. * The license governing the contents of this file is located in the LICENSE
  6. * file located at the root directory of this distribution. If the LICENSE file
  7. * is missing, please contact sales@modolabs.com.
  8. *
  9. */
  10. /**
  11. * @package ExternalData
  12. * @subpackage RSS
  13. */
  14. /**
  15. */
  16. includePackage('RSS');
  17. /**
  18. * @package ExternalData
  19. * @subpackage RSS
  20. */
  21. class LegacyRSSDataParser extends XMLDataParser
  22. {
  23. protected $channel;
  24. protected $channelClass='RSSChannel';
  25. protected $itemClass='RSSItem';
  26. protected $imageClass='RSSImage';
  27. protected $enclosureClass='RSSEnclosure';
  28. protected $imageEnclosureClass='RSSImageEnclosure';
  29. protected $removeDuplicates = false;
  30. protected $htmlEscapedCDATA = false;
  31. protected $useDescriptionForContent = false;
  32. protected $items=array();
  33. protected $guids=array();
  34. protected static $startElements=array(
  35. 'RSS', 'RDF:RDF', 'CHANNEL', 'FEED', 'ITEM', 'ENTRY',
  36. 'ENCLOSURE', 'MEDIA:THUMBNAIL','MEDIA:CONTENT', 'IMAGE', 'LINK');
  37. protected static $endElements=array(
  38. 'CHANNEL', 'FEED', 'ITEM', 'ENTRY', 'DESCRIPTION');
  39. public function items()
  40. {
  41. return $this->items;
  42. }
  43. public function getTitle() {
  44. return $this->channel->getTitle();
  45. }
  46. public function clearInternalCache() {
  47. parent::clearInternalCache();
  48. $this->items = array();
  49. $this->guids = array();
  50. }
  51. public function init($args)
  52. {
  53. parent::init($args);
  54. if (isset($args['CHANNEL_CLASS'])) {
  55. $this->setChannelClass($args['CHANNEL_CLASS']);
  56. }
  57. if (isset($args['ITEM_CLASS'])) {
  58. $this->setItemClass($args['ITEM_CLASS']);
  59. }
  60. if (isset($args['IMAGE_CLASS'])) {
  61. $this->setImageClass($args['IMAGE_CLASS']);
  62. }
  63. if (isset($args['ENCLOSURE_CLASS'])) {
  64. $this->setEnclosureClass($args['ENCLOSURE_CLASS']);
  65. }
  66. if (isset($args['IMAGE_ENCLOSURE_CLASS'])) {
  67. $this->setImageEnclosureClass($args['IMAGE_ENCLOSURE_CLASS']);
  68. }
  69. if (isset($args['REMOVE_DUPLICATES'])) {
  70. $this->removeDuplicates = $args['REMOVE_DUPLICATES'];
  71. }
  72. if (isset($args['HTML_ESCAPED_CDATA'])) {
  73. $this->htmlEscapedCDATA = $args['HTML_ESCAPED_CDATA'];
  74. }
  75. if (isset($args['USE_DESCRIPTION_FOR_CONTENT'])) {
  76. $this->setUseDescriptionForContent($args['USE_DESCRIPTION_FOR_CONTENT']);
  77. }
  78. }
  79. public function setUseDescriptionForContent($bool) {
  80. $this->useDescriptionForContent = $bool ? true : false;
  81. }
  82. protected function shouldHandleStartElement($name)
  83. {
  84. return in_array($name, self::$startElements);
  85. }
  86. protected function handleStartElement($name, $attribs)
  87. {
  88. switch ($name)
  89. {
  90. case 'RSS':
  91. case 'RDF:RDF':
  92. break;
  93. case 'CHANNEL':
  94. case 'FEED': //for atom feeds
  95. $this->elementStack[] = new $this->channelClass($attribs);
  96. break;
  97. case 'ITEM':
  98. case 'ENTRY': //for atom feeds
  99. $element = new $this->itemClass($attribs);
  100. $element->init($this->initArgs);
  101. $this->elementStack[] = $element;
  102. break;
  103. case 'ENCLOSURE':
  104. case 'MEDIA:CONTENT':
  105. case 'MEDIA:THUMBNAIL':
  106. if ($this->enclosureIsImage($name, $attribs)) {
  107. $element = new $this->imageEnclosureClass($attribs);
  108. } else {
  109. $element = new $this->enclosureClass($attribs);
  110. }
  111. $element->init($this->initArgs);
  112. $this->elementStack[] = $element;
  113. break;
  114. case 'LINK':
  115. if (isset($attribs['REL'], $attribs['HREF']) && $attribs['REL'] == 'enclosure') {
  116. $attribs['URL'] = $attribs['HREF'];
  117. if ($this->enclosureIsImage($name, $attribs)) {
  118. $element = new $this->imageEnclosureClass($attribs);
  119. } else {
  120. $element = new $this->enclosureClass($attribs);
  121. }
  122. $element->init($this->initArgs);
  123. $element->setName('enclosure');
  124. } else {
  125. $element = new XMLElement($name, $attribs, $this->getEncoding());
  126. }
  127. $this->elementStack[] = $element;
  128. break;
  129. case 'IMAGE':
  130. $this->elementStack[] = new $this->imageClass($attribs);
  131. break;
  132. }
  133. }
  134. protected function shouldHandleEndElement($name)
  135. {
  136. return in_array($name, self::$endElements);
  137. }
  138. protected function handleEndElement($name, $element, $parent)
  139. {
  140. switch ($name)
  141. {
  142. case 'FEED': //for atom feeds
  143. case 'CHANNEL':
  144. $this->channel = $element;
  145. break;
  146. case 'ITEM':
  147. case 'ENTRY': //for atom feeds
  148. if (!$this->removeDuplicates || !in_array($element->getGUID(), $this->guids)) {
  149. $this->guids[] = $element->getGUID();
  150. $this->items[] = $element;
  151. }
  152. break;
  153. case 'DESCRIPTION':
  154. $parent->addElement($element); // add description as description
  155. if ($this->useDescriptionForContent) {
  156. // add description element again as content
  157. $element->setName('CONTENT');
  158. $element->shouldStripTags($this->shouldStripTags($element));
  159. $element->shouldHTMLDecodeCDATA($this->shouldHTMLDecodeCDATA($element));
  160. $parent->addElement($element);
  161. }
  162. break;
  163. }
  164. }
  165. public function setChannelClass($channelClass)
  166. {
  167. if ($channelClass) {
  168. if (!class_exists($channelClass)) {
  169. throw new KurogoConfigurationException("Cannot load class $channelClass");
  170. }
  171. $this->channelClass = $channelClass;
  172. }
  173. }
  174. public function setItemClass($itemClass)
  175. {
  176. if ($itemClass) {
  177. if (!class_exists($itemClass)) {
  178. throw new KurogoConfigurationException("Cannot load class $itemClass");
  179. }
  180. $this->itemClass = $itemClass;
  181. }
  182. }
  183. public function setEnclosureClass($enclosureClass)
  184. {
  185. if ($enclosureClass) {
  186. if (!class_exists($enclosureClass)) {
  187. throw new KurogoConfigurationException("Cannot load class $enclosureClass");
  188. }
  189. $this->enclosureClass = $enclosureClass;
  190. }
  191. }
  192. public function setImageClass($imageClass)
  193. {
  194. if ($imageClass) {
  195. if (!class_exists($imageClass)) {
  196. throw new KurogoConfigurationException("Cannot load class $imageClass");
  197. }
  198. $this->imageClass = $imageClass;
  199. }
  200. }
  201. protected function shouldStripTags($element)
  202. {
  203. $strip_tags = true;
  204. switch ($element->name())
  205. {
  206. case 'CONTENT:ENCODED':
  207. case 'CONTENT':
  208. case 'BODY':
  209. case 'DESCRIPTION':
  210. $strip_tags = false;
  211. break;
  212. }
  213. return $strip_tags;
  214. }
  215. protected function shouldHTMLDecodeCDATA($element)
  216. {
  217. $html_decode = false;
  218. if ($this->htmlEscapedCDATA) {
  219. // Some buggy feeds have HTML escaped with both CDATA and html entities
  220. switch ($element->name()) {
  221. case 'CONTENT:ENCODED':
  222. case 'CONTENT':
  223. case 'BODY':
  224. $html_decode = true;
  225. break;
  226. }
  227. }
  228. return $html_decode;
  229. }
  230. protected function enclosureIsImage($name, $attribs)
  231. {
  232. $imageTypes = array(
  233. 'image/jpeg',
  234. 'image/jpg',
  235. 'image/gif',
  236. 'image/png',
  237. );
  238. $type = isset($attribs['TYPE']) ? $attribs['TYPE'] : '';
  239. $medium = isset($attribs['MEDIUM']) ? $attribs['MEDIUM'] : '';
  240. return in_array($type, $imageTypes) || $name == 'MEDIA:THUMBNAIL' || ($name == 'MEDIA:CONTENT' && $medium == 'image');
  241. }
  242. }