PageRenderTime 46ms CodeModel.GetById 17ms RepoModel.GetById 1ms app.codeStats 0ms

/common/libraries/plugin/lastrss/lastrss.class.php

https://bitbucket.org/chamilo/chamilo/
PHP | 354 lines | 236 code | 37 blank | 81 comment | 41 complexity | 4214b9d96ba4f01e994ddcabea7659ce MD5 | raw file
Possible License(s): GPL-2.0, BSD-3-Clause, LGPL-2.1, LGPL-3.0, GPL-3.0, MIT
  1. <?php
  2. /*
  3. ======================================================================
  4. lastRSS 0.9.1
  5. Simple yet powerfull PHP class to parse RSS files.
  6. by Vojtech Semecky, webmaster @ webdot . cz
  7. Latest version, features, manual and examples:
  8. http://lastrss.oslab.net/
  9. ----------------------------------------------------------------------
  10. LICENSE
  11. This program is free software; you can redistribute it and/or
  12. modify it under the terms of the GNU General Public License (GPL)
  13. as published by the Free Software Foundation; either version 2
  14. of the License, or (at your option) any later version.
  15. This program is distributed in the hope that it will be useful,
  16. but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  18. GNU General Public License for more details.
  19. To read the license please visit http://www.gnu.org/copyleft/gpl.html
  20. ======================================================================
  21. */
  22. /**
  23. * lastRSS
  24. * Simple yet powerfull PHP class to parse RSS files.
  25. */
  26. class LastRss
  27. {
  28. // TODO: Implement PEAR RSS parser package
  29. private $default_cp = 'UTF-8';
  30. private $cdata;
  31. private $cp;
  32. private $items_limit;
  33. private $strip_html;
  34. private $date_format;
  35. private $cache_dir;
  36. private $cache_time;
  37. private $feed_url;
  38. private $channeltags = array ('title', 'link', 'description', 'language', 'copyright', 'managingEditor', 'webMaster', 'lastBuildDate', 'rating', 'docs');
  39. private $itemtags = array('title', 'link', 'description', 'author', 'category', 'comments', 'enclosure', 'guid', 'pubDate', 'source');
  40. private $imagetags = array('title', 'url', 'link', 'width', 'height');
  41. private $textinputtags = array('title', 'description', 'name', 'link');
  42. function __construct($url)
  43. {
  44. // Set default values, can be overwritten via setters if necessary
  45. $this->cdata = 'content';
  46. $this->cp = 'UTF-8';
  47. $this->items_limit = 0;
  48. $this->strip_html = false;
  49. $this->date_format = 'd F Y, H:i:s';
  50. $this->cache_dir = dirname(__FILE__).'/../../cache';
  51. // Default is 3600 seconds = 1 hour
  52. $this->cache_time = 3600;
  53. // Feed url as passed on from the module
  54. $this->feed_url = $url;
  55. }
  56. function get_default_cp()
  57. {
  58. return $this->default_cp;
  59. }
  60. function get_cdata()
  61. {
  62. return $this->cdata;
  63. }
  64. function get_cp()
  65. {
  66. return $this->cp;
  67. }
  68. function get_items_limit()
  69. {
  70. return $this->items_limit;
  71. }
  72. function get_strip_html()
  73. {
  74. return $this->strip_html;
  75. }
  76. function get_date_format()
  77. {
  78. return $this->date_format;
  79. }
  80. function get_cache_dir()
  81. {
  82. return $this->cache_dir;
  83. }
  84. function get_cache_time()
  85. {
  86. return $this->cache_time;
  87. }
  88. function get_feed_url()
  89. {
  90. return $this->feed_url;
  91. }
  92. function set_default_cp($default_cp)
  93. {
  94. $this->default_cp = $default_cp;
  95. }
  96. function set_cdata($cdata)
  97. {
  98. $this->cdata =$cdata ;
  99. }
  100. function set_cp($cp)
  101. {
  102. $this->cp = $cp;
  103. }
  104. function set_items_limit($items_limit)
  105. {
  106. $this->items_limit = $items_limit;
  107. }
  108. function set_strip_html($strip_html)
  109. {
  110. $this->strip_html = $strip_html;
  111. }
  112. function set_date_format($date_format)
  113. {
  114. $this->date_format = $date_format;
  115. }
  116. function set_feed_url($feed_url)
  117. {
  118. $this->feed_url = $feed_url;
  119. }
  120. function set_cache_dir($cache_dir)
  121. {
  122. $this->cache_dir = $cache_dir;
  123. }
  124. function set_cache_time($cache_time)
  125. {
  126. $this->cache_time = $cache_time;
  127. }
  128. // -------------------------------------------------------------------
  129. // Parse RSS file and returns associative array.
  130. // -------------------------------------------------------------------
  131. function get_feed_content()
  132. {
  133. // If CACHE ENABLED
  134. if ($this->cache_dir != '')
  135. {
  136. $cache_file = $this->cache_dir . '/rsscache_' . md5($this->feed_url);
  137. $timedif = @(time() - filemtime($cache_file));
  138. if ($timedif < $this->cache_time)
  139. {
  140. // cached file is fresh enough, return cached array
  141. $result = unserialize(join('', file($cache_file)));
  142. // set 'cached' to 1 only if cached file is correct
  143. if ($result) $result['cached'] = 1;
  144. }
  145. else
  146. {
  147. // cached file is too old, create new
  148. $result = $this->parse_feed_content();
  149. $serialized = serialize($result);
  150. if ($f = @fopen($cache_file, 'w'))
  151. {
  152. fwrite ($f, $serialized, strlen($serialized));
  153. fclose($f);
  154. }
  155. if ($result) $result['cached'] = 0;
  156. }
  157. }
  158. // If CACHE DISABLED >> load and parse the file directly
  159. else
  160. {
  161. $result = $this->parse_feed_content();
  162. if ($result) $result['cached'] = 0;
  163. }
  164. // return result
  165. return $result;
  166. }
  167. // -------------------------------------------------------------------
  168. // Modification of preg_match(); return trimed field with index 1
  169. // from 'classic' preg_match() array output
  170. // -------------------------------------------------------------------
  171. function my_preg_match ($pattern, $subject) {
  172. // start regullar expression
  173. preg_match($pattern, $subject, $out);
  174. // if there is some result... process it and return it
  175. if(isset($out[1]))
  176. {
  177. // Process CDATA (if present)
  178. if ($this->cdata == 'content')
  179. {
  180. // Get CDATA content (without CDATA tag)
  181. $out[1] = strtr($out[1], array('<![CDATA['=>'', ']]>'=>''));
  182. }
  183. elseif ($this->cdata == 'strip')
  184. {
  185. // Strip CDATA
  186. $out[1] = strtr($out[1], array('<![CDATA['=>'', ']]>'=>''));
  187. }
  188. // If code page is set convert character encoding to required
  189. if ($this->cp != '') $out[1] = iconv($this->rsscp, $this->cp.'//TRANSLIT', $out[1]);
  190. // Return result
  191. return trim($out[1]);
  192. }
  193. else
  194. {
  195. // if there is NO result, return empty string
  196. return '';
  197. }
  198. }
  199. // -------------------------------------------------------------------
  200. // Replace HTML entities &something; by real characters
  201. // -------------------------------------------------------------------
  202. private function unhtmlentities ($string)
  203. {
  204. // Get HTML entities table
  205. $trans_tbl = get_html_translation_table (HTML_ENTITIES, ENT_QUOTES);
  206. // Flip keys<==>values
  207. $trans_tbl = array_flip ($trans_tbl);
  208. // Add support for &apos; entity (missing in HTML_ENTITIES)
  209. $trans_tbl += array('&apos;' => "'");
  210. // Replace entities by values
  211. return strtr ($string, $trans_tbl);
  212. }
  213. // -------------------------------------------------------------------
  214. // Parse() is private method used by Get() to load and parse RSS file.
  215. // Don't use Parse() in your scripts - use Get($rss_file) instead.
  216. // -------------------------------------------------------------------
  217. private function parse_feed_content()
  218. {
  219. // Open and load RSS file
  220. if ($f = @fopen($this->feed_url, 'r'))
  221. {
  222. $rss_content = '';
  223. while (!feof($f))
  224. {
  225. $rss_content .= fgets($f, 4096);
  226. }
  227. fclose($f);
  228. // Parse document encoding
  229. $result['encoding'] = $this->my_preg_match("'encoding=[\'\"](.*?)[\'\"]'si", $rss_content);
  230. // if document codepage is specified, use it
  231. if ($result['encoding'] != '')
  232. { $this->rsscp = $result['encoding']; } // This is used in my_preg_match()
  233. // otherwise use the default codepage
  234. else
  235. { $this->rsscp = $this->default_cp; } // This is used in my_preg_match()
  236. // Parse CHANNEL info
  237. preg_match("'<channel.*?>(.*?)</channel>'si", $rss_content, $out_channel);
  238. foreach($this->channeltags as $channeltag)
  239. {
  240. $temp = $this->my_preg_match("'<$channeltag.*?>(.*?)</$channeltag>'si", $out_channel[1]);
  241. if ($temp != '') $result[$channeltag] = $temp; // Set only if not empty
  242. }
  243. // If date_format is specified and lastBuildDate is valid
  244. if ($this->date_format != '' && ($timestamp = strtotime($result['lastBuildDate'])) !==-1)
  245. {
  246. // convert lastBuildDate to specified date format
  247. $result['lastBuildDate'] = date($this->date_format, $timestamp);
  248. }
  249. // Parse TEXTINPUT info
  250. preg_match("'<textinput(|[^>]*[^/])>(.*?)</textinput>'si", $rss_content, $out_textinfo);
  251. // This a little strange regexp means:
  252. // Look for tag <textinput> with or without any attributes, but skip truncated version <textinput /> (it's not beggining tag)
  253. if (isset($out_textinfo[2]))
  254. {
  255. foreach($this->textinputtags as $textinputtag)
  256. {
  257. $temp = $this->my_preg_match("'<$textinputtag.*?>(.*?)</$textinputtag>'si", $out_textinfo[2]);
  258. if ($temp != '') $result['textinput_'.$textinputtag] = $temp; // Set only if not empty
  259. }
  260. }
  261. // Parse IMAGE info
  262. preg_match("'<image.*?>(.*?)</image>'si", $rss_content, $out_imageinfo);
  263. if (isset($out_imageinfo[1]))
  264. {
  265. foreach($this->imagetags as $imagetag)
  266. {
  267. $temp = $this->my_preg_match("'<$imagetag.*?>(.*?)</$imagetag>'si", $out_imageinfo[1]);
  268. if ($temp != '') $result['image_'.$imagetag] = $temp; // Set only if not empty
  269. }
  270. }
  271. // Parse ITEMS
  272. preg_match_all("'<item(| .*?)>(.*?)</item>'si", $rss_content, $items);
  273. $rss_items = $items[2];
  274. $i = 0;
  275. $result['items'] = array(); // create array even if there are no items
  276. foreach($rss_items as $rss_item)
  277. {
  278. // If number of items is lower then limit: Parse one item
  279. if ($i < $this->items_limit || $this->items_limit == 0)
  280. {
  281. foreach($this->itemtags as $itemtag) {
  282. $temp = $this->my_preg_match("'<$itemtag.*?>(.*?)</$itemtag>'si", $rss_item);
  283. if ($temp != '') $result['items'][$i][$itemtag] = $temp; // Set only if not empty
  284. }
  285. // Strip HTML tags and other bullshit from DESCRIPTION
  286. if ($this->strip_html && $result['items'][$i]['description'])
  287. $result['items'][$i]['description'] = strip_tags($this->unhtmlentities(strip_tags($result['items'][$i]['description'])));
  288. // Strip HTML tags and other bullshit from TITLE
  289. if ($this->strip_html && $result['items'][$i]['title'])
  290. $result['items'][$i]['title'] = strip_tags($this->unhtmlentities(strip_tags($result['items'][$i]['title'])));
  291. // If date_format is specified and pubDate is valid
  292. if ($this->date_format != '' && ($timestamp = strtotime($result['items'][$i]['pubDate'])) !==-1) {
  293. // convert pubDate to specified date format
  294. $result['items'][$i]['pubDate'] = date($this->date_format, $timestamp);
  295. }
  296. // Item counter
  297. $i++;
  298. }
  299. }
  300. $result['items_count'] = $i;
  301. return $result;
  302. }
  303. else // Error in opening return False
  304. {
  305. return False;
  306. }
  307. }
  308. }
  309. ?>