PageRenderTime 104ms CodeModel.GetById 35ms RepoModel.GetById 0ms app.codeStats 1ms

/html/AppCode/expressionengine/plugins/pi.magpie.php

https://github.com/w3bg/www.hsifin.com
PHP | 2733 lines | 1940 code | 326 blank | 467 comment | 253 complexity | b2421ff0bdca21ec11b61e1f5c11f8ac MD5 | raw file
Possible License(s): AGPL-3.0
  1. <?php
  2. /*
  3. =====================================================
  4. ExpressionEngine - by EllisLab
  5. -----------------------------------------------------
  6. http://expressionengine.com/
  7. -----------------------------------------------------
  8. Copyright (c) 2004 - 2010 EllisLab, Inc.
  9. =====================================================
  10. THIS IS COPYRIGHTED SOFTWARE
  11. PLEASE READ THE LICENSE AGREEMENT
  12. http://expressionengine.com/user_guide/license.html
  13. =====================================================
  14. File: pi.magpie.php
  15. -----------------------------------------------------
  16. Purpose: Magpie RSS plugin
  17. =====================================================
  18. */
  19. $plugin_info = array(
  20. 'pi_name' => 'Magpie RSS Parser',
  21. 'pi_version' => '1.3.5',
  22. 'pi_author' => 'Paul Burdick',
  23. 'pi_author_url' => 'http://expressionengine.com/',
  24. 'pi_description' => 'Retrieves and Parses RSS/Atom Feeds',
  25. 'pi_usage' => Magpie::usage()
  26. );
  27. Class Magpie {
  28. var $cache_name = 'magpie_cache'; // Name of cache directory
  29. var $cache_refresh = 360; // Period between cache refreshes (in minutes)
  30. var $cache_data = ''; // Data from cache file
  31. var $cache_path = ''; // Path to cache file.
  32. var $cache_tpath = ''; // Path to cache file's time file.
  33. var $page_url = ''; // URL being requested
  34. var $items = array(); // Information about items returned
  35. var $dates = array('lastupdate','linkcreated'); // Date elements
  36. var $return_data = ''; // Data sent back to Template parser
  37. /** -------------------------------------
  38. /** Constructor
  39. /** -------------------------------------*/
  40. function Magpie()
  41. {
  42. // Make a local reference of the ExpressionEngine super object
  43. $this->EE =& get_instance();
  44. /** -------------------------------
  45. /** Set Parameters
  46. /** -------------------------------*/
  47. $this->cache_refresh = ( ! $this->EE->TMPL->fetch_param('refresh')) ? $this->cache_refresh : $this->EE->TMPL->fetch_param('refresh');
  48. $this->page_url = ( ! $this->EE->TMPL->fetch_param('url')) ? '' : trim($this->EE->TMPL->fetch_param('url'));
  49. $limit = ( ! $this->EE->TMPL->fetch_param('limit')) ? 20 : $this->EE->TMPL->fetch_param('limit');
  50. $offset = ( ! $this->EE->TMPL->fetch_param('offset')) ? 0 : $this->EE->TMPL->fetch_param('offset');
  51. $template = $this->EE->TMPL->tagdata;
  52. if ($this->page_url == '')
  53. {
  54. return $this->return_data;
  55. }
  56. if ($this->EE->config->item('debug') == 2 OR ($this->EE->config->item('debug') == 1 && $this->EE->session->userdata['group_id'] == 1))
  57. {
  58. if( ! defined('MAGPIE_DEBUG'))
  59. {
  60. define('MAGPIE_DEBUG', 1);
  61. }
  62. }
  63. else
  64. {
  65. if ( ! defined('MAGPIE_DEBUG'))
  66. {
  67. define('MAGPIE_DEBUG', 0);
  68. }
  69. }
  70. /** -------------------------------
  71. /** Check and Retrive Cache
  72. /** -------------------------------*/
  73. if ( ! defined('MAGPIE_CACHE_DIR'))
  74. {
  75. define('MAGPIE_CACHE_DIR', APPPATH.'cache/'.$this->cache_name.'/');
  76. }
  77. if ( ! defined('MAGPIE_CACHE_AGE'))
  78. {
  79. define('MAGPIE_CACHE_AGE', $this->cache_refresh * 60);
  80. }
  81. $this->RSS = fetch_rss($this->page_url);
  82. if (count($this->RSS->items) == 0)
  83. {
  84. return $this->return_data;
  85. }
  86. /** -----------------------------------
  87. /** Parse Template - ITEMS
  88. /** -----------------------------------*/
  89. if (preg_match("/(".LD."items".RD."(.*?)".LD.'\/'.'items'.RD."|".LD."magpie:items".RD."(.*?)".LD.'\/'.'magpie:items'.RD.")/s", $template, $matches))
  90. {
  91. $items_data = '';
  92. $i = 0;
  93. if (count($this->RSS->items) > 0)
  94. {
  95. foreach($this->RSS->items as $item)
  96. {
  97. $i++;
  98. if ($i <= $offset) continue;
  99. $temp_data = $matches['1'];
  100. /** ----------------------------------------
  101. /** Quick and Dirty Conditionals
  102. /** ----------------------------------------*/
  103. if (stristr($temp_data, LD.'if'))
  104. {
  105. $tagdata = $this->EE->functions->prep_conditionals($temp_data, $item, '', 'magpie:');
  106. }
  107. /** ----------------------------------------
  108. /** Single Variables
  109. /** ----------------------------------------*/
  110. foreach($item as $key => $value)
  111. {
  112. if ( ! is_array($value))
  113. {
  114. $temp_data = str_replace(LD.$key.RD, $value, $temp_data);
  115. $temp_data = str_replace(LD.'magpie:'.$key.RD, $value, $temp_data);
  116. if ($key == 'atom_content')
  117. {
  118. $temp_data = str_replace(LD.'content'.RD, $value, $temp_data);
  119. $temp_data = str_replace(LD.'magpie:content'.RD, $value, $temp_data);
  120. }
  121. }
  122. else
  123. {
  124. foreach ($value as $vk => $vv)
  125. {
  126. $temp_data = str_replace(LD.$key.'_'.$vk.RD, $vv, $temp_data);
  127. $temp_data = str_replace(LD.'magpie:'.$key.'_'.$vk.RD, $vv, $temp_data);
  128. if ($key == 'dc')
  129. {
  130. $temp_data = str_replace(LD.$vk.RD, $vv, $temp_data);
  131. $temp_data = str_replace(LD.'magpie:'.$vk.RD, $vv, $temp_data);
  132. }
  133. }
  134. }
  135. }
  136. $items_data .= $temp_data;
  137. if ($i >= ($limit + $offset))
  138. {
  139. break;
  140. }
  141. }
  142. }
  143. /** ----------------------------------------
  144. /** Clean up left over variables
  145. /** ----------------------------------------*/
  146. $items_data = str_replace(LD.'exp:', 'TgB903He0mnv3dd098', $items_data);
  147. $items_data = str_replace(LD.'/exp:', 'Mu87ddk2QPoid990iod', $items_data);
  148. $items_data = preg_replace("/".LD."if.*?".RD.".+?".LD.'\/'."if".RD."/s", '', $items_data);
  149. $items_data = preg_replace("/".LD.".+?".RD."/", '', $items_data);
  150. $items_data = str_replace('TgB903He0mnv3dd098', LD.'exp:', $items_data);
  151. $items_data = str_replace('Mu87ddk2QPoid990iod', LD.'/exp:', $items_data);
  152. $template = str_replace($matches['0'], $items_data, $template);
  153. }
  154. /** -----------------------------------
  155. /** Parse Template
  156. /** -----------------------------------*/
  157. $channel_variables = array('title', 'link', 'modified', 'generator',
  158. 'copyright', 'description', 'language',
  159. 'pubdate', 'lastbuilddate', 'generator',
  160. 'tagline', 'creator', 'date', 'rights');
  161. $image_variables = array('title','url', 'link','description', 'width', 'height');
  162. foreach ($this->EE->TMPL->var_single as $key => $val)
  163. {
  164. /** ----------------------------------------
  165. /** {feed_version} - Version of RSS/Atom Feed
  166. /** ----------------------------------------*/
  167. if ($key == "feed_version" OR $key == "magpie:feed_version")
  168. {
  169. if ( ! isset($this->RSS->feed_version)) $this->RSS->feed_version = '';
  170. $template = $this->EE->TMPL->swap_var_single($val, $this->RSS->feed_version, $template);
  171. }
  172. /** ----------------------------------------
  173. /** {feed_type}
  174. /** ----------------------------------------*/
  175. if (($key == "feed_type" OR $key == "magpie:feed_type") && isset($this->RSS->feed_type))
  176. {
  177. if ( ! isset($this->RSS->feed_type)) $this->RSS->feed_type = '';
  178. $template = $this->EE->TMPL->swap_var_single($val, $this->RSS->feed_type, $template);
  179. }
  180. /** ----------------------------------------
  181. /** Image related variables
  182. /** ----------------------------------------*/
  183. foreach ($image_variables as $variable)
  184. {
  185. if ($key == 'image_'.$variable OR $key == 'magpie:image_'.$variable)
  186. {
  187. if ( ! isset($this->RSS->image[$variable])) $this->RSS->image[$variable] = '';
  188. $template = $this->EE->TMPL->swap_var_single($val, $this->RSS->image[$variable], $template);
  189. }
  190. }
  191. /** ----------------------------------------
  192. /** Channel related variables
  193. /** ----------------------------------------*/
  194. foreach ($channel_variables as $variable)
  195. {
  196. if ($key == 'channel_'.$variable OR $key == 'magpie:channel_'.$variable)
  197. {
  198. if ( ! isset($this->RSS->channel[$variable]))
  199. {
  200. $this->RSS->channel[$variable] = ( ! isset($this->RSS->channel['dc'][$variable])) ? '' : $this->RSS->channel['dc'][$variable];
  201. }
  202. $template = $this->EE->TMPL->swap_var_single($val, $this->RSS->channel[$variable], $template);
  203. }
  204. }
  205. /** ----------------------------------------
  206. /** {page_url}
  207. /** ----------------------------------------*/
  208. if ($key == 'page_url' OR $key == 'magpie:page_url')
  209. {
  210. $template = $this->EE->TMPL->swap_var_single($val, $this->page_url, $template);
  211. }
  212. }
  213. $this->return_data = &$template;
  214. }
  215. /** ----------------------------------------
  216. /** Plugin Usage
  217. /** ----------------------------------------*/
  218. function usage()
  219. {
  220. ob_start();
  221. ?>
  222. STEP ONE:
  223. Insert plugin tag into your template. Set parameters and variables.
  224. PARAMETERS:
  225. The tag has three parameters:
  226. 1. url - The URL of the RSS or Atom feed.
  227. 2. limit - Number of items to display from feed.
  228. 3. offset - Skip a certain number of items in the display of the feed.
  229. 4. refresh - How often to refresh the cache file in minutes. The plugin default is to refresh the cached file every three hours.
  230. Example opening tag: {exp:magpie url="http://expressionengine.com/feeds/rss/full/" limit="8" refresh="720"}
  231. SINGLE VARIABLES:
  232. feed_version - What version of RSS or Atom is this feed
  233. feed_type - What type of feed is this, Atom or RSS
  234. page_url - Page URL of the feed.
  235. image_title - [RSS] The contents of the &lt;title&gt; element contained within the sub-element &lt;channel&gt;
  236. image_url - [RSS] The contents of the &lt;url&gt; element contained within the sub-element &lt;channel&gt;
  237. image_link - [RSS] The contents of the &lt;link&gt; element contained within the sub-element &lt;channel&gt;
  238. image_description - [RSS] The contents of the optional &lt;description&gt; element contained within the sub-element &lt;channel&gt;
  239. image_width - [RSS] The contents of the optional &lt;width&gt; element contained within the sub-element &lt;channel&gt;
  240. image_height - [RSS] The contents of the optional &lt;height&gt; element contained within the sub-element &lt;channel&gt;
  241. channel_title - [ATOM/RSS-0.91/RSS-1.0/RSS-2.0]
  242. channel_link - [ATOM/RSS-0.91/RSS-1.0/RSS-2.0]
  243. channel_modified - [ATOM]
  244. channel_generator - [ATOM]
  245. channel_copyright - [ATOM]
  246. channel_description - [RSS-0.91/ATOM]
  247. channel_language - [RSS-0.91/RSS-1.0/RSS-2.0]
  248. channel_pubdate - [RSS-0.91]
  249. channel_lastbuilddate - [RSS-0.91]
  250. channel_tagline - [RSS-0.91/RSS-1.0/RSS-2.0]
  251. channel_creator - [RSS-1.0/RSS-2.0]
  252. channel_date - [RSS-1.0/RSS-2.0]
  253. channel_rights - [RSS-2.0]
  254. PAIR VARIABLES:
  255. Only one pair variable, {items}, is available, and it is for the entries/items in the RSS/Atom Feeds. This pair
  256. variable allows many different other single variables to be contained within it depending on the type of feed.
  257. title - [ATOM/RSS-0.91/RSS-1.0/RSS-2.0]
  258. link - [ATOM/RSS-0.91/RSS-1.0/RSS-2.0]
  259. description - [RSS-0.91/RSS-1.0/RSS-2.0]
  260. about - [RSS-1.0]
  261. atom_content - [ATOM]
  262. author_name - [ATOM]
  263. author_email - [ATOM]
  264. content - [ATOM/RSS-2.0]
  265. created - [ATOM]
  266. creator - [RSS-1.0]
  267. pubdate/date - (varies by feed design)
  268. description - [ATOM]
  269. id - [ATOM]
  270. issued - [ATOM]
  271. modified - [ATOM]
  272. subject - [ATOM/RSS-1.0]
  273. summary - [ATOM/RSS-1.0/RSS-2.0]
  274. EXAMPLE:
  275. {exp:magpie url="http://expressionengine.com/feeds/rss/full/" limit="10" refresh="720"}
  276. <ul>
  277. {items}
  278. <li><a href="{link}">{title}</a></li>
  279. {/items}
  280. </ul>
  281. {/exp:magpie}
  282. ***************************
  283. Version 1.2
  284. ***************************
  285. Complete Rewrite That Improved the Caching System Dramatically
  286. ***************************
  287. Version 1.2.1 + 1.2.2
  288. ***************************
  289. Bug Fixes
  290. ***************************
  291. Version 1.2.3
  292. ***************************
  293. Modified the code so that one can put 'magpie:' as a prefix on all plugin variables,
  294. which allows the embedding of this plugin in a {exp:channel:entries} tag and using
  295. that tag's variables in this plugin's parameter (url="" parameter, specifically).
  296. {exp:magpie url="http://expressionengine.com/feeds/rss/full/" limit="10" refresh="720"}
  297. <ul>
  298. {magpie:items}
  299. <li><a href="{magpie:link}">{magpie:title}</a></li>
  300. {/magpie:items}
  301. </ul>
  302. {/exp:magpie}
  303. ***************************
  304. Version 1.2.4
  305. ***************************
  306. Added the ability for the encoding to be parsed out of the XML feed and used to
  307. convert the feed's data into the encoding specified in the preferences. Requires
  308. that the Multibyte String (mbstring: http://us4.php.net/manual/en/ref.mbstring.php)
  309. library be compiled into PHP.
  310. ***************************
  311. Version 1.2.5
  312. ***************************
  313. Fixed a bug where the Magpie library was adding slashes to the cache directory
  314. without doing any sort of double slash checking.
  315. ***************************
  316. Version 1.3
  317. ***************************
  318. Fixed a bug where the channel and image variables were not showing up because of a bug
  319. introuced in 1.2.
  320. ***************************
  321. Version 1.3.1
  322. ***************************
  323. New parameter convert_entities="y" which will have any entities in the RSS feed converted
  324. before being parsed by the PHP XML parser. This is helpful because sometimes the XML
  325. Parser converts entities incorrectly. You have to empty your Magpie cache after enabling this setting.
  326. New parameter encoding="ISO-8859-1". Allows you to specify the encoding of the RSS
  327. feed, which is sometimes helpful when using the convert_encoding="y" parameter.
  328. ***************************
  329. Version 1.3.2
  330. ***************************
  331. Eliminated all of the darn encoding parameters previously being used and used the
  332. encoding abilities recently added to the Magpie library that attempts to do all of the
  333. converting early on.
  334. ***************************
  335. Version 1.3.3
  336. ***************************
  337. The Snoopy library that is included with the Magpie plugin by default was causing
  338. problems with the Snoopy library included in the Third Party Linklist module, so
  339. the name was changed to eliminate the conflict.
  340. ***************************
  341. Version 1.3.4
  342. ***************************
  343. The offset="" parameter was undocumented and had a bug. Fixed.
  344. ***************************
  345. Version 1.3.5
  346. ***************************
  347. Added ability to override caching options when using fetch_rss() directly.
  348. <?php
  349. $buffer = ob_get_contents();
  350. ob_end_clean();
  351. return $buffer;
  352. }
  353. } // END Magpie class
  354. /*
  355. // -------------------------------------------
  356. // BEGIN MagpieRSS Class
  357. // -------------------------------------------
  358. * Project: MagpieRSS: a simple RSS integration tool
  359. * File: rss_parse.inc - parse an RSS or Atom feed
  360. * return as a simple object.
  361. *
  362. * Handles RSS 0.9x, RSS 2.0, RSS 1.0, and Atom 0.3
  363. *
  364. * The lastest version of MagpieRSS can be obtained from:
  365. * http://magpierss.sourceforge.net
  366. *
  367. * For questions, help, comments, discussion, etc., please join the
  368. * Magpie mailing list:
  369. * magpierss-general@lists.sourceforge.net
  370. *
  371. * Author: Kellan Elliott-McCrea <kellan@protest.net>
  372. * Version: 0.6a
  373. * License: GPL
  374. *
  375. *
  376. * ABOUT MAGPIE's APPROACH TO PARSING:
  377. * - Magpie is based on expat, an XML parser, and therefore will only parse
  378. * valid XML files. This includes all properly constructed RSS or Atom.
  379. *
  380. * - Magpie is an inclusive parser. It will include any elements that
  381. * it can turn into a key value pair in the parsed feed object it returns.
  382. *
  383. * - Magpie supports namespaces, and will return any elements found in a
  384. * namespace in a sub-array, with the key point to that array being the
  385. * namespace prefix.
  386. * (e.g. if an item contains a <dc:date> element, then that date can
  387. * be accessed at $item['dc']['date']
  388. *
  389. * - Magpie supports nested elements by combining the names. If an item
  390. * includes XML like:
  391. * <author>
  392. * <name>Kellan</name>
  393. * </author>
  394. *
  395. * The name field is accessible at $item['author_name']
  396. *
  397. * - Magpie makes no attempt validate a feed beyond insuring that it
  398. * is valid XML.
  399. * RSS validators are readily available on the web at:
  400. * http://feeds.archive.org/validator/
  401. * http://www.ldodds.com/rss_validator/1.0/validator.html
  402. *
  403. *
  404. * EXAMPLE PARSED RSS ITEM:
  405. *
  406. * Magpie tries to parse RSS into easy to use PHP datastructures.
  407. *
  408. * For example, Magpie on encountering (a rather complex) RSS 1.0 item entry:
  409. *
  410. * <item rdf:about="http://protest.net/NorthEast/calendrome.cgi?span=event&#38;ID=210257">
  411. * <title>Weekly Peace Vigil</title>
  412. * <link>http://protest.net/NorthEast/calendrome.cgi?span=event&#38;ID=210257</link>
  413. * <description>Wear a white ribbon</description>
  414. * <dc:subject>Peace</dc:subject>
  415. * <ev:startdate>2002-06-01T11:00:00</ev:startdate>
  416. * <ev:location>Northampton, MA</ev:location>
  417. * <ev:type>Protest</ev:type>
  418. * </item>
  419. *
  420. * Would transform it into the following associative array, and push it
  421. * onto the array $rss-items
  422. *
  423. * array(
  424. * title => 'Weekly Peace Vigil',
  425. * link => 'http://protest.net/NorthEast/calendrome.cgi?span=event&#38;ID=210257',
  426. * description => 'Wear a white ribbon',
  427. * dc => array (
  428. * subject => 'Peace'
  429. * ),
  430. * ev => array (
  431. * startdate => '2002-06-01T11:00:00',
  432. * enddate => '2002-06-01T12:00:00',
  433. * type => 'Protest',
  434. * location => 'Northampton, MA'
  435. * )
  436. * )
  437. *
  438. *
  439. *
  440. * A FEW NOTES ON PARSING Atom FEEDS
  441. *
  442. * Atom support is considered alpha. Atom elements will be often be available
  443. * as their RSS equivalent, summary is available as description for example.
  444. *
  445. * Elements of mode=xml, as flattened into a single string, just as if they
  446. * had been wrapped in a CDATA container.
  447. *
  448. * See: http://laughingmeme.org/archives/001676.html
  449. *
  450. */
  451. define('RSS', 'RSS');
  452. define('ATOM', 'Atom');
  453. class MagpieRSS {
  454. /*
  455. * Hybrid parser, and object. (probably a bad idea! :)
  456. *
  457. * Useage Example:
  458. *
  459. * $some_rss = "<?xml version="1.0"......
  460. *
  461. * $rss = new MagpieRSS( $some_rss );
  462. *
  463. * // print rss chanel title
  464. * echo $rss->channel['title'];
  465. *
  466. * // print the title of each item
  467. * foreach ($rss->items as $item ) {
  468. * echo $item[title];
  469. * }
  470. *
  471. * see: rss_fetch.inc for a simpler interface
  472. */
  473. var $parser;
  474. var $current_item = array(); // item currently being parsed
  475. var $items = array(); // collection of parsed items
  476. var $channel = array(); // hash of channel fields
  477. var $textinput = array();
  478. var $image = array();
  479. var $feed_type;
  480. var $feed_version;
  481. var $encoding;
  482. // parser variables
  483. var $stack = array(); // parser stack
  484. var $inchannel = false;
  485. var $initem = false;
  486. var $incontent = false; // if in Atom <content mode="xml"> field
  487. var $intextinput = false;
  488. var $inimage = false;
  489. var $current_field = '';
  490. var $current_namespace = false;
  491. var $etag = false;
  492. var $ERROR = "";
  493. var $_CONTENT_CONSTRUCTS = array('content', 'summary', 'info', 'title', 'tagline', 'copyright');
  494. var $_KNOWN_ENCODINGS = array('UTF-8', 'US-ASCII', 'ISO-8859-1');
  495. /*======================================================================*\
  496. Function: MagpieRSS
  497. Purpose: Constructor, sets up XML parser,parses source,
  498. and populates object..
  499. Input: String containing the RSS to be parsed
  500. \*======================================================================*/
  501. function MagpieRSS ($source, $output_encoding='ISO-8859-1',
  502. $input_encoding=null, $detect_encoding=true) {
  503. // Make a local reference of the ExpressionEngine super object
  504. $this->EE =& get_instance();
  505. # if PHP xml isn't compiled in, die
  506. #
  507. if ( ! function_exists('xml_parser_create')) {
  508. $this->error( "Failed to load PHP's XML Extension. " .
  509. "http://www.php.net/manual/en/ref.xml.php",
  510. E_USER_ERROR );
  511. }
  512. list($parser, $source) = $this->create_parser($source,
  513. $output_encoding, $input_encoding, $detect_encoding);
  514. if ( ! is_resource($parser))
  515. {
  516. $this->error( "Failed to create an instance of PHP's XML parser. " .
  517. "http://www.php.net/manual/en/ref.xml.php",
  518. E_USER_ERROR );
  519. }
  520. # pass in parser, and a reference to this object
  521. # setup handlers
  522. #
  523. xml_set_object( $parser, $this );
  524. xml_set_element_handler($parser,
  525. 'feed_start_element', 'feed_end_element' );
  526. xml_set_character_data_handler( $parser, 'feed_cdata' );
  527. $status = @xml_parse($parser, $source);
  528. if ( ! $status ) {
  529. $errorcode = xml_get_error_code( $parser );
  530. if ( $errorcode != XML_ERROR_NONE ) {
  531. $xml_error = xml_error_string( $errorcode );
  532. $error_line = xml_get_current_line_number($parser);
  533. $error_col = xml_get_current_column_number($parser);
  534. $errormsg = "$xml_error at line $error_line, column $error_col";
  535. $this->error( $errormsg );
  536. return FALSE;
  537. }
  538. }
  539. xml_parser_free( $parser );
  540. $this->normalize();
  541. }
  542. function change_key_case($array)
  543. {
  544. $new_array = array();
  545. foreach($array as $key => $value)
  546. {
  547. $new_array[strtolower($key)] = $value;
  548. }
  549. return $new_array;
  550. }
  551. function feed_start_element($p, $element, &$attrs) {
  552. $el = $element = strtolower($element);
  553. if ( ! function_exists('array_change_key_case'))
  554. {
  555. $attrs = $this->change_key_case($attrs);
  556. }
  557. else
  558. {
  559. $attrs = array_change_key_case($attrs, CASE_LOWER);
  560. }
  561. // check for a namespace, and split if found
  562. $ns = false;
  563. if ( strpos( $element, ':' ) ) {
  564. list($ns, $el) = explode( ':', $element, 2);
  565. }
  566. if ( $ns and $ns != 'rdf' ) {
  567. $this->current_namespace = $ns;
  568. }
  569. # if feed type isn't set, then this is first element of feed
  570. # identify feed from root element
  571. #
  572. if ( ! isset($this->feed_type) ) {
  573. if ( $el == 'rdf' ) {
  574. $this->feed_type = RSS;
  575. $this->feed_version = '1.0';
  576. }
  577. elseif ( $el == 'rss' ) {
  578. $this->feed_type = RSS;
  579. $this->feed_version = $attrs['version'];
  580. }
  581. elseif ( $el == 'feed' ) {
  582. $this->feed_type = ATOM;
  583. $this->feed_version = $attrs['version'];
  584. $this->inchannel = true;
  585. }
  586. return;
  587. }
  588. if ( $el == 'channel' )
  589. {
  590. $this->inchannel = true;
  591. }
  592. elseif ($el == 'item' or $el == 'entry' )
  593. {
  594. $this->initem = true;
  595. if ( isset($attrs['rdf:about']) ) {
  596. $this->current_item['about'] = $attrs['rdf:about'];
  597. }
  598. }
  599. // if we're in the default namespace of an RSS feed,
  600. // record textinput or image fields
  601. elseif (
  602. $this->feed_type == RSS and
  603. $this->current_namespace == '' and
  604. $el == 'textinput' )
  605. {
  606. $this->intextinput = true;
  607. }
  608. elseif (
  609. $this->feed_type == RSS and
  610. $this->current_namespace == '' and
  611. $el == 'image' )
  612. {
  613. $this->inimage = true;
  614. }
  615. # handle atom content constructs
  616. elseif ( $this->feed_type == ATOM and in_array($el, $this->_CONTENT_CONSTRUCTS) )
  617. {
  618. // avoid clashing w/ RSS mod_content
  619. if ($el == 'content' ) {
  620. $el = 'atom_content';
  621. }
  622. $this->incontent = $el;
  623. }
  624. // if inside an Atom content construct (e.g. content or summary) field treat tags as text
  625. elseif ($this->feed_type == ATOM and $this->incontent )
  626. {
  627. // if tags are inlined, then flatten
  628. $attrs_str = join(' ',
  629. array_map('map_attrs',
  630. array_keys($attrs),
  631. array_values($attrs) ) );
  632. $this->append_content( "<$element $attrs_str>" );
  633. array_unshift( $this->stack, $el );
  634. }
  635. // Atom support many links per containging element.
  636. // Magpie treats link elements of type rel='alternate'
  637. // as being equivalent to RSS's simple link element.
  638. //
  639. elseif ($this->feed_type == ATOM and $el == 'link' )
  640. {
  641. if ( isset($attrs['rel']) and $attrs['rel'] == 'alternate' )
  642. {
  643. $link_el = 'link';
  644. }
  645. else {
  646. $link_el = 'link_' . $attrs['rel'];
  647. }
  648. $this->append($link_el, $attrs['href']);
  649. }
  650. // set stack[0] to current element
  651. else {
  652. array_unshift($this->stack, $el);
  653. }
  654. }
  655. function feed_cdata ($p, $text) {
  656. if ($this->feed_type == ATOM and $this->incontent)
  657. {
  658. $this->append_content( $text );
  659. }
  660. else {
  661. $current_el = join('_', array_reverse($this->stack));
  662. $this->append($current_el, $text);
  663. }
  664. }
  665. function feed_end_element ($p, $el) {
  666. $el = strtolower($el);
  667. if ( $el == 'item' or $el == 'entry' )
  668. {
  669. $this->items[] = $this->current_item;
  670. $this->current_item = array();
  671. $this->initem = false;
  672. }
  673. elseif ($this->feed_type == RSS and $this->current_namespace == '' and $el == 'textinput' )
  674. {
  675. $this->intextinput = false;
  676. }
  677. elseif ($this->feed_type == RSS and $this->current_namespace == '' and $el == 'image' )
  678. {
  679. $this->inimage = false;
  680. }
  681. elseif ($this->feed_type == ATOM and in_array($el, $this->_CONTENT_CONSTRUCTS) )
  682. {
  683. $this->incontent = false;
  684. }
  685. elseif ($el == 'channel' or $el == 'feed' )
  686. {
  687. $this->inchannel = false;
  688. }
  689. elseif ($this->feed_type == ATOM and $this->incontent ) {
  690. // balance tags properly
  691. // note: i don't think this is actually neccessary
  692. if ( $this->stack[0] == $el )
  693. {
  694. $this->append_content("</$el>");
  695. }
  696. else {
  697. $this->append_content("<$el />");
  698. }
  699. array_shift( $this->stack );
  700. }
  701. else {
  702. array_shift( $this->stack );
  703. }
  704. $this->current_namespace = false;
  705. }
  706. function concat (&$str1, $str2="") {
  707. if ( ! isset($str1) ) {
  708. $str1="";
  709. }
  710. $str1 .= $str2;
  711. }
  712. function append_content($text) {
  713. if ( $this->initem ) {
  714. $this->concat( $this->current_item[ $this->incontent ], $text );
  715. }
  716. elseif ( $this->inchannel ) {
  717. $this->concat( $this->channel[ $this->incontent ], $text );
  718. }
  719. }
  720. // smart append - field and namespace aware
  721. function append($el, $text) {
  722. if ( ! $el) {
  723. return;
  724. }
  725. if ( $this->current_namespace )
  726. {
  727. if ( $this->initem ) {
  728. $this->concat(
  729. $this->current_item[ $this->current_namespace ][ $el ], $text);
  730. }
  731. elseif ($this->inchannel) {
  732. $this->concat(
  733. $this->channel[ $this->current_namespace][ $el ], $text );
  734. }
  735. elseif ($this->intextinput) {
  736. $this->concat(
  737. $this->textinput[ $this->current_namespace][ $el ], $text );
  738. }
  739. elseif ($this->inimage) {
  740. $this->concat(
  741. $this->image[ $this->current_namespace ][ $el ], $text );
  742. }
  743. }
  744. else {
  745. if ( $this->initem ) {
  746. $this->concat(
  747. $this->current_item[ $el ], $text);
  748. }
  749. elseif ($this->intextinput) {
  750. $this->concat(
  751. $this->textinput[ $el ], $text );
  752. }
  753. elseif ($this->inimage) {
  754. $this->concat(
  755. $this->image[ $el ], $text );
  756. }
  757. elseif ($this->inchannel) {
  758. $this->concat(
  759. $this->channel[ $el ], $text );
  760. }
  761. }
  762. }
  763. function normalize () {
  764. // if atom populate rss fields
  765. if ( $this->is_atom() ) {
  766. $this->channel['descripton'] = ( ! isset($this->channel['tagline'])) ? '' : $this->channel['tagline'];
  767. for ( $i = 0; $i < count($this->items); $i++) {
  768. $item = $this->items[$i];
  769. if ( isset($item['summary']) )
  770. $item['description'] = $item['summary'];
  771. if ( isset($item['atom_content']))
  772. $item['content']['encoded'] = $item['atom_content'];
  773. $this->items[$i] = $item;
  774. }
  775. }
  776. elseif ( $this->is_rss() ) {
  777. $this->channel['tagline'] = ( ! isset($this->channel['description'])) ? '' : $this->channel['description'];
  778. for ( $i = 0; $i < count($this->items); $i++) {
  779. $item = $this->items[$i];
  780. if ( isset($item['description']))
  781. $item['summary'] = $item['description'];
  782. if ( isset($item['content']['encoded'] ) )
  783. $item['atom_content'] = $item['content']['encoded'];
  784. $this->items[$i] = $item;
  785. }
  786. }
  787. }
  788. function error ($errormsg, $lvl=E_USER_WARNING) {
  789. // append PHP's error message if track_errors enabled
  790. if ( isset($php_errormsg) )
  791. {
  792. $errormsg .= " ($php_errormsg)";
  793. }
  794. $this->ERROR = $errormsg;
  795. if (MAGPIE_DEBUG)
  796. {
  797. trigger_error($errormsg, $lvl);
  798. }
  799. else
  800. {
  801. error_log($errormsg, 0);
  802. }
  803. }
  804. function is_rss () {
  805. if ( $this->feed_type == RSS ) {
  806. return $this->feed_version;
  807. }
  808. else {
  809. return false;
  810. }
  811. }
  812. function is_atom() {
  813. if ( $this->feed_type == ATOM ) {
  814. return $this->feed_version;
  815. }
  816. else {
  817. return false;
  818. }
  819. }
  820. /**
  821. * return XML parser, and possibly re-encoded source
  822. *
  823. */
  824. function create_parser($source, $out_enc, $in_enc, $detect)
  825. {
  826. if ( substr(phpversion(),0,1) == 5) {
  827. $parser = $this->php5_create_parser($in_enc, $detect);
  828. }
  829. else {
  830. list($parser, $source) = $this->php4_create_parser($source, $in_enc, $detect);
  831. }
  832. $this->encoding = $this->EE->config->item('charset');
  833. if (in_array(strtolower($this->encoding), array('iso-8859-1', 'us-ascii', 'utf-8')))
  834. {
  835. xml_parser_set_option($parser, XML_OPTION_TARGET_ENCODING, $this->encoding);
  836. }
  837. return array($parser, $source);
  838. }
  839. /**
  840. * Instantiate an XML parser under PHP5
  841. *
  842. * PHP5 will do a fine job of detecting input encoding
  843. * if passed an empty string as the encoding.
  844. *
  845. * All hail libxml2!
  846. *
  847. */
  848. function php5_create_parser($in_enc, $detect) {
  849. // by default php5 does a fine job of detecting input encodings
  850. if( ! $detect && $in_enc) {
  851. return xml_parser_create($in_enc);
  852. }
  853. else {
  854. return xml_parser_create('');
  855. }
  856. }
  857. /**
  858. * Instaniate an XML parser under PHP4
  859. *
  860. * Unfortunately PHP4's support for character encodings
  861. * and especially XML and character encodings sucks. As
  862. * long as the documents you parse only contain characters
  863. * from the ISO-8859-1 character set (a superset of ASCII,
  864. * and a subset of UTF-8) you're fine. However once you
  865. * step out of that comfy little world things get mad, bad,
  866. * and dangerous to know.
  867. *
  868. * The following code is based on SJM's work with FoF
  869. * @see http://minutillo.com/steve/channel/2004/6/17/php-xml-and-character-encodings-a-tale-of-sadness-rage-and-data-loss
  870. *
  871. */
  872. function php4_create_parser($source, $in_enc, $detect) {
  873. if ( ! $detect ) {
  874. return array(xml_parser_create($in_enc), $source);
  875. }
  876. if ( ! $in_enc) {
  877. if (preg_match('/<?xml.*encoding=[\'"](.*?)[\'"].*?>/m', $source, $m)) {
  878. $in_enc = strtoupper($m[1]);
  879. $this->source_encoding = $in_enc;
  880. }
  881. else {
  882. $in_enc = 'UTF-8';
  883. }
  884. }
  885. if ($this->known_encoding($in_enc)) {
  886. return array(xml_parser_create($in_enc), $source);
  887. }
  888. // the dectected encoding is not one of the simple encodings PHP knows
  889. // attempt to use the iconv extension to
  890. // cast the XML to a known encoding
  891. // @see http://php.net/iconv
  892. if (function_exists('iconv')) {
  893. $encoded_source = iconv($in_enc,'UTF-8', $source);
  894. if ($encoded_source) {
  895. return array(xml_parser_create('UTF-8'), $encoded_source);
  896. }
  897. }
  898. // iconv didn't work, try mb_convert_encoding
  899. // @see http://php.net/mbstring
  900. if(function_exists('mb_convert_encoding')) {
  901. $encoded_source = mb_convert_encoding($source, 'UTF-8', $in_enc );
  902. if ($encoded_source) {
  903. return array(xml_parser_create('UTF-8'), $encoded_source);
  904. }
  905. }
  906. // else
  907. $this->error("Feed is in an unsupported character encoding. ($in_enc) " .
  908. "You may see strange artifacts, and mangled characters.",
  909. E_USER_NOTICE);
  910. return array(xml_parser_create(), $source);
  911. }
  912. function known_encoding($enc) {
  913. $enc = strtoupper($enc);
  914. if ( in_array($enc, $this->_KNOWN_ENCODINGS) ) {
  915. return $enc;
  916. }
  917. else {
  918. return false;
  919. }
  920. }
  921. /*======================================================================*\
  922. EVERYTHING BELOW HERE IS FOR DEBUGGING PURPOSES
  923. \*======================================================================*/
  924. function show_list () {
  925. echo "<ol>\n";
  926. foreach ($this->items as $item) {
  927. echo "<li>", $this->show_item( $item );
  928. }
  929. echo "</ol>";
  930. }
  931. function show_channel () {
  932. echo "channel:<br>";
  933. echo "<ul>";
  934. while ( list($key, $value) = each( $this->channel ) ) {
  935. echo "<li> $key: $value";
  936. }
  937. echo "</ul>";
  938. }
  939. function show_item ($item) {
  940. echo "item: $item[title]";
  941. echo "<ul>";
  942. while ( list($key, $value) = each($item) ) {
  943. if ( is_array($value) ) {
  944. echo "<br><b>$key</b>";
  945. echo "<ul>";
  946. while ( list( $ns_key, $ns_value) = each( $value ) ) {
  947. echo "<li>$ns_key: $ns_value";
  948. }
  949. echo "</ul>";
  950. }
  951. else {
  952. echo "<li> $key: $value";
  953. }
  954. }
  955. echo "</ul>";
  956. }
  957. /*======================================================================*\
  958. END DEBUGGING FUNCTIONS
  959. \*======================================================================*/
  960. } # end class RSS
  961. function map_attrs($k, $v) {
  962. return "$k=\"$v\"";
  963. }
  964. /*
  965. * Project: MagpieRSS: a simple RSS integration tool
  966. * File: rss_fetch.inc, a simple functional interface
  967. to fetching and parsing RSS files, via the
  968. function fetch_rss()
  969. * Author: Kellan Elliott-McCrea <kellan@protest.net>
  970. * License: GPL
  971. *
  972. * The lastest version of MagpieRSS can be obtained from:
  973. * http://magpierss.sourceforge.net
  974. *
  975. * For questions, help, comments, discussion, etc., please join the
  976. * Magpie mailing list:
  977. * magpierss-general@lists.sourceforge.net
  978. *
  979. */
  980. // Setup MAGPIE_DIR for use on hosts that don't include
  981. // the current path in include_path.
  982. // with thanks to rajiv and smarty
  983. if ( ! defined('DIR_SEP')) {
  984. define('DIR_SEP', DIRECTORY_SEPARATOR);
  985. }
  986. if ( ! defined('MAGPIE_DIR')) {
  987. define('MAGPIE_DIR', dirname(__FILE__) . DIR_SEP);
  988. }
  989. /*
  990. * CONSTANTS - redefine these in your script to change the
  991. * behaviour of fetch_rss() currently, most options effect the cache
  992. *
  993. * MAGPIE_CACHE_ON - Should Magpie cache parsed RSS objects?
  994. * For me a built in cache was essential to creating a "PHP-like"
  995. * feel to Magpie, see rss_cache.inc for rationale
  996. *
  997. *
  998. * MAGPIE_CACHE_DIR - Where should Magpie cache parsed RSS objects?
  999. * This should be a location that the webserver can write to. If this
  1000. * directory does not already exist Mapie will try to be smart and create
  1001. * it. This will often fail for permissions reasons.
  1002. *
  1003. *
  1004. * MAGPIE_CACHE_AGE - How long to store cached RSS objects? In seconds.
  1005. *
  1006. *
  1007. * MAGPIE_CACHE_FRESH_ONLY - If remote fetch fails, throw error
  1008. * instead of returning stale object?
  1009. *
  1010. * MAGPIE_DEBUG - Display debugging notices?
  1011. *
  1012. */
  1013. /*=======================================================================*\
  1014. Function: fetch_rss:
  1015. Purpose: return RSS object for the give url
  1016. maintain the cache
  1017. Input: url of RSS file
  1018. Output: parsed RSS object (see rss_parse.inc)
  1019. NOTES ON CACHEING:
  1020. If caching is on (MAGPIE_CACHE_ON) fetch_rss will first check the cache.
  1021. NOTES ON RETRIEVING REMOTE FILES:
  1022. If conditional gets are on (MAGPIE_CONDITIONAL_GET_ON) fetch_rss will
  1023. return a cached object, and touch the cache object upon recieving a
  1024. 304.
  1025. NOTES ON FAILED REQUESTS:
  1026. If there is an HTTP error while fetching an RSS object, the cached
  1027. version will be return, if it exists (and if MAGPIE_CACHE_FRESH_ONLY is off)
  1028. \*=======================================================================*/
  1029. define('MAGPIE_VERSION', '0.61');
  1030. $MAGPIE_ERROR = "";
  1031. function fetch_rss ($url, $cache_age = '') {
  1032. // initialize constants
  1033. init();
  1034. if ( ! isset($url) ) {
  1035. error("fetch_rss called without a url");
  1036. return false;
  1037. }
  1038. // if cache is disabled
  1039. if ( !MAGPIE_CACHE_ON ) {
  1040. // fetch file, and parse it
  1041. $resp = _fetch_remote_file( $url );
  1042. if ( is_success( $resp->status ) ) {
  1043. return _response_to_rss( $resp );
  1044. }
  1045. else {
  1046. error("Failed to fetch $url and cache is off");
  1047. return false;
  1048. }
  1049. }
  1050. // else cache is ON
  1051. else {
  1052. // Flow
  1053. // 1. check cache
  1054. // 2. if there is a hit, make sure its fresh
  1055. // 3. if cached obj fails freshness check, fetch remote
  1056. // 4. if remote fails, return stale object, or error
  1057. $cache = new RSSCache( MAGPIE_CACHE_DIR, ($cache_age != '') ? $cache_age : MAGPIE_CACHE_AGE );
  1058. if (MAGPIE_DEBUG and $cache->ERROR) {
  1059. debug($cache->ERROR, E_USER_WARNING);
  1060. }
  1061. $cache_status = 0; // response of check_cache
  1062. $request_headers = array(); // HTTP headers to send with fetch
  1063. $rss = 0; // parsed RSS object
  1064. $errormsg = 0; // errors, if any
  1065. if ( ! $cache->ERROR) {
  1066. // return cache HIT, MISS, or STALE
  1067. $cache_status = $cache->check_cache( $url );
  1068. }
  1069. // if object cached, and cache is fresh, return cached obj
  1070. if ( $cache_status == 'HIT' ) {
  1071. $rss = $cache->get( $url );
  1072. if ( isset($rss) and $rss ) {
  1073. $rss->from_cache = 1;
  1074. if ( MAGPIE_DEBUG > 1) {
  1075. debug("MagpieRSS: Cache HIT", E_USER_NOTICE);
  1076. }
  1077. return $rss;
  1078. }
  1079. }
  1080. // else attempt a conditional get
  1081. // setup headers
  1082. if ( $cache_status == 'STALE' ) {
  1083. $rss = $cache->get( $url );
  1084. if (isset($rss->etag) && isset($rss->last_modified)) {
  1085. $request_headers['If-None-Match'] = $rss->etag;
  1086. $request_headers['If-Last-Modified'] = $rss->last_modified;
  1087. }
  1088. }
  1089. $resp = _fetch_remote_file( $url, $request_headers );
  1090. if (isset($resp) and $resp) {
  1091. if ($resp->status == '304' ) {
  1092. // we have the most current copy
  1093. if ( MAGPIE_DEBUG > 1) {
  1094. debug("Got 304 for $url");
  1095. }
  1096. // reset cache on 304 (at minutillo insistent prodding)
  1097. $cache->set($url, $rss);
  1098. return $rss;
  1099. }
  1100. elseif ( is_success( $resp->status ) ) {
  1101. $rss = _response_to_rss( $resp );
  1102. if ( $rss ) {
  1103. if (MAGPIE_DEBUG > 1) {
  1104. debug("Fetch successful");
  1105. }
  1106. // add object to cache
  1107. $cache->set( $url, $rss );
  1108. return $rss;
  1109. }
  1110. }
  1111. else {
  1112. $errormsg = "Failed to fetch $url. ";
  1113. if ( $resp->error ) {
  1114. # compensate for Snoopy's annoying habbit to tacking
  1115. # on '\n'
  1116. $http_error = substr($resp->error, 0, -2);
  1117. $errormsg .= "(HTTP Error: $http_error)";
  1118. }
  1119. else {
  1120. $errormsg .= "(HTTP Response: " . $resp->response_code .')';
  1121. }
  1122. }
  1123. }
  1124. else {
  1125. $errormsg = "Unable to retrieve RSS file for unknown reasons.";
  1126. }
  1127. // else fetch failed
  1128. // attempt to return cached object
  1129. if ($rss) {
  1130. if ( MAGPIE_DEBUG ) {
  1131. //debug("Returning STALE object for $url");
  1132. }
  1133. return $rss;
  1134. }
  1135. // else we totally failed
  1136. error( $errormsg );
  1137. return false;
  1138. } // end if ( !MAGPIE_CACHE_ON ) {
  1139. } // end fetch_rss()
  1140. /*=======================================================================*\
  1141. Function: error
  1142. Purpose: set MAGPIE_ERROR, and trigger error
  1143. \*=======================================================================*/
  1144. function error ($errormsg, $lvl=E_USER_WARNING) {
  1145. global $MAGPIE_ERROR;
  1146. // append PHP's error message if track_errors enabled
  1147. if ( isset($php_errormsg) ) {
  1148. $errormsg .= " ($php_errormsg)";
  1149. }
  1150. if ( $errormsg ) {
  1151. $errormsg = "MagpieRSS: $errormsg";
  1152. $MAGPIE_ERROR = $errormsg;
  1153. if (MAGPIE_DEBUG)
  1154. {
  1155. trigger_error($errormsg, $lvl);
  1156. }
  1157. else
  1158. {
  1159. error_log($errormsg, 0);
  1160. }
  1161. }
  1162. }
  1163. function debug ($debugmsg, $lvl=E_USER_NOTICE) {
  1164. trigger_error("MagpieRSS [debug] $debugmsg", $lvl);
  1165. }
  1166. /*=======================================================================*\
  1167. Function: magpie_error
  1168. Purpose: accessor for the magpie error variable
  1169. \*=======================================================================*/
  1170. function magpie_error ($errormsg="") {
  1171. global $MAGPIE_ERROR;
  1172. if ( isset($errormsg) and $errormsg ) {
  1173. $MAGPIE_ERROR = $errormsg;
  1174. }
  1175. return $MAGPIE_ERROR;
  1176. }
  1177. /*=======================================================================*\
  1178. Function: _fetch_remote_file
  1179. Purpose: retrieve an arbitrary remote file
  1180. Input: url of the remote file
  1181. headers to send along with the request (optional)
  1182. Output: an HTTP response object (see Snoopy.class.inc)
  1183. \*=======================================================================*/
  1184. function _fetch_remote_file ($url, $headers = "" ) {
  1185. // Snoopy is an HTTP client in PHP
  1186. $client = new M_Snoopy();
  1187. $client->agent = MAGPIE_USER_AGENT;
  1188. $client->read_timeout = MAGPIE_FETCH_TIME_OUT;
  1189. $client->use_gzip = MAGPIE_USE_GZIP;
  1190. if (is_array($headers) ) {
  1191. $client->rawheaders = $headers;
  1192. }
  1193. @$client->fetch($url);
  1194. return $client;
  1195. }
  1196. /*=======================================================================*\
  1197. Function: _response_to_rss
  1198. Purpose: parse an HTTP response object into an RSS object
  1199. Input: an HTTP response object (see Snoopy)
  1200. Output: parsed RSS object (see rss_parse)
  1201. \*=======================================================================*/
  1202. function _response_to_rss ($resp) {
  1203. $rss = new MagpieRSS( $resp->results );
  1204. // if RSS parsed successfully
  1205. if ( $rss and ! $rss->ERROR) {
  1206. // find Etag, and Last-Modified
  1207. foreach($resp->headers as $h) {
  1208. // 2003-03-02 - Nicola Asuni (www.tecnick.com) - fixed bug "Undefined offset: 1"
  1209. if (strpos($h, ": ")) {
  1210. list($field, $val) = explode(": ", $h, 2);
  1211. }
  1212. else {
  1213. $field = $h;
  1214. $val = "";
  1215. }
  1216. if ( $field == 'ETag' ) {
  1217. $rss->etag = $val;
  1218. }
  1219. if ( $field == 'Last-Modified' ) {
  1220. $rss->last_modified = $val;
  1221. }
  1222. }
  1223. return $rss;
  1224. } // else construct error message
  1225. else {
  1226. $errormsg = "Failed to parse RSS file.";
  1227. if ($rss) {
  1228. $errormsg .= " (" . $rss->ERROR . ")";
  1229. }
  1230. error($errormsg);
  1231. return false;
  1232. } // end if ($rss and ! $rss->error)
  1233. }
  1234. /*=======================================================================*\
  1235. Function: init
  1236. Purpose: setup constants with default values
  1237. check for user overrides
  1238. \*=======================================================================*/
  1239. function init () {
  1240. if ( defined('MAGPIE_INITALIZED') ) {
  1241. return;
  1242. }
  1243. else {
  1244. define('MAGPIE_INITALIZED', 1);
  1245. }
  1246. if ( ! defined('MAGPIE_CACHE_ON') ) {
  1247. define('MAGPIE_CACHE_ON', 1);
  1248. }
  1249. if ( ! defined('MAGPIE_CACHE_DIR') ) {
  1250. define('MAGPIE_CACHE_DIR', './cache');
  1251. }
  1252. if ( ! defined('MAGPIE_CACHE_AGE') ) {
  1253. define('MAGPIE_CACHE_AGE', 60*60); // one hour
  1254. }
  1255. if ( ! defined('MAGPIE_CACHE_FRESH_ONLY') ) {
  1256. define('MAGPIE_CACHE_FRESH_ONLY', 0);
  1257. }
  1258. if ( ! defined('MAGPIE_DEBUG') ) {
  1259. define('MAGPIE_DEBUG', 0);
  1260. }
  1261. if ( ! defined('MAGPIE_USER_AGENT') ) {
  1262. $ua = 'MagpieRSS/'. MAGPIE_VERSION . ' (+http://magpierss.sf.net';
  1263. if ( MAGPIE_CACHE_ON ) {
  1264. $ua = $ua . ')';
  1265. }
  1266. else {
  1267. $ua = $ua . '; No cache)';
  1268. }
  1269. define('MAGPIE_USER_AGENT', $ua);
  1270. }
  1271. if ( ! defined('MAGPIE_FETCH_TIME_OUT') ) {
  1272. define('MAGPIE_FETCH_TIME_OUT', 5); // 5 second timeout
  1273. }
  1274. // use gzip encoding to fetch rss files if supported?
  1275. if ( ! defined('MAGPIE_USE_GZIP') ) {
  1276. define('MAGPIE_USE_GZIP', true);
  1277. }
  1278. }
  1279. // NOTE: the following code should really be in Snoopy, or at least
  1280. // somewhere other then rss_fetch!
  1281. /*=======================================================================*\
  1282. HTTP STATUS CODE PREDICATES
  1283. These functions attempt to classify an HTTP status code
  1284. based on RFC 2616 and RFC 2518.
  1285. All of them take an HTTP status code as input, and return true or false
  1286. All this code is adapted from LWP's HTTP::Status.
  1287. \*=======================================================================*/
  1288. /*=======================================================================*\
  1289. Function: is_info
  1290. Purpose: return true if Informational status code
  1291. \*=======================================================================*/
  1292. function is_info ($sc) {
  1293. return $sc >= 100 && $sc < 200;
  1294. }
  1295. /*=======================================================================*\
  1296. Function: is_success
  1297. Purpose: return true if Successful status code
  1298. \*=======================================================================*/
  1299. function is_success ($sc) {
  1300. return $sc >= 200 && $sc < 300;
  1301. }
  1302. /*=======================================================================*\
  1303. Function: is_redirect
  1304. Purpose: return true if Redirection status code
  1305. \*=======================================================================*/
  1306. function is_redirect ($sc) {
  1307. return $sc >= 300 && $sc < 400;
  1308. }
  1309. /*=======================================================================*\
  1310. Function: is_error
  1311. Purpose: return true if Error status code
  1312. \*=======================================================================*/
  1313. function is_error ($sc) {
  1314. return $sc >= 400 && $sc < 600;
  1315. }
  1316. /*=======================================================================*\
  1317. Function: is_client_error
  1318. Purpose: return true if Error status code, and its a client error
  1319. \*=======================================================================*/
  1320. function is_client_error ($sc) {
  1321. return $sc >= 400 && $sc < 500;
  1322. }
  1323. /*=======================================================================*\
  1324. Function: is_client_error
  1325. Purpose: return true if Error status code, and its a server error
  1326. \*=======================================================================*/
  1327. function is_server_error ($sc) {
  1328. return $sc >= 500 && $sc < 600;
  1329. }
  1330. /*
  1331. * Project: MagpieRSS: a simple RSS integration tool
  1332. * File: rss_cache.inc, a simple, rolling(no GC), cache
  1333. * for RSS objects, keyed on URL.
  1334. * Author: Kellan Elliott-McCrea <kellan@protest.net>
  1335. * Version: 0.51
  1336. * License: GPL
  1337. *
  1338. * The lastest version of MagpieRSS can be obtained from:
  1339. * http://magpierss.sourceforge.net
  1340. *
  1341. * For questions, help, comments, discussion, etc., please join the
  1342. * Magpie mailing list:
  1343. * http://lists.sourceforge.net/lists/listinfo/magpierss-general
  1344. *
  1345. */
  1346. class RSSCache {
  1347. var $BASE_CACHE = './cache'; // where the cache files are stored
  1348. var $MAX_AGE = 3600; // when are files stale, default one hour
  1349. var $ERROR = ""; // accumulate error messages
  1350. function RSSCache ($base='', $age='') {
  1351. // Make a local reference of the ExpressionEngine super object
  1352. $this->EE =& get_instance();
  1353. if ( $base ) {
  1354. $this->BASE_CACHE = $base;
  1355. }
  1356. if ( $age ) {
  1357. $this->MAX_AGE = $age;
  1358. }
  1359. // attempt to make the cache directory
  1360. if ( ! file_exists( $this->BASE_CACHE ) ) {
  1361. $status = @mkdir( $this->BASE_CACHE, DIR_READ_MODE );
  1362. @chmod($this->BASE_CACHE, DIR_WRITE_MODE);
  1363. // if make failed
  1364. if ( ! $status ) {
  1365. $this->error(
  1366. "Cache couldn't make dir '" . $this->BASE_CACHE . "'."
  1367. );
  1368. }
  1369. }
  1370. else
  1371. {
  1372. // EE - Make sure cache is 777
  1373. @chmod($this->BASE_CACHE, DIR_WRITE_MODE);
  1374. }
  1375. }
  1376. /*=======================================================================*\
  1377. Function: set
  1378. Purpose: add an item to the cache, keyed on url
  1379. Input: url from wich the rss file was fetched
  1380. Output: true on sucess
  1381. \*=======================================================================*/
  1382. function set ($url, $rss) {
  1383. $this->ERROR = "";
  1384. $cache_file = $this->file_name( $url );
  1385. $fp = @fopen( $cache_file, 'w' );
  1386. if ( ! $fp ) {
  1387. $this->error(
  1388. "Cache unable to open file for writing: $cache_file"
  1389. );
  1390. return 0;
  1391. }
  1392. $data = serialize( $rss );
  1393. fwrite( $fp, $data );
  1394. fclose( $fp );
  1395. @chmod($cache_file, FILE_WRITE_MODE);
  1396. return $cache_file;
  1397. }
  1398. /*=======================================================================*\
  1399. Function: get
  1400. Purpose: fetch an item from the cache
  1401. Input: url from wich the rss file was fetched
  1402. Output: cached object on HIT, false on MISS
  1403. \*=======================================================================*/
  1404. function get ($url) {
  1405. $this->ERROR = "";
  1406. $cache_file = $this->file_name( $url );
  1407. if ( ! file_exists( $cache_file ) ) {
  1408. $this->debug(
  1409. "Cache doesn't contain: $url (cache file: $cache_file)"
  1410. );
  1411. return 0;
  1412. }
  1413. $fp = @fopen($cache_file, 'r');
  1414. if ( ! $fp ) {
  1415. $this->error(
  1416. "Failed to open cache file for reading: $cache_file"
  1417. );
  1418. return 0;
  1419. }
  1420. if (($file_size = filesize($cache_file)) == 0)
  1421. {
  1422. return 0;
  1423. }
  1424. $data = fread( $fp, $file_size );
  1425. $rss = unserialize( $data );
  1426. @chmod($cache_file, FILE_WRITE_MODE);
  1427. return $rss;
  1428. }
  1429. /*=======================================================================*\
  1430. Function: check_cache
  1431. Purpose: check a url for membership in the cache
  1432. and whether the object is older then MAX_AGE (ie. STALE)
  1433. Input: url from wich the rss file was fetched
  1434. Output: cached object on HIT, false on MISS
  1435. \*=======================================================================*/
  1436. function check_cache ( $url ) {
  1437. $this->ERROR = "";
  1438. $filename = $this->file_name( $url );
  1439. if ( file_exists( $filename ) ) {
  1440. // find how long ago the file was added to the cache
  1441. // and whether that is longer then MAX_AGE
  1442. $mtime = filemtime( $filename );
  1443. $age = time() - $mtime;
  1444. if ( $this->MAX_AGE > $age ) {
  1445. // object exists and is current
  1446. return 'HIT';
  1447. }
  1448. else {
  1449. // object exists but is old
  1450. return 'STALE';
  1451. }
  1452. }
  1453. else {
  1454. // object does not exist
  1455. return 'MISS';
  1456. }
  1457. }
  1458. /*=======================================================================*\
  1459. Function: file_name
  1460. Purpose: map url to location in cache
  1461. Input: url from wich the rss file was fetched
  1462. Output: a file name
  1463. \*=======================================================================*/
  1464. function file_name ($url)
  1465. {
  1466. $filename = md5( $url );
  1467. return $this->EE->functions->remove_double_slashes(join( DIRECTORY_SEPARATOR, array( $this->BASE_CACHE, $filename)));
  1468. }
  1469. /*=======================================================================*\
  1470. Function: error
  1471. Purpose: register error
  1472. \*=======================================================================*/
  1473. function error ($errormsg, $lvl=E_USER_WARNING) {
  1474. // append PHP's error message if track_errors enabled
  1475. if ( isset($php_errormsg) ) {
  1476. $errormsg .= " ($php_errormsg)";
  1477. }
  1478. $this->ERROR = $errormsg;
  1479. if ( MAGPIE_DEBUG ) {
  1480. trigger_error( $errormsg, $lvl);
  1481. }
  1482. else {
  1483. error_log( $errormsg, 0);
  1484. }
  1485. }
  1486. function debug ($debugmsg, $lvl=E_USER_NOTICE) {
  1487. if ( MAGPIE_DEBUG ) {
  1488. $this->error("MagpieRSS [debug] $debugmsg", $lvl);
  1489. }
  1490. }
  1491. }
  1492. /*************************************************
  1493. Snoopy - the PHP net client
  1494. Author: Monte Ohrt <monte@ispi.net>
  1495. Copyright (c): 1999-2008 New Digital Group, all rights reserved
  1496. Version: 1.2.4
  1497. * This library is free software; you can redistribute it and/or
  1498. * modify it under the terms of the GNU Lesser General Public
  1499. * License as published by the Free Software Foundation; either
  1500. * version 2.1 of the License, or (at your option) any later version.
  1501. *
  1502. * This library is distributed in the hope that it will be useful,
  1503. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  1504. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  1505. * Lesser General Public License for more details.
  1506. *
  1507. * You should have received a copy of the GNU Lesser General Public
  1508. * License along with this library; if not, write to the Free Software
  1509. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  1510. You may contact the author of Snoopy by e-mail at:
  1511. monte@ohrt.com
  1512. The latest version of Snoopy can be obtained from:
  1513. http://snoopy.sourceforge.net/
  1514. *************************************************/
  1515. class M_Snoopy
  1516. {
  1517. /**** Public variables ****/
  1518. /* user definable vars */
  1519. var $host = "www.php.net"; // host name we are connecting to
  1520. var $port = 80; // port we are connecting to
  1521. var $proxy_host = ""; // proxy host to use
  1522. var $proxy_port = ""; // proxy port to use
  1523. var $proxy_user = ""; // proxy user to use
  1524. var $proxy_pass = ""; // proxy password to use
  1525. var $agent = "Snoopy v1.2.4"; // agent we masquerade as
  1526. var $referer = ""; // referer info to pass
  1527. var $cookies = array(); // array of cookies to pass
  1528. // $cookies["username"]="joe";
  1529. var $rawheaders = array(); // array of raw headers to send
  1530. // $rawheaders["Content-type"]="text/html";
  1531. var $maxredirs = 5; // http redirection depth maximum. 0 = disallow
  1532. var $lastredirectaddr = ""; // contains address of last redirected address
  1533. var $offsiteok = true; // allows redirection off-site
  1534. var $maxframes = 0; // frame content depth maximum. 0 = disallow
  1535. var $expandlinks = true; // expand links to fully qualified URLs.
  1536. // this only applies to fetchlinks()
  1537. // submitlinks(), and submittext()
  1538. var $passcookies = true; // pass set cookies back through redirects
  1539. // NOTE: this currently does not respect
  1540. // dates, domains or paths.
  1541. var $user = ""; // user for http authentication
  1542. var $pass = ""; // password for http authentication
  1543. // http accept types
  1544. var $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
  1545. var $results = ""; // where the content is put
  1546. var $error = ""; // error messages sent here
  1547. var $response_code = ""; // response code returned from server
  1548. var $headers = array(); // headers returned from server sent here
  1549. var $maxlength = 500000; // max return data length (body)
  1550. var $read_timeout = 0; // timeout on read operations, in seconds
  1551. // supported only since PHP 4 Beta 4
  1552. // set to 0 to disallow timeouts
  1553. var $timed_out = false; // if a read operation timed out
  1554. var $status = 0; // http request status
  1555. var $temp_dir = "/tmp"; // temporary directory that the webserver
  1556. // has permission to write to.
  1557. // under Windows, this should be C:\temp
  1558. var $curl_path = "/usr/local/bin/curl";
  1559. // Snoopy will use cURL for fetching
  1560. // SSL content if a full system path to
  1561. // the cURL binary is supplied here.
  1562. // set to false if you do not have
  1563. // cURL installed. See http://curl.haxx.se
  1564. // for details on installing cURL.
  1565. // Snoopy does *not* use the cURL
  1566. // library functions built into php,
  1567. // as these functions are not stable
  1568. // as of this Snoopy release.
  1569. /**** Private variables ****/
  1570. var $_maxlinelen = 4096; // max line length (headers)
  1571. var $_httpmethod = "GET"; // default http request method
  1572. var $_httpversion = "HTTP/1.0"; // default http request version
  1573. var $_submit_method = "POST"; // default submit method
  1574. var $_submit_type = "application/x-www-form-urlencoded"; // default submit type
  1575. var $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type
  1576. var $_redirectaddr = false; // will be set if page fetched is a redirect
  1577. var $_redirectdepth = 0; // increments on an http redirect
  1578. var $_frameurls = array(); // frame src urls
  1579. var $_framedepth = 0; // increments on frame depth
  1580. var $_isproxy = false; // set if using a proxy server
  1581. var $_fp_timeout = 30; // timeout for socket connection
  1582. /*======================================================================*\
  1583. Function: fetch
  1584. Purpose: fetch the contents of a web page
  1585. (and possibly other protocols in the
  1586. future like ftp, nntp, gopher, etc.)
  1587. Input: $URI the location of the page to fetch
  1588. Output: $this->results the output text from the fetch
  1589. \*======================================================================*/
  1590. function fetch($URI)
  1591. {
  1592. //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
  1593. $URI_PARTS = parse_url($URI);
  1594. if (!empty($URI_PARTS["user"]))
  1595. $this->user = $URI_PARTS["user"];
  1596. if (!empty($URI_PARTS["pass"]))
  1597. $this->pass = $URI_PARTS["pass"];
  1598. if (empty($URI_PARTS["query"]))
  1599. $URI_PARTS["query"] = '';
  1600. if (empty($URI_PARTS["path"]))
  1601. $URI_PARTS["path"] = '';
  1602. switch(strtolower($URI_PARTS["scheme"]))
  1603. {
  1604. case "http":
  1605. $this->host = $URI_PARTS["host"];
  1606. // Default to 80, cannot connect without a port
  1607. $this->port = empty($URI_PARTS["port"]) ? 80 : $URI_PARTS["port"];
  1608. if($this->_connect($fp))
  1609. {
  1610. // We needed port 80 to connect, but we can now switch to empty
  1611. // for the Host header. Some servers will try to redirect if the
  1612. // host header contains a port, which would previously create a loop.
  1613. if(empty($URI_PARTS["port"]))
  1614. {
  1615. $this->port = '';
  1616. }
  1617. if($this->_isproxy)
  1618. {
  1619. // using proxy, send entire URI
  1620. $this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
  1621. }
  1622. else
  1623. {
  1624. $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
  1625. // no proxy, send only the path
  1626. $this->_httprequest($path, $fp, $URI, $this->_httpmethod);
  1627. }
  1628. $this->_disconnect($fp);
  1629. if($this->_redirectaddr)
  1630. {
  1631. /* url was redirected, check if we've hit the max depth */
  1632. if($this->maxredirs > $this->_redirectdepth)
  1633. {
  1634. // only follow redirect if it's on this site, or offsiteok is true
  1635. if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
  1636. {
  1637. /* follow the redirect */
  1638. $this->_redirectdepth++;
  1639. $this->lastredirectaddr=$this->_redirectaddr;
  1640. $this->fetch($this->_redirectaddr);
  1641. }
  1642. }
  1643. }
  1644. if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
  1645. {
  1646. $frameurls = $this->_frameurls;
  1647. $this->_frameurls = array();
  1648. while(list(,$frameurl) = each($frameurls))
  1649. {
  1650. if($this->_framedepth < $this->maxframes)
  1651. {
  1652. $this->fetch($frameurl);
  1653. $this->_framedepth++;
  1654. }
  1655. else
  1656. break;
  1657. }
  1658. }
  1659. }
  1660. else
  1661. {
  1662. return false;
  1663. }
  1664. return true;
  1665. break;
  1666. case "https":
  1667. if(!$this->curl_path)
  1668. return false;
  1669. if(function_exists("is_executable"))
  1670. if (!is_executable($this->curl_path))
  1671. return false;
  1672. $this->host = $URI_PARTS["host"];
  1673. if(!empty($URI_PARTS["port"]))
  1674. $this->port = $URI_PARTS["port"];
  1675. if($this->_isproxy)
  1676. {
  1677. // using proxy, send entire URI
  1678. $this->_httpsrequest($URI,$URI,$this->_httpmethod);
  1679. }
  1680. else
  1681. {
  1682. $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
  1683. // no proxy, send only the path
  1684. $this->_httpsrequest($path, $URI, $this->_httpmethod);
  1685. }
  1686. if($this->_redirectaddr)
  1687. {
  1688. /* url was redirected, check if we've hit the max depth */
  1689. if($this->maxredirs > $this->_redirectdepth)
  1690. {
  1691. // only follow redirect if it's on this site, or offsiteok is true
  1692. if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
  1693. {
  1694. /* follow the redirect */
  1695. $this->_redirectdepth++;
  1696. $this->lastredirectaddr=$this->_redirectaddr;
  1697. $this->fetch($this->_redirectaddr);
  1698. }
  1699. }
  1700. }
  1701. if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
  1702. {
  1703. $frameurls = $this->_frameurls;
  1704. $this->_frameurls = array();
  1705. while(list(,$frameurl) = each($frameurls))
  1706. {
  1707. if($this->_framedepth < $this->maxframes)
  1708. {
  1709. $this->fetch($frameurl);
  1710. $this->_framedepth++;
  1711. }
  1712. else
  1713. break;
  1714. }
  1715. }
  1716. return true;
  1717. break;
  1718. default:
  1719. // not a valid protocol
  1720. $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
  1721. return false;
  1722. break;
  1723. }
  1724. return true;
  1725. }
  1726. /*======================================================================*\
  1727. Private functions
  1728. \*======================================================================*/
  1729. /*======================================================================*\
  1730. Function: _striplinks
  1731. Purpose: strip the hyperlinks from an html document
  1732. Input: $document document to strip.
  1733. Output: $match an array of the links
  1734. \*======================================================================*/
  1735. function _striplinks($document)
  1736. {
  1737. preg_match_all("'<\s*a\s.*?href\s*=\s* # find <a href=
  1738. ([\"\'])? # find single or double quote
  1739. (?(1) (.*?)\\1 | ([^\s\>]+)) # if quote found, match up to next matching
  1740. # quote, otherwise match up to next space
  1741. 'isx",$document,$links);
  1742. // catenate the non-empty matches from the conditional subpattern
  1743. while(list($key,$val) = each($links[2]))
  1744. {
  1745. if(!empty($val))
  1746. $match[] = $val;
  1747. }
  1748. while(list($key,$val) = each($links[3]))
  1749. {
  1750. if(!empty($val))
  1751. $match[] = $val;
  1752. }
  1753. // return the links
  1754. return $match;
  1755. }
  1756. /*======================================================================*\
  1757. Function: _stripform
  1758. Purpose: strip the form elements from an html document
  1759. Input: $document document to strip.
  1760. Output: $match an array of the links
  1761. \*======================================================================*/
  1762. function _stripform($document)
  1763. {
  1764. preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
  1765. // catenate the matches
  1766. $match = implode("\r\n",$elements[0]);
  1767. // return the links
  1768. return $match;
  1769. }
  1770. /*======================================================================*\
  1771. Function: _striptext
  1772. Purpose: strip the text from an html document
  1773. Input: $document document to strip.
  1774. Output: $text the resulting text
  1775. \*======================================================================*/
  1776. function _striptext($document)
  1777. {
  1778. // I didn't use preg eval (//e) since that is only available in PHP 4.0.
  1779. // so, list your entities one by one here. I included some of the
  1780. // more common ones.
  1781. $search = array("'<script[^>]*?>.*?</script>'si", // strip out javascript
  1782. "'<[\/\!]*?[^<>]*?>'si", // strip out html tags
  1783. "'([\r\n])[\s]+'", // strip out white space
  1784. "'&(quot|#34|#034|#x22);'i", // replace html entities
  1785. "'&(amp|#38|#038|#x26);'i", // added hexadecimal values
  1786. "'&(lt|#60|#060|#x3c);'i",
  1787. "'&(gt|#62|#062|#x3e);'i",
  1788. "'&(nbsp|#160|#xa0);'i",
  1789. "'&(iexcl|#161);'i",
  1790. "'&(cent|#162);'i",
  1791. "'&(pound|#163);'i",
  1792. "'&(copy|#169);'i",
  1793. "'&(reg|#174);'i",
  1794. "'&(deg|#176);'i",
  1795. "'&(#39|#039|#x27);'",
  1796. "'&(euro|#8364);'i", // europe
  1797. "'&a(uml|UML);'", // german
  1798. "'&o(uml|UML);'",
  1799. "'&u(uml|UML);'",
  1800. "'&A(uml|UML);'",
  1801. "'&O(uml|UML);'",
  1802. "'&U(uml|UML);'",
  1803. "'&szlig;'i",
  1804. );
  1805. $replace = array( "",
  1806. "",
  1807. "\\1",
  1808. "\"",
  1809. "&",
  1810. "<",
  1811. ">",
  1812. " ",
  1813. chr(161),
  1814. chr(162),
  1815. chr(163),
  1816. chr(169),
  1817. chr(174),
  1818. chr(176),
  1819. chr(39),
  1820. chr(128),
  1821. "ä",
  1822. "ö",
  1823. "ü",
  1824. "Ä",
  1825. "Ö",
  1826. "Ü",
  1827. "ß",
  1828. );
  1829. $text = preg_replace($search,$replace,$document);
  1830. return $text;
  1831. }
  1832. /*======================================================================*\
  1833. Function: _expandlinks
  1834. Purpose: expand each link into a fully qualified URL
  1835. Input: $links the links to qualify
  1836. $URI the full URI to get the base from
  1837. Output: $expandedLinks the expanded links
  1838. \*======================================================================*/
  1839. function _expandlinks($links,$URI)
  1840. {
  1841. preg_match("/^[^\?]+/",$URI,$match);
  1842. $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
  1843. $match = preg_replace("|/$|","",$match);
  1844. $match_part = parse_url($match);
  1845. $match_root =
  1846. $match_part["scheme"]."://".$match_part["host"];
  1847. $search = array( "|^http://".preg_quote($this->host)."|i",
  1848. "|^(\/)|i",
  1849. "|^(?!http://)(?!mailto:)|i",
  1850. "|/\./|",
  1851. "|/[^\/]+/\.\./|"
  1852. );
  1853. $replace = array( "",
  1854. $match_root."/",
  1855. $match."/",
  1856. "/",
  1857. "/"
  1858. );
  1859. $expandedLinks = preg_replace($search,$replace,$links);
  1860. return $expandedLinks;
  1861. }
  1862. /*======================================================================*\
  1863. Function: _httprequest
  1864. Purpose: go get the http data from the server
  1865. Input: $url the url to fetch
  1866. $fp the current open file pointer
  1867. $URI the full URI
  1868. $body body contents to send if any (POST)
  1869. Output:
  1870. \*======================================================================*/
  1871. function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
  1872. {
  1873. $cookie_headers = '';
  1874. if($this->passcookies && $this->_redirectaddr)
  1875. $this->setcookies();
  1876. $URI_PARTS = parse_url($URI);
  1877. if(empty($url))
  1878. $url = "/";
  1879. $headers = $http_method." ".$url." ".$this->_httpversion."\r\n";
  1880. if(!empty($this->agent))
  1881. $headers .= "User-Agent: ".$this->agent."\r\n";
  1882. if(!empty($this->host) && !isset($this->rawheaders['Host'])) {
  1883. $headers .= "Host: ".$this->host;
  1884. if(!empty($this->port))
  1885. $headers .= ":".$this->port;
  1886. $headers .= "\r\n";
  1887. }
  1888. if(!empty($this->accept))
  1889. $headers .= "Accept: ".$this->accept."\r\n";
  1890. if(!empty($this->referer))
  1891. $headers .= "Referer: ".$this->referer."\r\n";
  1892. if(!empty($this->cookies))
  1893. {
  1894. if(!is_array($this->cookies))
  1895. $this->cookies = (array)$this->cookies;
  1896. reset($this->cookies);
  1897. if ( count($this->cookies) > 0 ) {
  1898. $cookie_headers .= 'Cookie: ';
  1899. foreach ( $this->cookies as $cookieKey => $cookieVal ) {
  1900. $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
  1901. }
  1902. $headers .= substr($cookie_headers,0,-2) . "\r\n";
  1903. }
  1904. }
  1905. if(!empty($this->rawheaders))
  1906. {
  1907. if(!is_array($this->rawheaders))
  1908. $this->rawheaders = (array)$this->rawheaders;
  1909. while(list($headerKey,$headerVal) = each($this->rawheaders))
  1910. $headers .= $headerKey.": ".$headerVal."\r\n";
  1911. }
  1912. if(!empty($content_type)) {
  1913. $headers .= "Content-type: $content_type";
  1914. if ($content_type == "multipart/form-data")
  1915. $headers .= "; boundary=".$this->_mime_boundary;
  1916. $headers .= "\r\n";
  1917. }
  1918. if(!empty($body))
  1919. $headers .= "Content-length: ".strlen($body)."\r\n";
  1920. if(!empty($this->user) || !empty($this->pass))
  1921. $headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n";
  1922. //add proxy auth headers
  1923. if(!empty($this->proxy_user))
  1924. $headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass)."\r\n";
  1925. $headers .= "\r\n";
  1926. // set the read timeout if needed
  1927. if ($this->read_timeout > 0)
  1928. socket_set_timeout($fp, $this->read_timeout);
  1929. $this->timed_out = false;
  1930. fwrite($fp,$headers.$body,strlen($headers.$body));
  1931. $this->_redirectaddr = false;
  1932. unset($this->headers);
  1933. while($currentHeader = fgets($fp,$this->_maxlinelen))
  1934. {
  1935. if ($this->read_timeout > 0 && $this->_check_timeout($fp))
  1936. {
  1937. $this->status=-100;
  1938. return false;
  1939. }
  1940. if($currentHeader == "\r\n")
  1941. break;
  1942. // if a header begins with Location: or URI:, set the redirect
  1943. if(preg_match("/^(Location:|URI:)/i",$currentHeader))
  1944. {
  1945. // get URL portion of the redirect
  1946. preg_match("/^(Location:|URI:)[ ]+(.*)/i",chop($currentHeader),$matches);
  1947. // look for :// in the Location header to see if hostname is included
  1948. if(!preg_match("|\:\/\/|",$matches[2]))
  1949. {
  1950. // no host in the path, so prepend
  1951. $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
  1952. // eliminate double slash
  1953. if(!preg_match("|^/|",$matches[2]))
  1954. $this->_redirectaddr .= "/".$matches[2];
  1955. else
  1956. $this->_redirectaddr .= $matches[2];
  1957. }
  1958. else
  1959. $this->_redirectaddr = $matches[2];
  1960. }
  1961. if(preg_match("|^HTTP/|",$currentHeader))
  1962. {
  1963. if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
  1964. {
  1965. $this->status= $status[1];
  1966. }
  1967. $this->response_code = $currentHeader;
  1968. }
  1969. $this->headers[] = $currentHeader;
  1970. }
  1971. $results = '';
  1972. do {
  1973. $_data = fread($fp, $this->maxlength);
  1974. if (strlen($_data) == 0) {
  1975. break;
  1976. }
  1977. $results .= $_data;
  1978. } while(true);
  1979. if ($this->read_timeout > 0 && $this->_check_timeout($fp))
  1980. {
  1981. $this->status=-100;
  1982. return false;
  1983. }
  1984. // check if there is a a redirect meta tag
  1985. if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
  1986. {
  1987. $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
  1988. }
  1989. // have we hit our frame depth and is there frame src to fetch?
  1990. if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
  1991. {
  1992. $this->results[] = $results;
  1993. for($x=0; $x<count($match[1]); $x++)
  1994. $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
  1995. }
  1996. // have we already fetched framed content?
  1997. elseif(is_array($this->results))
  1998. $this->results[] = $results;
  1999. // no framed content
  2000. else
  2001. $this->results = $results;
  2002. return true;
  2003. }
  2004. /*======================================================================*\
  2005. Function: _httpsrequest
  2006. Purpose: go get the https data from the server using curl
  2007. Input: $url the url to fetch
  2008. $URI the full URI
  2009. $body body contents to send if any (POST)
  2010. Output:
  2011. \*======================================================================*/
  2012. function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
  2013. {
  2014. if($this->passcookies && $this->_redirectaddr)
  2015. $this->setcookies();
  2016. $headers = array();
  2017. $URI_PARTS = parse_url($URI);
  2018. if(empty($url))
  2019. $url = "/";
  2020. // GET ... header not needed for curl
  2021. //$headers[] = $http_method." ".$url." ".$this->_httpversion;
  2022. if(!empty($this->agent))
  2023. $headers[] = "User-Agent: ".$this->agent;
  2024. if(!empty($this->host))
  2025. if(!empty($this->port))
  2026. $headers[] = "Host: ".$this->host.":".$this->port;
  2027. else
  2028. $headers[] = "Host: ".$this->host;
  2029. if(!empty($this->accept))
  2030. $headers[] = "Accept: ".$this->accept;
  2031. if(!empty($this->referer))
  2032. $headers[] = "Referer: ".$this->referer;
  2033. if(!empty($this->cookies))
  2034. {
  2035. if(!is_array($this->cookies))
  2036. $this->cookies = (array)$this->cookies;
  2037. reset($this->cookies);
  2038. if ( count($this->cookies) > 0 ) {
  2039. $cookie_str = 'Cookie: ';
  2040. foreach ( $this->cookies as $cookieKey => $cookieVal ) {
  2041. $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
  2042. }
  2043. $headers[] = substr($cookie_str,0,-2);
  2044. }
  2045. }
  2046. if(!empty($this->rawheaders))
  2047. {
  2048. if(!is_array($this->rawheaders))
  2049. $this->rawheaders = (array)$this->rawheaders;
  2050. while(list($headerKey,$headerVal) = each($this->rawheaders))
  2051. $headers[] = $headerKey.": ".$headerVal;
  2052. }
  2053. if(!empty($content_type)) {
  2054. if ($content_type == "multipart/form-data")
  2055. $headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
  2056. else
  2057. $headers[] = "Content-type: $content_type";
  2058. }
  2059. if(!empty($body))
  2060. $headers[] = "Content-length: ".strlen($body);
  2061. if(!empty($this->user) || !empty($this->pass))
  2062. $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
  2063. for($curr_header = 0; $curr_header < count($headers); $curr_header++) {
  2064. $safer_header = strtr( $headers[$curr_header], "\"", " " );
  2065. $cmdline_params .= " -H \"".$safer_header."\"";
  2066. }
  2067. if(!empty($body))
  2068. $cmdline_params .= " -d \"$body\"";
  2069. if($this->read_timeout > 0)
  2070. $cmdline_params .= " -m ".$this->read_timeout;
  2071. $headerfile = tempnam($temp_dir, "sno");
  2072. exec($this->curl_path." -k -D \"$headerfile\"".$cmdline_params." \"".escapeshellcmd($URI)."\"",$results,$return);
  2073. if($return)
  2074. {
  2075. $this->error = "Error: cURL could not retrieve the document, error $return.";
  2076. return false;
  2077. }
  2078. $results = implode("\r\n",$results);
  2079. $result_headers = file("$headerfile");
  2080. $this->_redirectaddr = false;
  2081. unset($this->headers);
  2082. for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
  2083. {
  2084. // if a header begins with Location: or URI:, set the redirect
  2085. if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
  2086. {
  2087. // get URL portion of the redirect
  2088. preg_match("/^(Location: |URI:)\s+(.*)/",chop($result_headers[$currentHeader]),$matches);
  2089. // look for :// in the Location header to see if hostname is included
  2090. if(!preg_match("|\:\/\/|",$matches[2]))
  2091. {
  2092. // no host in the path, so prepend
  2093. $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
  2094. // eliminate double slash
  2095. if(!preg_match("|^/|",$matches[2]))
  2096. $this->_redirectaddr .= "/".$matches[2];
  2097. else
  2098. $this->_redirectaddr .= $matches[2];
  2099. }
  2100. else
  2101. $this->_redirectaddr = $matches[2];
  2102. }
  2103. if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
  2104. $this->response_code = $result_headers[$currentHeader];
  2105. $this->headers[] = $result_headers[$currentHeader];
  2106. }
  2107. // check if there is a a redirect meta tag
  2108. if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
  2109. {
  2110. $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
  2111. }
  2112. // have we hit our frame depth and is there frame src to fetch?
  2113. if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
  2114. {
  2115. $this->results[] = $results;
  2116. for($x=0; $x<count($match[1]); $x++)
  2117. $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
  2118. }
  2119. // have we already fetched framed content?
  2120. elseif(is_array($this->results))
  2121. $this->results[] = $results;
  2122. // no framed content
  2123. else
  2124. $this->results = $results;
  2125. unlink("$headerfile");
  2126. return true;
  2127. }
  2128. /*======================================================================*\
  2129. Function: setcookies()
  2130. Purpose: set cookies for a redirection
  2131. \*======================================================================*/
  2132. function setcookies()
  2133. {
  2134. for($x=0; $x<count($this->headers); $x++)
  2135. {
  2136. if(preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x],$match))
  2137. $this->cookies[$match[1]] = urldecode($match[2]);
  2138. }
  2139. }
  2140. /*======================================================================*\
  2141. Function: _check_timeout
  2142. Purpose: checks whether timeout has occurred
  2143. Input: $fp file pointer
  2144. \*======================================================================*/
  2145. function _check_timeout($fp)
  2146. {
  2147. if ($this->read_timeout > 0) {
  2148. $fp_status = socket_get_status($fp);
  2149. if ($fp_status["timed_out"]) {
  2150. $this->timed_out = true;
  2151. return true;
  2152. }
  2153. }
  2154. return false;
  2155. }
  2156. /*======================================================================*\
  2157. Function: _connect
  2158. Purpose: make a socket connection
  2159. Input: $fp file pointer
  2160. \*======================================================================*/
  2161. function _connect(&$fp)
  2162. {
  2163. if(!empty($this->proxy_host) && !empty($this->proxy_port))
  2164. {
  2165. $this->_isproxy = true;
  2166. $host = $this->proxy_host;
  2167. $port = $this->proxy_port;
  2168. }
  2169. else
  2170. {
  2171. $host = $this->host;
  2172. $port = $this->port;
  2173. }
  2174. $this->status = 0;
  2175. if($fp = fsockopen(
  2176. $host,
  2177. $port,
  2178. $errno,
  2179. $errstr,
  2180. $this->_fp_timeout
  2181. ))
  2182. {
  2183. // socket connection succeeded
  2184. return true;
  2185. }
  2186. else
  2187. {
  2188. // socket connection failed
  2189. $this->status = $errno;
  2190. switch($errno)
  2191. {
  2192. case -3:
  2193. $this->error="socket creation failed (-3)";
  2194. case -4:
  2195. $this->error="dns lookup failure (-4)";
  2196. case -5:
  2197. $this->error="connection refused or timed out (-5)";
  2198. default:
  2199. $this->error="connection failed (".$errno.")";
  2200. }
  2201. return false;
  2202. }
  2203. }
  2204. /*======================================================================*\
  2205. Function: _disconnect
  2206. Purpose: disconnect a socket connection
  2207. Input: $fp file pointer
  2208. \*======================================================================*/
  2209. function _disconnect($fp)
  2210. {
  2211. return(fclose($fp));
  2212. }
  2213. /*======================================================================*\
  2214. Function: _prepare_post_body
  2215. Purpose: Prepare post body according to encoding type
  2216. Input: $formvars - form variables
  2217. $formfiles - form upload files
  2218. Output: post body
  2219. \*======================================================================*/
  2220. function _prepare_post_body($formvars, $formfiles)
  2221. {
  2222. settype($formvars, "array");
  2223. settype($formfiles, "array");
  2224. $postdata = '';
  2225. if (count($formvars) == 0 && count($formfiles) == 0)
  2226. return;
  2227. switch ($this->_submit_type) {
  2228. case "application/x-www-form-urlencoded":
  2229. reset($formvars);
  2230. while(list($key,$val) = each($formvars)) {
  2231. if (is_array($val) || is_object($val)) {
  2232. while (list($cur_key, $cur_val) = each($val)) {
  2233. $postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
  2234. }
  2235. } else
  2236. $postdata .= urlencode($key)."=".urlencode($val)."&";
  2237. }
  2238. break;
  2239. case "multipart/form-data":
  2240. $this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
  2241. reset($formvars);
  2242. while(list($key,$val) = each($formvars)) {
  2243. if (is_array($val) || is_object($val)) {
  2244. while (list($cur_key, $cur_val) = each($val)) {
  2245. $postdata .= "--".$this->_mime_boundary."\r\n";
  2246. $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
  2247. $postdata .= "$cur_val\r\n";
  2248. }
  2249. } else {
  2250. $postdata .= "--".$this->_mime_boundary."\r\n";
  2251. $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
  2252. $postdata .= "$val\r\n";
  2253. }
  2254. }
  2255. reset($formfiles);
  2256. while (list($field_name, $file_names) = each($formfiles)) {
  2257. settype($file_names, "array");
  2258. while (list(, $file_name) = each($file_names)) {
  2259. if (!is_readable($file_name)) continue;
  2260. $fp = fopen($file_name, "r");
  2261. $file_content = fread($fp, filesize($file_name));
  2262. fclose($fp);
  2263. $base_name = basename($file_name);
  2264. $postdata .= "--".$this->_mime_boundary."\r\n";
  2265. $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
  2266. $postdata .= "$file_content\r\n";
  2267. }
  2268. }
  2269. $postdata .= "--".$this->_mime_boundary."--\r\n";
  2270. break;
  2271. }
  2272. return $postdata;
  2273. }
  2274. }
  2275. /* End of file pi.magpie.php */
  2276. /* Location: ./system/expressionengine/plugins/pi.magpie.php */