PageRenderTime 670ms CodeModel.GetById 152ms app.highlight 327ms RepoModel.GetById 117ms app.codeStats 9ms

/Feed/Reader.php

https://bitbucket.org/goldie/zend-framework1
PHP | 735 lines | 481 code | 60 blank | 194 comment | 64 complexity | 413ace99d1b8e7ddcd991429e8b346fa MD5 | raw file
  1<?php
  2/**
  3 * Zend Framework
  4 *
  5 * LICENSE
  6 *
  7 * This source file is subject to the new BSD license that is bundled
  8 * with this package in the file LICENSE.txt.
  9 * It is also available through the world-wide-web at this URL:
 10 * http://framework.zend.com/license/new-bsd
 11 * If you did not receive a copy of the license and are unable to
 12 * obtain it through the world-wide-web, please send an email
 13 * to license@zend.com so we can send you a copy immediately.
 14 *
 15 * @category   Zend
 16 * @package    Zend_Feed_Reader
 17 * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
 18 * @license    http://framework.zend.com/license/new-bsd     New BSD License
 19 * @version    $Id: Reader.php 23975 2011-05-03 16:43:46Z ralph $
 20 */
 21
 22/**
 23 * @see Zend_Feed
 24 */
 25require_once 'Zend/Feed.php';
 26
 27/**
 28 * @see Zend_Feed_Reader_Feed_Rss
 29 */
 30require_once 'Zend/Feed/Reader/Feed/Rss.php';
 31
 32/**
 33 * @see Zend_Feed_Reader_Feed_Atom
 34 */
 35require_once 'Zend/Feed/Reader/Feed/Atom.php';
 36
 37/**
 38 * @see Zend_Feed_Reader_FeedSet
 39 */
 40require_once 'Zend/Feed/Reader/FeedSet.php';
 41
 42/**
 43 * @category   Zend
 44 * @package    Zend_Feed_Reader
 45 * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
 46 * @license    http://framework.zend.com/license/new-bsd     New BSD License
 47 */
 48class Zend_Feed_Reader
 49{
 50    /**
 51     * Namespace constants
 52     */
 53    const NAMESPACE_ATOM_03  = 'http://purl.org/atom/ns#';
 54    const NAMESPACE_ATOM_10  = 'http://www.w3.org/2005/Atom';
 55    const NAMESPACE_RDF      = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
 56    const NAMESPACE_RSS_090  = 'http://my.netscape.com/rdf/simple/0.9/';
 57    const NAMESPACE_RSS_10   = 'http://purl.org/rss/1.0/';
 58
 59    /**
 60     * Feed type constants
 61     */
 62    const TYPE_ANY              = 'any';
 63    const TYPE_ATOM_03          = 'atom-03';
 64    const TYPE_ATOM_10          = 'atom-10';
 65    const TYPE_ATOM_10_ENTRY    = 'atom-10-entry';
 66    const TYPE_ATOM_ANY         = 'atom';
 67    const TYPE_RSS_090          = 'rss-090';
 68    const TYPE_RSS_091          = 'rss-091';
 69    const TYPE_RSS_091_NETSCAPE = 'rss-091n';
 70    const TYPE_RSS_091_USERLAND = 'rss-091u';
 71    const TYPE_RSS_092          = 'rss-092';
 72    const TYPE_RSS_093          = 'rss-093';
 73    const TYPE_RSS_094          = 'rss-094';
 74    const TYPE_RSS_10           = 'rss-10';
 75    const TYPE_RSS_20           = 'rss-20';
 76    const TYPE_RSS_ANY          = 'rss';
 77
 78    /**
 79     * Cache instance
 80     *
 81     * @var Zend_Cache_Core
 82     */
 83    protected static $_cache = null;
 84
 85    /**
 86     * HTTP client object to use for retrieving feeds
 87     *
 88     * @var Zend_Http_Client
 89     */
 90    protected static $_httpClient = null;
 91
 92    /**
 93     * Override HTTP PUT and DELETE request methods?
 94     *
 95     * @var boolean
 96     */
 97    protected static $_httpMethodOverride = false;
 98
 99    protected static $_httpConditionalGet = false;
100
101    protected static $_pluginLoader = null;
102
103    protected static $_prefixPaths = array();
104
105    protected static $_extensions = array(
106        'feed' => array(
107            'DublinCore_Feed',
108            'Atom_Feed'
109        ),
110        'entry' => array(
111            'Content_Entry',
112            'DublinCore_Entry',
113            'Atom_Entry'
114        ),
115        'core' => array(
116            'DublinCore_Feed',
117            'Atom_Feed',
118            'Content_Entry',
119            'DublinCore_Entry',
120            'Atom_Entry'
121        )
122    );
123
124    /**
125     * Get the Feed cache
126     *
127     * @return Zend_Cache_Core
128     */
129    public static function getCache()
130    {
131        return self::$_cache;
132    }
133
134    /**
135     * Set the feed cache
136     *
137     * @param Zend_Cache_Core $cache
138     * @return void
139     */
140    public static function setCache(Zend_Cache_Core $cache)
141    {
142        self::$_cache = $cache;
143    }
144
145    /**
146     * Set the HTTP client instance
147     *
148     * Sets the HTTP client object to use for retrieving the feeds.
149     *
150     * @param  Zend_Http_Client $httpClient
151     * @return void
152     */
153    public static function setHttpClient(Zend_Http_Client $httpClient)
154    {
155        self::$_httpClient = $httpClient;
156    }
157
158
159    /**
160     * Gets the HTTP client object. If none is set, a new Zend_Http_Client will be used.
161     *
162     * @return Zend_Http_Client_Abstract
163     */
164    public static function getHttpClient()
165    {
166        if (!self::$_httpClient instanceof Zend_Http_Client) {
167            /**
168             * @see Zend_Http_Client
169             */
170            require_once 'Zend/Http/Client.php';
171            self::$_httpClient = new Zend_Http_Client();
172        }
173
174        return self::$_httpClient;
175    }
176
177    /**
178     * Toggle using POST instead of PUT and DELETE HTTP methods
179     *
180     * Some feed implementations do not accept PUT and DELETE HTTP
181     * methods, or they can't be used because of proxies or other
182     * measures. This allows turning on using POST where PUT and
183     * DELETE would normally be used; in addition, an
184     * X-Method-Override header will be sent with a value of PUT or
185     * DELETE as appropriate.
186     *
187     * @param  boolean $override Whether to override PUT and DELETE.
188     * @return void
189     */
190    public static function setHttpMethodOverride($override = true)
191    {
192        self::$_httpMethodOverride = $override;
193    }
194
195    /**
196     * Get the HTTP override state
197     *
198     * @return boolean
199     */
200    public static function getHttpMethodOverride()
201    {
202        return self::$_httpMethodOverride;
203    }
204
205    /**
206     * Set the flag indicating whether or not to use HTTP conditional GET
207     *
208     * @param  bool $bool
209     * @return void
210     */
211    public static function useHttpConditionalGet($bool = true)
212    {
213        self::$_httpConditionalGet = $bool;
214    }
215
216    /**
217     * Import a feed by providing a URL
218     *
219     * @param  string $url The URL to the feed
220     * @param  string $etag OPTIONAL Last received ETag for this resource
221     * @param  string $lastModified OPTIONAL Last-Modified value for this resource
222     * @return Zend_Feed_Reader_FeedInterface
223     */
224    public static function import($uri, $etag = null, $lastModified = null)
225    {
226        $cache       = self::getCache();
227        $feed        = null;
228        $responseXml = '';
229        $client      = self::getHttpClient();
230        $client->resetParameters();
231        $client->setHeaders('If-None-Match', null);
232        $client->setHeaders('If-Modified-Since', null);
233        $client->setUri($uri);
234        $cacheId = 'Zend_Feed_Reader_' . md5($uri);
235
236        if (self::$_httpConditionalGet && $cache) {
237            $data = $cache->load($cacheId);
238            if ($data) {
239                if ($etag === null) {
240                    $etag = $cache->load($cacheId.'_etag');
241                }
242                if ($lastModified === null) {
243                    $lastModified = $cache->load($cacheId.'_lastmodified');;
244                }
245                if ($etag) {
246                    $client->setHeaders('If-None-Match', $etag);
247                }
248                if ($lastModified) {
249                    $client->setHeaders('If-Modified-Since', $lastModified);
250                }
251            }
252            $response = $client->request('GET');
253            if ($response->getStatus() !== 200 && $response->getStatus() !== 304) {
254                require_once 'Zend/Feed/Exception.php';
255                throw new Zend_Feed_Exception('Feed failed to load, got response code ' . $response->getStatus());
256            }
257            if ($response->getStatus() == 304) {
258                $responseXml = $data;
259            } else {
260                $responseXml = $response->getBody();
261                $cache->save($responseXml, $cacheId);
262                if ($response->getHeader('ETag')) {
263                    $cache->save($response->getHeader('ETag'), $cacheId.'_etag');
264                }
265                if ($response->getHeader('Last-Modified')) {
266                    $cache->save($response->getHeader('Last-Modified'), $cacheId.'_lastmodified');
267                }
268            }
269            if (empty($responseXml)) {
270                require_once 'Zend/Feed/Exception.php';
271                throw new Zend_Feed_Exception('Feed failed to load, got empty response body');
272            }
273            return self::importString($responseXml);
274        } elseif ($cache) {
275            $data = $cache->load($cacheId);
276            if ($data !== false) {
277                return self::importString($data);
278            }
279            $response = $client->request('GET');
280            if ($response->getStatus() !== 200) {
281                require_once 'Zend/Feed/Exception.php';
282                throw new Zend_Feed_Exception('Feed failed to load, got response code ' . $response->getStatus());
283            }
284            $responseXml = $response->getBody();
285            $cache->save($responseXml, $cacheId);
286            if (empty($responseXml)) {
287                require_once 'Zend/Feed/Exception.php';
288                throw new Zend_Feed_Exception('Feed failed to load, got empty response body');
289            }
290            return self::importString($responseXml);
291        } else {
292            $response = $client->request('GET');
293            if ($response->getStatus() !== 200) {
294                require_once 'Zend/Feed/Exception.php';
295                throw new Zend_Feed_Exception('Feed failed to load, got response code ' . $response->getStatus());
296            }
297            $responseXml = $response->getBody();
298            if (empty($responseXml)) {
299                require_once 'Zend/Feed/Exception.php';
300                throw new Zend_Feed_Exception('Feed failed to load, got empty response body');
301            }
302            $reader = self::importString($responseXml);
303            $reader->setOriginalSourceUri($uri);
304            return $reader;
305        }
306    }
307
308    /**
309     * Import a feed by providing a Zend_Feed_Abstract object
310     *
311     * @param  Zend_Feed_Abstract $feed A fully instantiated Zend_Feed object
312     * @return Zend_Feed_Reader_FeedInterface
313     */
314    public static function importFeed(Zend_Feed_Abstract $feed)
315    {
316        $dom  = $feed->getDOM()->ownerDocument;
317        $type = self::detectType($dom);
318        self::_registerCoreExtensions();
319        if (substr($type, 0, 3) == 'rss') {
320            $reader = new Zend_Feed_Reader_Feed_Rss($dom, $type);
321        } else {
322            $reader = new Zend_Feed_Reader_Feed_Atom($dom, $type);
323        }
324
325        return $reader;
326    }
327
328    /**
329     * Import a feed froma string
330     *
331     * @param  string $string
332     * @return Zend_Feed_Reader_FeedInterface
333     */
334    public static function importString($string)
335    {
336        
337        $libxml_errflag = libxml_use_internal_errors(true);
338        $dom = new DOMDocument;
339        $status = $dom->loadXML($string);
340        libxml_use_internal_errors($libxml_errflag);
341
342        if (!$status) {
343            // Build error message
344            $error = libxml_get_last_error();
345            if ($error && $error->message) {
346                $errormsg = "DOMDocument cannot parse XML: {$error->message}";
347            } else {
348                $errormsg = "DOMDocument cannot parse XML: Please check the XML document's validity";
349            }
350
351            require_once 'Zend/Feed/Exception.php';
352            throw new Zend_Feed_Exception($errormsg);
353        }
354
355        $type = self::detectType($dom);
356
357        self::_registerCoreExtensions();
358
359        if (substr($type, 0, 3) == 'rss') {
360            $reader = new Zend_Feed_Reader_Feed_Rss($dom, $type);
361        } elseif (substr($type, 8, 5) == 'entry') {
362            $reader = new Zend_Feed_Reader_Entry_Atom($dom->documentElement, 0, Zend_Feed_Reader::TYPE_ATOM_10);
363        } elseif (substr($type, 0, 4) == 'atom') {
364            $reader = new Zend_Feed_Reader_Feed_Atom($dom, $type);
365        } else {
366            require_once 'Zend/Feed/Exception.php';
367            throw new Zend_Feed_Exception('The URI used does not point to a '
368            . 'valid Atom, RSS or RDF feed that Zend_Feed_Reader can parse.');
369        }
370        return $reader;
371    }
372
373    /**
374     * Imports a feed from a file located at $filename.
375     *
376     * @param  string $filename
377     * @throws Zend_Feed_Exception
378     * @return Zend_Feed_Reader_FeedInterface
379     */
380    public static function importFile($filename)
381    {
382        @ini_set('track_errors', 1);
383        $feed = @file_get_contents($filename);
384        @ini_restore('track_errors');
385        if ($feed === false) {
386            /**
387             * @see Zend_Feed_Exception
388             */
389            require_once 'Zend/Feed/Exception.php';
390            throw new Zend_Feed_Exception("File could not be loaded: $php_errormsg");
391        }
392        return self::importString($feed);
393    }
394
395    public static function findFeedLinks($uri)
396    {
397        // Get the HTTP response from $uri and save the contents
398        $client = self::getHttpClient();
399        $client->setUri($uri);
400        $response = $client->request();
401        if ($response->getStatus() !== 200) {
402            /**
403             * @see Zend_Feed_Exception
404             */
405            require_once 'Zend/Feed/Exception.php';
406            throw new Zend_Feed_Exception("Failed to access $uri, got response code " . $response->getStatus());
407        }
408        $responseHtml = $response->getBody();
409        $libxml_errflag = libxml_use_internal_errors(true);
410        $dom = new DOMDocument;
411        $status = $dom->loadHTML($responseHtml);
412        libxml_use_internal_errors($libxml_errflag);
413        if (!$status) {
414            // Build error message
415            $error = libxml_get_last_error();
416            if ($error && $error->message) {
417                $errormsg = "DOMDocument cannot parse HTML: {$error->message}";
418            } else {
419                $errormsg = "DOMDocument cannot parse HTML: Please check the XML document's validity";
420            }
421
422            require_once 'Zend/Feed/Exception.php';
423            throw new Zend_Feed_Exception($errormsg);
424        }
425        $feedSet = new Zend_Feed_Reader_FeedSet;
426        $links = $dom->getElementsByTagName('link');
427        $feedSet->addLinks($links, $uri);
428        return $feedSet;
429    }
430
431    /**
432     * Detect the feed type of the provided feed
433     *
434     * @param  Zend_Feed_Abstract|DOMDocument|string $feed
435     * @return string
436     */
437    public static function detectType($feed, $specOnly = false)
438    {
439        if ($feed instanceof Zend_Feed_Reader_FeedInterface) {
440            $dom = $feed->getDomDocument();
441        } elseif($feed instanceof DOMDocument) {
442            $dom = $feed;
443        } elseif(is_string($feed) && !empty($feed)) {
444            @ini_set('track_errors', 1);
445            $dom = new DOMDocument;
446            $status = @$dom->loadXML($feed);
447            @ini_restore('track_errors');
448            if (!$status) {
449                if (!isset($php_errormsg)) {
450                    if (function_exists('xdebug_is_enabled')) {
451                        $php_errormsg = '(error message not available, when XDebug is running)';
452                    } else {
453                        $php_errormsg = '(error message not available)';
454                    }
455                }
456                require_once 'Zend/Feed/Exception.php';
457                throw new Zend_Feed_Exception("DOMDocument cannot parse XML: $php_errormsg");
458            }
459        } else {
460            require_once 'Zend/Feed/Exception.php';
461            throw new Zend_Feed_Exception('Invalid object/scalar provided: must'
462            . ' be of type Zend_Feed_Reader_FeedInterface, DomDocument or string');
463        }
464        $xpath = new DOMXPath($dom);
465
466        if ($xpath->query('/rss')->length) {
467            $type = self::TYPE_RSS_ANY;
468            $version = $xpath->evaluate('string(/rss/@version)');
469
470            if (strlen($version) > 0) {
471                switch($version) {
472                    case '2.0':
473                        $type = self::TYPE_RSS_20;
474                        break;
475
476                    case '0.94':
477                        $type = self::TYPE_RSS_094;
478                        break;
479
480                    case '0.93':
481                        $type = self::TYPE_RSS_093;
482                        break;
483
484                    case '0.92':
485                        $type = self::TYPE_RSS_092;
486                        break;
487
488                    case '0.91':
489                        $type = self::TYPE_RSS_091;
490                        break;
491                }
492            }
493
494            return $type;
495        }
496
497        $xpath->registerNamespace('rdf', self::NAMESPACE_RDF);
498
499        if ($xpath->query('/rdf:RDF')->length) {
500            $xpath->registerNamespace('rss', self::NAMESPACE_RSS_10);
501
502            if ($xpath->query('/rdf:RDF/rss:channel')->length
503                || $xpath->query('/rdf:RDF/rss:image')->length
504                || $xpath->query('/rdf:RDF/rss:item')->length
505                || $xpath->query('/rdf:RDF/rss:textinput')->length
506            ) {
507                return self::TYPE_RSS_10;
508            }
509
510            $xpath->registerNamespace('rss', self::NAMESPACE_RSS_090);
511
512            if ($xpath->query('/rdf:RDF/rss:channel')->length
513                || $xpath->query('/rdf:RDF/rss:image')->length
514                || $xpath->query('/rdf:RDF/rss:item')->length
515                || $xpath->query('/rdf:RDF/rss:textinput')->length
516            ) {
517                return self::TYPE_RSS_090;
518            }
519        }
520
521        $type = self::TYPE_ATOM_ANY;
522        $xpath->registerNamespace('atom', self::NAMESPACE_ATOM_10);
523
524        if ($xpath->query('//atom:feed')->length) {
525            return self::TYPE_ATOM_10;
526        }
527
528        if ($xpath->query('//atom:entry')->length) {
529            if ($specOnly == true) {
530                return self::TYPE_ATOM_10;
531            } else {
532                return self::TYPE_ATOM_10_ENTRY;
533            }
534        }
535
536        $xpath->registerNamespace('atom', self::NAMESPACE_ATOM_03);
537
538        if ($xpath->query('//atom:feed')->length) {
539            return self::TYPE_ATOM_03;
540        }
541
542        return self::TYPE_ANY;
543    }
544
545    /**
546     * Set plugin loader for use with Extensions
547     *
548     * @param  Zend_Loader_PluginLoader_Interface $loader
549     */
550    public static function setPluginLoader(Zend_Loader_PluginLoader_Interface $loader)
551    {
552        self::$_pluginLoader = $loader;
553    }
554
555    /**
556     * Get plugin loader for use with Extensions
557     *
558     * @return  Zend_Loader_PluginLoader_Interface $loader
559     */
560    public static function getPluginLoader()
561    {
562        if (!isset(self::$_pluginLoader)) {
563            require_once 'Zend/Loader/PluginLoader.php';
564            self::$_pluginLoader = new Zend_Loader_PluginLoader(array(
565                'Zend_Feed_Reader_Extension_' => 'Zend/Feed/Reader/Extension/',
566            ));
567        }
568        return self::$_pluginLoader;
569    }
570
571    /**
572     * Add prefix path for loading Extensions
573     *
574     * @param  string $prefix
575     * @param  string $path
576     * @return void
577     */
578    public static function addPrefixPath($prefix, $path)
579    {
580        $prefix = rtrim($prefix, '_');
581        $path   = rtrim($path, DIRECTORY_SEPARATOR);
582        self::getPluginLoader()->addPrefixPath($prefix, $path);
583    }
584
585    /**
586     * Add multiple Extension prefix paths at once
587     *
588     * @param  array $spec
589     * @return void
590     */
591    public static function addPrefixPaths(array $spec)
592    {
593        if (isset($spec['prefix']) && isset($spec['path'])) {
594            self::addPrefixPath($spec['prefix'], $spec['path']);
595        }
596        foreach ($spec as $prefixPath) {
597            if (isset($prefixPath['prefix']) && isset($prefixPath['path'])) {
598                self::addPrefixPath($prefixPath['prefix'], $prefixPath['path']);
599            }
600        }
601    }
602
603    /**
604     * Register an Extension by name
605     *
606     * @param  string $name
607     * @return void
608     * @throws Zend_Feed_Exception if unable to resolve Extension class
609     */
610    public static function registerExtension($name)
611    {
612        $feedName  = $name . '_Feed';
613        $entryName = $name . '_Entry';
614        if (self::isRegistered($name)) {
615            if (self::getPluginLoader()->isLoaded($feedName) ||
616                self::getPluginLoader()->isLoaded($entryName)) {
617                return;
618            }
619        }
620        try {
621            self::getPluginLoader()->load($feedName);
622            self::$_extensions['feed'][] = $feedName;
623        } catch (Zend_Loader_PluginLoader_Exception $e) {
624        }
625        try {
626            self::getPluginLoader()->load($entryName);
627            self::$_extensions['entry'][] = $entryName;
628        } catch (Zend_Loader_PluginLoader_Exception $e) {
629        }
630        if (!self::getPluginLoader()->isLoaded($feedName)
631            && !self::getPluginLoader()->isLoaded($entryName)
632        ) {
633            require_once 'Zend/Feed/Exception.php';
634            throw new Zend_Feed_Exception('Could not load extension: ' . $name
635                . 'using Plugin Loader. Check prefix paths are configured and extension exists.');
636        }
637    }
638
639    /**
640     * Is a given named Extension registered?
641     *
642     * @param  string $extensionName
643     * @return boolean
644     */
645    public static function isRegistered($extensionName)
646    {
647        $feedName  = $extensionName . '_Feed';
648        $entryName = $extensionName . '_Entry';
649        if (in_array($feedName, self::$_extensions['feed'])
650            || in_array($entryName, self::$_extensions['entry'])
651        ) {
652            return true;
653        }
654        return false;
655    }
656
657    /**
658     * Get a list of extensions
659     *
660     * @return array
661     */
662    public static function getExtensions()
663    {
664        return self::$_extensions;
665    }
666
667    /**
668     * Reset class state to defaults
669     *
670     * @return void
671     */
672    public static function reset()
673    {
674        self::$_cache              = null;
675        self::$_httpClient         = null;
676        self::$_httpMethodOverride = false;
677        self::$_httpConditionalGet = false;
678        self::$_pluginLoader       = null;
679        self::$_prefixPaths        = array();
680        self::$_extensions         = array(
681            'feed' => array(
682                'DublinCore_Feed',
683                'Atom_Feed'
684            ),
685            'entry' => array(
686                'Content_Entry',
687                'DublinCore_Entry',
688                'Atom_Entry'
689            ),
690            'core' => array(
691                'DublinCore_Feed',
692                'Atom_Feed',
693                'Content_Entry',
694                'DublinCore_Entry',
695                'Atom_Entry'
696            )
697        );
698    }
699
700    /**
701     * Register core (default) extensions
702     *
703     * @return void
704     */
705    protected static function _registerCoreExtensions()
706    {
707        self::registerExtension('DublinCore');
708        self::registerExtension('Content');
709        self::registerExtension('Atom');
710        self::registerExtension('Slash');
711        self::registerExtension('WellFormedWeb');
712        self::registerExtension('Thread');
713        self::registerExtension('Podcast');
714    }
715
716    /**
717     * Utility method to apply array_unique operation to a multidimensional
718     * array.
719     *
720     * @param array
721     * @return array
722     */
723    public static function arrayUnique(array $array)
724    {
725        foreach ($array as &$value) {
726            $value = serialize($value);
727        }
728        $array = array_unique($array);
729        foreach ($array as &$value) {
730            $value = unserialize($value);
731        }
732        return $array;
733    }
734
735}