PageRenderTime 23ms CodeModel.GetById 9ms RepoModel.GetById 0ms app.codeStats 0ms

/tools/pear_xml_parser/Parser/Type.php

https://gitlab.com/staging06/myproject
PHP | 475 lines | 233 code | 38 blank | 204 comment | 43 complexity | 6e312ad10e0c788dd693b87573d453fe MD5 | raw file
  1. <?php
  2. /* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
  3. /**
  4. * Abstract class providing common methods for XML_Feed_Parser feeds.
  5. *
  6. * PHP versions 5
  7. *
  8. * LICENSE: This source file is subject to version 3.0 of the PHP license
  9. * that is available through the world-wide-web at the following URI:
  10. * http://www.php.net/license/3_0.txt. If you did not receive a copy of
  11. * the PHP License and are unable to obtain it through the web, please
  12. * send a note to license@php.net so we can mail you a copy immediately.
  13. *
  14. * @category XML
  15. * @package XML_Feed_Parser
  16. * @author James Stewart <james@jystewart.net>
  17. * @copyright 2005 James Stewart <james@jystewart.net>
  18. * @license http://www.gnu.org/copyleft/lesser.html GNU LGPL 2.1
  19. * @version CVS: $Id: Type.php 6844 2011-06-03 14:46:51Z dMetzger $
  20. * @link http://pear.php.net/package/XML_Feed_Parser/
  21. */
  22. /**
  23. * This abstract class provides some general methods that are likely to be
  24. * implemented exactly the same way for all feed types.
  25. *
  26. * @package XML_Feed_Parser
  27. * @author James Stewart <james@jystewart.net>
  28. * @version Release: @package_version@
  29. */
  30. abstract class XML_Feed_Parser_Type
  31. {
  32. /**
  33. * Where we store our DOM object for this feed
  34. * @var DOMDocument
  35. */
  36. public $model;
  37. /**
  38. * For iteration we'll want a count of the number of entries
  39. * @var int
  40. */
  41. public $numberEntries;
  42. /**
  43. * Where we store our entry objects once instantiated
  44. * @var array
  45. */
  46. public $entries = array();
  47. /**
  48. * Store mappings between entry IDs and their position in the feed
  49. */
  50. public $idMappings = array();
  51. /**
  52. * Proxy to allow use of element names as method names
  53. *
  54. * We are not going to provide methods for every entry type so this
  55. * function will allow for a lot of mapping. We rely pretty heavily
  56. * on this to handle our mappings between other feed types and atom.
  57. *
  58. * @param string $call - the method attempted
  59. * @param array $arguments - arguments to that method
  60. * @return mixed
  61. */
  62. function __call($call, $arguments = array())
  63. {
  64. if (! is_array($arguments)) {
  65. $arguments = array();
  66. }
  67. if (isset($this->compatMap[$call])) {
  68. $tempMap = $this->compatMap;
  69. $tempcall = array_pop($tempMap[$call]);
  70. if (! empty($tempMap)) {
  71. $arguments = array_merge($arguments, $tempMap[$call]);
  72. }
  73. $call = $tempcall;
  74. }
  75. /* To be helpful, we allow a case-insensitive search for this method */
  76. if (! isset($this->map[$call])) {
  77. foreach (array_keys($this->map) as $key) {
  78. if (strtoupper($key) == strtoupper($call)) {
  79. $call = $key;
  80. break;
  81. }
  82. }
  83. }
  84. if (empty($this->map[$call])) {
  85. return false;
  86. }
  87. $method = 'get' . $this->map[$call][0];
  88. if ($method == 'getLink') {
  89. $offset = empty($arguments[0]) ? 0 : $arguments[0];
  90. $attribute = empty($arguments[1]) ? 'href' : $arguments[1];
  91. $params = isset($arguments[2]) ? $arguments[2] : array();
  92. return $this->getLink($offset, $attribute, $params);
  93. }
  94. if (method_exists($this, $method)) {
  95. return $this->$method($call, $arguments);
  96. }
  97. return false;
  98. }
  99. /**
  100. * Proxy to allow use of element names as attribute names
  101. *
  102. * For many elements variable-style access will be desirable. This function
  103. * provides for that.
  104. *
  105. * @param string $value - the variable required
  106. * @return mixed
  107. */
  108. function __get($value)
  109. {
  110. return $this->__call($value, array());
  111. }
  112. /**
  113. * Utility function to help us resolve xml:base values
  114. *
  115. * We have other methods which will traverse the DOM and work out the different
  116. * xml:base declarations we need to be aware of. We then need to combine them.
  117. * If a declaration starts with a protocol then we restart the string. If it
  118. * starts with a / then we add on to the domain name. Otherwise we simply tag
  119. * it on to the end.
  120. *
  121. * @param string $base - the base to add the link to
  122. * @param string $link
  123. */
  124. function combineBases($base, $link)
  125. {
  126. if (preg_match('/^[A-Za-z]+:\/\//', $link)) {
  127. return $link;
  128. } elseif (preg_match('/^\//', $link)) {
  129. /* Extract domain and suffix link to that */
  130. preg_match('/^([A-Za-z]+:\/\/.*)?\/*/', $base, $results);
  131. $firstLayer = $results[0];
  132. return $firstLayer . "/" . $link;
  133. } elseif (preg_match('/^\.\.\//', $base)) {
  134. /* Step up link to find place to be */
  135. preg_match('/^((\.\.\/)+)(.*)$/', $link, $bases);
  136. $suffix = $bases[3];
  137. $count = preg_match_all('/\.\.\//', $bases[1], $steps);
  138. $url = explode("/", $base);
  139. for ($i = 0; $i <= $count; $i++) {
  140. array_pop($url);
  141. }
  142. return implode("/", $url) . "/" . $suffix;
  143. } elseif (preg_match('/^(?!\/$)/', $base)) {
  144. $base = preg_replace('/(.*\/).*$/', '$1', $base) ;
  145. return $base . $link;
  146. } else {
  147. /* Just stick it on the end */
  148. return $base . $link;
  149. }
  150. }
  151. /**
  152. * Determine whether we need to apply our xml:base rules
  153. *
  154. * Gets us the xml:base data and then processes that with regard
  155. * to our current link.
  156. *
  157. * @param string
  158. * @param DOMElement
  159. * @return string
  160. */
  161. function addBase($link, $element)
  162. {
  163. if (preg_match('/^[A-Za-z]+:\/\//', $link)) {
  164. return $link;
  165. }
  166. return $this->combineBases($element->baseURI, $link);
  167. }
  168. /**
  169. * Get an entry by its position in the feed, starting from zero
  170. *
  171. * As well as allowing the items to be iterated over we want to allow
  172. * users to be able to access a specific entry. This is one of two ways of
  173. * doing that, the other being by ID.
  174. *
  175. * @param int $offset
  176. * @return XML_Feed_Parser_RSS1Element
  177. */
  178. function getEntryByOffset($offset)
  179. {
  180. if (! isset($this->entries[$offset])) {
  181. $entries = $this->model->getElementsByTagName($this->itemElement);
  182. if ($entries->length > $offset) {
  183. $xmlBase = $entries->item($offset)->baseURI;
  184. $this->entries[$offset] = new $this->itemClass(
  185. $entries->item($offset), $this, $xmlBase);
  186. if ($id = $this->entries[$offset]->id) {
  187. $this->idMappings[$id] = $this->entries[$offset];
  188. }
  189. } else {
  190. throw new XML_Feed_Parser_Exception('No entries found');
  191. }
  192. }
  193. return $this->entries[$offset];
  194. }
  195. /**
  196. * Return a date in seconds since epoch.
  197. *
  198. * Get a date construct. We use PHP's strtotime to return it as a unix datetime, which
  199. * is the number of seconds since 1970-01-01 00:00:00.
  200. *
  201. * @link http://php.net/strtotime
  202. * @param string $method The name of the date construct we want
  203. * @param array $arguments Included for compatibility with our __call usage
  204. * @return int|false datetime
  205. */
  206. protected function getDate($method, $arguments)
  207. {
  208. $time = $this->model->getElementsByTagName($method);
  209. if ($time->length == 0 || empty($time->item(0)->nodeValue)) {
  210. return false;
  211. }
  212. return strtotime($time->item(0)->nodeValue);
  213. }
  214. /**
  215. * Get a text construct.
  216. *
  217. * @param string $method The name of the text construct we want
  218. * @param array $arguments Included for compatibility with our __call usage
  219. * @return string
  220. */
  221. protected function getText($method, $arguments = array())
  222. {
  223. $tags = $this->model->getElementsByTagName($method);
  224. if ($tags->length > 0) {
  225. $value = $tags->item(0)->nodeValue;
  226. return $value;
  227. }
  228. return false;
  229. }
  230. /**
  231. * Apply various rules to retrieve category data.
  232. *
  233. * There is no single way of declaring a category in RSS1/1.1 as there is in RSS2
  234. * and Atom. Instead the usual approach is to use the dublin core namespace to
  235. * declare categories. For example delicious use both:
  236. * <dc:subject>PEAR</dc:subject> and: <taxo:topics><rdf:Bag>
  237. * <rdf:li resource="http://del.icio.us/tag/PEAR" /></rdf:Bag></taxo:topics>
  238. * to declare a categorisation of 'PEAR'.
  239. *
  240. * We need to be sensitive to this where possible.
  241. *
  242. * @param string $call for compatibility with our overloading
  243. * @param array $arguments - arg 0 is the offset, arg 1 is whether to return as array
  244. * @return string|array|false
  245. */
  246. protected function getCategory($call, $arguments)
  247. {
  248. $categories = $this->model->getElementsByTagName('subject');
  249. $offset = empty($arguments[0]) ? 0 : $arguments[0];
  250. $array = empty($arguments[1]) ? false : true;
  251. if ($categories->length <= $offset) {
  252. return false;
  253. }
  254. if ($array) {
  255. $list = array();
  256. foreach ($categories as $category) {
  257. array_push($list, $category->nodeValue);
  258. }
  259. return $list;
  260. }
  261. return $categories->item($offset)->nodeValue;
  262. }
  263. /**
  264. * Count occurrences of an element
  265. *
  266. * This function will tell us how many times the element $type
  267. * appears at this level of the feed.
  268. *
  269. * @param string $type the element we want to get a count of
  270. * @return int
  271. */
  272. protected function count($type)
  273. {
  274. if ($tags = $this->model->getElementsByTagName($type)) {
  275. return $tags->length;
  276. }
  277. return 0;
  278. }
  279. /**
  280. * Part of our xml:base processing code
  281. *
  282. * We need a couple of methods to access XHTML content stored in feeds.
  283. * This is because we dereference all xml:base references before returning
  284. * the element. This method handles the attributes.
  285. *
  286. * @param DOMElement $node The DOM node we are iterating over
  287. * @return string
  288. */
  289. function processXHTMLAttributes($node) {
  290. $return = '';
  291. foreach ($node->attributes as $attribute) {
  292. if ($attribute->name == 'src' or $attribute->name == 'href') {
  293. $attribute->value = $this->addBase(htmlentities($attribute->value, null, 'utf-8'), $attribute);
  294. }
  295. if ($attribute->name == 'base') {
  296. continue;
  297. }
  298. $return .= $attribute->name . '="' . htmlentities($attribute->value, null, 'utf-8') .'" ';
  299. }
  300. if (! empty($return)) {
  301. return ' ' . trim($return);
  302. }
  303. return '';
  304. }
  305. /**
  306. * Convert HTML entities based on the current character set.
  307. *
  308. * @param String
  309. * @return String
  310. */
  311. function processEntitiesForNodeValue($node)
  312. {
  313. if (function_exists('iconv')) {
  314. $current_encoding = $node->ownerDocument->encoding;
  315. $value = iconv($current_encoding, 'UTF-8', $node->nodeValue);
  316. } elseif ($current_encoding == 'iso-8859-1') {
  317. $value = utf8_encode($node->nodeValue);
  318. } else {
  319. $value = $node->nodeValue;
  320. }
  321. $decoded = html_entity_decode($value, null, 'UTF-8');
  322. return htmlentities($decoded, null, 'UTF-8');
  323. }
  324. /**
  325. * Part of our xml:base processing code
  326. *
  327. * We need a couple of methods to access XHTML content stored in feeds.
  328. * This is because we dereference all xml:base references before returning
  329. * the element. This method recurs through the tree descending from the node
  330. * and builds our string.
  331. *
  332. * @param DOMElement $node The DOM node we are processing
  333. * @return string
  334. */
  335. function traverseNode($node)
  336. {
  337. $content = '';
  338. /* Add the opening of this node to the content */
  339. if ($node instanceof DOMElement) {
  340. $content .= '<' . $node->tagName .
  341. $this->processXHTMLAttributes($node) . '>';
  342. }
  343. /* Process children */
  344. if ($node->hasChildNodes()) {
  345. foreach ($node->childNodes as $child) {
  346. $content .= $this->traverseNode($child);
  347. }
  348. }
  349. if ($node instanceof DOMText) {
  350. $content .= $this->processEntitiesForNodeValue($node);
  351. }
  352. /* Add the closing of this node to the content */
  353. if ($node instanceof DOMElement) {
  354. $content .= '</' . $node->tagName . '>';
  355. }
  356. return $content;
  357. }
  358. /**
  359. * Get content from RSS feeds (atom has its own implementation)
  360. *
  361. * The official way to include full content in an RSS1 entry is to use
  362. * the content module's element 'encoded', and RSS2 feeds often duplicate that.
  363. * Often, however, the 'description' element is used instead. We will offer that
  364. * as a fallback. Atom uses its own approach and overrides this method.
  365. *
  366. * @return string|false
  367. */
  368. protected function getContent()
  369. {
  370. $options = array('encoded', 'description');
  371. foreach ($options as $element) {
  372. $test = $this->model->getElementsByTagName($element);
  373. if ($test->length == 0) {
  374. continue;
  375. }
  376. if ($test->item(0)->hasChildNodes()) {
  377. $value = '';
  378. foreach ($test->item(0)->childNodes as $child) {
  379. if ($child instanceof DOMText) {
  380. $value .= $child->nodeValue;
  381. } else {
  382. $simple = simplexml_import_dom($child);
  383. $value .= $simple->asXML();
  384. }
  385. }
  386. return $value;
  387. } elseif ($test->length > 0) {
  388. return $test->item(0)->nodeValue;
  389. }
  390. }
  391. return false;
  392. }
  393. /**
  394. * Checks if this element has a particular child element.
  395. *
  396. * @param String
  397. * @param Integer
  398. * @return bool
  399. **/
  400. function hasKey($name, $offset = 0)
  401. {
  402. $search = $this->model->getElementsByTagName($name);
  403. return $search->length > $offset;
  404. }
  405. /**
  406. * Return an XML serialization of the feed, should it be required. Most
  407. * users however, will already have a serialization that they used when
  408. * instantiating the object.
  409. *
  410. * @return string XML serialization of element
  411. */
  412. function __toString()
  413. {
  414. $simple = simplexml_import_dom($this->model);
  415. return $simple->asXML();
  416. }
  417. /**
  418. * Get directory holding RNG schemas. Method is based on that
  419. * found in Contact_AddressBook.
  420. *
  421. * @return string PEAR data directory.
  422. * @access public
  423. * @static
  424. */
  425. static function getSchemaDir()
  426. {
  427. require_once 'PEAR/Config.php';
  428. $config = new PEAR_Config;
  429. return $config->get('data_dir') . '/XML_Feed_Parser/schemas';
  430. }
  431. public function relaxNGValidate() {
  432. $dir = self::getSchemaDir();
  433. $path = $dir . '/' . $this->relax;
  434. return $this->model->relaxNGValidate($path);
  435. }
  436. }
  437. ?>