PageRenderTime 76ms CodeModel.GetById 20ms RepoModel.GetById 1ms app.codeStats 1ms

/owncloud/files/apps/news/vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlParser.php

https://gitlab.com/thallian/freebsd-roles
PHP | 229 lines | 129 code | 24 blank | 76 comment | 9 complexity | 3542a3e1391b6280edc70d162bcd6ecf MD5 | raw file
  1. <?php
  2. namespace PicoFeed\Parser;
  3. use DomDocument;
  4. use SimpleXmlElement;
  5. use Exception;
  6. use ZendXml\Security;
  7. /**
  8. * XML parser class.
  9. *
  10. * Checks for XML eXternal Entity (XXE) and XML Entity Expansion (XEE) attacks on XML documents
  11. *
  12. * @author Frederic Guillot
  13. */
  14. class XmlParser
  15. {
  16. /**
  17. * Get a SimpleXmlElement instance or return false.
  18. *
  19. * @static
  20. *
  21. * @param string $input XML content
  22. *
  23. * @return mixed
  24. */
  25. public static function getSimpleXml($input)
  26. {
  27. return self::scan($input);
  28. }
  29. /**
  30. * Get a DomDocument instance or return false.
  31. *
  32. * @static
  33. *
  34. * @param string $input XML content
  35. *
  36. * @return \DOMNDocument
  37. */
  38. public static function getDomDocument($input)
  39. {
  40. if (empty($input)) {
  41. return false;
  42. }
  43. $dom = self::scan($input, new DOMDocument());
  44. // The document is empty, there is probably some parsing errors
  45. if ($dom && $dom->childNodes->length === 0) {
  46. return false;
  47. }
  48. return $dom;
  49. }
  50. /**
  51. * Small wrapper around ZendXml to turn their exceptions into picoFeed
  52. * exceptions
  53. * @param $input the xml to load
  54. * @param $dom pass in a dom document or use null/omit if simpleXml should
  55. * be used
  56. */
  57. private static function scan($input, $dom=null)
  58. {
  59. try {
  60. return Security::scan($input, $dom);
  61. } catch(\ZendXml\Exception\RuntimeException $e) {
  62. throw new XmlEntityException($e->getMessage());
  63. }
  64. }
  65. /**
  66. * Load HTML document by using a DomDocument instance or return false on failure.
  67. *
  68. * @static
  69. *
  70. * @param string $input XML content
  71. *
  72. * @return \DOMDocument
  73. */
  74. public static function getHtmlDocument($input)
  75. {
  76. $dom = new DomDocument();
  77. if (empty($input)) {
  78. return $dom;
  79. }
  80. libxml_use_internal_errors(true);
  81. if (version_compare(PHP_VERSION, '5.4.0', '>=')) {
  82. $dom->loadHTML($input, LIBXML_NONET);
  83. } else {
  84. $dom->loadHTML($input);
  85. }
  86. return $dom;
  87. }
  88. /**
  89. * Convert a HTML document to XML.
  90. *
  91. * @static
  92. *
  93. * @param string $html HTML document
  94. *
  95. * @return string
  96. */
  97. public static function htmlToXml($html)
  98. {
  99. $dom = self::getHtmlDocument('<?xml version="1.0" encoding="UTF-8">'.$html);
  100. return $dom->saveXML($dom->getElementsByTagName('body')->item(0));
  101. }
  102. /**
  103. * Get XML parser errors.
  104. *
  105. * @static
  106. *
  107. * @return string
  108. */
  109. public static function getErrors()
  110. {
  111. $errors = array();
  112. foreach (libxml_get_errors() as $error) {
  113. $errors[] = sprintf('XML error: %s (Line: %d - Column: %d - Code: %d)',
  114. $error->message,
  115. $error->line,
  116. $error->column,
  117. $error->code
  118. );
  119. }
  120. return implode(', ', $errors);
  121. }
  122. /**
  123. * Get the encoding from a xml tag.
  124. *
  125. * @static
  126. *
  127. * @param string $data Input data
  128. *
  129. * @return string
  130. */
  131. public static function getEncodingFromXmlTag($data)
  132. {
  133. $encoding = '';
  134. if (strpos($data, '<?xml') !== false) {
  135. $data = substr($data, 0, strrpos($data, '?>'));
  136. $data = str_replace("'", '"', $data);
  137. $p1 = strpos($data, 'encoding=');
  138. $p2 = strpos($data, '"', $p1 + 10);
  139. if ($p1 !== false && $p2 !== false) {
  140. $encoding = substr($data, $p1 + 10, $p2 - $p1 - 10);
  141. $encoding = strtolower($encoding);
  142. }
  143. }
  144. return $encoding;
  145. }
  146. /**
  147. * Get the charset from a meta tag.
  148. *
  149. * @static
  150. *
  151. * @param string $data Input data
  152. *
  153. * @return string
  154. */
  155. public static function getEncodingFromMetaTag($data)
  156. {
  157. $encoding = '';
  158. if (preg_match('/<meta.*?charset\s*=\s*["\']?\s*([^"\'\s\/>;]+)/i', $data, $match) === 1) {
  159. $encoding = strtolower($match[1]);
  160. }
  161. return $encoding;
  162. }
  163. /**
  164. * Rewrite XPath query to use namespace-uri and local-name derived from prefix.
  165. *
  166. * @param string $query XPath query
  167. * @param array $ns Prefix to namespace URI mapping
  168. *
  169. * @return string
  170. */
  171. public static function replaceXPathPrefixWithNamespaceURI($query, array $ns)
  172. {
  173. return preg_replace_callback('/([A-Z0-9]+):([A-Z0-9]+)/iu', function ($matches) use ($ns) {
  174. // don't try to map the special prefix XML
  175. if (strtolower($matches[1]) === 'xml') {
  176. return $matches[0];
  177. }
  178. return '*[namespace-uri()="'.$ns[$matches[1]].'" and local-name()="'.$matches[2].'"]';
  179. },
  180. $query);
  181. }
  182. /**
  183. * Get the result elements of a XPath query.
  184. *
  185. * @param \SimpleXMLElement $xml XML element
  186. * @param string $query XPath query
  187. * @param array $ns Prefix to namespace URI mapping
  188. *
  189. * @return \SimpleXMLElement
  190. */
  191. public static function getXPathResult(SimpleXMLElement $xml, $query, array $ns = array())
  192. {
  193. if (!empty($ns)) {
  194. $query = static::replaceXPathPrefixWithNamespaceURI($query, $ns);
  195. }
  196. return $xml->xpath($query);
  197. }
  198. }