PageRenderTime 50ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/sites/all/modules/service_container/lib/Drupal/Component/Utility/Html.php

https://gitlab.com/leoplanxxi/dr7-web-buap-2016
PHP | 379 lines | 122 code | 28 blank | 229 comment | 13 complexity | 5b9805ce1bbcfdc966340af12baa7021 MD5 | raw file
  1. <?php
  2. /**
  3. * @file
  4. * Contains \Drupal\Component\Utility\Html.
  5. */
  6. namespace Drupal\Component\Utility;
  7. /**
  8. * Provides DOMDocument helpers for parsing and serializing HTML strings.
  9. *
  10. * @ingroup utility
  11. */
  12. class Html {
  13. /**
  14. * An array of previously cleaned HTML classes.
  15. *
  16. * @var array
  17. */
  18. protected static $classes = array();
  19. /**
  20. * An array of the initial IDs used in one request.
  21. *
  22. * @var array
  23. */
  24. protected static $seenIdsInit;
  25. /**
  26. * An array of IDs, including incremented versions when an ID is duplicated.
  27. * @var array
  28. */
  29. protected static $seenIds;
  30. /**
  31. * Contains the current AJAX HTML IDs.
  32. *
  33. * @var string
  34. */
  35. protected static $ajaxHTMLIDs;
  36. /**
  37. * Prepares a string for use as a valid class name.
  38. *
  39. * Do not pass one string containing multiple classes as they will be
  40. * incorrectly concatenated with dashes, i.e. "one two" will become "one-two".
  41. *
  42. * @param string $class
  43. * The class name to clean.
  44. *
  45. * @return string
  46. * The cleaned class name.
  47. */
  48. public static function getClass($class) {
  49. if (!isset(static::$classes[$class])) {
  50. static::$classes[$class] = static::cleanCssIdentifier(Unicode::strtolower($class));
  51. }
  52. return static::$classes[$class];
  53. }
  54. /**
  55. * Prepares a string for use as a CSS identifier (element, class, or ID name).
  56. *
  57. * http://www.w3.org/TR/CSS21/syndata.html#characters shows the syntax for
  58. * valid CSS identifiers (including element names, classes, and IDs in
  59. * selectors.)
  60. *
  61. * @param string $identifier
  62. * The identifier to clean.
  63. * @param array $filter
  64. * An array of string replacements to use on the identifier.
  65. *
  66. * @return string
  67. * The cleaned identifier.
  68. */
  69. public static function cleanCssIdentifier($identifier, array $filter = array(
  70. ' ' => '-',
  71. '_' => '-',
  72. '__' => '__',
  73. '/' => '-',
  74. '[' => '-',
  75. ']' => ''
  76. )) {
  77. $identifier = strtr($identifier, $filter);
  78. // Valid characters in a CSS identifier are:
  79. // - the hyphen (U+002D)
  80. // - a-z (U+0030 - U+0039)
  81. // - A-Z (U+0041 - U+005A)
  82. // - the underscore (U+005F)
  83. // - 0-9 (U+0061 - U+007A)
  84. // - ISO 10646 characters U+00A1 and higher
  85. // We strip out any character not in the above list.
  86. $identifier = preg_replace('/[^\x{002D}\x{0030}-\x{0039}\x{0041}-\x{005A}\x{005F}\x{0061}-\x{007A}\x{00A1}-\x{FFFF}]/u', '', $identifier);
  87. // Identifiers cannot start with a digit, two hyphens, or a hyphen followed by a digit.
  88. $identifier = preg_replace(array(
  89. '/^[0-9]/',
  90. '/^(-[0-9])|^(--)/'
  91. ), array('_', '__'), $identifier);
  92. return $identifier;
  93. }
  94. /**
  95. * Sets the AJAX HTML IDs.
  96. *
  97. * @param string $ajax_html_ids
  98. * The AJAX HTML IDs, probably coming from the current request.
  99. */
  100. public static function setAjaxHtmlIds($ajax_html_ids = '') {
  101. static::$ajaxHTMLIDs = $ajax_html_ids;
  102. }
  103. /**
  104. * Prepares a string for use as a valid HTML ID and guarantees uniqueness.
  105. *
  106. * This function ensures that each passed HTML ID value only exists once on
  107. * the page. By tracking the already returned ids, this function enables
  108. * forms, blocks, and other content to be output multiple times on the same
  109. * page, without breaking (X)HTML validation.
  110. *
  111. * For already existing IDs, a counter is appended to the ID string.
  112. * Therefore, JavaScript and CSS code should not rely on any value that was
  113. * generated by this function and instead should rely on manually added CSS
  114. * classes or similarly reliable constructs.
  115. *
  116. * Two consecutive hyphens separate the counter from the original ID. To
  117. * manage uniqueness across multiple Ajax requests on the same page, Ajax
  118. * requests POST an array of all IDs currently present on the page, which are
  119. * used to prime this function's cache upon first invocation.
  120. *
  121. * To allow reverse-parsing of IDs submitted via Ajax, any multiple
  122. * consecutive hyphens in the originally passed $id are replaced with a
  123. * single hyphen.
  124. *
  125. * @param string $id
  126. * The ID to clean.
  127. *
  128. * @return string
  129. * The cleaned ID.
  130. */
  131. public static function getUniqueId($id) {
  132. // If this is an Ajax request, then content returned by this page request
  133. // will be merged with content already on the base page. The HTML IDs must
  134. // be unique for the fully merged content. Therefore, initialize $seen_ids
  135. // to take into account IDs that are already in use on the base page.
  136. if (!isset(static::$seenIdsInit)) {
  137. // Ideally, Drupal would provide an API to persist state information about
  138. // prior page requests in the database, and we'd be able to add this
  139. // function's $seen_ids static variable to that state information in order
  140. // to have it properly initialized for this page request. However, no such
  141. // page state API exists, so instead, ajax.js adds all of the in-use HTML
  142. // IDs to the POST data of Ajax submissions. Direct use of $_POST is
  143. // normally not recommended as it could open up security risks, but
  144. // because the raw POST data is cast to a number before being returned by
  145. // this function, this usage is safe.
  146. if (empty(static::$ajaxHTMLIDs)) {
  147. static::$seenIdsInit = array();
  148. }
  149. else {
  150. // This function ensures uniqueness by appending a counter to the base
  151. // id requested by the calling function after the first occurrence of
  152. // that requested id. $_POST['ajax_html_ids'] contains the ids as they
  153. // were returned by this function, potentially with the appended
  154. // counter, so we parse that to reconstruct the $seen_ids array.
  155. $ajax_html_ids = explode(' ', static::$ajaxHTMLIDs);
  156. foreach ($ajax_html_ids as $seen_id) {
  157. // We rely on '--' being used solely for separating a base id from the
  158. // counter, which this function ensures when returning an id.
  159. $parts = explode('--', $seen_id, 2);
  160. if (!empty($parts[1]) && is_numeric($parts[1])) {
  161. list($seen_id, $i) = $parts;
  162. }
  163. else {
  164. $i = 1;
  165. }
  166. if (!isset(static::$seenIdsInit[$seen_id]) || ($i > static::$seenIdsInit[$seen_id])) {
  167. static::$seenIdsInit[$seen_id] = $i;
  168. }
  169. }
  170. }
  171. }
  172. if (!isset(static::$seenIds)) {
  173. static::$seenIds = static::$seenIdsInit;
  174. }
  175. $id = static::getId($id);
  176. // Ensure IDs are unique by appending a counter after the first occurrence.
  177. // The counter needs to be appended with a delimiter that does not exist in
  178. // the base ID. Requiring a unique delimiter helps ensure that we really do
  179. // return unique IDs and also helps us re-create the $seen_ids array during
  180. // Ajax requests.
  181. if (isset(static::$seenIds[$id])) {
  182. $id = $id . '--' . ++static::$seenIds[$id];
  183. }
  184. else {
  185. static::$seenIds[$id] = 1;
  186. }
  187. return $id;
  188. }
  189. /**
  190. * Prepares a string for use as a valid HTML ID.
  191. *
  192. * Only use this function when you want to intentionally skip the uniqueness
  193. * guarantee of self::getUniqueId().
  194. *
  195. * @param string $id
  196. * The ID to clean.
  197. *
  198. * @return string
  199. * The cleaned ID.
  200. *
  201. * @see self::getUniqueId()
  202. */
  203. public static function getId($id) {
  204. $id = strtr(Unicode::strtolower($id), array(' ' => '-', '_' => '-', '[' => '-', ']' => ''));
  205. // As defined in http://www.w3.org/TR/html4/types.html#type-name, HTML IDs can
  206. // only contain letters, digits ([0-9]), hyphens ("-"), underscores ("_"),
  207. // colons (":"), and periods ("."). We strip out any character not in that
  208. // list. Note that the CSS spec doesn't allow colons or periods in identifiers
  209. // (http://www.w3.org/TR/CSS21/syndata.html#characters), so we strip those two
  210. // characters as well.
  211. $id = preg_replace('/[^A-Za-z0-9\-_]/', '', $id);
  212. // Removing multiple consecutive hyphens.
  213. $id = preg_replace('/\-+/', '-', $id);
  214. return $id;
  215. }
  216. /**
  217. * Resets the list of seen IDs.
  218. */
  219. public static function resetSeenIds() {
  220. static::$seenIds = NULL;
  221. }
  222. /**
  223. * Normalizes an HTML snippet.
  224. *
  225. * This function is essentially \DOMDocument::normalizeDocument(), but
  226. * operates on an HTML string instead of a \DOMDocument.
  227. *
  228. * @param string $html
  229. * The HTML string to normalize.
  230. *
  231. * @return string
  232. * The normalized HTML string.
  233. */
  234. public static function normalize($html) {
  235. $document = static::load($html);
  236. return static::serialize($document);
  237. }
  238. /**
  239. * Parses an HTML snippet and returns it as a DOM object.
  240. *
  241. * This function loads the body part of a partial (X)HTML document and returns
  242. * a full \DOMDocument object that represents this document.
  243. *
  244. * Use \Drupal\Component\Utility\Html::serialize() to serialize this
  245. * \DOMDocument back to a string.
  246. *
  247. * @param string $html
  248. * The partial (X)HTML snippet to load. Invalid markup will be corrected on
  249. * import.
  250. *
  251. * @return \DOMDocument
  252. * A \DOMDocument that represents the loaded (X)HTML snippet.
  253. */
  254. public static function load($html) {
  255. $document = <<<EOD
  256. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  257. <html xmlns="http://www.w3.org/1999/xhtml">
  258. <head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head>
  259. <body>!html</body>
  260. </html>
  261. EOD;
  262. // PHP's \DOMDocument serialization adds straw whitespace in case the markup
  263. // of the wrapping document contains newlines, so ensure to remove all
  264. // newlines before injecting the actual HTML body to process.
  265. $document = strtr($document, array("\n" => '', '!html' => $html));
  266. $dom = new \DOMDocument();
  267. // Ignore warnings during HTML soup loading.
  268. @$dom->loadHTML($document);
  269. return $dom;
  270. }
  271. /**
  272. * Converts the body of a \DOMDocument back to an HTML snippet.
  273. *
  274. * The function serializes the body part of a \DOMDocument back to an (X)HTML
  275. * snippet. The resulting (X)HTML snippet will be properly formatted to be
  276. * compatible with HTML user agents.
  277. *
  278. * @param \DOMDocument $document
  279. * A \DOMDocument object to serialize, only the tags below the first <body>
  280. * node will be converted.
  281. *
  282. * @return string
  283. * A valid (X)HTML snippet, as a string.
  284. */
  285. public static function serialize(\DOMDocument $document) {
  286. $body_node = $document->getElementsByTagName('body')->item(0);
  287. $html = '';
  288. foreach ($body_node->getElementsByTagName('script') as $node) {
  289. static::escapeCdataElement($node);
  290. }
  291. foreach ($body_node->getElementsByTagName('style') as $node) {
  292. static::escapeCdataElement($node, '/*', '*/');
  293. }
  294. foreach ($body_node->childNodes as $node) {
  295. $html .= $document->saveXML($node);
  296. }
  297. return $html;
  298. }
  299. /**
  300. * Adds comments around a <!CDATA section in a \DOMNode.
  301. *
  302. * \DOMDocument::loadHTML() in \Drupal\Component\Utility\Html::load() makes
  303. * CDATA sections from the contents of inline script and style tags. This can
  304. * cause HTML4 browsers to throw exceptions.
  305. *
  306. * This function attempts to solve the problem by creating a
  307. * \DOMDocumentFragment to comment the CDATA tag.
  308. *
  309. * @param \DOMNode $node
  310. * The element potentially containing a CDATA node.
  311. * @param string $comment_start
  312. * (optional) A string to use as a comment start marker to escape the CDATA
  313. * declaration. Defaults to '//'.
  314. * @param string $comment_end
  315. * (optional) A string to use as a comment end marker to escape the CDATA
  316. * declaration. Defaults to an empty string.
  317. */
  318. public static function escapeCdataElement(\DOMNode $node, $comment_start = '//', $comment_end = '') {
  319. foreach ($node->childNodes as $child_node) {
  320. if ($child_node instanceof \DOMCdataSection) {
  321. $embed_prefix = "\n<!--{$comment_start}--><![CDATA[{$comment_start} ><!--{$comment_end}\n";
  322. $embed_suffix = "\n{$comment_start}--><!]]>{$comment_end}\n";
  323. // Prevent invalid cdata escaping as this would throw a DOM error.
  324. // This is the same behavior as found in libxml2.
  325. // Related W3C standard: http://www.w3.org/TR/REC-xml/#dt-cdsection
  326. // Fix explanation: http://en.wikipedia.org/wiki/CDATA#Nesting
  327. $data = str_replace(']]>', ']]]]><![CDATA[>', $child_node->data);
  328. $fragment = $node->ownerDocument->createDocumentFragment();
  329. $fragment->appendXML($embed_prefix . $data . $embed_suffix);
  330. $node->appendChild($fragment);
  331. $node->removeChild($child_node);
  332. }
  333. }
  334. }
  335. /**
  336. * Decodes all HTML entities including numerical ones to regular UTF-8 bytes.
  337. *
  338. * Double-escaped entities will only be decoded once ("&amp;lt;" becomes
  339. * "&lt;", not "<"). Be careful when using this function, as it will revert
  340. * previous sanitization efforts (&lt;script&gt; will become <script>).
  341. *
  342. * @param string $text
  343. * The text to decode entities in.
  344. *
  345. * @return string
  346. * The input $text, with all HTML entities decoded once.
  347. */
  348. public static function decodeEntities($text) {
  349. return html_entity_decode($text, ENT_QUOTES, 'UTF-8');
  350. }
  351. }