/sites/all/modules/service_container/lib/Drupal/Component/Utility/Html.php
PHP | 379 lines | 122 code | 28 blank | 229 comment | 13 complexity | 5b9805ce1bbcfdc966340af12baa7021 MD5 | raw file
- <?php
- /**
- * @file
- * Contains \Drupal\Component\Utility\Html.
- */
- namespace Drupal\Component\Utility;
- /**
- * Provides DOMDocument helpers for parsing and serializing HTML strings.
- *
- * @ingroup utility
- */
- class Html {
- /**
- * An array of previously cleaned HTML classes.
- *
- * @var array
- */
- protected static $classes = array();
- /**
- * An array of the initial IDs used in one request.
- *
- * @var array
- */
- protected static $seenIdsInit;
- /**
- * An array of IDs, including incremented versions when an ID is duplicated.
- * @var array
- */
- protected static $seenIds;
- /**
- * Contains the current AJAX HTML IDs.
- *
- * @var string
- */
- protected static $ajaxHTMLIDs;
- /**
- * Prepares a string for use as a valid class name.
- *
- * Do not pass one string containing multiple classes as they will be
- * incorrectly concatenated with dashes, i.e. "one two" will become "one-two".
- *
- * @param string $class
- * The class name to clean.
- *
- * @return string
- * The cleaned class name.
- */
- public static function getClass($class) {
- if (!isset(static::$classes[$class])) {
- static::$classes[$class] = static::cleanCssIdentifier(Unicode::strtolower($class));
- }
- return static::$classes[$class];
- }
- /**
- * Prepares a string for use as a CSS identifier (element, class, or ID name).
- *
- * http://www.w3.org/TR/CSS21/syndata.html#characters shows the syntax for
- * valid CSS identifiers (including element names, classes, and IDs in
- * selectors.)
- *
- * @param string $identifier
- * The identifier to clean.
- * @param array $filter
- * An array of string replacements to use on the identifier.
- *
- * @return string
- * The cleaned identifier.
- */
- public static function cleanCssIdentifier($identifier, array $filter = array(
- ' ' => '-',
- '_' => '-',
- '__' => '__',
- '/' => '-',
- '[' => '-',
- ']' => ''
- )) {
- $identifier = strtr($identifier, $filter);
- // Valid characters in a CSS identifier are:
- // - the hyphen (U+002D)
- // - a-z (U+0030 - U+0039)
- // - A-Z (U+0041 - U+005A)
- // - the underscore (U+005F)
- // - 0-9 (U+0061 - U+007A)
- // - ISO 10646 characters U+00A1 and higher
- // We strip out any character not in the above list.
- $identifier = preg_replace('/[^\x{002D}\x{0030}-\x{0039}\x{0041}-\x{005A}\x{005F}\x{0061}-\x{007A}\x{00A1}-\x{FFFF}]/u', '', $identifier);
- // Identifiers cannot start with a digit, two hyphens, or a hyphen followed by a digit.
- $identifier = preg_replace(array(
- '/^[0-9]/',
- '/^(-[0-9])|^(--)/'
- ), array('_', '__'), $identifier);
- return $identifier;
- }
- /**
- * Sets the AJAX HTML IDs.
- *
- * @param string $ajax_html_ids
- * The AJAX HTML IDs, probably coming from the current request.
- */
- public static function setAjaxHtmlIds($ajax_html_ids = '') {
- static::$ajaxHTMLIDs = $ajax_html_ids;
- }
- /**
- * Prepares a string for use as a valid HTML ID and guarantees uniqueness.
- *
- * This function ensures that each passed HTML ID value only exists once on
- * the page. By tracking the already returned ids, this function enables
- * forms, blocks, and other content to be output multiple times on the same
- * page, without breaking (X)HTML validation.
- *
- * For already existing IDs, a counter is appended to the ID string.
- * Therefore, JavaScript and CSS code should not rely on any value that was
- * generated by this function and instead should rely on manually added CSS
- * classes or similarly reliable constructs.
- *
- * Two consecutive hyphens separate the counter from the original ID. To
- * manage uniqueness across multiple Ajax requests on the same page, Ajax
- * requests POST an array of all IDs currently present on the page, which are
- * used to prime this function's cache upon first invocation.
- *
- * To allow reverse-parsing of IDs submitted via Ajax, any multiple
- * consecutive hyphens in the originally passed $id are replaced with a
- * single hyphen.
- *
- * @param string $id
- * The ID to clean.
- *
- * @return string
- * The cleaned ID.
- */
- public static function getUniqueId($id) {
- // If this is an Ajax request, then content returned by this page request
- // will be merged with content already on the base page. The HTML IDs must
- // be unique for the fully merged content. Therefore, initialize $seen_ids
- // to take into account IDs that are already in use on the base page.
- if (!isset(static::$seenIdsInit)) {
- // Ideally, Drupal would provide an API to persist state information about
- // prior page requests in the database, and we'd be able to add this
- // function's $seen_ids static variable to that state information in order
- // to have it properly initialized for this page request. However, no such
- // page state API exists, so instead, ajax.js adds all of the in-use HTML
- // IDs to the POST data of Ajax submissions. Direct use of $_POST is
- // normally not recommended as it could open up security risks, but
- // because the raw POST data is cast to a number before being returned by
- // this function, this usage is safe.
- if (empty(static::$ajaxHTMLIDs)) {
- static::$seenIdsInit = array();
- }
- else {
- // This function ensures uniqueness by appending a counter to the base
- // id requested by the calling function after the first occurrence of
- // that requested id. $_POST['ajax_html_ids'] contains the ids as they
- // were returned by this function, potentially with the appended
- // counter, so we parse that to reconstruct the $seen_ids array.
- $ajax_html_ids = explode(' ', static::$ajaxHTMLIDs);
- foreach ($ajax_html_ids as $seen_id) {
- // We rely on '--' being used solely for separating a base id from the
- // counter, which this function ensures when returning an id.
- $parts = explode('--', $seen_id, 2);
- if (!empty($parts[1]) && is_numeric($parts[1])) {
- list($seen_id, $i) = $parts;
- }
- else {
- $i = 1;
- }
- if (!isset(static::$seenIdsInit[$seen_id]) || ($i > static::$seenIdsInit[$seen_id])) {
- static::$seenIdsInit[$seen_id] = $i;
- }
- }
- }
- }
- if (!isset(static::$seenIds)) {
- static::$seenIds = static::$seenIdsInit;
- }
- $id = static::getId($id);
- // Ensure IDs are unique by appending a counter after the first occurrence.
- // The counter needs to be appended with a delimiter that does not exist in
- // the base ID. Requiring a unique delimiter helps ensure that we really do
- // return unique IDs and also helps us re-create the $seen_ids array during
- // Ajax requests.
- if (isset(static::$seenIds[$id])) {
- $id = $id . '--' . ++static::$seenIds[$id];
- }
- else {
- static::$seenIds[$id] = 1;
- }
- return $id;
- }
- /**
- * Prepares a string for use as a valid HTML ID.
- *
- * Only use this function when you want to intentionally skip the uniqueness
- * guarantee of self::getUniqueId().
- *
- * @param string $id
- * The ID to clean.
- *
- * @return string
- * The cleaned ID.
- *
- * @see self::getUniqueId()
- */
- public static function getId($id) {
- $id = strtr(Unicode::strtolower($id), array(' ' => '-', '_' => '-', '[' => '-', ']' => ''));
- // As defined in http://www.w3.org/TR/html4/types.html#type-name, HTML IDs can
- // only contain letters, digits ([0-9]), hyphens ("-"), underscores ("_"),
- // colons (":"), and periods ("."). We strip out any character not in that
- // list. Note that the CSS spec doesn't allow colons or periods in identifiers
- // (http://www.w3.org/TR/CSS21/syndata.html#characters), so we strip those two
- // characters as well.
- $id = preg_replace('/[^A-Za-z0-9\-_]/', '', $id);
- // Removing multiple consecutive hyphens.
- $id = preg_replace('/\-+/', '-', $id);
- return $id;
- }
- /**
- * Resets the list of seen IDs.
- */
- public static function resetSeenIds() {
- static::$seenIds = NULL;
- }
- /**
- * Normalizes an HTML snippet.
- *
- * This function is essentially \DOMDocument::normalizeDocument(), but
- * operates on an HTML string instead of a \DOMDocument.
- *
- * @param string $html
- * The HTML string to normalize.
- *
- * @return string
- * The normalized HTML string.
- */
- public static function normalize($html) {
- $document = static::load($html);
- return static::serialize($document);
- }
- /**
- * Parses an HTML snippet and returns it as a DOM object.
- *
- * This function loads the body part of a partial (X)HTML document and returns
- * a full \DOMDocument object that represents this document.
- *
- * Use \Drupal\Component\Utility\Html::serialize() to serialize this
- * \DOMDocument back to a string.
- *
- * @param string $html
- * The partial (X)HTML snippet to load. Invalid markup will be corrected on
- * import.
- *
- * @return \DOMDocument
- * A \DOMDocument that represents the loaded (X)HTML snippet.
- */
- public static function load($html) {
- $document = <<<EOD
- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
- <html xmlns="http://www.w3.org/1999/xhtml">
- <head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head>
- <body>!html</body>
- </html>
- EOD;
- // PHP's \DOMDocument serialization adds straw whitespace in case the markup
- // of the wrapping document contains newlines, so ensure to remove all
- // newlines before injecting the actual HTML body to process.
- $document = strtr($document, array("\n" => '', '!html' => $html));
- $dom = new \DOMDocument();
- // Ignore warnings during HTML soup loading.
- @$dom->loadHTML($document);
- return $dom;
- }
- /**
- * Converts the body of a \DOMDocument back to an HTML snippet.
- *
- * The function serializes the body part of a \DOMDocument back to an (X)HTML
- * snippet. The resulting (X)HTML snippet will be properly formatted to be
- * compatible with HTML user agents.
- *
- * @param \DOMDocument $document
- * A \DOMDocument object to serialize, only the tags below the first <body>
- * node will be converted.
- *
- * @return string
- * A valid (X)HTML snippet, as a string.
- */
- public static function serialize(\DOMDocument $document) {
- $body_node = $document->getElementsByTagName('body')->item(0);
- $html = '';
- foreach ($body_node->getElementsByTagName('script') as $node) {
- static::escapeCdataElement($node);
- }
- foreach ($body_node->getElementsByTagName('style') as $node) {
- static::escapeCdataElement($node, '/*', '*/');
- }
- foreach ($body_node->childNodes as $node) {
- $html .= $document->saveXML($node);
- }
- return $html;
- }
- /**
- * Adds comments around a <!CDATA section in a \DOMNode.
- *
- * \DOMDocument::loadHTML() in \Drupal\Component\Utility\Html::load() makes
- * CDATA sections from the contents of inline script and style tags. This can
- * cause HTML4 browsers to throw exceptions.
- *
- * This function attempts to solve the problem by creating a
- * \DOMDocumentFragment to comment the CDATA tag.
- *
- * @param \DOMNode $node
- * The element potentially containing a CDATA node.
- * @param string $comment_start
- * (optional) A string to use as a comment start marker to escape the CDATA
- * declaration. Defaults to '//'.
- * @param string $comment_end
- * (optional) A string to use as a comment end marker to escape the CDATA
- * declaration. Defaults to an empty string.
- */
- public static function escapeCdataElement(\DOMNode $node, $comment_start = '//', $comment_end = '') {
- foreach ($node->childNodes as $child_node) {
- if ($child_node instanceof \DOMCdataSection) {
- $embed_prefix = "\n<!--{$comment_start}--><![CDATA[{$comment_start} ><!--{$comment_end}\n";
- $embed_suffix = "\n{$comment_start}--><!]]>{$comment_end}\n";
- // Prevent invalid cdata escaping as this would throw a DOM error.
- // This is the same behavior as found in libxml2.
- // Related W3C standard: http://www.w3.org/TR/REC-xml/#dt-cdsection
- // Fix explanation: http://en.wikipedia.org/wiki/CDATA#Nesting
- $data = str_replace(']]>', ']]]]><![CDATA[>', $child_node->data);
- $fragment = $node->ownerDocument->createDocumentFragment();
- $fragment->appendXML($embed_prefix . $data . $embed_suffix);
- $node->appendChild($fragment);
- $node->removeChild($child_node);
- }
- }
- }
- /**
- * Decodes all HTML entities including numerical ones to regular UTF-8 bytes.
- *
- * Double-escaped entities will only be decoded once ("&lt;" becomes
- * "<", not "<"). Be careful when using this function, as it will revert
- * previous sanitization efforts (<script> will become <script>).
- *
- * @param string $text
- * The text to decode entities in.
- *
- * @return string
- * The input $text, with all HTML entities decoded once.
- */
- public static function decodeEntities($text) {
- return html_entity_decode($text, ENT_QUOTES, 'UTF-8');
- }
- }