PageRenderTime 46ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/core/modules/editor/src/EditorXssFilter/Standard.php

https://gitlab.com/reasonat/test8
PHP | 173 lines | 58 code | 20 blank | 95 comment | 9 complexity | 4265a936497d6b1d7cd6629ca9187191 MD5 | raw file
  1. <?php
  2. namespace Drupal\editor\EditorXssFilter;
  3. use Drupal\Component\Utility\Html;
  4. use Drupal\Component\Utility\Xss;
  5. use Drupal\filter\FilterFormatInterface;
  6. use Drupal\editor\EditorXssFilterInterface;
  7. /**
  8. * Defines the standard text editor XSS filter.
  9. */
  10. class Standard extends Xss implements EditorXssFilterInterface {
  11. /**
  12. * {@inheritdoc}
  13. */
  14. public static function filterXss($html, FilterFormatInterface $format, FilterFormatInterface $original_format = NULL) {
  15. // Apply XSS filtering, but blacklist the <script>, <style>, <link>, <embed>
  16. // and <object> tags.
  17. // The <script> and <style> tags are blacklisted because their contents
  18. // can be malicious (and therefor they are inherently unsafe), whereas for
  19. // all other tags, only their attributes can make them malicious. Since
  20. // \Drupal\Component\Utility\Xss::filter() protects against malicious
  21. // attributes, we take no blacklisting action.
  22. // The exceptions to the above rule are <link>, <embed> and <object>:
  23. // - <link> because the href attribute allows the attacker to import CSS
  24. // using the HTTP(S) protocols which Xss::filter() considers safe by
  25. // default. The imported remote CSS is applied to the main document, thus
  26. // allowing for the same XSS attacks as a regular <style> tag.
  27. // - <embed> and <object> because these tags allow non-HTML applications or
  28. // content to be embedded using the src or data attributes, respectively.
  29. // This is safe in the case of HTML documents, but not in the case of
  30. // Flash objects for example, that may access/modify the main document
  31. // directly.
  32. // <iframe> is considered safe because it only allows HTML content to be
  33. // embedded, hence ensuring the same origin policy always applies.
  34. $dangerous_tags = array('script', 'style', 'link', 'embed', 'object');
  35. // Simply blacklisting these five dangerous tags would bring safety, but
  36. // also user frustration: what if a text format is configured to allow
  37. // <embed>, for example? Then we would strip that tag, even though it is
  38. // allowed, thereby causing data loss!
  39. // Therefor, we want to be smarter still. We want to take into account which
  40. // HTML tags are allowed and forbidden by the text format we're filtering
  41. // for, and if we're switching from another text format, we want to take
  42. // that format's allowed and forbidden tags into account as well.
  43. // In other words: we only expect markup allowed in both the original and
  44. // the new format to continue to exist.
  45. $format_restrictions = $format->getHtmlRestrictions();
  46. if ($original_format !== NULL) {
  47. $original_format_restrictions = $original_format->getHtmlRestrictions();
  48. }
  49. // Any tags that are explicitly blacklisted by the text format must be
  50. // appended to the list of default dangerous tags: if they're explicitly
  51. // forbidden, then we must respect that configuration.
  52. // When switching from another text format, we must use the union of
  53. // forbidden tags: if either text format is more restrictive, then the
  54. // safety expectations of *both* text formats apply.
  55. $forbidden_tags = self::getForbiddenTags($format_restrictions);
  56. if ($original_format !== NULL) {
  57. $forbidden_tags = array_merge($forbidden_tags, self::getForbiddenTags($original_format_restrictions));
  58. }
  59. // Any tags that are explicitly whitelisted by the text format must be
  60. // removed from the list of default dangerous tags: if they're explicitly
  61. // allowed, then we must respect that configuration.
  62. // When switching from another format, we must use the intersection of
  63. // allowed tags: if either format is more restrictive, then the safety
  64. // expectations of *both* formats apply.
  65. $allowed_tags = self::getAllowedTags($format_restrictions);
  66. if ($original_format !== NULL) {
  67. $allowed_tags = array_intersect($allowed_tags, self::getAllowedTags($original_format_restrictions));
  68. }
  69. // Don't blacklist dangerous tags that are explicitly allowed in both text
  70. // formats.
  71. $blacklisted_tags = array_diff($dangerous_tags, $allowed_tags);
  72. // Also blacklist tags that are explicitly forbidden in either text format.
  73. $blacklisted_tags = array_merge($blacklisted_tags, $forbidden_tags);
  74. $output = static::filter($html, $blacklisted_tags);
  75. // Since data-attributes can contain encoded HTML markup that could be
  76. // decoded and interpreted by editors, we need to apply XSS filtering to
  77. // their contents.
  78. return static::filterXssDataAttributes($output);
  79. }
  80. /**
  81. * Applies a very permissive XSS/HTML filter to data-attributes.
  82. *
  83. * @param string $html
  84. * The string to apply the data-attributes filtering to.
  85. *
  86. * @return string
  87. * The filtered string.
  88. */
  89. protected static function filterXssDataAttributes($html) {
  90. if (stristr($html, 'data-') !== FALSE) {
  91. $dom = Html::load($html);
  92. $xpath = new \DOMXPath($dom);
  93. foreach ($xpath->query('//@*[starts-with(name(.), "data-")]') as $node) {
  94. // The data-attributes contain an HTML-encoded value, so we need to
  95. // decode the value, apply XSS filtering and then re-save as encoded
  96. // value. There is no need to explicitly decode $node->value, since the
  97. // DOMAttr::value getter returns the decoded value.
  98. $value = Xss::filterAdmin($node->value);
  99. $node->value = Html::escape($value);
  100. }
  101. $html = Html::serialize($dom);
  102. }
  103. return $html;
  104. }
  105. /**
  106. * Get all allowed tags from a restrictions data structure.
  107. *
  108. * @param array|FALSE $restrictions
  109. * Restrictions as returned by FilterInterface::getHTMLRestrictions().
  110. *
  111. * @return array
  112. * An array of allowed HTML tags.
  113. *
  114. * @see \Drupal\filter\Plugin\Filter\FilterInterface::getHTMLRestrictions()
  115. */
  116. protected static function getAllowedTags($restrictions) {
  117. if ($restrictions === FALSE || !isset($restrictions['allowed'])) {
  118. return array();
  119. }
  120. $allowed_tags = array_keys($restrictions['allowed']);
  121. // Exclude the wildcard tag, which is used to set attribute restrictions on
  122. // all tags simultaneously.
  123. $allowed_tags = array_diff($allowed_tags, array('*'));
  124. return $allowed_tags;
  125. }
  126. /**
  127. * Get all forbidden tags from a restrictions data structure.
  128. *
  129. * @param array|FALSE $restrictions
  130. * Restrictions as returned by FilterInterface::getHTMLRestrictions().
  131. *
  132. * @return array
  133. * An array of forbidden HTML tags.
  134. *
  135. * @see \Drupal\filter\Plugin\Filter\FilterInterface::getHTMLRestrictions()
  136. */
  137. protected static function getForbiddenTags($restrictions) {
  138. if ($restrictions === FALSE || !isset($restrictions['forbidden_tags'])) {
  139. return array();
  140. }
  141. else {
  142. return $restrictions['forbidden_tags'];
  143. }
  144. }
  145. /**
  146. * {@inheritdoc}
  147. */
  148. protected static function needsRemoval($html_tags, $elem) {
  149. // See static::filterXss() about how this class uses blacklisting instead
  150. // of the normal whitelisting.
  151. return !parent::needsRemoval($html_tags, $elem);
  152. }
  153. }