/src/Symfony/Component/HtmlSanitizer/HtmlSanitizerConfig.php

https://github.com/FabienD/symfony · PHP · 501 lines · 223 code · 75 blank · 203 comment · 6 complexity · 4934ea5bdbccf30e8a014c7e66140208 MD5 · raw file

  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\HtmlSanitizer;
  11. use Symfony\Component\HtmlSanitizer\Reference\W3CReference;
  12. use Symfony\Component\HtmlSanitizer\Visitor\AttributeSanitizer\AttributeSanitizerInterface;
  13. /**
  14. * @author Titouan Galopin <galopintitouan@gmail.com>
  15. *
  16. * @experimental
  17. */
  18. class HtmlSanitizerConfig
  19. {
  20. /**
  21. * Elements that should be removed but their children should be retained.
  22. *
  23. * @var array<string, true>
  24. */
  25. private array $blockedElements = [];
  26. /**
  27. * Elements that should be retained, with their allowed attributes.
  28. *
  29. * @var array<string, array<string, true>>
  30. */
  31. private array $allowedElements = [];
  32. /**
  33. * Attributes that should always be added to certain elements.
  34. *
  35. * @var array<string, array<string, string>>
  36. */
  37. private array $forcedAttributes = [];
  38. /**
  39. * Links schemes that should be retained, other being dropped.
  40. *
  41. * @var list<string>
  42. */
  43. private array $allowedLinkSchemes = ['http', 'https', 'mailto', 'tel'];
  44. /**
  45. * Links hosts that should be retained (by default, all hosts are allowed).
  46. *
  47. * @var list<string>|null
  48. */
  49. private ?array $allowedLinkHosts = null;
  50. /**
  51. * Should the sanitizer allow relative links (by default, they are dropped).
  52. */
  53. private bool $allowRelativeLinks = false;
  54. /**
  55. * Image/Audio/Video schemes that should be retained, other being dropped.
  56. *
  57. * @var list<string>
  58. */
  59. private array $allowedMediaSchemes = ['http', 'https', 'data'];
  60. /**
  61. * Image/Audio/Video hosts that should be retained (by default, all hosts are allowed).
  62. *
  63. * @var list<string>|null
  64. */
  65. private ?array $allowedMediaHosts = null;
  66. /**
  67. * Should the sanitizer allow relative media URL (by default, they are dropped).
  68. */
  69. private bool $allowRelativeMedias = false;
  70. /**
  71. * Should the URL in the sanitized document be transformed to HTTPS if they are using HTTP.
  72. */
  73. private bool $forceHttpsUrls = false;
  74. /**
  75. * Sanitizers that should be applied to specific attributes in addition to standard sanitization.
  76. *
  77. * @var list<AttributeSanitizerInterface>
  78. */
  79. private array $attributeSanitizers;
  80. private int $maxInputLength = 20_000;
  81. public function __construct()
  82. {
  83. $this->attributeSanitizers = [
  84. new Visitor\AttributeSanitizer\UrlAttributeSanitizer(),
  85. ];
  86. }
  87. /**
  88. * Allows all static elements and attributes from the W3C Sanitizer API standard.
  89. *
  90. * All scripts will be removed but the output may still contain other dangerous
  91. * behaviors like CSS injection (click-jacking), CSS expressions, ...
  92. */
  93. public function allowStaticElements(): static
  94. {
  95. $elements = array_merge(
  96. array_keys(W3CReference::HEAD_ELEMENTS),
  97. array_keys(W3CReference::BODY_ELEMENTS)
  98. );
  99. $clone = clone $this;
  100. foreach ($elements as $element) {
  101. $clone = $clone->allowElement($element, '*');
  102. }
  103. return $clone;
  104. }
  105. /**
  106. * Allows "safe" elements and attributes.
  107. *
  108. * All scripts will be removed, as well as other dangerous behaviors like CSS injection.
  109. */
  110. public function allowSafeElements(): static
  111. {
  112. $attributes = [];
  113. foreach (W3CReference::ATTRIBUTES as $attribute => $isSafe) {
  114. if ($isSafe) {
  115. $attributes[] = $attribute;
  116. }
  117. }
  118. $clone = clone $this;
  119. foreach (W3CReference::HEAD_ELEMENTS as $element => $isSafe) {
  120. if ($isSafe) {
  121. $clone = $clone->allowElement($element, $attributes);
  122. }
  123. }
  124. foreach (W3CReference::BODY_ELEMENTS as $element => $isSafe) {
  125. if ($isSafe) {
  126. $clone = $clone->allowElement($element, $attributes);
  127. }
  128. }
  129. return $clone;
  130. }
  131. /**
  132. * Allows only a given list of schemes to be used in links href attributes.
  133. *
  134. * All other schemes will be dropped.
  135. *
  136. * @param list<string> $allowLinkSchemes
  137. */
  138. public function allowLinkSchemes(array $allowLinkSchemes): static
  139. {
  140. $clone = clone $this;
  141. $clone->allowedLinkSchemes = $allowLinkSchemes;
  142. return $clone;
  143. }
  144. /**
  145. * Allows only a given list of hosts to be used in links href attributes.
  146. *
  147. * All other hosts will be dropped. By default all hosts are allowed
  148. * ($allowedLinkHosts = null).
  149. *
  150. * @param list<string>|null $allowLinkHosts
  151. */
  152. public function allowLinkHosts(?array $allowLinkHosts): static
  153. {
  154. $clone = clone $this;
  155. $clone->allowedLinkHosts = $allowLinkHosts;
  156. return $clone;
  157. }
  158. /**
  159. * Allows relative URLs to be used in links href attributes.
  160. */
  161. public function allowRelativeLinks(bool $allowRelativeLinks = true): static
  162. {
  163. $clone = clone $this;
  164. $clone->allowRelativeLinks = $allowRelativeLinks;
  165. return $clone;
  166. }
  167. /**
  168. * Allows only a given list of schemes to be used in media source attributes (img, audio, video, ...).
  169. *
  170. * All other schemes will be dropped.
  171. *
  172. * @param list<string> $allowMediaSchemes
  173. */
  174. public function allowMediaSchemes(array $allowMediaSchemes): static
  175. {
  176. $clone = clone $this;
  177. $clone->allowedMediaSchemes = $allowMediaSchemes;
  178. return $clone;
  179. }
  180. /**
  181. * Allows only a given list of hosts to be used in media source attributes (img, audio, video, ...).
  182. *
  183. * All other hosts will be dropped. By default all hosts are allowed
  184. * ($allowMediaHosts = null).
  185. *
  186. * @param list<string>|null $allowMediaHosts
  187. */
  188. public function allowMediaHosts(?array $allowMediaHosts): static
  189. {
  190. $clone = clone $this;
  191. $clone->allowedMediaHosts = $allowMediaHosts;
  192. return $clone;
  193. }
  194. /**
  195. * Allows relative URLs to be used in media source attributes (img, audio, video, ...).
  196. */
  197. public function allowRelativeMedias(bool $allowRelativeMedias = true): static
  198. {
  199. $clone = clone $this;
  200. $clone->allowRelativeMedias = $allowRelativeMedias;
  201. return $clone;
  202. }
  203. /**
  204. * Transforms URLs using the HTTP scheme to use the HTTPS scheme instead.
  205. */
  206. public function forceHttpsUrls(bool $forceHttpsUrls = true): static
  207. {
  208. $clone = clone $this;
  209. $clone->forceHttpsUrls = $forceHttpsUrls;
  210. return $clone;
  211. }
  212. /**
  213. * Configures the given element as allowed.
  214. *
  215. * Allowed elements are elements the sanitizer should retain from the input.
  216. *
  217. * A list of allowed attributes for this element can be passed as a second argument.
  218. * Passing "*" will allow all standard attributes on this element. By default, no
  219. * attributes are allowed on the element.
  220. *
  221. * @param list<string>|string $allowedAttributes
  222. */
  223. public function allowElement(string $element, array|string $allowedAttributes = []): static
  224. {
  225. $clone = clone $this;
  226. // Unblock the element is necessary
  227. unset($clone->blockedElements[$element]);
  228. $clone->allowedElements[$element] = [];
  229. $attrs = ('*' === $allowedAttributes) ? array_keys(W3CReference::ATTRIBUTES) : (array) $allowedAttributes;
  230. foreach ($attrs as $allowedAttr) {
  231. $clone->allowedElements[$element][$allowedAttr] = true;
  232. }
  233. return $clone;
  234. }
  235. /**
  236. * Configures the given element as blocked.
  237. *
  238. * Blocked elements are elements the sanitizer should remove from the input, but retain
  239. * their children.
  240. */
  241. public function blockElement(string $element): static
  242. {
  243. $clone = clone $this;
  244. // Disallow the element is necessary
  245. unset($clone->allowedElements[$element]);
  246. $clone->blockedElements[$element] = true;
  247. return $clone;
  248. }
  249. /**
  250. * Configures the given element as dropped.
  251. *
  252. * Dropped elements are elements the sanitizer should remove from the input, including
  253. * their children.
  254. *
  255. * Note: when using an empty configuration, all unknown elements are dropped
  256. * automatically. This method let you drop elements that were allowed earlier
  257. * in the configuration.
  258. */
  259. public function dropElement(string $element): static
  260. {
  261. $clone = clone $this;
  262. unset($clone->allowedElements[$element], $clone->blockedElements[$element]);
  263. return $clone;
  264. }
  265. /**
  266. * Configures the given attribute as allowed.
  267. *
  268. * Allowed attributes are attributes the sanitizer should retain from the input.
  269. *
  270. * A list of allowed elements for this attribute can be passed as a second argument.
  271. * Passing "*" will allow all currently allowed elements to use this attribute.
  272. *
  273. * @param list<string>|string $allowedElements
  274. */
  275. public function allowAttribute(string $attribute, array|string $allowedElements): static
  276. {
  277. $clone = clone $this;
  278. $allowedElements = ('*' === $allowedElements) ? array_keys($clone->allowedElements) : (array) $allowedElements;
  279. // For each configured element ...
  280. foreach ($clone->allowedElements as $element => $attrs) {
  281. if (\in_array($element, $allowedElements, true)) {
  282. // ... if the attribute should be allowed, add it
  283. $clone->allowedElements[$element][$attribute] = true;
  284. } else {
  285. // ... if the attribute should not be allowed, remove it
  286. unset($clone->allowedElements[$element][$attribute]);
  287. }
  288. }
  289. return $clone;
  290. }
  291. /**
  292. * Configures the given attribute as dropped.
  293. *
  294. * Dropped attributes are attributes the sanitizer should remove from the input.
  295. *
  296. * A list of elements on which to drop this attribute can be passed as a second argument.
  297. * Passing "*" will drop this attribute from all currently allowed elements.
  298. *
  299. * Note: when using an empty configuration, all unknown attributes are dropped
  300. * automatically. This method let you drop attributes that were allowed earlier
  301. * in the configuration.
  302. *
  303. * @param list<string>|string $droppedElements
  304. */
  305. public function dropAttribute(string $attribute, array|string $droppedElements): static
  306. {
  307. $clone = clone $this;
  308. $droppedElements = ('*' === $droppedElements) ? array_keys($clone->allowedElements) : (array) $droppedElements;
  309. foreach ($droppedElements as $element) {
  310. if (isset($clone->allowedElements[$element][$attribute])) {
  311. unset($clone->allowedElements[$element][$attribute]);
  312. }
  313. }
  314. return $clone;
  315. }
  316. /**
  317. * Forcefully set the value of a given attribute on a given element.
  318. *
  319. * The attribute will be created on the nodes if it didn't exist.
  320. */
  321. public function forceAttribute(string $element, string $attribute, string $value): static
  322. {
  323. $clone = clone $this;
  324. $clone->forcedAttributes[$element][$attribute] = $value;
  325. return $clone;
  326. }
  327. /**
  328. * Registers a custom attribute sanitizer.
  329. */
  330. public function withAttributeSanitizer(AttributeSanitizerInterface $sanitizer): static
  331. {
  332. $clone = clone $this;
  333. $clone->attributeSanitizers[] = $sanitizer;
  334. return $clone;
  335. }
  336. /**
  337. * Unregisters a custom attribute sanitizer.
  338. */
  339. public function withoutAttributeSanitizer(AttributeSanitizerInterface $sanitizer): static
  340. {
  341. $clone = clone $this;
  342. $clone->attributeSanitizers = array_values(array_filter(
  343. $this->attributeSanitizers,
  344. static fn ($current) => $current !== $sanitizer
  345. ));
  346. return $clone;
  347. }
  348. public function withMaxInputLength(int $maxInputLength): static
  349. {
  350. $clone = clone $this;
  351. $clone->maxInputLength = $maxInputLength;
  352. return $clone;
  353. }
  354. public function getMaxInputLength(): int
  355. {
  356. return $this->maxInputLength;
  357. }
  358. /**
  359. * @return array<string, array<string, true>>
  360. */
  361. public function getAllowedElements(): array
  362. {
  363. return $this->allowedElements;
  364. }
  365. /**
  366. * @return array<string, true>
  367. */
  368. public function getBlockedElements(): array
  369. {
  370. return $this->blockedElements;
  371. }
  372. /**
  373. * @return array<string, array<string, string>>
  374. */
  375. public function getForcedAttributes(): array
  376. {
  377. return $this->forcedAttributes;
  378. }
  379. /**
  380. * @return list<string>
  381. */
  382. public function getAllowedLinkSchemes(): array
  383. {
  384. return $this->allowedLinkSchemes;
  385. }
  386. /**
  387. * @return list<string>|null
  388. */
  389. public function getAllowedLinkHosts(): ?array
  390. {
  391. return $this->allowedLinkHosts;
  392. }
  393. public function getAllowRelativeLinks(): bool
  394. {
  395. return $this->allowRelativeLinks;
  396. }
  397. /**
  398. * @return list<string>
  399. */
  400. public function getAllowedMediaSchemes(): array
  401. {
  402. return $this->allowedMediaSchemes;
  403. }
  404. /**
  405. * @return list<string>|null
  406. */
  407. public function getAllowedMediaHosts(): ?array
  408. {
  409. return $this->allowedMediaHosts;
  410. }
  411. public function getAllowRelativeMedias(): bool
  412. {
  413. return $this->allowRelativeMedias;
  414. }
  415. public function getForceHttpsUrls(): bool
  416. {
  417. return $this->forceHttpsUrls;
  418. }
  419. /**
  420. * @return list<AttributeSanitizerInterface>
  421. */
  422. public function getAttributeSanitizers(): array
  423. {
  424. return $this->attributeSanitizers;
  425. }
  426. }