PageRenderTime 60ms CodeModel.GetById 23ms RepoModel.GetById 1ms app.codeStats 2ms

/framework/vendors/htmlpurifier/HTMLPurifier.standalone.php

https://bitbucket.org/gencer/yii
PHP | 15832 lines | 8290 code | 1913 blank | 5629 comment | 1329 complexity | cea3a5f66e6653741b92440cd6d59233 MD5 | raw file
Possible License(s): BSD-3-Clause
  1. <?php
  2. /**
  3. * @file
  4. * This file was auto-generated by generate-includes.php and includes all of
  5. * the core files required by HTML Purifier. Use this if performance is a
  6. * primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS
  7. * FILE, changes will be overwritten the next time the script is run.
  8. *
  9. * @version 4.6.0
  10. *
  11. * @warning
  12. * You must *not* include any other HTML Purifier files before this file,
  13. * because 'require' not 'require_once' is used.
  14. *
  15. * @warning
  16. * This file requires that the include path contains the HTML Purifier
  17. * library directory; this is not auto-set.
  18. */
  19. /*! @mainpage
  20. *
  21. * HTML Purifier is an HTML filter that will take an arbitrary snippet of
  22. * HTML and rigorously test, validate and filter it into a version that
  23. * is safe for output onto webpages. It achieves this by:
  24. *
  25. * -# Lexing (parsing into tokens) the document,
  26. * -# Executing various strategies on the tokens:
  27. * -# Removing all elements not in the whitelist,
  28. * -# Making the tokens well-formed,
  29. * -# Fixing the nesting of the nodes, and
  30. * -# Validating attributes of the nodes; and
  31. * -# Generating HTML from the purified tokens.
  32. *
  33. * However, most users will only need to interface with the HTMLPurifier
  34. * and HTMLPurifier_Config.
  35. */
  36. /*
  37. HTML Purifier 4.6.0 - Standards Compliant HTML Filtering
  38. Copyright (C) 2006-2008 Edward Z. Yang
  39. This library is free software; you can redistribute it and/or
  40. modify it under the terms of the GNU Lesser General Public
  41. License as published by the Free Software Foundation; either
  42. version 2.1 of the License, or (at your option) any later version.
  43. This library is distributed in the hope that it will be useful,
  44. but WITHOUT ANY WARRANTY; without even the implied warranty of
  45. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  46. Lesser General Public License for more details.
  47. You should have received a copy of the GNU Lesser General Public
  48. License along with this library; if not, write to the Free Software
  49. Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  50. */
  51. /**
  52. * Facade that coordinates HTML Purifier's subsystems in order to purify HTML.
  53. *
  54. * @note There are several points in which configuration can be specified
  55. * for HTML Purifier. The precedence of these (from lowest to
  56. * highest) is as follows:
  57. * -# Instance: new HTMLPurifier($config)
  58. * -# Invocation: purify($html, $config)
  59. * These configurations are entirely independent of each other and
  60. * are *not* merged (this behavior may change in the future).
  61. *
  62. * @todo We need an easier way to inject strategies using the configuration
  63. * object.
  64. */
  65. class HTMLPurifier
  66. {
  67. /**
  68. * Version of HTML Purifier.
  69. * @type string
  70. */
  71. public $version = '4.6.0';
  72. /**
  73. * Constant with version of HTML Purifier.
  74. */
  75. const VERSION = '4.6.0';
  76. /**
  77. * Global configuration object.
  78. * @type HTMLPurifier_Config
  79. */
  80. public $config;
  81. /**
  82. * Array of extra filter objects to run on HTML,
  83. * for backwards compatibility.
  84. * @type HTMLPurifier_Filter[]
  85. */
  86. private $filters = array();
  87. /**
  88. * Single instance of HTML Purifier.
  89. * @type HTMLPurifier
  90. */
  91. private static $instance;
  92. /**
  93. * @type HTMLPurifier_Strategy_Core
  94. */
  95. protected $strategy;
  96. /**
  97. * @type HTMLPurifier_Generator
  98. */
  99. protected $generator;
  100. /**
  101. * Resultant context of last run purification.
  102. * Is an array of contexts if the last called method was purifyArray().
  103. * @type HTMLPurifier_Context
  104. */
  105. public $context;
  106. /**
  107. * Initializes the purifier.
  108. *
  109. * @param HTMLPurifier_Config $config Optional HTMLPurifier_Config object
  110. * for all instances of the purifier, if omitted, a default
  111. * configuration is supplied (which can be overridden on a
  112. * per-use basis).
  113. * The parameter can also be any type that
  114. * HTMLPurifier_Config::create() supports.
  115. */
  116. public function __construct($config = null)
  117. {
  118. $this->config = HTMLPurifier_Config::create($config);
  119. $this->strategy = new HTMLPurifier_Strategy_Core();
  120. }
  121. /**
  122. * Adds a filter to process the output. First come first serve
  123. *
  124. * @param HTMLPurifier_Filter $filter HTMLPurifier_Filter object
  125. */
  126. public function addFilter($filter)
  127. {
  128. trigger_error(
  129. 'HTMLPurifier->addFilter() is deprecated, use configuration directives' .
  130. ' in the Filter namespace or Filter.Custom',
  131. E_USER_WARNING
  132. );
  133. $this->filters[] = $filter;
  134. }
  135. /**
  136. * Filters an HTML snippet/document to be XSS-free and standards-compliant.
  137. *
  138. * @param string $html String of HTML to purify
  139. * @param HTMLPurifier_Config $config Config object for this operation,
  140. * if omitted, defaults to the config object specified during this
  141. * object's construction. The parameter can also be any type
  142. * that HTMLPurifier_Config::create() supports.
  143. *
  144. * @return string Purified HTML
  145. */
  146. public function purify($html, $config = null)
  147. {
  148. // :TODO: make the config merge in, instead of replace
  149. $config = $config ? HTMLPurifier_Config::create($config) : $this->config;
  150. // implementation is partially environment dependant, partially
  151. // configuration dependant
  152. $lexer = HTMLPurifier_Lexer::create($config);
  153. $context = new HTMLPurifier_Context();
  154. // setup HTML generator
  155. $this->generator = new HTMLPurifier_Generator($config, $context);
  156. $context->register('Generator', $this->generator);
  157. // set up global context variables
  158. if ($config->get('Core.CollectErrors')) {
  159. // may get moved out if other facilities use it
  160. $language_factory = HTMLPurifier_LanguageFactory::instance();
  161. $language = $language_factory->create($config, $context);
  162. $context->register('Locale', $language);
  163. $error_collector = new HTMLPurifier_ErrorCollector($context);
  164. $context->register('ErrorCollector', $error_collector);
  165. }
  166. // setup id_accumulator context, necessary due to the fact that
  167. // AttrValidator can be called from many places
  168. $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
  169. $context->register('IDAccumulator', $id_accumulator);
  170. $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
  171. // setup filters
  172. $filter_flags = $config->getBatch('Filter');
  173. $custom_filters = $filter_flags['Custom'];
  174. unset($filter_flags['Custom']);
  175. $filters = array();
  176. foreach ($filter_flags as $filter => $flag) {
  177. if (!$flag) {
  178. continue;
  179. }
  180. if (strpos($filter, '.') !== false) {
  181. continue;
  182. }
  183. $class = "HTMLPurifier_Filter_$filter";
  184. $filters[] = new $class;
  185. }
  186. foreach ($custom_filters as $filter) {
  187. // maybe "HTMLPurifier_Filter_$filter", but be consistent with AutoFormat
  188. $filters[] = $filter;
  189. }
  190. $filters = array_merge($filters, $this->filters);
  191. // maybe prepare(), but later
  192. for ($i = 0, $filter_size = count($filters); $i < $filter_size; $i++) {
  193. $html = $filters[$i]->preFilter($html, $config, $context);
  194. }
  195. // purified HTML
  196. $html =
  197. $this->generator->generateFromTokens(
  198. // list of tokens
  199. $this->strategy->execute(
  200. // list of un-purified tokens
  201. $lexer->tokenizeHTML(
  202. // un-purified HTML
  203. $html,
  204. $config,
  205. $context
  206. ),
  207. $config,
  208. $context
  209. )
  210. );
  211. for ($i = $filter_size - 1; $i >= 0; $i--) {
  212. $html = $filters[$i]->postFilter($html, $config, $context);
  213. }
  214. $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context);
  215. $this->context =& $context;
  216. return $html;
  217. }
  218. /**
  219. * Filters an array of HTML snippets
  220. *
  221. * @param string[] $array_of_html Array of html snippets
  222. * @param HTMLPurifier_Config $config Optional config object for this operation.
  223. * See HTMLPurifier::purify() for more details.
  224. *
  225. * @return string[] Array of purified HTML
  226. */
  227. public function purifyArray($array_of_html, $config = null)
  228. {
  229. $context_array = array();
  230. foreach ($array_of_html as $key => $html) {
  231. $array_of_html[$key] = $this->purify($html, $config);
  232. $context_array[$key] = $this->context;
  233. }
  234. $this->context = $context_array;
  235. return $array_of_html;
  236. }
  237. /**
  238. * Singleton for enforcing just one HTML Purifier in your system
  239. *
  240. * @param HTMLPurifier|HTMLPurifier_Config $prototype Optional prototype
  241. * HTMLPurifier instance to overload singleton with,
  242. * or HTMLPurifier_Config instance to configure the
  243. * generated version with.
  244. *
  245. * @return HTMLPurifier
  246. */
  247. public static function instance($prototype = null)
  248. {
  249. if (!self::$instance || $prototype) {
  250. if ($prototype instanceof HTMLPurifier) {
  251. self::$instance = $prototype;
  252. } elseif ($prototype) {
  253. self::$instance = new HTMLPurifier($prototype);
  254. } else {
  255. self::$instance = new HTMLPurifier();
  256. }
  257. }
  258. return self::$instance;
  259. }
  260. /**
  261. * Singleton for enforcing just one HTML Purifier in your system
  262. *
  263. * @param HTMLPurifier|HTMLPurifier_Config $prototype Optional prototype
  264. * HTMLPurifier instance to overload singleton with,
  265. * or HTMLPurifier_Config instance to configure the
  266. * generated version with.
  267. *
  268. * @return HTMLPurifier
  269. * @note Backwards compatibility, see instance()
  270. */
  271. public static function getInstance($prototype = null)
  272. {
  273. return HTMLPurifier::instance($prototype);
  274. }
  275. }
  276. /**
  277. * Converts a stream of HTMLPurifier_Token into an HTMLPurifier_Node,
  278. * and back again.
  279. *
  280. * @note This transformation is not an equivalence. We mutate the input
  281. * token stream to make it so; see all [MUT] markers in code.
  282. */
  283. class HTMLPurifier_Arborize
  284. {
  285. public static function arborize($tokens, $config, $context) {
  286. $definition = $config->getHTMLDefinition();
  287. $parent = new HTMLPurifier_Token_Start($definition->info_parent);
  288. $stack = array($parent->toNode());
  289. foreach ($tokens as $token) {
  290. $token->skip = null; // [MUT]
  291. $token->carryover = null; // [MUT]
  292. if ($token instanceof HTMLPurifier_Token_End) {
  293. $token->start = null; // [MUT]
  294. $r = array_pop($stack);
  295. assert($r->name === $token->name);
  296. assert(empty($token->attr));
  297. $r->endCol = $token->col;
  298. $r->endLine = $token->line;
  299. $r->endArmor = $token->armor;
  300. continue;
  301. }
  302. $node = $token->toNode();
  303. $stack[count($stack)-1]->children[] = $node;
  304. if ($token instanceof HTMLPurifier_Token_Start) {
  305. $stack[] = $node;
  306. }
  307. }
  308. assert(count($stack) == 1);
  309. return $stack[0];
  310. }
  311. public static function flatten($node, $config, $context) {
  312. $level = 0;
  313. $nodes = array($level => new HTMLPurifier_Queue(array($node)));
  314. $closingTokens = array();
  315. $tokens = array();
  316. do {
  317. while (!$nodes[$level]->isEmpty()) {
  318. $node = $nodes[$level]->shift(); // FIFO
  319. list($start, $end) = $node->toTokenPair();
  320. if ($level > 0) {
  321. $tokens[] = $start;
  322. }
  323. if ($end !== NULL) {
  324. $closingTokens[$level][] = $end;
  325. }
  326. if ($node instanceof HTMLPurifier_Node_Element) {
  327. $level++;
  328. $nodes[$level] = new HTMLPurifier_Queue();
  329. foreach ($node->children as $childNode) {
  330. $nodes[$level]->push($childNode);
  331. }
  332. }
  333. }
  334. $level--;
  335. if ($level && isset($closingTokens[$level])) {
  336. while ($token = array_pop($closingTokens[$level])) {
  337. $tokens[] = $token;
  338. }
  339. }
  340. } while ($level > 0);
  341. return $tokens;
  342. }
  343. }
  344. /**
  345. * Defines common attribute collections that modules reference
  346. */
  347. class HTMLPurifier_AttrCollections
  348. {
  349. /**
  350. * Associative array of attribute collections, indexed by name.
  351. * @type array
  352. */
  353. public $info = array();
  354. /**
  355. * Performs all expansions on internal data for use by other inclusions
  356. * It also collects all attribute collection extensions from
  357. * modules
  358. * @param HTMLPurifier_AttrTypes $attr_types HTMLPurifier_AttrTypes instance
  359. * @param HTMLPurifier_HTMLModule[] $modules Hash array of HTMLPurifier_HTMLModule members
  360. */
  361. public function __construct($attr_types, $modules)
  362. {
  363. // load extensions from the modules
  364. foreach ($modules as $module) {
  365. foreach ($module->attr_collections as $coll_i => $coll) {
  366. if (!isset($this->info[$coll_i])) {
  367. $this->info[$coll_i] = array();
  368. }
  369. foreach ($coll as $attr_i => $attr) {
  370. if ($attr_i === 0 && isset($this->info[$coll_i][$attr_i])) {
  371. // merge in includes
  372. $this->info[$coll_i][$attr_i] = array_merge(
  373. $this->info[$coll_i][$attr_i],
  374. $attr
  375. );
  376. continue;
  377. }
  378. $this->info[$coll_i][$attr_i] = $attr;
  379. }
  380. }
  381. }
  382. // perform internal expansions and inclusions
  383. foreach ($this->info as $name => $attr) {
  384. // merge attribute collections that include others
  385. $this->performInclusions($this->info[$name]);
  386. // replace string identifiers with actual attribute objects
  387. $this->expandIdentifiers($this->info[$name], $attr_types);
  388. }
  389. }
  390. /**
  391. * Takes a reference to an attribute associative array and performs
  392. * all inclusions specified by the zero index.
  393. * @param array &$attr Reference to attribute array
  394. */
  395. public function performInclusions(&$attr)
  396. {
  397. if (!isset($attr[0])) {
  398. return;
  399. }
  400. $merge = $attr[0];
  401. $seen = array(); // recursion guard
  402. // loop through all the inclusions
  403. for ($i = 0; isset($merge[$i]); $i++) {
  404. if (isset($seen[$merge[$i]])) {
  405. continue;
  406. }
  407. $seen[$merge[$i]] = true;
  408. // foreach attribute of the inclusion, copy it over
  409. if (!isset($this->info[$merge[$i]])) {
  410. continue;
  411. }
  412. foreach ($this->info[$merge[$i]] as $key => $value) {
  413. if (isset($attr[$key])) {
  414. continue;
  415. } // also catches more inclusions
  416. $attr[$key] = $value;
  417. }
  418. if (isset($this->info[$merge[$i]][0])) {
  419. // recursion
  420. $merge = array_merge($merge, $this->info[$merge[$i]][0]);
  421. }
  422. }
  423. unset($attr[0]);
  424. }
  425. /**
  426. * Expands all string identifiers in an attribute array by replacing
  427. * them with the appropriate values inside HTMLPurifier_AttrTypes
  428. * @param array &$attr Reference to attribute array
  429. * @param HTMLPurifier_AttrTypes $attr_types HTMLPurifier_AttrTypes instance
  430. */
  431. public function expandIdentifiers(&$attr, $attr_types)
  432. {
  433. // because foreach will process new elements we add, make sure we
  434. // skip duplicates
  435. $processed = array();
  436. foreach ($attr as $def_i => $def) {
  437. // skip inclusions
  438. if ($def_i === 0) {
  439. continue;
  440. }
  441. if (isset($processed[$def_i])) {
  442. continue;
  443. }
  444. // determine whether or not attribute is required
  445. if ($required = (strpos($def_i, '*') !== false)) {
  446. // rename the definition
  447. unset($attr[$def_i]);
  448. $def_i = trim($def_i, '*');
  449. $attr[$def_i] = $def;
  450. }
  451. $processed[$def_i] = true;
  452. // if we've already got a literal object, move on
  453. if (is_object($def)) {
  454. // preserve previous required
  455. $attr[$def_i]->required = ($required || $attr[$def_i]->required);
  456. continue;
  457. }
  458. if ($def === false) {
  459. unset($attr[$def_i]);
  460. continue;
  461. }
  462. if ($t = $attr_types->get($def)) {
  463. $attr[$def_i] = $t;
  464. $attr[$def_i]->required = $required;
  465. } else {
  466. unset($attr[$def_i]);
  467. }
  468. }
  469. }
  470. }
  471. /**
  472. * Base class for all validating attribute definitions.
  473. *
  474. * This family of classes forms the core for not only HTML attribute validation,
  475. * but also any sort of string that needs to be validated or cleaned (which
  476. * means CSS properties and composite definitions are defined here too).
  477. * Besides defining (through code) what precisely makes the string valid,
  478. * subclasses are also responsible for cleaning the code if possible.
  479. */
  480. abstract class HTMLPurifier_AttrDef
  481. {
  482. /**
  483. * Tells us whether or not an HTML attribute is minimized.
  484. * Has no meaning in other contexts.
  485. * @type bool
  486. */
  487. public $minimized = false;
  488. /**
  489. * Tells us whether or not an HTML attribute is required.
  490. * Has no meaning in other contexts
  491. * @type bool
  492. */
  493. public $required = false;
  494. /**
  495. * Validates and cleans passed string according to a definition.
  496. *
  497. * @param string $string String to be validated and cleaned.
  498. * @param HTMLPurifier_Config $config Mandatory HTMLPurifier_Config object.
  499. * @param HTMLPurifier_Context $context Mandatory HTMLPurifier_Context object.
  500. */
  501. abstract public function validate($string, $config, $context);
  502. /**
  503. * Convenience method that parses a string as if it were CDATA.
  504. *
  505. * This method process a string in the manner specified at
  506. * <http://www.w3.org/TR/html4/types.html#h-6.2> by removing
  507. * leading and trailing whitespace, ignoring line feeds, and replacing
  508. * carriage returns and tabs with spaces. While most useful for HTML
  509. * attributes specified as CDATA, it can also be applied to most CSS
  510. * values.
  511. *
  512. * @note This method is not entirely standards compliant, as trim() removes
  513. * more types of whitespace than specified in the spec. In practice,
  514. * this is rarely a problem, as those extra characters usually have
  515. * already been removed by HTMLPurifier_Encoder.
  516. *
  517. * @warning This processing is inconsistent with XML's whitespace handling
  518. * as specified by section 3.3.3 and referenced XHTML 1.0 section
  519. * 4.7. However, note that we are NOT necessarily
  520. * parsing XML, thus, this behavior may still be correct. We
  521. * assume that newlines have been normalized.
  522. */
  523. public function parseCDATA($string)
  524. {
  525. $string = trim($string);
  526. $string = str_replace(array("\n", "\t", "\r"), ' ', $string);
  527. return $string;
  528. }
  529. /**
  530. * Factory method for creating this class from a string.
  531. * @param string $string String construction info
  532. * @return HTMLPurifier_AttrDef Created AttrDef object corresponding to $string
  533. */
  534. public function make($string)
  535. {
  536. // default implementation, return a flyweight of this object.
  537. // If $string has an effect on the returned object (i.e. you
  538. // need to overload this method), it is best
  539. // to clone or instantiate new copies. (Instantiation is safer.)
  540. return $this;
  541. }
  542. /**
  543. * Removes spaces from rgb(0, 0, 0) so that shorthand CSS properties work
  544. * properly. THIS IS A HACK!
  545. * @param string $string a CSS colour definition
  546. * @return string
  547. */
  548. protected function mungeRgb($string)
  549. {
  550. return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string);
  551. }
  552. /**
  553. * Parses a possibly escaped CSS string and returns the "pure"
  554. * version of it.
  555. */
  556. protected function expandCSSEscape($string)
  557. {
  558. // flexibly parse it
  559. $ret = '';
  560. for ($i = 0, $c = strlen($string); $i < $c; $i++) {
  561. if ($string[$i] === '\\') {
  562. $i++;
  563. if ($i >= $c) {
  564. $ret .= '\\';
  565. break;
  566. }
  567. if (ctype_xdigit($string[$i])) {
  568. $code = $string[$i];
  569. for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
  570. if (!ctype_xdigit($string[$i])) {
  571. break;
  572. }
  573. $code .= $string[$i];
  574. }
  575. // We have to be extremely careful when adding
  576. // new characters, to make sure we're not breaking
  577. // the encoding.
  578. $char = HTMLPurifier_Encoder::unichr(hexdec($code));
  579. if (HTMLPurifier_Encoder::cleanUTF8($char) === '') {
  580. continue;
  581. }
  582. $ret .= $char;
  583. if ($i < $c && trim($string[$i]) !== '') {
  584. $i--;
  585. }
  586. continue;
  587. }
  588. if ($string[$i] === "\n") {
  589. continue;
  590. }
  591. }
  592. $ret .= $string[$i];
  593. }
  594. return $ret;
  595. }
  596. }
  597. /**
  598. * Processes an entire attribute array for corrections needing multiple values.
  599. *
  600. * Occasionally, a certain attribute will need to be removed and popped onto
  601. * another value. Instead of creating a complex return syntax for
  602. * HTMLPurifier_AttrDef, we just pass the whole attribute array to a
  603. * specialized object and have that do the special work. That is the
  604. * family of HTMLPurifier_AttrTransform.
  605. *
  606. * An attribute transformation can be assigned to run before or after
  607. * HTMLPurifier_AttrDef validation. See HTMLPurifier_HTMLDefinition for
  608. * more details.
  609. */
  610. abstract class HTMLPurifier_AttrTransform
  611. {
  612. /**
  613. * Abstract: makes changes to the attributes dependent on multiple values.
  614. *
  615. * @param array $attr Assoc array of attributes, usually from
  616. * HTMLPurifier_Token_Tag::$attr
  617. * @param HTMLPurifier_Config $config Mandatory HTMLPurifier_Config object.
  618. * @param HTMLPurifier_Context $context Mandatory HTMLPurifier_Context object
  619. * @return array Processed attribute array.
  620. */
  621. abstract public function transform($attr, $config, $context);
  622. /**
  623. * Prepends CSS properties to the style attribute, creating the
  624. * attribute if it doesn't exist.
  625. * @param array &$attr Attribute array to process (passed by reference)
  626. * @param string $css CSS to prepend
  627. */
  628. public function prependCSS(&$attr, $css)
  629. {
  630. $attr['style'] = isset($attr['style']) ? $attr['style'] : '';
  631. $attr['style'] = $css . $attr['style'];
  632. }
  633. /**
  634. * Retrieves and removes an attribute
  635. * @param array &$attr Attribute array to process (passed by reference)
  636. * @param mixed $key Key of attribute to confiscate
  637. * @return mixed
  638. */
  639. public function confiscateAttr(&$attr, $key)
  640. {
  641. if (!isset($attr[$key])) {
  642. return null;
  643. }
  644. $value = $attr[$key];
  645. unset($attr[$key]);
  646. return $value;
  647. }
  648. }
  649. /**
  650. * Provides lookup array of attribute types to HTMLPurifier_AttrDef objects
  651. */
  652. class HTMLPurifier_AttrTypes
  653. {
  654. /**
  655. * Lookup array of attribute string identifiers to concrete implementations.
  656. * @type HTMLPurifier_AttrDef[]
  657. */
  658. protected $info = array();
  659. /**
  660. * Constructs the info array, supplying default implementations for attribute
  661. * types.
  662. */
  663. public function __construct()
  664. {
  665. // XXX This is kind of poor, since we don't actually /clone/
  666. // instances; instead, we use the supplied make() attribute. So,
  667. // the underlying class must know how to deal with arguments.
  668. // With the old implementation of Enum, that ignored its
  669. // arguments when handling a make dispatch, the IAlign
  670. // definition wouldn't work.
  671. // pseudo-types, must be instantiated via shorthand
  672. $this->info['Enum'] = new HTMLPurifier_AttrDef_Enum();
  673. $this->info['Bool'] = new HTMLPurifier_AttrDef_HTML_Bool();
  674. $this->info['CDATA'] = new HTMLPurifier_AttrDef_Text();
  675. $this->info['ID'] = new HTMLPurifier_AttrDef_HTML_ID();
  676. $this->info['Length'] = new HTMLPurifier_AttrDef_HTML_Length();
  677. $this->info['MultiLength'] = new HTMLPurifier_AttrDef_HTML_MultiLength();
  678. $this->info['NMTOKENS'] = new HTMLPurifier_AttrDef_HTML_Nmtokens();
  679. $this->info['Pixels'] = new HTMLPurifier_AttrDef_HTML_Pixels();
  680. $this->info['Text'] = new HTMLPurifier_AttrDef_Text();
  681. $this->info['URI'] = new HTMLPurifier_AttrDef_URI();
  682. $this->info['LanguageCode'] = new HTMLPurifier_AttrDef_Lang();
  683. $this->info['Color'] = new HTMLPurifier_AttrDef_HTML_Color();
  684. $this->info['IAlign'] = self::makeEnum('top,middle,bottom,left,right');
  685. $this->info['LAlign'] = self::makeEnum('top,bottom,left,right');
  686. $this->info['FrameTarget'] = new HTMLPurifier_AttrDef_HTML_FrameTarget();
  687. // unimplemented aliases
  688. $this->info['ContentType'] = new HTMLPurifier_AttrDef_Text();
  689. $this->info['ContentTypes'] = new HTMLPurifier_AttrDef_Text();
  690. $this->info['Charsets'] = new HTMLPurifier_AttrDef_Text();
  691. $this->info['Character'] = new HTMLPurifier_AttrDef_Text();
  692. // "proprietary" types
  693. $this->info['Class'] = new HTMLPurifier_AttrDef_HTML_Class();
  694. // number is really a positive integer (one or more digits)
  695. // FIXME: ^^ not always, see start and value of list items
  696. $this->info['Number'] = new HTMLPurifier_AttrDef_Integer(false, false, true);
  697. }
  698. private static function makeEnum($in)
  699. {
  700. return new HTMLPurifier_AttrDef_Clone(new HTMLPurifier_AttrDef_Enum(explode(',', $in)));
  701. }
  702. /**
  703. * Retrieves a type
  704. * @param string $type String type name
  705. * @return HTMLPurifier_AttrDef Object AttrDef for type
  706. */
  707. public function get($type)
  708. {
  709. // determine if there is any extra info tacked on
  710. if (strpos($type, '#') !== false) {
  711. list($type, $string) = explode('#', $type, 2);
  712. } else {
  713. $string = '';
  714. }
  715. if (!isset($this->info[$type])) {
  716. trigger_error('Cannot retrieve undefined attribute type ' . $type, E_USER_ERROR);
  717. return;
  718. }
  719. return $this->info[$type]->make($string);
  720. }
  721. /**
  722. * Sets a new implementation for a type
  723. * @param string $type String type name
  724. * @param HTMLPurifier_AttrDef $impl Object AttrDef for type
  725. */
  726. public function set($type, $impl)
  727. {
  728. $this->info[$type] = $impl;
  729. }
  730. }
  731. /**
  732. * Validates the attributes of a token. Doesn't manage required attributes
  733. * very well. The only reason we factored this out was because RemoveForeignElements
  734. * also needed it besides ValidateAttributes.
  735. */
  736. class HTMLPurifier_AttrValidator
  737. {
  738. /**
  739. * Validates the attributes of a token, mutating it as necessary.
  740. * that has valid tokens
  741. * @param HTMLPurifier_Token $token Token to validate.
  742. * @param HTMLPurifier_Config $config Instance of HTMLPurifier_Config
  743. * @param HTMLPurifier_Context $context Instance of HTMLPurifier_Context
  744. */
  745. public function validateToken($token, $config, $context)
  746. {
  747. $definition = $config->getHTMLDefinition();
  748. $e =& $context->get('ErrorCollector', true);
  749. // initialize IDAccumulator if necessary
  750. $ok =& $context->get('IDAccumulator', true);
  751. if (!$ok) {
  752. $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
  753. $context->register('IDAccumulator', $id_accumulator);
  754. }
  755. // initialize CurrentToken if necessary
  756. $current_token =& $context->get('CurrentToken', true);
  757. if (!$current_token) {
  758. $context->register('CurrentToken', $token);
  759. }
  760. if (!$token instanceof HTMLPurifier_Token_Start &&
  761. !$token instanceof HTMLPurifier_Token_Empty
  762. ) {
  763. return;
  764. }
  765. // create alias to global definition array, see also $defs
  766. // DEFINITION CALL
  767. $d_defs = $definition->info_global_attr;
  768. // don't update token until the very end, to ensure an atomic update
  769. $attr = $token->attr;
  770. // do global transformations (pre)
  771. // nothing currently utilizes this
  772. foreach ($definition->info_attr_transform_pre as $transform) {
  773. $attr = $transform->transform($o = $attr, $config, $context);
  774. if ($e) {
  775. if ($attr != $o) {
  776. $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
  777. }
  778. }
  779. }
  780. // do local transformations only applicable to this element (pre)
  781. // ex. <p align="right"> to <p style="text-align:right;">
  782. foreach ($definition->info[$token->name]->attr_transform_pre as $transform) {
  783. $attr = $transform->transform($o = $attr, $config, $context);
  784. if ($e) {
  785. if ($attr != $o) {
  786. $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
  787. }
  788. }
  789. }
  790. // create alias to this element's attribute definition array, see
  791. // also $d_defs (global attribute definition array)
  792. // DEFINITION CALL
  793. $defs = $definition->info[$token->name]->attr;
  794. $attr_key = false;
  795. $context->register('CurrentAttr', $attr_key);
  796. // iterate through all the attribute keypairs
  797. // Watch out for name collisions: $key has previously been used
  798. foreach ($attr as $attr_key => $value) {
  799. // call the definition
  800. if (isset($defs[$attr_key])) {
  801. // there is a local definition defined
  802. if ($defs[$attr_key] === false) {
  803. // We've explicitly been told not to allow this element.
  804. // This is usually when there's a global definition
  805. // that must be overridden.
  806. // Theoretically speaking, we could have a
  807. // AttrDef_DenyAll, but this is faster!
  808. $result = false;
  809. } else {
  810. // validate according to the element's definition
  811. $result = $defs[$attr_key]->validate(
  812. $value,
  813. $config,
  814. $context
  815. );
  816. }
  817. } elseif (isset($d_defs[$attr_key])) {
  818. // there is a global definition defined, validate according
  819. // to the global definition
  820. $result = $d_defs[$attr_key]->validate(
  821. $value,
  822. $config,
  823. $context
  824. );
  825. } else {
  826. // system never heard of the attribute? DELETE!
  827. $result = false;
  828. }
  829. // put the results into effect
  830. if ($result === false || $result === null) {
  831. // this is a generic error message that should replaced
  832. // with more specific ones when possible
  833. if ($e) {
  834. $e->send(E_ERROR, 'AttrValidator: Attribute removed');
  835. }
  836. // remove the attribute
  837. unset($attr[$attr_key]);
  838. } elseif (is_string($result)) {
  839. // generally, if a substitution is happening, there
  840. // was some sort of implicit correction going on. We'll
  841. // delegate it to the attribute classes to say exactly what.
  842. // simple substitution
  843. $attr[$attr_key] = $result;
  844. } else {
  845. // nothing happens
  846. }
  847. // we'd also want slightly more complicated substitution
  848. // involving an array as the return value,
  849. // although we're not sure how colliding attributes would
  850. // resolve (certain ones would be completely overriden,
  851. // others would prepend themselves).
  852. }
  853. $context->destroy('CurrentAttr');
  854. // post transforms
  855. // global (error reporting untested)
  856. foreach ($definition->info_attr_transform_post as $transform) {
  857. $attr = $transform->transform($o = $attr, $config, $context);
  858. if ($e) {
  859. if ($attr != $o) {
  860. $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
  861. }
  862. }
  863. }
  864. // local (error reporting untested)
  865. foreach ($definition->info[$token->name]->attr_transform_post as $transform) {
  866. $attr = $transform->transform($o = $attr, $config, $context);
  867. if ($e) {
  868. if ($attr != $o) {
  869. $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
  870. }
  871. }
  872. }
  873. $token->attr = $attr;
  874. // destroy CurrentToken if we made it ourselves
  875. if (!$current_token) {
  876. $context->destroy('CurrentToken');
  877. }
  878. }
  879. }
  880. // constants are slow, so we use as few as possible
  881. if (!defined('HTMLPURIFIER_PREFIX')) {
  882. define('HTMLPURIFIER_PREFIX', dirname(__FILE__) . '/standalone');
  883. set_include_path(HTMLPURIFIER_PREFIX . PATH_SEPARATOR . get_include_path());
  884. }
  885. // accomodations for versions earlier than 5.0.2
  886. // borrowed from PHP_Compat, LGPL licensed, by Aidan Lister <aidan@php.net>
  887. if (!defined('PHP_EOL')) {
  888. switch (strtoupper(substr(PHP_OS, 0, 3))) {
  889. case 'WIN':
  890. define('PHP_EOL', "\r\n");
  891. break;
  892. case 'DAR':
  893. define('PHP_EOL', "\r");
  894. break;
  895. default:
  896. define('PHP_EOL', "\n");
  897. }
  898. }
  899. /**
  900. * Bootstrap class that contains meta-functionality for HTML Purifier such as
  901. * the autoload function.
  902. *
  903. * @note
  904. * This class may be used without any other files from HTML Purifier.
  905. */
  906. class HTMLPurifier_Bootstrap
  907. {
  908. /**
  909. * Autoload function for HTML Purifier
  910. * @param string $class Class to load
  911. * @return bool
  912. */
  913. public static function autoload($class)
  914. {
  915. $file = HTMLPurifier_Bootstrap::getPath($class);
  916. if (!$file) {
  917. return false;
  918. }
  919. // Technically speaking, it should be ok and more efficient to
  920. // just do 'require', but Antonio Parraga reports that with
  921. // Zend extensions such as Zend debugger and APC, this invariant
  922. // may be broken. Since we have efficient alternatives, pay
  923. // the cost here and avoid the bug.
  924. require_once HTMLPURIFIER_PREFIX . '/' . $file;
  925. return true;
  926. }
  927. /**
  928. * Returns the path for a specific class.
  929. * @param string $class Class path to get
  930. * @return string
  931. */
  932. public static function getPath($class)
  933. {
  934. if (strncmp('HTMLPurifier', $class, 12) !== 0) {
  935. return false;
  936. }
  937. // Custom implementations
  938. if (strncmp('HTMLPurifier_Language_', $class, 22) === 0) {
  939. $code = str_replace('_', '-', substr($class, 22));
  940. $file = 'HTMLPurifier/Language/classes/' . $code . '.php';
  941. } else {
  942. $file = str_replace('_', '/', $class) . '.php';
  943. }
  944. if (!file_exists(HTMLPURIFIER_PREFIX . '/' . $file)) {
  945. return false;
  946. }
  947. return $file;
  948. }
  949. /**
  950. * "Pre-registers" our autoloader on the SPL stack.
  951. */
  952. public static function registerAutoload()
  953. {
  954. $autoload = array('HTMLPurifier_Bootstrap', 'autoload');
  955. if (($funcs = spl_autoload_functions()) === false) {
  956. spl_autoload_register($autoload);
  957. } elseif (function_exists('spl_autoload_unregister')) {
  958. if (version_compare(PHP_VERSION, '5.3.0', '>=')) {
  959. // prepend flag exists, no need for shenanigans
  960. spl_autoload_register($autoload, true, true);
  961. } else {
  962. $buggy = version_compare(PHP_VERSION, '5.2.11', '<');
  963. $compat = version_compare(PHP_VERSION, '5.1.2', '<=') &&
  964. version_compare(PHP_VERSION, '5.1.0', '>=');
  965. foreach ($funcs as $func) {
  966. if ($buggy && is_array($func)) {
  967. // :TRICKY: There are some compatibility issues and some
  968. // places where we need to error out
  969. $reflector = new ReflectionMethod($func[0], $func[1]);
  970. if (!$reflector->isStatic()) {
  971. throw new Exception(
  972. 'HTML Purifier autoloader registrar is not compatible
  973. with non-static object methods due to PHP Bug #44144;
  974. Please do not use HTMLPurifier.autoload.php (or any
  975. file that includes this file); instead, place the code:
  976. spl_autoload_register(array(\'HTMLPurifier_Bootstrap\', \'autoload\'))
  977. after your own autoloaders.'
  978. );
  979. }
  980. // Suprisingly, spl_autoload_register supports the
  981. // Class::staticMethod callback format, although call_user_func doesn't
  982. if ($compat) {
  983. $func = implode('::', $func);
  984. }
  985. }
  986. spl_autoload_unregister($func);
  987. }
  988. spl_autoload_register($autoload);
  989. foreach ($funcs as $func) {
  990. spl_autoload_register($func);
  991. }
  992. }
  993. }
  994. }
  995. }
  996. /**
  997. * Super-class for definition datatype objects, implements serialization
  998. * functions for the class.
  999. */
  1000. abstract class HTMLPurifier_Definition
  1001. {
  1002. /**
  1003. * Has setup() been called yet?
  1004. * @type bool
  1005. */
  1006. public $setup = false;
  1007. /**
  1008. * If true, write out the final definition object to the cache after
  1009. * setup. This will be true only if all invocations to get a raw
  1010. * definition object are also optimized. This does not cause file
  1011. * system thrashing because on subsequent calls the cached object
  1012. * is used and any writes to the raw definition object are short
  1013. * circuited. See enduser-customize.html for the high-level
  1014. * picture.
  1015. * @type bool
  1016. */
  1017. public $optimized = null;
  1018. /**
  1019. * What type of definition is it?
  1020. * @type string
  1021. */
  1022. public $type;
  1023. /**
  1024. * Sets up the definition object into the final form, something
  1025. * not done by the constructor
  1026. * @param HTMLPurifier_Config $config
  1027. */
  1028. abstract protected function doSetup($config);
  1029. /**
  1030. * Setup function that aborts if already setup
  1031. * @param HTMLPurifier_Config $config
  1032. */
  1033. public function setup($config)
  1034. {
  1035. if ($this->setup) {
  1036. return;
  1037. }
  1038. $this->setup = true;
  1039. $this->doSetup($config);
  1040. }
  1041. }
  1042. /**
  1043. * Defines allowed CSS attributes and what their values are.
  1044. * @see HTMLPurifier_HTMLDefinition
  1045. */
  1046. class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
  1047. {
  1048. public $type = 'CSS';
  1049. /**
  1050. * Assoc array of attribute name to definition object.
  1051. * @type HTMLPurifier_AttrDef[]
  1052. */
  1053. public $info = array();
  1054. /**
  1055. * Constructs the info array. The meat of this class.
  1056. * @param HTMLPurifier_Config $config
  1057. */
  1058. protected function doSetup($config)
  1059. {
  1060. $this->info['text-align'] = new HTMLPurifier_AttrDef_Enum(
  1061. array('left', 'right', 'center', 'justify'),
  1062. false
  1063. );
  1064. $border_style =
  1065. $this->info['border-bottom-style'] =
  1066. $this->info['border-right-style'] =
  1067. $this->info['border-left-style'] =
  1068. $this->info['border-top-style'] = new HTMLPurifier_AttrDef_Enum(
  1069. array(
  1070. 'none',
  1071. 'hidden',
  1072. 'dotted',
  1073. 'dashed',
  1074. 'solid',
  1075. 'double',
  1076. 'groove',
  1077. 'ridge',
  1078. 'inset',
  1079. 'outset'
  1080. ),
  1081. false
  1082. );
  1083. $this->info['border-style'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_style);
  1084. $this->info['clear'] = new HTMLPurifier_AttrDef_Enum(
  1085. array('none', 'left', 'right', 'both'),
  1086. false
  1087. );
  1088. $this->info['float'] = new HTMLPurifier_AttrDef_Enum(
  1089. array('none', 'left', 'right'),
  1090. false
  1091. );
  1092. $this->info['font-style'] = new HTMLPurifier_AttrDef_Enum(
  1093. array('normal', 'italic', 'oblique'),
  1094. false
  1095. );
  1096. $this->info['font-variant'] = new HTMLPurifier_AttrDef_Enum(
  1097. array('normal', 'small-caps'),
  1098. false
  1099. );
  1100. $uri_or_none = new HTMLPurifier_AttrDef_CSS_Composite(
  1101. array(
  1102. new HTMLPurifier_AttrDef_Enum(array('none')),
  1103. new HTMLPurifier_AttrDef_CSS_URI()
  1104. )
  1105. );
  1106. $this->info['list-style-position'] = new HTMLPurifier_AttrDef_Enum(
  1107. array('inside', 'outside'),
  1108. false
  1109. );
  1110. $this->info['list-style-type'] = new HTMLPurifier_AttrDef_Enum(
  1111. array(
  1112. 'disc',
  1113. 'circle',
  1114. 'square',
  1115. 'decimal',
  1116. 'lower-roman',
  1117. 'upper-roman',
  1118. 'lower-alpha',
  1119. 'upper-alpha',
  1120. 'none'
  1121. ),
  1122. false
  1123. );
  1124. $this->info['list-style-image'] = $uri_or_none;
  1125. $this->info['list-style'] = new HTMLPurifier_AttrDef_CSS_ListStyle($config);
  1126. $this->info['text-transform'] = new HTMLPurifier_AttrDef_Enum(
  1127. array('capitalize', 'uppercase', 'lowercase', 'none'),
  1128. false
  1129. );
  1130. $this->info['color'] = new HTMLPurifier_AttrDef_CSS_Color();
  1131. $this->info['background-image'] = $uri_or_none;
  1132. $this->info['background-repeat'] = new HTMLPurifier_AttrDef_Enum(
  1133. array('repeat', 'repeat-x', 'repeat-y', 'no-repeat')
  1134. );
  1135. $this->info['background-attachment'] = new HTMLPurifier_AttrDef_Enum(
  1136. array('scroll', 'fixed')
  1137. );
  1138. $this->info['background-position'] = new HTMLPurifier_AttrDef_CSS_BackgroundPosition();
  1139. $border_color =
  1140. $this->info['border-top-color'] =
  1141. $this->info['border-bottom-color'] =
  1142. $this->info['border-left-color'] =
  1143. $this->info['border-right-color'] =
  1144. $this->info['background-color'] = new HTMLPurifier_AttrDef_CSS_Composite(
  1145. array(
  1146. new HTMLPurifier_AttrDef_Enum(array('transparent')),
  1147. new HTMLPurifier_AttrDef_CSS_Color()
  1148. )
  1149. );
  1150. $this->info['background'] = new HTMLPurifier_AttrDef_CSS_Background($config);
  1151. $this->info['border-color'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_color);
  1152. $border_width =
  1153. $this->info['border-top-width'] =
  1154. $this->info['border-bottom-width'] =
  1155. $this->info['border-left-width'] =
  1156. $this->info['border-right-width'] = new HTMLPurifier_AttrDef_CSS_Composite(
  1157. array(
  1158. new HTMLPurifier_AttrDef_Enum(array('thin', 'medium', 'thick')),
  1159. new HTMLPurifier_AttrDef_CSS_Length('0') //disallow negative
  1160. )
  1161. );
  1162. $this->info['border-width'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_width);
  1163. $this->info['letter-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(
  1164. array(
  1165. new HTMLPurifier_AttrDef_Enum(array('normal')),
  1166. new HTMLPurifier_AttrDef_CSS_Length()
  1167. )
  1168. );
  1169. $this->info['word-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(
  1170. array(
  1171. new HTMLPurifier_AttrDef_Enum(array('normal')),
  1172. new HTMLPurifier_AttrDef_CSS_Length()
  1173. )
  1174. );
  1175. $this->info['font-size'] = new HTMLPurifier_AttrDef_CSS_Composite(
  1176. array(
  1177. new HTMLPurifier_AttrDef_Enum(
  1178. array(
  1179. 'xx-small',
  1180. 'x-small',
  1181. 'small',
  1182. 'medium',
  1183. 'large',
  1184. 'x-large',
  1185. 'xx-large',
  1186. 'larger',
  1187. 'smaller'
  1188. )
  1189. ),
  1190. new HTMLPurifier_AttrDef_CSS_Percentage(),
  1191. new HTMLPurifier_AttrDef_CSS_Length()
  1192. )
  1193. );
  1194. $this->info['line-height'] = new HTMLPurifier_AttrDef_CSS_Composite(
  1195. array(
  1196. new HTMLPurifier_AttrDef_Enum(array('normal')),
  1197. new HTMLPurifier_AttrDef_CSS_Number(true), // no negatives
  1198. new HTMLPurifier_AttrDef_CSS_Length('0'),
  1199. new HTMLPurifier_AttrDef_CSS_Percentage(true)
  1200. )
  1201. );
  1202. $margin =
  1203. $this->info['margin-top'] =
  1204. $this->info['margin-bottom'] =
  1205. $this->info['margin-left'] =
  1206. $this->info['margin-right'] = new HTMLPurifier_AttrDef_CSS_Composite(
  1207. array(
  1208. new HTMLPurifier_AttrDef_CSS_Length(),
  1209. new HTMLPurifier_AttrDef_CSS_Percentage(),
  1210. new HTMLPurifier_AttrDef_Enum(array('auto'))
  1211. )
  1212. );
  1213. $this->info['margin'] = new HTMLPurifier_AttrDef_CSS_Multiple($margin);
  1214. // non-negative
  1215. $padding =
  1216. $this->info['padding-top'] =
  1217. $this->info['padding-bottom'] =
  1218. $this->info['padding-left'] =
  1219. $this->info['padding-right'] = new HTMLPurifier_AttrDef_CSS_Composite(
  1220. array(
  1221. new HTMLPurifier_AttrDef_CSS_Length('0'),
  1222. new HTMLPurifier_AttrDef_CSS_Percentage(true)
  1223. )
  1224. );
  1225. $this->info['padding'] = new HTMLPurifier_AttrDef_CSS_Multiple($padding);
  1226. $this->info['text-indent'] = new HTMLPurifier_AttrDef_CSS_Composite(
  1227. array(
  1228. new HTMLPurifier_AttrDef_CSS_Length(),
  1229. new HTMLPurifier_AttrDef_CSS_Percentage()
  1230. )
  1231. );
  1232. $trusted_wh = new HTMLPurifier_AttrDef_CSS_Composite(
  1233. array(
  1234. new HTMLPurifier_AttrDef_CSS_Length('0'),
  1235. new HTMLPurifier_AttrDef_CSS_Percentage(true),
  1236. new HTMLPurifier_AttrDef_Enum(array('auto'))
  1237. )
  1238. );
  1239. $max = $config->get('CSS.MaxImgLength');
  1240. $this->info['width'] =
  1241. $this->info['height'] =
  1242. $max === null ?
  1243. $trusted_wh :
  1244. new HTMLPurifier_AttrDef_Switch(
  1245. 'img',
  1246. // For img tags:
  1247. new HTMLPurifier_AttrDef_CSS_Composite(
  1248. array(
  1249. new HTMLPurifier_AttrDef_CSS_Length('0', $max),
  1250. new HTMLPurifier_AttrDef_Enum(array('auto'))
  1251. )
  1252. ),
  1253. // For everyone else:
  1254. $trusted_wh
  1255. );
  1256. $this->info['text-decoration'] = new HTMLPurifier_AttrDef_CSS_TextDecoration();
  1257. $this->info['font-family'] = new HTMLPurifier_AttrDef_CSS_FontFamily();
  1258. // this could use specialized code
  1259. $this->info['font-weight'] = new HTMLPurifier_AttrDef_Enum(
  1260. array(
  1261. 'normal',
  1262. 'bold',
  1263. 'bolder',
  1264. 'lighter',
  1265. '100',
  1266. '200',
  1267. '300',
  1268. '400',
  1269. '500',
  1270. '600',
  1271. '700',
  1272. '800',
  1273. '900'
  1274. ),
  1275. false
  1276. );
  1277. // MUST be called after other font properties, as it references
  1278. // a CSSDefinition object
  1279. $this->info['font'] = new HTMLPurifier_AttrDef_CSS_Font($config);
  1280. // same here
  1281. $this->info['border'] =
  1282. $this->info['border-bottom'] =
  1283. $this->info['border-top'] =
  1284. $this->info['border-left'] =
  1285. $this->info['border-right'] = new HTMLPurifier_AttrDef_CSS_Border($config);
  1286. $this->info['border-collapse'] = new HTMLPurifier_AttrDef_Enum(
  1287. array('collapse', 'separate')
  1288. );
  1289. $this->info['caption-side'] = new HTMLPurifier_AttrDef_Enum(
  1290. array('top', 'bottom')
  1291. );
  1292. $this->info['table-layout'] = new HTMLPurifier_AttrDef_Enum(
  1293. array('auto', 'fixed')
  1294. );
  1295. $this->info['vertical-align'] = new HTMLPurifier_AttrDef_CSS_Composite(
  1296. array(
  1297. new HTMLPurifier_AttrDef_Enum(
  1298. array(
  1299. 'baseline',
  1300. 'sub',
  1301. 'super',
  1302. 'top',
  1303. 'text-top',
  1304. 'middle',
  1305. 'bottom',
  1306. 'text-bottom'
  1307. )
  1308. ),
  1309. new HTMLPurifier_AttrDef_CSS_Length(),
  1310. new HTMLPurifier_AttrDef_CSS_Percentage()
  1311. )
  1312. );
  1313. $this->info['border-spacing'] = new HTMLPurifier_AttrDef_CSS_Multiple(new HTMLPurifier_AttrDef_CSS_Length(), 2);
  1314. // These CSS properties don't work on many browsers, but we live
  1315. // in THE FUTURE!
  1316. $this->info['white-space'] = new HTMLPurifier_AttrDef_Enum(
  1317. array('nowrap', 'normal', 'pre', 'pre-wrap', 'pre-line')
  1318. );
  1319. if ($config->get('CSS.Proprietary')) {
  1320. $this->doSetupProprietary($config);
  1321. }
  1322. if ($config->get('CSS.AllowTricky')) {
  1323. $this->doSetupTricky($config);
  1324. }
  1325. if ($config->get('CSS.Trusted')) {
  1326. $this->doSetupTrusted($config);
  1327. }
  1328. $allow_important = $config->get('CSS.AllowImportant');
  1329. // wrap all attr-defs with decorator that handles !important
  1330. foreach ($this->info as $k => $v) {
  1331. $this->info[$k] = new HTMLPurifier_AttrDef_CSS_ImportantDecorator($v, $allow_important);
  1332. }
  1333. $this->setupConfigStuff($config);
  1334. }
  1335. /**
  1336. * @param HTMLPurifier_Config $config
  1337. */
  1338. protected function doSetupProprietary($config)
  1339. {
  1340. // Internet Explorer only scrollbar colors
  1341. $this->info['scrollbar-arrow-color'] = new HTMLPurifier_AttrDef_CSS_Color();
  1342. $this->info['scrollbar-base-color'] = new HTMLPurifier_AttrDef_CSS_Color();
  1343. $this->info['scrollbar-darkshadow-color'] = new HTMLPurifier_AttrDef_CSS_Color();
  1344. $this->info['scrollbar-face-color'] = new HTMLPurifier_AttrDef_CSS_Color();
  1345. $this->info['scrollbar-highlight-color'] = new HTMLPurifier_AttrDef_CSS_Color();
  1346. $this->info['scrollbar-shadow-color'] = new HTMLPurifier_AttrDef_CSS_Color();
  1347. // technically not proprietary, but CSS3, and no one supports it
  1348. $this->info['opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue();
  1349. $this->info['-moz-opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue();
  1350. $this->info['-khtml-opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue();
  1351. // only opacity, for now
  1352. $this->info['filter'] = new HTMLPurifier_AttrDef_CSS_Filter();
  1353. // more CSS3
  1354. $this->info['page-break-after'] =
  1355. $this->info['page-break-before'] = new HTMLPurifier_AttrDef_Enum(
  1356. array(
  1357. 'auto',
  1358. 'always',
  1359. 'avoid',
  1360. 'left',
  1361. 'right'
  1362. )
  1363. );
  1364. $this->info['page-break-inside'] = new HTMLPurifier_AttrDef_Enum(array('auto', 'avoid'));
  1365. }
  1366. /**
  1367. * @param HTMLPurifier_Config $config
  1368. */
  1369. protected function doSetupTricky($config)
  1370. {
  1371. $this->info['display'] = new HTMLPurifier_AttrDef_Enum(
  1372. array(
  1373. 'inline',
  1374. 'block',
  1375. 'list-item',
  1376. 'run-in',
  1377. 'compact',
  1378. 'marker',
  1379. 'table',
  1380. 'inline-block',
  1381. 'inline-table',
  1382. 'table-row-group',
  1383. 'table-header-group',
  1384. 'table-footer-group',
  1385. 'table-row',
  1386. 'table-column-group',
  1387. 'table-column',
  1388. 'table-cell',
  1389. 'table-caption',
  1390. 'none'
  1391. )
  1392. );
  1393. $this->info['visibility'] = new HTMLPurifier_AttrDef_Enum(
  1394. array('visible', 'hidden', 'collapse')
  1395. );
  1396. $this->info['overflow'] = new HTMLPurifier_AttrDef_Enum(array('visible', 'hidden', 'auto', 'scroll'));
  1397. }
  1398. /**
  1399. * @param HTMLPurifier_Config $config
  1400. */
  1401. protected function doSetupTrusted($config)
  1402. {
  1403. $this->info['position'] = new HTMLPurifier_AttrDef_Enum(
  1404. array('static', 'relative', 'absolute', 'fixed')
  1405. );
  1406. $this->info['top'] =
  1407. $this->info['left'] =
  1408. $this->info['right'] =
  1409. $this->info['bottom'] = new HTMLPurifier_AttrDef_CSS_Composite(
  1410. array(
  1411. new HTMLPurifier_AttrDef_CSS_Length(),
  1412. new HTMLPurifier_AttrDef_CSS_Percentage(),
  1413. new HTMLPurifier_AttrDef_Enum(array('auto')),
  1414. )
  1415. );
  1416. $this->info['z-index'] = new HTMLPurifier_AttrDef_CSS_Composite(
  1417. array(
  1418. new HTMLPurifier_AttrDef_Integer(),
  1419. new HTMLPurifier_AttrDef_Enum(array('auto')),
  1420. )
  1421. );
  1422. }
  1423. /**
  1424. * Performs extra config-based processing. Based off of
  1425. * HTMLPurifier_HTMLDefinition.
  1426. * @param HTMLPurifier_Config $config
  1427. * @todo Refactor duplicate elements into common class (probably using
  1428. * composition, not inheritance).
  1429. */
  1430. protected function setupConfigStuff($config)
  1431. {
  1432. // setup allowed elements
  1433. $support = "(for information on implementing this, see the " .
  1434. "support forums) ";
  1435. $allowed_properties = $config->get('CSS.AllowedProperties');
  1436. if ($allowed_properties !== null) {
  1437. foreach ($this->info as $name => $d) {
  1438. if (!isset($allowed_properties[$name])) {
  1439. unset($this->info[$name]);
  1440. }
  1441. unset($allowed_properties[$name]);
  1442. }
  1443. // emit errors
  1444. foreach ($allowed_properties as $name => $d) {
  1445. // :TODO: Is this htmlspecialchars() call really necessary?
  1446. $name = htmlspecialchars($name);
  1447. trigger_error("Style attribute '$name' is not supported $support", E_USER_WARNING);
  1448. }
  1449. }
  1450. $forbidden_properties = $config->get('CSS.ForbiddenProperties');
  1451. if ($forbidden_properties !== null) {
  1452. foreach ($this->info as $name => $d) {
  1453. if (isset($forbidden_properties[$name])) {
  1454. unset($this->info[$name]);
  1455. }
  1456. }
  1457. }
  1458. }
  1459. }
  1460. /**
  1461. * Defines allowed child nodes and validates nodes against it.
  1462. */
  1463. abstract class HTMLPurifier_ChildDef
  1464. {
  1465. /**
  1466. * Type of child definition, usually right-most part of class name lowercase.
  1467. * Used occasionally in terms of context.
  1468. * @type string
  1469. */
  1470. public $type;
  1471. /**
  1472. * Indicates whether or not an empty array of children is okay.
  1473. *
  1474. * This is necessary for redundant checking when changes affecting
  1475. * a child node may cause a parent node to now be disallowed.
  1476. * @type bool
  1477. */
  1478. public $allow_empty;
  1479. /**
  1480. * Lookup array of all elements that this definition could possibly allow.
  1481. * @type array
  1482. */
  1483. public $elements = array();
  1484. /**
  1485. * Get lookup of tag names that should not close this element automatically.
  1486. * All other elements will do so.
  1487. * @param HTMLPurifier_Config $config HTMLPurifier_Config object
  1488. * @return array
  1489. */
  1490. public function getAllowedElements($config)
  1491. {
  1492. return $this->elements;
  1493. }
  1494. /**
  1495. * Validates nodes according to definition and returns modification.
  1496. *
  1497. * @param HTMLPurifier_Node[] $children Array of HTMLPurifier_Node
  1498. * @param HTMLPurifier_Config $config HTMLPurifier_Config object
  1499. * @param HTMLPurifier_Context $context HTMLPurifier_Context object
  1500. * @return bool|array true to leave nodes as is, false to remove parent node, array of replacement children
  1501. */
  1502. abstract public function validateChildren($children, $config, $context);
  1503. }
  1504. /**
  1505. * Configuration object that triggers customizable behavior.
  1506. *
  1507. * @warning This class is strongly defined: that means that the class
  1508. * will fail if an undefined directive is retrieved or set.
  1509. *
  1510. * @note Many classes that could (although many times don't) use the
  1511. * configuration object make it a mandatory parameter. This is
  1512. * because a configuration object should always be forwarded,
  1513. * otherwise, you run the risk of missing a parameter and then
  1514. * being stumped when a configuration directive doesn't work.
  1515. *
  1516. * @todo Reconsider some of the public member variables
  1517. */
  1518. class HTMLPurifier_Config
  1519. {
  1520. /**
  1521. * HTML Purifier's version
  1522. * @type string
  1523. */
  1524. public $version = '4.6.0';
  1525. /**
  1526. * Whether or not to automatically finalize
  1527. * the object if a read operation is done.
  1528. * @type bool
  1529. */
  1530. public $autoFinalize = true;
  1531. // protected member variables
  1532. /**
  1533. * Namespace indexed array of serials for specific namespaces.
  1534. * @see getSerial() for more info.
  1535. * @type string[]
  1536. */
  1537. protected $serials = array();
  1538. /**
  1539. * Serial for entire configuration object.
  1540. * @type string
  1541. */
  1542. protected $serial;
  1543. /**
  1544. * Parser for variables.
  1545. * @type HTMLPurifier_VarParser_Flexible
  1546. */
  1547. protected $parser = null;
  1548. /**
  1549. * Reference HTMLPurifier_ConfigSchema for value checking.
  1550. * @type HTMLPurifier_ConfigSchema
  1551. * @note This is public for introspective purposes. Please don't
  1552. * abuse!
  1553. */
  1554. public $def;
  1555. /**
  1556. * Indexed array of definitions.
  1557. * @type HTMLPurifier_Definition[]
  1558. */
  1559. protected $definitions;
  1560. /**
  1561. * Whether or not config is finalized.
  1562. * @type bool
  1563. */
  1564. protected $finalized = false;
  1565. /**
  1566. * Property list containing configuration directives.
  1567. * @type array
  1568. */
  1569. protected $plist;
  1570. /**
  1571. * Whether or not a set is taking place due to an alias lookup.
  1572. * @type bool
  1573. */
  1574. private $aliasMode;
  1575. /**
  1576. * Set to false if you do not want line and file numbers in errors.
  1577. * (useful when unit testing). This will also compress some errors
  1578. * and exceptions.
  1579. * @type bool
  1580. */
  1581. public $chatty = true;
  1582. /**
  1583. * Current lock; only gets to this namespace are allowed.
  1584. * @type string
  1585. */
  1586. private $lock;
  1587. /**
  1588. * Constructor
  1589. * @param HTMLPurifier_ConfigSchema $definition ConfigSchema that defines
  1590. * what directives are allowed.
  1591. * @param HTMLPurifier_PropertyList $parent
  1592. */
  1593. public function __construct($definition, $parent = null)
  1594. {
  1595. $parent = $parent ? $parent : $definition->defaultPlist;
  1596. $this->plist = new HTMLPurifier_PropertyList($parent);
  1597. $this->def = $definition; // keep a copy around for checking
  1598. $this->parser = new HTMLPurifier_VarParser_Flexible();
  1599. }
  1600. /**
  1601. * Convenience constructor that creates a config object based on a mixed var
  1602. * @param mixed $config Variable that defines the state of the config
  1603. * object. Can be: a HTMLPurifier_Config() object,
  1604. * an array of directives based on loadArray(),
  1605. * or a string filename of an ini file.
  1606. * @param HTMLPurifier_ConfigSchema $schema Schema object
  1607. * @return HTMLPurifier_Config Configured object
  1608. */
  1609. public static function create($config, $schema = null)
  1610. {
  1611. if ($config instanceof HTMLPurifier_Config) {
  1612. // pass-through
  1613. return $config;
  1614. }
  1615. if (!$schema) {
  1616. $ret = HTMLPurifier_Config::createDefault();
  1617. } else {
  1618. $ret = new HTMLPurifier_Config($schema);
  1619. }
  1620. if (is_string($config)) {
  1621. $ret->loadIni($config);
  1622. } elseif (is_array($config)) $ret->loadArray($config);
  1623. return $ret;
  1624. }
  1625. /**
  1626. * Creates a new config object that inherits from a previous one.
  1627. * @param HTMLPurifier_Config $config Configuration object to inherit from.
  1628. * @return HTMLPurifier_Config object with $config as its parent.
  1629. */
  1630. public static function inherit(HTMLPurifier_Config $config)
  1631. {
  1632. return new HTMLPurifier_Config($config->def, $config->plist);
  1633. }
  1634. /**
  1635. * Convenience constructor that creates a default configuration object.
  1636. * @return HTMLPurifier_Config default object.
  1637. */
  1638. public static function createDefault()
  1639. {
  1640. $definition = HTMLPurifier_ConfigSchema::instance();
  1641. $config = new HTMLPurifier_Config($definition);
  1642. return $config;
  1643. }
  1644. /**
  1645. * Retrieves a value from the configuration.
  1646. *
  1647. * @param string $key String key
  1648. * @param mixed $a
  1649. *
  1650. * @return mixed
  1651. */
  1652. public function get($key, $a = null)
  1653. {
  1654. if ($a !== null) {
  1655. $this->triggerError(
  1656. "Using deprecated API: use \$config->get('$key.$a') instead",
  1657. E_USER_WARNING
  1658. );
  1659. $key = "$key.$a";
  1660. }
  1661. if (!$this->finalized) {
  1662. $this->autoFinalize();
  1663. }
  1664. if (!isset($this->def->info[$key])) {
  1665. // can't add % due to SimpleTest bug
  1666. $this->triggerError(
  1667. 'Cannot retrieve value of undefined directive ' . htmlspecialchars($key),
  1668. E_USER_WARNING
  1669. );
  1670. return;
  1671. }
  1672. if (isset($this->def->info[$key]->isAlias)) {
  1673. $d = $this->def->info[$key];
  1674. $this->triggerError(
  1675. 'Cannot get value from aliased directive, use real name ' . $d->key,
  1676. E_USER_ERROR
  1677. );
  1678. return;
  1679. }
  1680. if ($this->lock) {
  1681. list($ns) = explode('.', $key);
  1682. if ($ns !== $this->lock) {
  1683. $this->triggerError(
  1684. 'Cannot get value of namespace ' . $ns . ' when lock for ' .
  1685. $this->lock .
  1686. ' is active, this probably indicates a Definition setup method ' .
  1687. 'is accessing directives that are not within its namespace',
  1688. E_USER_ERROR
  1689. );
  1690. return;
  1691. }
  1692. }
  1693. return $this->plist->get($key);
  1694. }
  1695. /**
  1696. * Retrieves an array of directives to values from a given namespace
  1697. *
  1698. * @param string $namespace String namespace
  1699. *
  1700. * @return array
  1701. */
  1702. public function getBatch($namespace)
  1703. {
  1704. if (!$this->finalized) {
  1705. $this->autoFinalize();
  1706. }
  1707. $full = $this->getAll();
  1708. if (!isset($full[$namespace])) {
  1709. $this->triggerError(
  1710. 'Cannot retrieve undefined namespace ' .
  1711. htmlspecialchars($namespace),
  1712. E_USER_WARNING
  1713. );
  1714. return;
  1715. }
  1716. return $full[$namespace];
  1717. }
  1718. /**
  1719. * Returns a SHA-1 signature of a segment of the configuration object
  1720. * that uniquely identifies that particular configuration
  1721. *
  1722. * @param string $namespace Namespace to get serial for
  1723. *
  1724. * @return string
  1725. * @note Revision is handled specially and is removed from the batch
  1726. * before processing!
  1727. */
  1728. public function getBatchSerial($namespace)
  1729. {
  1730. if (empty($this->serials[$namespace])) {
  1731. $batch = $this->getBatch($namespace);
  1732. unset($batch['DefinitionRev']);
  1733. $this->serials[$namespace] = sha1(serialize($batch));
  1734. }
  1735. return $this->serials[$namespace];
  1736. }
  1737. /**
  1738. * Returns a SHA-1 signature for the entire configuration object
  1739. * that uniquely identifies that particular configuration
  1740. *
  1741. * @return string
  1742. */
  1743. public function getSerial()
  1744. {
  1745. if (empty($this->serial)) {
  1746. $this->serial = sha1(serialize($this->getAll()));
  1747. }
  1748. return $this->serial;
  1749. }
  1750. /**
  1751. * Retrieves all directives, organized by namespace
  1752. *
  1753. * @warning This is a pretty inefficient function, avoid if you can
  1754. */
  1755. public function getAll()
  1756. {
  1757. if (!$this->finalized) {
  1758. $this->autoFinalize();
  1759. }
  1760. $ret = array();
  1761. foreach ($this->plist->squash() as $name => $value) {
  1762. list($ns, $key) = explode('.', $name, 2);
  1763. $ret[$ns][$key] = $value;
  1764. }
  1765. return $ret;
  1766. }
  1767. /**
  1768. * Sets a value to configuration.
  1769. *
  1770. * @param string $key key
  1771. * @param mixed $value value
  1772. * @param mixed $a
  1773. */
  1774. public function set($key, $value, $a = null)
  1775. {
  1776. if (strpos($key, '.') === false) {
  1777. $namespace = $key;
  1778. $directive = $value;
  1779. $value = $a;
  1780. $key = "$key.$directive";
  1781. $this->triggerError("Using deprecated API: use \$config->set('$key', ...) instead", E_USER_NOTICE);
  1782. } else {
  1783. list($namespace) = explode('.', $key);
  1784. }
  1785. if ($this->isFinalized('Cannot set directive after finalization')) {
  1786. return;
  1787. }
  1788. if (!isset($this->def->info[$key])) {
  1789. $this->triggerError(
  1790. 'Cannot set undefined directive ' . htmlspecialchars($key) . ' to value',
  1791. E_USER_WARNING
  1792. );
  1793. return;
  1794. }
  1795. $def = $this->def->info[$key];
  1796. if (isset($def->isAlias)) {
  1797. if ($this->aliasMode) {
  1798. $this->triggerError(
  1799. 'Double-aliases not allowed, please fix '.
  1800. 'ConfigSchema bug with' . $key,
  1801. E_USER_ERROR
  1802. );
  1803. return;
  1804. }
  1805. $this->aliasMode = true;
  1806. $this->set($def->key, $value);
  1807. $this->aliasMode = false;
  1808. $this->triggerError("$key is an alias, preferred directive name is {$def->key}", E_USER_NOTICE);
  1809. return;
  1810. }
  1811. // Raw type might be negative when using the fully optimized form
  1812. // of stdclass, which indicates allow_null == true
  1813. $rtype = is_int($def) ? $def : $def->type;
  1814. if ($rtype < 0) {
  1815. $type = -$rtype;
  1816. $allow_null = true;
  1817. } else {
  1818. $type = $rtype;
  1819. $allow_null = isset($def->allow_null);
  1820. }
  1821. try {
  1822. $value = $this->parser->parse($value, $type, $allow_null);
  1823. } catch (HTMLPurifier_VarParserException $e) {
  1824. $this->triggerError(
  1825. 'Value for ' . $key . ' is of invalid type, should be ' .
  1826. HTMLPurifier_VarParser::getTypeName($type),
  1827. E_USER_WARNING
  1828. );
  1829. return;
  1830. }
  1831. if (is_string($value) && is_object($def)) {
  1832. // resolve value alias if defined
  1833. if (isset($def->aliases[$value])) {
  1834. $value = $def->aliases[$value];
  1835. }
  1836. // check to see if the value is allowed
  1837. if (isset($def->allowed) && !isset($def->allowed[$value])) {
  1838. $this->triggerError(
  1839. 'Value not supported, valid values are: ' .
  1840. $this->_listify($def->allowed),
  1841. E_USER_WARNING
  1842. );
  1843. return;
  1844. }
  1845. }
  1846. $this->plist->set($key, $value);
  1847. // reset definitions if the directives they depend on changed
  1848. // this is a very costly process, so it's discouraged
  1849. // with finalization
  1850. if ($namespace == 'HTML' || $namespace == 'CSS' || $namespace == 'URI') {
  1851. $this->definitions[$namespace] = null;
  1852. }
  1853. $this->serials[$namespace] = false;
  1854. }
  1855. /**
  1856. * Convenience function for error reporting
  1857. *
  1858. * @param array $lookup
  1859. *
  1860. * @return string
  1861. */
  1862. private function _listify($lookup)
  1863. {
  1864. $list = array();
  1865. foreach ($lookup as $name => $b) {
  1866. $list[] = $name;
  1867. }
  1868. return implode(', ', $list);
  1869. }
  1870. /**
  1871. * Retrieves object reference to the HTML definition.
  1872. *
  1873. * @param bool $raw Return a copy that has not been setup yet. Must be
  1874. * called before it's been setup, otherwise won't work.
  1875. * @param bool $optimized If true, this method may return null, to
  1876. * indicate that a cached version of the modified
  1877. * definition object is available and no further edits
  1878. * are necessary. Consider using
  1879. * maybeGetRawHTMLDefinition, which is more explicitly
  1880. * named, instead.
  1881. *
  1882. * @return HTMLPurifier_HTMLDefinition
  1883. */
  1884. public function getHTMLDefinition($raw = false, $optimized = false)
  1885. {
  1886. return $this->getDefinition('HTML', $raw, $optimized);
  1887. }
  1888. /**
  1889. * Retrieves object reference to the CSS definition
  1890. *
  1891. * @param bool $raw Return a copy that has not been setup yet. Must be
  1892. * called before it's been setup, otherwise won't work.
  1893. * @param bool $optimized If true, this method may return null, to
  1894. * indicate that a cached version of the modified
  1895. * definition object is available and no further edits
  1896. * are necessary. Consider using
  1897. * maybeGetRawCSSDefinition, which is more explicitly
  1898. * named, instead.
  1899. *
  1900. * @return HTMLPurifier_CSSDefinition
  1901. */
  1902. public function getCSSDefinition($raw = false, $optimized = false)
  1903. {
  1904. return $this->getDefinition('CSS', $raw, $optimized);
  1905. }
  1906. /**
  1907. * Retrieves object reference to the URI definition
  1908. *
  1909. * @param bool $raw Return a copy that has not been setup yet. Must be
  1910. * called before it's been setup, otherwise won't work.
  1911. * @param bool $optimized If true, this method may return null, to
  1912. * indicate that a cached version of the modified
  1913. * definition object is available and no further edits
  1914. * are necessary. Consider using
  1915. * maybeGetRawURIDefinition, which is more explicitly
  1916. * named, instead.
  1917. *
  1918. * @return HTMLPurifier_URIDefinition
  1919. */
  1920. public function getURIDefinition($raw = false, $optimized = false)
  1921. {
  1922. return $this->getDefinition('URI', $raw, $optimized);
  1923. }
  1924. /**
  1925. * Retrieves a definition
  1926. *
  1927. * @param string $type Type of definition: HTML, CSS, etc
  1928. * @param bool $raw Whether or not definition should be returned raw
  1929. * @param bool $optimized Only has an effect when $raw is true. Whether
  1930. * or not to return null if the result is already present in
  1931. * the cache. This is off by default for backwards
  1932. * compatibility reasons, but you need to do things this
  1933. * way in order to ensure that caching is done properly.
  1934. * Check out enduser-customize.html for more details.
  1935. * We probably won't ever change this default, as much as the
  1936. * maybe semantics is the "right thing to do."
  1937. *
  1938. * @throws HTMLPurifier_Exception
  1939. * @return HTMLPurifier_Definition
  1940. */
  1941. public function getDefinition($type, $raw = false, $optimized = false)
  1942. {
  1943. if ($optimized && !$raw) {
  1944. throw new HTMLPurifier_Exception("Cannot set optimized = true when raw = false");
  1945. }
  1946. if (!$this->finalized) {
  1947. $this->autoFinalize();
  1948. }
  1949. // temporarily suspend locks, so we can handle recursive definition calls
  1950. $lock = $this->lock;
  1951. $this->lock = null;
  1952. $factory = HTMLPurifier_DefinitionCacheFactory::instance();
  1953. $cache = $factory->create($type, $this);
  1954. $this->lock = $lock;
  1955. if (!$raw) {
  1956. // full definition
  1957. // ---------------
  1958. // check if definition is in memory
  1959. if (!empty($this->definitions[$type])) {
  1960. $def = $this->definitions[$type];
  1961. // check if the definition is setup
  1962. if ($def->setup) {
  1963. return $def;
  1964. } else {
  1965. $def->setup($this);
  1966. if ($def->optimized) {
  1967. $cache->add($def, $this);
  1968. }
  1969. return $def;
  1970. }
  1971. }
  1972. // check if definition is in cache
  1973. $def = $cache->get($this);
  1974. if ($def) {
  1975. // definition in cache, save to memory and return it
  1976. $this->definitions[$type] = $def;
  1977. return $def;
  1978. }
  1979. // initialize it
  1980. $def = $this->initDefinition($type);
  1981. // set it up
  1982. $this->lock = $type;
  1983. $def->setup($this);
  1984. $this->lock = null;
  1985. // save in cache
  1986. $cache->add($def, $this);
  1987. // return it
  1988. return $def;
  1989. } else {
  1990. // raw definition
  1991. // --------------
  1992. // check preconditions
  1993. $def = null;
  1994. if ($optimized) {
  1995. if (is_null($this->get($type . '.DefinitionID'))) {
  1996. // fatally error out if definition ID not set
  1997. throw new HTMLPurifier_Exception(
  1998. "Cannot retrieve raw version without specifying %$type.DefinitionID"
  1999. );
  2000. }
  2001. }
  2002. if (!empty($this->definitions[$type])) {
  2003. $def = $this->definitions[$type];
  2004. if ($def->setup && !$optimized) {
  2005. $extra = $this->chatty ?
  2006. " (try moving this code block earlier in your initialization)" :
  2007. "";
  2008. throw new HTMLPurifier_Exception(
  2009. "Cannot retrieve raw definition after it has already been setup" .
  2010. $extra
  2011. );
  2012. }
  2013. if ($def->optimized === null) {
  2014. $extra = $this->chatty ? " (try flushing your cache)" : "";
  2015. throw new HTMLPurifier_Exception(
  2016. "Optimization status of definition is unknown" . $extra
  2017. );
  2018. }
  2019. if ($def->optimized !== $optimized) {
  2020. $msg = $optimized ? "optimized" : "unoptimized";
  2021. $extra = $this->chatty ?
  2022. " (this backtrace is for the first inconsistent call, which was for a $msg raw definition)"
  2023. : "";
  2024. throw new HTMLPurifier_Exception(
  2025. "Inconsistent use of optimized and unoptimized raw definition retrievals" . $extra
  2026. );
  2027. }
  2028. }
  2029. // check if definition was in memory
  2030. if ($def) {
  2031. if ($def->setup) {
  2032. // invariant: $optimized === true (checked above)
  2033. return null;
  2034. } else {
  2035. return $def;
  2036. }
  2037. }
  2038. // if optimized, check if definition was in cache
  2039. // (because we do the memory check first, this formulation
  2040. // is prone to cache slamming, but I think
  2041. // guaranteeing that either /all/ of the raw
  2042. // setup code or /none/ of it is run is more important.)
  2043. if ($optimized) {
  2044. // This code path only gets run once; once we put
  2045. // something in $definitions (which is guaranteed by the
  2046. // trailing code), we always short-circuit above.
  2047. $def = $cache->get($this);
  2048. if ($def) {
  2049. // save the full definition for later, but don't
  2050. // return it yet
  2051. $this->definitions[$type] = $def;
  2052. return null;
  2053. }
  2054. }
  2055. // check invariants for creation
  2056. if (!$optimized) {
  2057. if (!is_null($this->get($type . '.DefinitionID'))) {
  2058. if ($this->chatty) {
  2059. $this->triggerError(
  2060. 'Due to a documentation error in previous version of HTML Purifier, your ' .
  2061. 'definitions are not being cached. If this is OK, you can remove the ' .
  2062. '%$type.DefinitionRev and %$type.DefinitionID declaration. Otherwise, ' .
  2063. 'modify your code to use maybeGetRawDefinition, and test if the returned ' .
  2064. 'value is null before making any edits (if it is null, that means that a ' .
  2065. 'cached version is available, and no raw operations are necessary). See ' .
  2066. '<a href="http://htmlpurifier.org/docs/enduser-customize.html#optimized">' .
  2067. 'Customize</a> for more details',
  2068. E_USER_WARNING
  2069. );
  2070. } else {
  2071. $this->triggerError(
  2072. "Useless DefinitionID declaration",
  2073. E_USER_WARNING
  2074. );
  2075. }
  2076. }
  2077. }
  2078. // initialize it
  2079. $def = $this->initDefinition($type);
  2080. $def->optimized = $optimized;
  2081. return $def;
  2082. }
  2083. throw new HTMLPurifier_Exception("The impossible happened!");
  2084. }
  2085. /**
  2086. * Initialise definition
  2087. *
  2088. * @param string $type What type of definition to create
  2089. *
  2090. * @return HTMLPurifier_CSSDefinition|HTMLPurifier_HTMLDefinition|HTMLPurifier_URIDefinition
  2091. * @throws HTMLPurifier_Exception
  2092. */
  2093. private function initDefinition($type)
  2094. {
  2095. // quick checks failed, let's create the object
  2096. if ($type == 'HTML') {
  2097. $def = new HTMLPurifier_HTMLDefinition();
  2098. } elseif ($type == 'CSS') {
  2099. $def = new HTMLPurifier_CSSDefinition();
  2100. } elseif ($type == 'URI') {
  2101. $def = new HTMLPurifier_URIDefinition();
  2102. } else {
  2103. throw new HTMLPurifier_Exception(
  2104. "Definition of $type type not supported"
  2105. );
  2106. }
  2107. $this->definitions[$type] = $def;
  2108. return $def;
  2109. }
  2110. public function maybeGetRawDefinition($name)
  2111. {
  2112. return $this->getDefinition($name, true, true);
  2113. }
  2114. public function maybeGetRawHTMLDefinition()
  2115. {
  2116. return $this->getDefinition('HTML', true, true);
  2117. }
  2118. public function maybeGetRawCSSDefinition()
  2119. {
  2120. return $this->getDefinition('CSS', true, true);
  2121. }
  2122. public function maybeGetRawURIDefinition()
  2123. {
  2124. return $this->getDefinition('URI', true, true);
  2125. }
  2126. /**
  2127. * Loads configuration values from an array with the following structure:
  2128. * Namespace.Directive => Value
  2129. *
  2130. * @param array $config_array Configuration associative array
  2131. */
  2132. public function loadArray($config_array)
  2133. {
  2134. if ($this->isFinalized('Cannot load directives after finalization')) {
  2135. return;
  2136. }
  2137. foreach ($config_array as $key => $value) {
  2138. $key = str_replace('_', '.', $key);
  2139. if (strpos($key, '.') !== false) {
  2140. $this->set($key, $value);
  2141. } else {
  2142. $namespace = $key;
  2143. $namespace_values = $value;
  2144. foreach ($namespace_values as $directive => $value2) {
  2145. $this->set($namespace .'.'. $directive, $value2);
  2146. }
  2147. }
  2148. }
  2149. }
  2150. /**
  2151. * Returns a list of array(namespace, directive) for all directives
  2152. * that are allowed in a web-form context as per an allowed
  2153. * namespaces/directives list.
  2154. *
  2155. * @param array $allowed List of allowed namespaces/directives
  2156. * @param HTMLPurifier_ConfigSchema $schema Schema to use, if not global copy
  2157. *
  2158. * @return array
  2159. */
  2160. public static function getAllowedDirectivesForForm($allowed, $schema = null)
  2161. {
  2162. if (!$schema) {
  2163. $schema = HTMLPurifier_ConfigSchema::instance();
  2164. }
  2165. if ($allowed !== true) {
  2166. if (is_string($allowed)) {
  2167. $allowed = array($allowed);
  2168. }
  2169. $allowed_ns = array();
  2170. $allowed_directives = array();
  2171. $blacklisted_directives = array();
  2172. foreach ($allowed as $ns_or_directive) {
  2173. if (strpos($ns_or_directive, '.') !== false) {
  2174. // directive
  2175. if ($ns_or_directive[0] == '-') {
  2176. $blacklisted_directives[substr($ns_or_directive, 1)] = true;
  2177. } else {
  2178. $allowed_directives[$ns_or_directive] = true;
  2179. }
  2180. } else {
  2181. // namespace
  2182. $allowed_ns[$ns_or_directive] = true;
  2183. }
  2184. }
  2185. }
  2186. $ret = array();
  2187. foreach ($schema->info as $key => $def) {
  2188. list($ns, $directive) = explode('.', $key, 2);
  2189. if ($allowed !== true) {
  2190. if (isset($blacklisted_directives["$ns.$directive"])) {
  2191. continue;
  2192. }
  2193. if (!isset($allowed_directives["$ns.$directive"]) && !isset($allowed_ns[$ns])) {
  2194. continue;
  2195. }
  2196. }
  2197. if (isset($def->isAlias)) {
  2198. continue;
  2199. }
  2200. if ($directive == 'DefinitionID' || $directive == 'DefinitionRev') {
  2201. continue;
  2202. }
  2203. $ret[] = array($ns, $directive);
  2204. }
  2205. return $ret;
  2206. }
  2207. /**
  2208. * Loads configuration values from $_GET/$_POST that were posted
  2209. * via ConfigForm
  2210. *
  2211. * @param array $array $_GET or $_POST array to import
  2212. * @param string|bool $index Index/name that the config variables are in
  2213. * @param array|bool $allowed List of allowed namespaces/directives
  2214. * @param bool $mq_fix Boolean whether or not to enable magic quotes fix
  2215. * @param HTMLPurifier_ConfigSchema $schema Schema to use, if not global copy
  2216. *
  2217. * @return mixed
  2218. */
  2219. public static function loadArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true, $schema = null)
  2220. {
  2221. $ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix, $schema);
  2222. $config = HTMLPurifier_Config::create($ret, $schema);
  2223. return $config;
  2224. }
  2225. /**
  2226. * Merges in configuration values from $_GET/$_POST to object. NOT STATIC.
  2227. *
  2228. * @param array $array $_GET or $_POST array to import
  2229. * @param string|bool $index Index/name that the config variables are in
  2230. * @param array|bool $allowed List of allowed namespaces/directives
  2231. * @param bool $mq_fix Boolean whether or not to enable magic quotes fix
  2232. */
  2233. public function mergeArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true)
  2234. {
  2235. $ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix, $this->def);
  2236. $this->loadArray($ret);
  2237. }
  2238. /**
  2239. * Prepares an array from a form into something usable for the more
  2240. * strict parts of HTMLPurifier_Config
  2241. *
  2242. * @param array $array $_GET or $_POST array to import
  2243. * @param string|bool $index Index/name that the config variables are in
  2244. * @param array|bool $allowed List of allowed namespaces/directives
  2245. * @param bool $mq_fix Boolean whether or not to enable magic quotes fix
  2246. * @param HTMLPurifier_ConfigSchema $schema Schema to use, if not global copy
  2247. *
  2248. * @return array
  2249. */
  2250. public static function prepareArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true, $schema = null)
  2251. {
  2252. if ($index !== false) {
  2253. $array = (isset($array[$index]) && is_array($array[$index])) ? $array[$index] : array();
  2254. }
  2255. $mq = $mq_fix && function_exists('get_magic_quotes_gpc') && get_magic_quotes_gpc();
  2256. $allowed = HTMLPurifier_Config::getAllowedDirectivesForForm($allowed, $schema);
  2257. $ret = array();
  2258. foreach ($allowed as $key) {
  2259. list($ns, $directive) = $key;
  2260. $skey = "$ns.$directive";
  2261. if (!empty($array["Null_$skey"])) {
  2262. $ret[$ns][$directive] = null;
  2263. continue;
  2264. }
  2265. if (!isset($array[$skey])) {
  2266. continue;
  2267. }
  2268. $value = $mq ? stripslashes($array[$skey]) : $array[$skey];
  2269. $ret[$ns][$directive] = $value;
  2270. }
  2271. return $ret;
  2272. }
  2273. /**
  2274. * Loads configuration values from an ini file
  2275. *
  2276. * @param string $filename Name of ini file
  2277. */
  2278. public function loadIni($filename)
  2279. {
  2280. if ($this->isFinalized('Cannot load directives after finalization')) {
  2281. return;
  2282. }
  2283. $array = parse_ini_file($filename, true);
  2284. $this->loadArray($array);
  2285. }
  2286. /**
  2287. * Checks whether or not the configuration object is finalized.
  2288. *
  2289. * @param string|bool $error String error message, or false for no error
  2290. *
  2291. * @return bool
  2292. */
  2293. public function isFinalized($error = false)
  2294. {
  2295. if ($this->finalized && $error) {
  2296. $this->triggerError($error, E_USER_ERROR);
  2297. }
  2298. return $this->finalized;
  2299. }
  2300. /**
  2301. * Finalizes configuration only if auto finalize is on and not
  2302. * already finalized
  2303. */
  2304. public function autoFinalize()
  2305. {
  2306. if ($this->autoFinalize) {
  2307. $this->finalize();
  2308. } else {
  2309. $this->plist->squash(true);
  2310. }
  2311. }
  2312. /**
  2313. * Finalizes a configuration object, prohibiting further change
  2314. */
  2315. public function finalize()
  2316. {
  2317. $this->finalized = true;
  2318. $this->parser = null;
  2319. }
  2320. /**
  2321. * Produces a nicely formatted error message by supplying the
  2322. * stack frame information OUTSIDE of HTMLPurifier_Config.
  2323. *
  2324. * @param string $msg An error message
  2325. * @param int $no An error number
  2326. */
  2327. protected function triggerError($msg, $no)
  2328. {
  2329. // determine previous stack frame
  2330. $extra = '';
  2331. if ($this->chatty) {
  2332. $trace = debug_backtrace();
  2333. // zip(tail(trace), trace) -- but PHP is not Haskell har har
  2334. for ($i = 0, $c = count($trace); $i < $c - 1; $i++) {
  2335. // XXX this is not correct on some versions of HTML Purifier
  2336. if ($trace[$i + 1]['class'] === 'HTMLPurifier_Config') {
  2337. continue;
  2338. }
  2339. $frame = $trace[$i];
  2340. $extra = " invoked on line {$frame['line']} in file {$frame['file']}";
  2341. break;
  2342. }
  2343. }
  2344. trigger_error($msg . $extra, $no);
  2345. }
  2346. /**
  2347. * Returns a serialized form of the configuration object that can
  2348. * be reconstituted.
  2349. *
  2350. * @return string
  2351. */
  2352. public function serialize()
  2353. {
  2354. $this->getDefinition('HTML');
  2355. $this->getDefinition('CSS');
  2356. $this->getDefinition('URI');
  2357. return serialize($this);
  2358. }
  2359. }
  2360. /**
  2361. * Configuration definition, defines directives and their defaults.
  2362. */
  2363. class HTMLPurifier_ConfigSchema
  2364. {
  2365. /**
  2366. * Defaults of the directives and namespaces.
  2367. * @type array
  2368. * @note This shares the exact same structure as HTMLPurifier_Config::$conf
  2369. */
  2370. public $defaults = array();
  2371. /**
  2372. * The default property list. Do not edit this property list.
  2373. * @type array
  2374. */
  2375. public $defaultPlist;
  2376. /**
  2377. * Definition of the directives.
  2378. * The structure of this is:
  2379. *
  2380. * array(
  2381. * 'Namespace' => array(
  2382. * 'Directive' => new stdclass(),
  2383. * )
  2384. * )
  2385. *
  2386. * The stdclass may have the following properties:
  2387. *
  2388. * - If isAlias isn't set:
  2389. * - type: Integer type of directive, see HTMLPurifier_VarParser for definitions
  2390. * - allow_null: If set, this directive allows null values
  2391. * - aliases: If set, an associative array of value aliases to real values
  2392. * - allowed: If set, a lookup array of allowed (string) values
  2393. * - If isAlias is set:
  2394. * - namespace: Namespace this directive aliases to
  2395. * - name: Directive name this directive aliases to
  2396. *
  2397. * In certain degenerate cases, stdclass will actually be an integer. In
  2398. * that case, the value is equivalent to an stdclass with the type
  2399. * property set to the integer. If the integer is negative, type is
  2400. * equal to the absolute value of integer, and allow_null is true.
  2401. *
  2402. * This class is friendly with HTMLPurifier_Config. If you need introspection
  2403. * about the schema, you're better of using the ConfigSchema_Interchange,
  2404. * which uses more memory but has much richer information.
  2405. * @type array
  2406. */
  2407. public $info = array();
  2408. /**
  2409. * Application-wide singleton
  2410. * @type HTMLPurifier_ConfigSchema
  2411. */
  2412. protected static $singleton;
  2413. public function __construct()
  2414. {
  2415. $this->defaultPlist = new HTMLPurifier_PropertyList();
  2416. }
  2417. /**
  2418. * Unserializes the default ConfigSchema.
  2419. * @return HTMLPurifier_ConfigSchema
  2420. */
  2421. public static function makeFromSerial()
  2422. {
  2423. $contents = file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema.ser');
  2424. $r = unserialize($contents);
  2425. if (!$r) {
  2426. $hash = sha1($contents);
  2427. trigger_error("Unserialization of configuration schema failed, sha1 of file was $hash", E_USER_ERROR);
  2428. }
  2429. return $r;
  2430. }
  2431. /**
  2432. * Retrieves an instance of the application-wide configuration definition.
  2433. * @param HTMLPurifier_ConfigSchema $prototype
  2434. * @return HTMLPurifier_ConfigSchema
  2435. */
  2436. public static function instance($prototype = null)
  2437. {
  2438. if ($prototype !== null) {
  2439. HTMLPurifier_ConfigSchema::$singleton = $prototype;
  2440. } elseif (HTMLPurifier_ConfigSchema::$singleton === null || $prototype === true) {
  2441. HTMLPurifier_ConfigSchema::$singleton = HTMLPurifier_ConfigSchema::makeFromSerial();
  2442. }
  2443. return HTMLPurifier_ConfigSchema::$singleton;
  2444. }
  2445. /**
  2446. * Defines a directive for configuration
  2447. * @warning Will fail of directive's namespace is defined.
  2448. * @warning This method's signature is slightly different from the legacy
  2449. * define() static method! Beware!
  2450. * @param string $key Name of directive
  2451. * @param mixed $default Default value of directive
  2452. * @param string $type Allowed type of the directive. See
  2453. * HTMLPurifier_DirectiveDef::$type for allowed values
  2454. * @param bool $allow_null Whether or not to allow null values
  2455. */
  2456. public function add($key, $default, $type, $allow_null)
  2457. {
  2458. $obj = new stdclass();
  2459. $obj->type = is_int($type) ? $type : HTMLPurifier_VarParser::$types[$type];
  2460. if ($allow_null) {
  2461. $obj->allow_null = true;
  2462. }
  2463. $this->info[$key] = $obj;
  2464. $this->defaults[$key] = $default;
  2465. $this->defaultPlist->set($key, $default);
  2466. }
  2467. /**
  2468. * Defines a directive value alias.
  2469. *
  2470. * Directive value aliases are convenient for developers because it lets
  2471. * them set a directive to several values and get the same result.
  2472. * @param string $key Name of Directive
  2473. * @param array $aliases Hash of aliased values to the real alias
  2474. */
  2475. public function addValueAliases($key, $aliases)
  2476. {
  2477. if (!isset($this->info[$key]->aliases)) {
  2478. $this->info[$key]->aliases = array();
  2479. }
  2480. foreach ($aliases as $alias => $real) {
  2481. $this->info[$key]->aliases[$alias] = $real;
  2482. }
  2483. }
  2484. /**
  2485. * Defines a set of allowed values for a directive.
  2486. * @warning This is slightly different from the corresponding static
  2487. * method definition.
  2488. * @param string $key Name of directive
  2489. * @param array $allowed Lookup array of allowed values
  2490. */
  2491. public function addAllowedValues($key, $allowed)
  2492. {
  2493. $this->info[$key]->allowed = $allowed;
  2494. }
  2495. /**
  2496. * Defines a directive alias for backwards compatibility
  2497. * @param string $key Directive that will be aliased
  2498. * @param string $new_key Directive that the alias will be to
  2499. */
  2500. public function addAlias($key, $new_key)
  2501. {
  2502. $obj = new stdclass;
  2503. $obj->key = $new_key;
  2504. $obj->isAlias = true;
  2505. $this->info[$key] = $obj;
  2506. }
  2507. /**
  2508. * Replaces any stdclass that only has the type property with type integer.
  2509. */
  2510. public function postProcess()
  2511. {
  2512. foreach ($this->info as $key => $v) {
  2513. if (count((array) $v) == 1) {
  2514. $this->info[$key] = $v->type;
  2515. } elseif (count((array) $v) == 2 && isset($v->allow_null)) {
  2516. $this->info[$key] = -$v->type;
  2517. }
  2518. }
  2519. }
  2520. }
  2521. /**
  2522. * @todo Unit test
  2523. */
  2524. class HTMLPurifier_ContentSets
  2525. {
  2526. /**
  2527. * List of content set strings (pipe separators) indexed by name.
  2528. * @type array
  2529. */
  2530. public $info = array();
  2531. /**
  2532. * List of content set lookups (element => true) indexed by name.
  2533. * @type array
  2534. * @note This is in HTMLPurifier_HTMLDefinition->info_content_sets
  2535. */
  2536. public $lookup = array();
  2537. /**
  2538. * Synchronized list of defined content sets (keys of info).
  2539. * @type array
  2540. */
  2541. protected $keys = array();
  2542. /**
  2543. * Synchronized list of defined content values (values of info).
  2544. * @type array
  2545. */
  2546. protected $values = array();
  2547. /**
  2548. * Merges in module's content sets, expands identifiers in the content
  2549. * sets and populates the keys, values and lookup member variables.
  2550. * @param HTMLPurifier_HTMLModule[] $modules List of HTMLPurifier_HTMLModule
  2551. */
  2552. public function __construct($modules)
  2553. {
  2554. if (!is_array($modules)) {
  2555. $modules = array($modules);
  2556. }
  2557. // populate content_sets based on module hints
  2558. // sorry, no way of overloading
  2559. foreach ($modules as $module) {
  2560. foreach ($module->content_sets as $key => $value) {
  2561. $temp = $this->convertToLookup($value);
  2562. if (isset($this->lookup[$key])) {
  2563. // add it into the existing content set
  2564. $this->lookup[$key] = array_merge($this->lookup[$key], $temp);
  2565. } else {
  2566. $this->lookup[$key] = $temp;
  2567. }
  2568. }
  2569. }
  2570. $old_lookup = false;
  2571. while ($old_lookup !== $this->lookup) {
  2572. $old_lookup = $this->lookup;
  2573. foreach ($this->lookup as $i => $set) {
  2574. $add = array();
  2575. foreach ($set as $element => $x) {
  2576. if (isset($this->lookup[$element])) {
  2577. $add += $this->lookup[$element];
  2578. unset($this->lookup[$i][$element]);
  2579. }
  2580. }
  2581. $this->lookup[$i] += $add;
  2582. }
  2583. }
  2584. foreach ($this->lookup as $key => $lookup) {
  2585. $this->info[$key] = implode(' | ', array_keys($lookup));
  2586. }
  2587. $this->keys = array_keys($this->info);
  2588. $this->values = array_values($this->info);
  2589. }
  2590. /**
  2591. * Accepts a definition; generates and assigns a ChildDef for it
  2592. * @param HTMLPurifier_ElementDef $def HTMLPurifier_ElementDef reference
  2593. * @param HTMLPurifier_HTMLModule $module Module that defined the ElementDef
  2594. */
  2595. public function generateChildDef(&$def, $module)
  2596. {
  2597. if (!empty($def->child)) { // already done!
  2598. return;
  2599. }
  2600. $content_model = $def->content_model;
  2601. if (is_string($content_model)) {
  2602. // Assume that $this->keys is alphanumeric
  2603. $def->content_model = preg_replace_callback(
  2604. '/\b(' . implode('|', $this->keys) . ')\b/',
  2605. array($this, 'generateChildDefCallback'),
  2606. $content_model
  2607. );
  2608. //$def->content_model = str_replace(
  2609. // $this->keys, $this->values, $content_model);
  2610. }
  2611. $def->child = $this->getChildDef($def, $module);
  2612. }
  2613. public function generateChildDefCallback($matches)
  2614. {
  2615. return $this->info[$matches[0]];
  2616. }
  2617. /**
  2618. * Instantiates a ChildDef based on content_model and content_model_type
  2619. * member variables in HTMLPurifier_ElementDef
  2620. * @note This will also defer to modules for custom HTMLPurifier_ChildDef
  2621. * subclasses that need content set expansion
  2622. * @param HTMLPurifier_ElementDef $def HTMLPurifier_ElementDef to have ChildDef extracted
  2623. * @param HTMLPurifier_HTMLModule $module Module that defined the ElementDef
  2624. * @return HTMLPurifier_ChildDef corresponding to ElementDef
  2625. */
  2626. public function getChildDef($def, $module)
  2627. {
  2628. $value = $def->content_model;
  2629. if (is_object($value)) {
  2630. trigger_error(
  2631. 'Literal object child definitions should be stored in '.
  2632. 'ElementDef->child not ElementDef->content_model',
  2633. E_USER_NOTICE
  2634. );
  2635. return $value;
  2636. }
  2637. switch ($def->content_model_type) {
  2638. case 'required':
  2639. return new HTMLPurifier_ChildDef_Required($value);
  2640. case 'optional':
  2641. return new HTMLPurifier_ChildDef_Optional($value);
  2642. case 'empty':
  2643. return new HTMLPurifier_ChildDef_Empty();
  2644. case 'custom':
  2645. return new HTMLPurifier_ChildDef_Custom($value);
  2646. }
  2647. // defer to its module
  2648. $return = false;
  2649. if ($module->defines_child_def) { // save a func call
  2650. $return = $module->getChildDef($def);
  2651. }
  2652. if ($return !== false) {
  2653. return $return;
  2654. }
  2655. // error-out
  2656. trigger_error(
  2657. 'Could not determine which ChildDef class to instantiate',
  2658. E_USER_ERROR
  2659. );
  2660. return false;
  2661. }
  2662. /**
  2663. * Converts a string list of elements separated by pipes into
  2664. * a lookup array.
  2665. * @param string $string List of elements
  2666. * @return array Lookup array of elements
  2667. */
  2668. protected function convertToLookup($string)
  2669. {
  2670. $array = explode('|', str_replace(' ', '', $string));
  2671. $ret = array();
  2672. foreach ($array as $k) {
  2673. $ret[$k] = true;
  2674. }
  2675. return $ret;
  2676. }
  2677. }
  2678. /**
  2679. * Registry object that contains information about the current context.
  2680. * @warning Is a bit buggy when variables are set to null: it thinks
  2681. * they don't exist! So use false instead, please.
  2682. * @note Since the variables Context deals with may not be objects,
  2683. * references are very important here! Do not remove!
  2684. */
  2685. class HTMLPurifier_Context
  2686. {
  2687. /**
  2688. * Private array that stores the references.
  2689. * @type array
  2690. */
  2691. private $_storage = array();
  2692. /**
  2693. * Registers a variable into the context.
  2694. * @param string $name String name
  2695. * @param mixed $ref Reference to variable to be registered
  2696. */
  2697. public function register($name, &$ref)
  2698. {
  2699. if (array_key_exists($name, $this->_storage)) {
  2700. trigger_error(
  2701. "Name $name produces collision, cannot re-register",
  2702. E_USER_ERROR
  2703. );
  2704. return;
  2705. }
  2706. $this->_storage[$name] =& $ref;
  2707. }
  2708. /**
  2709. * Retrieves a variable reference from the context.
  2710. * @param string $name String name
  2711. * @param bool $ignore_error Boolean whether or not to ignore error
  2712. * @return mixed
  2713. */
  2714. public function &get($name, $ignore_error = false)
  2715. {
  2716. if (!array_key_exists($name, $this->_storage)) {
  2717. if (!$ignore_error) {
  2718. trigger_error(
  2719. "Attempted to retrieve non-existent variable $name",
  2720. E_USER_ERROR
  2721. );
  2722. }
  2723. $var = null; // so we can return by reference
  2724. return $var;
  2725. }
  2726. return $this->_storage[$name];
  2727. }
  2728. /**
  2729. * Destroys a variable in the context.
  2730. * @param string $name String name
  2731. */
  2732. public function destroy($name)
  2733. {
  2734. if (!array_key_exists($name, $this->_storage)) {
  2735. trigger_error(
  2736. "Attempted to destroy non-existent variable $name",
  2737. E_USER_ERROR
  2738. );
  2739. return;
  2740. }
  2741. unset($this->_storage[$name]);
  2742. }
  2743. /**
  2744. * Checks whether or not the variable exists.
  2745. * @param string $name String name
  2746. * @return bool
  2747. */
  2748. public function exists($name)
  2749. {
  2750. return array_key_exists($name, $this->_storage);
  2751. }
  2752. /**
  2753. * Loads a series of variables from an associative array
  2754. * @param array $context_array Assoc array of variables to load
  2755. */
  2756. public function loadArray($context_array)
  2757. {
  2758. foreach ($context_array as $key => $discard) {
  2759. $this->register($key, $context_array[$key]);
  2760. }
  2761. }
  2762. }
  2763. /**
  2764. * Abstract class representing Definition cache managers that implements
  2765. * useful common methods and is a factory.
  2766. * @todo Create a separate maintenance file advanced users can use to
  2767. * cache their custom HTMLDefinition, which can be loaded
  2768. * via a configuration directive
  2769. * @todo Implement memcached
  2770. */
  2771. abstract class HTMLPurifier_DefinitionCache
  2772. {
  2773. /**
  2774. * @type string
  2775. */
  2776. public $type;
  2777. /**
  2778. * @param string $type Type of definition objects this instance of the
  2779. * cache will handle.
  2780. */
  2781. public function __construct($type)
  2782. {
  2783. $this->type = $type;
  2784. }
  2785. /**
  2786. * Generates a unique identifier for a particular configuration
  2787. * @param HTMLPurifier_Config $config Instance of HTMLPurifier_Config
  2788. * @return string
  2789. */
  2790. public function generateKey($config)
  2791. {
  2792. return $config->version . ',' . // possibly replace with function calls
  2793. $config->getBatchSerial($this->type) . ',' .
  2794. $config->get($this->type . '.DefinitionRev');
  2795. }
  2796. /**
  2797. * Tests whether or not a key is old with respect to the configuration's
  2798. * version and revision number.
  2799. * @param string $key Key to test
  2800. * @param HTMLPurifier_Config $config Instance of HTMLPurifier_Config to test against
  2801. * @return bool
  2802. */
  2803. public function isOld($key, $config)
  2804. {
  2805. if (substr_count($key, ',') < 2) {
  2806. return true;
  2807. }
  2808. list($version, $hash, $revision) = explode(',', $key, 3);
  2809. $compare = version_compare($version, $config->version);
  2810. // version mismatch, is always old
  2811. if ($compare != 0) {
  2812. return true;
  2813. }
  2814. // versions match, ids match, check revision number
  2815. if ($hash == $config->getBatchSerial($this->type) &&
  2816. $revision < $config->get($this->type . '.DefinitionRev')) {
  2817. return true;
  2818. }
  2819. return false;
  2820. }
  2821. /**
  2822. * Checks if a definition's type jives with the cache's type
  2823. * @note Throws an error on failure
  2824. * @param HTMLPurifier_Definition $def Definition object to check
  2825. * @return bool true if good, false if not
  2826. */
  2827. public function checkDefType($def)
  2828. {
  2829. if ($def->type !== $this->type) {
  2830. trigger_error("Cannot use definition of type {$def->type} in cache for {$this->type}");
  2831. return false;
  2832. }
  2833. return true;
  2834. }
  2835. /**
  2836. * Adds a definition object to the cache
  2837. * @param HTMLPurifier_Definition $def
  2838. * @param HTMLPurifier_Config $config
  2839. */
  2840. abstract public function add($def, $config);
  2841. /**
  2842. * Unconditionally saves a definition object to the cache
  2843. * @param HTMLPurifier_Definition $def
  2844. * @param HTMLPurifier_Config $config
  2845. */
  2846. abstract public function set($def, $config);
  2847. /**
  2848. * Replace an object in the cache
  2849. * @param HTMLPurifier_Definition $def
  2850. * @param HTMLPurifier_Config $config
  2851. */
  2852. abstract public function replace($def, $config);
  2853. /**
  2854. * Retrieves a definition object from the cache
  2855. * @param HTMLPurifier_Config $config
  2856. */
  2857. abstract public function get($config);
  2858. /**
  2859. * Removes a definition object to the cache
  2860. * @param HTMLPurifier_Config $config
  2861. */
  2862. abstract public function remove($config);
  2863. /**
  2864. * Clears all objects from cache
  2865. * @param HTMLPurifier_Config $config
  2866. */
  2867. abstract public function flush($config);
  2868. /**
  2869. * Clears all expired (older version or revision) objects from cache
  2870. * @note Be carefuly implementing this method as flush. Flush must
  2871. * not interfere with other Definition types, and cleanup()
  2872. * should not be repeatedly called by userland code.
  2873. * @param HTMLPurifier_Config $config
  2874. */
  2875. abstract public function cleanup($config);
  2876. }
  2877. /**
  2878. * Responsible for creating definition caches.
  2879. */
  2880. class HTMLPurifier_DefinitionCacheFactory
  2881. {
  2882. /**
  2883. * @type array
  2884. */
  2885. protected $caches = array('Serializer' => array());
  2886. /**
  2887. * @type array
  2888. */
  2889. protected $implementations = array();
  2890. /**
  2891. * @type HTMLPurifier_DefinitionCache_Decorator[]
  2892. */
  2893. protected $decorators = array();
  2894. /**
  2895. * Initialize default decorators
  2896. */
  2897. public function setup()
  2898. {
  2899. $this->addDecorator('Cleanup');
  2900. }
  2901. /**
  2902. * Retrieves an instance of global definition cache factory.
  2903. * @param HTMLPurifier_DefinitionCacheFactory $prototype
  2904. * @return HTMLPurifier_DefinitionCacheFactory
  2905. */
  2906. public static function instance($prototype = null)
  2907. {
  2908. static $instance;
  2909. if ($prototype !== null) {
  2910. $instance = $prototype;
  2911. } elseif ($instance === null || $prototype === true) {
  2912. $instance = new HTMLPurifier_DefinitionCacheFactory();
  2913. $instance->setup();
  2914. }
  2915. return $instance;
  2916. }
  2917. /**
  2918. * Registers a new definition cache object
  2919. * @param string $short Short name of cache object, for reference
  2920. * @param string $long Full class name of cache object, for construction
  2921. */
  2922. public function register($short, $long)
  2923. {
  2924. $this->implementations[$short] = $long;
  2925. }
  2926. /**
  2927. * Factory method that creates a cache object based on configuration
  2928. * @param string $type Name of definitions handled by cache
  2929. * @param HTMLPurifier_Config $config Config instance
  2930. * @return mixed
  2931. */
  2932. public function create($type, $config)
  2933. {
  2934. $method = $config->get('Cache.DefinitionImpl');
  2935. if ($method === null) {
  2936. return new HTMLPurifier_DefinitionCache_Null($type);
  2937. }
  2938. if (!empty($this->caches[$method][$type])) {
  2939. return $this->caches[$method][$type];
  2940. }
  2941. if (isset($this->implementations[$method]) &&
  2942. class_exists($class = $this->implementations[$method], false)) {
  2943. $cache = new $class($type);
  2944. } else {
  2945. if ($method != 'Serializer') {
  2946. trigger_error("Unrecognized DefinitionCache $method, using Serializer instead", E_USER_WARNING);
  2947. }
  2948. $cache = new HTMLPurifier_DefinitionCache_Serializer($type);
  2949. }
  2950. foreach ($this->decorators as $decorator) {
  2951. $new_cache = $decorator->decorate($cache);
  2952. // prevent infinite recursion in PHP 4
  2953. unset($cache);
  2954. $cache = $new_cache;
  2955. }
  2956. $this->caches[$method][$type] = $cache;
  2957. return $this->caches[$method][$type];
  2958. }
  2959. /**
  2960. * Registers a decorator to add to all new cache objects
  2961. * @param HTMLPurifier_DefinitionCache_Decorator|string $decorator An instance or the name of a decorator
  2962. */
  2963. public function addDecorator($decorator)
  2964. {
  2965. if (is_string($decorator)) {
  2966. $class = "HTMLPurifier_DefinitionCache_Decorator_$decorator";
  2967. $decorator = new $class;
  2968. }
  2969. $this->decorators[$decorator->name] = $decorator;
  2970. }
  2971. }
  2972. /**
  2973. * Represents a document type, contains information on which modules
  2974. * need to be loaded.
  2975. * @note This class is inspected by Printer_HTMLDefinition->renderDoctype.
  2976. * If structure changes, please update that function.
  2977. */
  2978. class HTMLPurifier_Doctype
  2979. {
  2980. /**
  2981. * Full name of doctype
  2982. * @type string
  2983. */
  2984. public $name;
  2985. /**
  2986. * List of standard modules (string identifiers or literal objects)
  2987. * that this doctype uses
  2988. * @type array
  2989. */
  2990. public $modules = array();
  2991. /**
  2992. * List of modules to use for tidying up code
  2993. * @type array
  2994. */
  2995. public $tidyModules = array();
  2996. /**
  2997. * Is the language derived from XML (i.e. XHTML)?
  2998. * @type bool
  2999. */
  3000. public $xml = true;
  3001. /**
  3002. * List of aliases for this doctype
  3003. * @type array
  3004. */
  3005. public $aliases = array();
  3006. /**
  3007. * Public DTD identifier
  3008. * @type string
  3009. */
  3010. public $dtdPublic;
  3011. /**
  3012. * System DTD identifier
  3013. * @type string
  3014. */
  3015. public $dtdSystem;
  3016. public function __construct(
  3017. $name = null,
  3018. $xml = true,
  3019. $modules = array(),
  3020. $tidyModules = array(),
  3021. $aliases = array(),
  3022. $dtd_public = null,
  3023. $dtd_system = null
  3024. ) {
  3025. $this->name = $name;
  3026. $this->xml = $xml;
  3027. $this->modules = $modules;
  3028. $this->tidyModules = $tidyModules;
  3029. $this->aliases = $aliases;
  3030. $this->dtdPublic = $dtd_public;
  3031. $this->dtdSystem = $dtd_system;
  3032. }
  3033. }
  3034. class HTMLPurifier_DoctypeRegistry
  3035. {
  3036. /**
  3037. * Hash of doctype names to doctype objects.
  3038. * @type array
  3039. */
  3040. protected $doctypes;
  3041. /**
  3042. * Lookup table of aliases to real doctype names.
  3043. * @type array
  3044. */
  3045. protected $aliases;
  3046. /**
  3047. * Registers a doctype to the registry
  3048. * @note Accepts a fully-formed doctype object, or the
  3049. * parameters for constructing a doctype object
  3050. * @param string $doctype Name of doctype or literal doctype object
  3051. * @param bool $xml
  3052. * @param array $modules Modules doctype will load
  3053. * @param array $tidy_modules Modules doctype will load for certain modes
  3054. * @param array $aliases Alias names for doctype
  3055. * @param string $dtd_public
  3056. * @param string $dtd_system
  3057. * @return HTMLPurifier_Doctype Editable registered doctype
  3058. */
  3059. public function register(
  3060. $doctype,
  3061. $xml = true,
  3062. $modules = array(),
  3063. $tidy_modules = array(),
  3064. $aliases = array(),
  3065. $dtd_public = null,
  3066. $dtd_system = null
  3067. ) {
  3068. if (!is_array($modules)) {
  3069. $modules = array($modules);
  3070. }
  3071. if (!is_array($tidy_modules)) {
  3072. $tidy_modules = array($tidy_modules);
  3073. }
  3074. if (!is_array($aliases)) {
  3075. $aliases = array($aliases);
  3076. }
  3077. if (!is_object($doctype)) {
  3078. $doctype = new HTMLPurifier_Doctype(
  3079. $doctype,
  3080. $xml,
  3081. $modules,
  3082. $tidy_modules,
  3083. $aliases,
  3084. $dtd_public,
  3085. $dtd_system
  3086. );
  3087. }
  3088. $this->doctypes[$doctype->name] = $doctype;
  3089. $name = $doctype->name;
  3090. // hookup aliases
  3091. foreach ($doctype->aliases as $alias) {
  3092. if (isset($this->doctypes[$alias])) {
  3093. continue;
  3094. }
  3095. $this->aliases[$alias] = $name;
  3096. }
  3097. // remove old aliases
  3098. if (isset($this->aliases[$name])) {
  3099. unset($this->aliases[$name]);
  3100. }
  3101. return $doctype;
  3102. }
  3103. /**
  3104. * Retrieves reference to a doctype of a certain name
  3105. * @note This function resolves aliases
  3106. * @note When possible, use the more fully-featured make()
  3107. * @param string $doctype Name of doctype
  3108. * @return HTMLPurifier_Doctype Editable doctype object
  3109. */
  3110. public function get($doctype)
  3111. {
  3112. if (isset($this->aliases[$doctype])) {
  3113. $doctype = $this->aliases[$doctype];
  3114. }
  3115. if (!isset($this->doctypes[$doctype])) {
  3116. trigger_error('Doctype ' . htmlspecialchars($doctype) . ' does not exist', E_USER_ERROR);
  3117. $anon = new HTMLPurifier_Doctype($doctype);
  3118. return $anon;
  3119. }
  3120. return $this->doctypes[$doctype];
  3121. }
  3122. /**
  3123. * Creates a doctype based on a configuration object,
  3124. * will perform initialization on the doctype
  3125. * @note Use this function to get a copy of doctype that config
  3126. * can hold on to (this is necessary in order to tell
  3127. * Generator whether or not the current document is XML
  3128. * based or not).
  3129. * @param HTMLPurifier_Config $config
  3130. * @return HTMLPurifier_Doctype
  3131. */
  3132. public function make($config)
  3133. {
  3134. return clone $this->get($this->getDoctypeFromConfig($config));
  3135. }
  3136. /**
  3137. * Retrieves the doctype from the configuration object
  3138. * @param HTMLPurifier_Config $config
  3139. * @return string
  3140. */
  3141. public function getDoctypeFromConfig($config)
  3142. {
  3143. // recommended test
  3144. $doctype = $config->get('HTML.Doctype');
  3145. if (!empty($doctype)) {
  3146. return $doctype;
  3147. }
  3148. $doctype = $config->get('HTML.CustomDoctype');
  3149. if (!empty($doctype)) {
  3150. return $doctype;
  3151. }
  3152. // backwards-compatibility
  3153. if ($config->get('HTML.XHTML')) {
  3154. $doctype = 'XHTML 1.0';
  3155. } else {
  3156. $doctype = 'HTML 4.01';
  3157. }
  3158. if ($config->get('HTML.Strict')) {
  3159. $doctype .= ' Strict';
  3160. } else {
  3161. $doctype .= ' Transitional';
  3162. }
  3163. return $doctype;
  3164. }
  3165. }
  3166. /**
  3167. * Structure that stores an HTML element definition. Used by
  3168. * HTMLPurifier_HTMLDefinition and HTMLPurifier_HTMLModule.
  3169. * @note This class is inspected by HTMLPurifier_Printer_HTMLDefinition.
  3170. * Please update that class too.
  3171. * @warning If you add new properties to this class, you MUST update
  3172. * the mergeIn() method.
  3173. */
  3174. class HTMLPurifier_ElementDef
  3175. {
  3176. /**
  3177. * Does the definition work by itself, or is it created solely
  3178. * for the purpose of merging into another definition?
  3179. * @type bool
  3180. */
  3181. public $standalone = true;
  3182. /**
  3183. * Associative array of attribute name to HTMLPurifier_AttrDef.
  3184. * @type array
  3185. * @note Before being processed by HTMLPurifier_AttrCollections
  3186. * when modules are finalized during
  3187. * HTMLPurifier_HTMLDefinition->setup(), this array may also
  3188. * contain an array at index 0 that indicates which attribute
  3189. * collections to load into the full array. It may also
  3190. * contain string indentifiers in lieu of HTMLPurifier_AttrDef,
  3191. * see HTMLPurifier_AttrTypes on how they are expanded during
  3192. * HTMLPurifier_HTMLDefinition->setup() processing.
  3193. */
  3194. public $attr = array();
  3195. // XXX: Design note: currently, it's not possible to override
  3196. // previously defined AttrTransforms without messing around with
  3197. // the final generated config. This is by design; a previous version
  3198. // used an associated list of attr_transform, but it was extremely
  3199. // easy to accidentally override other attribute transforms by
  3200. // forgetting to specify an index (and just using 0.) While we
  3201. // could check this by checking the index number and complaining,
  3202. // there is a second problem which is that it is not at all easy to
  3203. // tell when something is getting overridden. Combine this with a
  3204. // codebase where this isn't really being used, and it's perfect for
  3205. // nuking.
  3206. /**
  3207. * List of tags HTMLPurifier_AttrTransform to be done before validation.
  3208. * @type array
  3209. */
  3210. public $attr_transform_pre = array();
  3211. /**
  3212. * List of tags HTMLPurifier_AttrTransform to be done after validation.
  3213. * @type array
  3214. */
  3215. public $attr_transform_post = array();
  3216. /**
  3217. * HTMLPurifier_ChildDef of this tag.
  3218. * @type HTMLPurifier_ChildDef
  3219. */
  3220. public $child;
  3221. /**
  3222. * Abstract string representation of internal ChildDef rules.
  3223. * @see HTMLPurifier_ContentSets for how this is parsed and then transformed
  3224. * into an HTMLPurifier_ChildDef.
  3225. * @warning This is a temporary variable that is not available after
  3226. * being processed by HTMLDefinition
  3227. * @type string
  3228. */
  3229. public $content_model;
  3230. /**
  3231. * Value of $child->type, used to determine which ChildDef to use,
  3232. * used in combination with $content_model.
  3233. * @warning This must be lowercase
  3234. * @warning This is a temporary variable that is not available after
  3235. * being processed by HTMLDefinition
  3236. * @type string
  3237. */
  3238. public $content_model_type;
  3239. /**
  3240. * Does the element have a content model (#PCDATA | Inline)*? This
  3241. * is important for chameleon ins and del processing in
  3242. * HTMLPurifier_ChildDef_Chameleon. Dynamically set: modules don't
  3243. * have to worry about this one.
  3244. * @type bool
  3245. */
  3246. public $descendants_are_inline = false;
  3247. /**
  3248. * List of the names of required attributes this element has.
  3249. * Dynamically populated by HTMLPurifier_HTMLDefinition::getElement()
  3250. * @type array
  3251. */
  3252. public $required_attr = array();
  3253. /**
  3254. * Lookup table of tags excluded from all descendants of this tag.
  3255. * @type array
  3256. * @note SGML permits exclusions for all descendants, but this is
  3257. * not possible with DTDs or XML Schemas. W3C has elected to
  3258. * use complicated compositions of content_models to simulate
  3259. * exclusion for children, but we go the simpler, SGML-style
  3260. * route of flat-out exclusions, which correctly apply to
  3261. * all descendants and not just children. Note that the XHTML
  3262. * Modularization Abstract Modules are blithely unaware of such
  3263. * distinctions.
  3264. */
  3265. public $excludes = array();
  3266. /**
  3267. * This tag is explicitly auto-closed by the following tags.
  3268. * @type array
  3269. */
  3270. public $autoclose = array();
  3271. /**
  3272. * If a foreign element is found in this element, test if it is
  3273. * allowed by this sub-element; if it is, instead of closing the
  3274. * current element, place it inside this element.
  3275. * @type string
  3276. */
  3277. public $wrap;
  3278. /**
  3279. * Whether or not this is a formatting element affected by the
  3280. * "Active Formatting Elements" algorithm.
  3281. * @type bool
  3282. */
  3283. public $formatting;
  3284. /**
  3285. * Low-level factory constructor for creating new standalone element defs
  3286. */
  3287. public static function create($content_model, $content_model_type, $attr)
  3288. {
  3289. $def = new HTMLPurifier_ElementDef();
  3290. $def->content_model = $content_model;
  3291. $def->content_model_type = $content_model_type;
  3292. $def->attr = $attr;
  3293. return $def;
  3294. }
  3295. /**
  3296. * Merges the values of another element definition into this one.
  3297. * Values from the new element def take precedence if a value is
  3298. * not mergeable.
  3299. * @param HTMLPurifier_ElementDef $def
  3300. */
  3301. public function mergeIn($def)
  3302. {
  3303. // later keys takes precedence
  3304. foreach ($def->attr as $k => $v) {
  3305. if ($k === 0) {
  3306. // merge in the includes
  3307. // sorry, no way to override an include
  3308. foreach ($v as $v2) {
  3309. $this->attr[0][] = $v2;
  3310. }
  3311. continue;
  3312. }
  3313. if ($v === false) {
  3314. if (isset($this->attr[$k])) {
  3315. unset($this->attr[$k]);
  3316. }
  3317. continue;
  3318. }
  3319. $this->attr[$k] = $v;
  3320. }
  3321. $this->_mergeAssocArray($this->excludes, $def->excludes);
  3322. $this->attr_transform_pre = array_merge($this->attr_transform_pre, $def->attr_transform_pre);
  3323. $this->attr_transform_post = array_merge($this->attr_transform_post, $def->attr_transform_post);
  3324. if (!empty($def->content_model)) {
  3325. $this->content_model =
  3326. str_replace("#SUPER", $this->content_model, $def->content_model);
  3327. $this->child = false;
  3328. }
  3329. if (!empty($def->content_model_type)) {
  3330. $this->content_model_type = $def->content_model_type;
  3331. $this->child = false;
  3332. }
  3333. if (!is_null($def->child)) {
  3334. $this->child = $def->child;
  3335. }
  3336. if (!is_null($def->formatting)) {
  3337. $this->formatting = $def->formatting;
  3338. }
  3339. if ($def->descendants_are_inline) {
  3340. $this->descendants_are_inline = $def->descendants_are_inline;
  3341. }
  3342. }
  3343. /**
  3344. * Merges one array into another, removes values which equal false
  3345. * @param $a1 Array by reference that is merged into
  3346. * @param $a2 Array that merges into $a1
  3347. */
  3348. private function _mergeAssocArray(&$a1, $a2)
  3349. {
  3350. foreach ($a2 as $k => $v) {
  3351. if ($v === false) {
  3352. if (isset($a1[$k])) {
  3353. unset($a1[$k]);
  3354. }
  3355. continue;
  3356. }
  3357. $a1[$k] = $v;
  3358. }
  3359. }
  3360. }
  3361. /**
  3362. * A UTF-8 specific character encoder that handles cleaning and transforming.
  3363. * @note All functions in this class should be static.
  3364. */
  3365. class HTMLPurifier_Encoder
  3366. {
  3367. /**
  3368. * Constructor throws fatal error if you attempt to instantiate class
  3369. */
  3370. private function __construct()
  3371. {
  3372. trigger_error('Cannot instantiate encoder, call methods statically', E_USER_ERROR);
  3373. }
  3374. /**
  3375. * Error-handler that mutes errors, alternative to shut-up operator.
  3376. */
  3377. public static function muteErrorHandler()
  3378. {
  3379. }
  3380. /**
  3381. * iconv wrapper which mutes errors, but doesn't work around bugs.
  3382. * @param string $in Input encoding
  3383. * @param string $out Output encoding
  3384. * @param string $text The text to convert
  3385. * @return string
  3386. */
  3387. public static function unsafeIconv($in, $out, $text)
  3388. {
  3389. set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
  3390. $r = iconv($in, $out, $text);
  3391. restore_error_handler();
  3392. return $r;
  3393. }
  3394. /**
  3395. * iconv wrapper which mutes errors and works around bugs.
  3396. * @param string $in Input encoding
  3397. * @param string $out Output encoding
  3398. * @param string $text The text to convert
  3399. * @param int $max_chunk_size
  3400. * @return string
  3401. */
  3402. public static function iconv($in, $out, $text, $max_chunk_size = 8000)
  3403. {
  3404. $code = self::testIconvTruncateBug();
  3405. if ($code == self::ICONV_OK) {
  3406. return self::unsafeIconv($in, $out, $text);
  3407. } elseif ($code == self::ICONV_TRUNCATES) {
  3408. // we can only work around this if the input character set
  3409. // is utf-8
  3410. if ($in == 'utf-8') {
  3411. if ($max_chunk_size < 4) {
  3412. trigger_error('max_chunk_size is too small', E_USER_WARNING);
  3413. return false;
  3414. }
  3415. // split into 8000 byte chunks, but be careful to handle
  3416. // multibyte boundaries properly
  3417. if (($c = strlen($text)) <= $max_chunk_size) {
  3418. return self::unsafeIconv($in, $out, $text);
  3419. }
  3420. $r = '';
  3421. $i = 0;
  3422. while (true) {
  3423. if ($i + $max_chunk_size >= $c) {
  3424. $r .= self::unsafeIconv($in, $out, substr($text, $i));
  3425. break;
  3426. }
  3427. // wibble the boundary
  3428. if (0x80 != (0xC0 & ord($text[$i + $max_chunk_size]))) {
  3429. $chunk_size = $max_chunk_size;
  3430. } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 1]))) {
  3431. $chunk_size = $max_chunk_size - 1;
  3432. } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 2]))) {
  3433. $chunk_size = $max_chunk_size - 2;
  3434. } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 3]))) {
  3435. $chunk_size = $max_chunk_size - 3;
  3436. } else {
  3437. return false; // rather confusing UTF-8...
  3438. }
  3439. $chunk = substr($text, $i, $chunk_size); // substr doesn't mind overlong lengths
  3440. $r .= self::unsafeIconv($in, $out, $chunk);
  3441. $i += $chunk_size;
  3442. }
  3443. return $r;
  3444. } else {
  3445. return false;
  3446. }
  3447. } else {
  3448. return false;
  3449. }
  3450. }
  3451. /**
  3452. * Cleans a UTF-8 string for well-formedness and SGML validity
  3453. *
  3454. * It will parse according to UTF-8 and return a valid UTF8 string, with
  3455. * non-SGML codepoints excluded.
  3456. *
  3457. * @param string $str The string to clean
  3458. * @param bool $force_php
  3459. * @return string
  3460. *
  3461. * @note Just for reference, the non-SGML code points are 0 to 31 and
  3462. * 127 to 159, inclusive. However, we allow code points 9, 10
  3463. * and 13, which are the tab, line feed and carriage return
  3464. * respectively. 128 and above the code points map to multibyte
  3465. * UTF-8 representations.
  3466. *
  3467. * @note Fallback code adapted from utf8ToUnicode by Henri Sivonen and
  3468. * hsivonen@iki.fi at <http://iki.fi/hsivonen/php-utf8/> under the
  3469. * LGPL license. Notes on what changed are inside, but in general,
  3470. * the original code transformed UTF-8 text into an array of integer
  3471. * Unicode codepoints. Understandably, transforming that back to
  3472. * a string would be somewhat expensive, so the function was modded to
  3473. * directly operate on the string. However, this discourages code
  3474. * reuse, and the logic enumerated here would be useful for any
  3475. * function that needs to be able to understand UTF-8 characters.
  3476. * As of right now, only smart lossless character encoding converters
  3477. * would need that, and I'm probably not going to implement them.
  3478. * Once again, PHP 6 should solve all our problems.
  3479. */
  3480. public static function cleanUTF8($str, $force_php = false)
  3481. {
  3482. // UTF-8 validity is checked since PHP 4.3.5
  3483. // This is an optimization: if the string is already valid UTF-8, no
  3484. // need to do PHP stuff. 99% of the time, this will be the case.
  3485. // The regexp matches the XML char production, as well as well as excluding
  3486. // non-SGML codepoints U+007F to U+009F
  3487. if (preg_match(
  3488. '/^[\x{9}\x{A}\x{D}\x{20}-\x{7E}\x{A0}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]*$/Du',
  3489. $str
  3490. )) {
  3491. return $str;
  3492. }
  3493. $mState = 0; // cached expected number of octets after the current octet
  3494. // until the beginning of the next UTF8 character sequence
  3495. $mUcs4 = 0; // cached Unicode character
  3496. $mBytes = 1; // cached expected number of octets in the current sequence
  3497. // original code involved an $out that was an array of Unicode
  3498. // codepoints. Instead of having to convert back into UTF-8, we've
  3499. // decided to directly append valid UTF-8 characters onto a string
  3500. // $out once they're done. $char accumulates raw bytes, while $mUcs4
  3501. // turns into the Unicode code point, so there's some redundancy.
  3502. $out = '';
  3503. $char = '';
  3504. $len = strlen($str);
  3505. for ($i = 0; $i < $len; $i++) {
  3506. $in = ord($str{$i});
  3507. $char .= $str[$i]; // append byte to char
  3508. if (0 == $mState) {
  3509. // When mState is zero we expect either a US-ASCII character
  3510. // or a multi-octet sequence.
  3511. if (0 == (0x80 & ($in))) {
  3512. // US-ASCII, pass straight through.
  3513. if (($in <= 31 || $in == 127) &&
  3514. !($in == 9 || $in == 13 || $in == 10) // save \r\t\n
  3515. ) {
  3516. // control characters, remove
  3517. } else {
  3518. $out .= $char;
  3519. }
  3520. // reset
  3521. $char = '';
  3522. $mBytes = 1;
  3523. } elseif (0xC0 == (0xE0 & ($in))) {
  3524. // First octet of 2 octet sequence
  3525. $mUcs4 = ($in);
  3526. $mUcs4 = ($mUcs4 & 0x1F) << 6;
  3527. $mState = 1;
  3528. $mBytes = 2;
  3529. } elseif (0xE0 == (0xF0 & ($in))) {
  3530. // First octet of 3 octet sequence
  3531. $mUcs4 = ($in);
  3532. $mUcs4 = ($mUcs4 & 0x0F) << 12;
  3533. $mState = 2;
  3534. $mBytes = 3;
  3535. } elseif (0xF0 == (0xF8 & ($in))) {
  3536. // First octet of 4 octet sequence
  3537. $mUcs4 = ($in);
  3538. $mUcs4 = ($mUcs4 & 0x07) << 18;
  3539. $mState = 3;
  3540. $mBytes = 4;
  3541. } elseif (0xF8 == (0xFC & ($in))) {
  3542. // First octet of 5 octet sequence.
  3543. //
  3544. // This is illegal because the encoded codepoint must be
  3545. // either:
  3546. // (a) not the shortest form or
  3547. // (b) outside the Unicode range of 0-0x10FFFF.
  3548. // Rather than trying to resynchronize, we will carry on
  3549. // until the end of the sequence and let the later error
  3550. // handling code catch it.
  3551. $mUcs4 = ($in);
  3552. $mUcs4 = ($mUcs4 & 0x03) << 24;
  3553. $mState = 4;
  3554. $mBytes = 5;
  3555. } elseif (0xFC == (0xFE & ($in))) {
  3556. // First octet of 6 octet sequence, see comments for 5
  3557. // octet sequence.
  3558. $mUcs4 = ($in);
  3559. $mUcs4 = ($mUcs4 & 1) << 30;
  3560. $mState = 5;
  3561. $mBytes = 6;
  3562. } else {
  3563. // Current octet is neither in the US-ASCII range nor a
  3564. // legal first octet of a multi-octet sequence.
  3565. $mState = 0;
  3566. $mUcs4 = 0;
  3567. $mBytes = 1;
  3568. $char = '';
  3569. }
  3570. } else {
  3571. // When mState is non-zero, we expect a continuation of the
  3572. // multi-octet sequence
  3573. if (0x80 == (0xC0 & ($in))) {
  3574. // Legal continuation.
  3575. $shift = ($mState - 1) * 6;
  3576. $tmp = $in;
  3577. $tmp = ($tmp & 0x0000003F) << $shift;
  3578. $mUcs4 |= $tmp;
  3579. if (0 == --$mState) {
  3580. // End of the multi-octet sequence. mUcs4 now contains
  3581. // the final Unicode codepoint to be output
  3582. // Check for illegal sequences and codepoints.
  3583. // From Unicode 3.1, non-shortest form is illegal
  3584. if (((2 == $mBytes) && ($mUcs4 < 0x0080)) ||
  3585. ((3 == $mBytes) && ($mUcs4 < 0x0800)) ||
  3586. ((4 == $mBytes) && ($mUcs4 < 0x10000)) ||
  3587. (4 < $mBytes) ||
  3588. // From Unicode 3.2, surrogate characters = illegal
  3589. (($mUcs4 & 0xFFFFF800) == 0xD800) ||
  3590. // Codepoints outside the Unicode range are illegal
  3591. ($mUcs4 > 0x10FFFF)
  3592. ) {
  3593. } elseif (0xFEFF != $mUcs4 && // omit BOM
  3594. // check for valid Char unicode codepoints
  3595. (
  3596. 0x9 == $mUcs4 ||
  3597. 0xA == $mUcs4 ||
  3598. 0xD == $mUcs4 ||
  3599. (0x20 <= $mUcs4 && 0x7E >= $mUcs4) ||
  3600. // 7F-9F is not strictly prohibited by XML,
  3601. // but it is non-SGML, and thus we don't allow it
  3602. (0xA0 <= $mUcs4 && 0xD7FF >= $mUcs4) ||
  3603. (0x10000 <= $mUcs4 && 0x10FFFF >= $mUcs4)
  3604. )
  3605. ) {
  3606. $out .= $char;
  3607. }
  3608. // initialize UTF8 cache (reset)
  3609. $mState = 0;
  3610. $mUcs4 = 0;
  3611. $mBytes = 1;
  3612. $char = '';
  3613. }
  3614. } else {
  3615. // ((0xC0 & (*in) != 0x80) && (mState != 0))
  3616. // Incomplete multi-octet sequence.
  3617. // used to result in complete fail, but we'll reset
  3618. $mState = 0;
  3619. $mUcs4 = 0;
  3620. $mBytes = 1;
  3621. $char ='';
  3622. }
  3623. }
  3624. }
  3625. return $out;
  3626. }
  3627. /**
  3628. * Translates a Unicode codepoint into its corresponding UTF-8 character.
  3629. * @note Based on Feyd's function at
  3630. * <http://forums.devnetwork.net/viewtopic.php?p=191404#191404>,
  3631. * which is in public domain.
  3632. * @note While we're going to do code point parsing anyway, a good
  3633. * optimization would be to refuse to translate code points that
  3634. * are non-SGML characters. However, this could lead to duplication.
  3635. * @note This is very similar to the unichr function in
  3636. * maintenance/generate-entity-file.php (although this is superior,
  3637. * due to its sanity checks).
  3638. */
  3639. // +----------+----------+----------+----------+
  3640. // | 33222222 | 22221111 | 111111 | |
  3641. // | 10987654 | 32109876 | 54321098 | 76543210 | bit
  3642. // +----------+----------+----------+----------+
  3643. // | | | | 0xxxxxxx | 1 byte 0x00000000..0x0000007F
  3644. // | | | 110yyyyy | 10xxxxxx | 2 byte 0x00000080..0x000007FF
  3645. // | | 1110zzzz | 10yyyyyy | 10xxxxxx | 3 byte 0x00000800..0x0000FFFF
  3646. // | 11110www | 10wwzzzz | 10yyyyyy | 10xxxxxx | 4 byte 0x00010000..0x0010FFFF
  3647. // +----------+----------+----------+----------+
  3648. // | 00000000 | 00011111 | 11111111 | 11111111 | Theoretical upper limit of legal scalars: 2097151 (0x001FFFFF)
  3649. // | 00000000 | 00010000 | 11111111 | 11111111 | Defined upper limit of legal scalar codes
  3650. // +----------+----------+----------+----------+
  3651. public static function unichr($code)
  3652. {
  3653. if ($code > 1114111 or $code < 0 or
  3654. ($code >= 55296 and $code <= 57343) ) {
  3655. // bits are set outside the "valid" range as defined
  3656. // by UNICODE 4.1.0
  3657. return '';
  3658. }
  3659. $x = $y = $z = $w = 0;
  3660. if ($code < 128) {
  3661. // regular ASCII character
  3662. $x = $code;
  3663. } else {
  3664. // set up bits for UTF-8
  3665. $x = ($code & 63) | 128;
  3666. if ($code < 2048) {
  3667. $y = (($code & 2047) >> 6) | 192;
  3668. } else {
  3669. $y = (($code & 4032) >> 6) | 128;
  3670. if ($code < 65536) {
  3671. $z = (($code >> 12) & 15) | 224;
  3672. } else {
  3673. $z = (($code >> 12) & 63) | 128;
  3674. $w = (($code >> 18) & 7) | 240;
  3675. }
  3676. }
  3677. }
  3678. // set up the actual character
  3679. $ret = '';
  3680. if ($w) {
  3681. $ret .= chr($w);
  3682. }
  3683. if ($z) {
  3684. $ret .= chr($z);
  3685. }
  3686. if ($y) {
  3687. $ret .= chr($y);
  3688. }
  3689. $ret .= chr($x);
  3690. return $ret;
  3691. }
  3692. /**
  3693. * @return bool
  3694. */
  3695. public static function iconvAvailable()
  3696. {
  3697. static $iconv = null;
  3698. if ($iconv === null) {
  3699. $iconv = function_exists('iconv') && self::testIconvTruncateBug() != self::ICONV_UNUSABLE;
  3700. }
  3701. return $iconv;
  3702. }
  3703. /**
  3704. * Convert a string to UTF-8 based on configuration.
  3705. * @param string $str The string to convert
  3706. * @param HTMLPurifier_Config $config
  3707. * @param HTMLPurifier_Context $context
  3708. * @return string
  3709. */
  3710. public static function convertToUTF8($str, $config, $context)
  3711. {
  3712. $encoding = $config->get('Core.Encoding');
  3713. if ($encoding === 'utf-8') {
  3714. return $str;
  3715. }
  3716. static $iconv = null;
  3717. if ($iconv === null) {
  3718. $iconv = self::iconvAvailable();
  3719. }
  3720. if ($iconv && !$config->get('Test.ForceNoIconv')) {
  3721. // unaffected by bugs, since UTF-8 support all characters
  3722. $str = self::unsafeIconv($encoding, 'utf-8//IGNORE', $str);
  3723. if ($str === false) {
  3724. // $encoding is not a valid encoding
  3725. trigger_error('Invalid encoding ' . $encoding, E_USER_ERROR);
  3726. return '';
  3727. }
  3728. // If the string is bjorked by Shift_JIS or a similar encoding
  3729. // that doesn't support all of ASCII, convert the naughty
  3730. // characters to their true byte-wise ASCII/UTF-8 equivalents.
  3731. $str = strtr($str, self::testEncodingSupportsASCII($encoding));
  3732. return $str;
  3733. } elseif ($encoding === 'iso-8859-1') {
  3734. $str = utf8_encode($str);
  3735. return $str;
  3736. }
  3737. $bug = HTMLPurifier_Encoder::testIconvTruncateBug();
  3738. if ($bug == self::ICONV_OK) {
  3739. trigger_error('Encoding not supported, please install iconv', E_USER_ERROR);
  3740. } else {
  3741. trigger_error(
  3742. 'You have a buggy version of iconv, see https://bugs.php.net/bug.php?id=48147 ' .
  3743. 'and http://sourceware.org/bugzilla/show_bug.cgi?id=13541',
  3744. E_USER_ERROR
  3745. );
  3746. }
  3747. }
  3748. /**
  3749. * Converts a string from UTF-8 based on configuration.
  3750. * @param string $str The string to convert
  3751. * @param HTMLPurifier_Config $config
  3752. * @param HTMLPurifier_Context $context
  3753. * @return string
  3754. * @note Currently, this is a lossy conversion, with unexpressable
  3755. * characters being omitted.
  3756. */
  3757. public static function convertFromUTF8($str, $config, $context)
  3758. {
  3759. $encoding = $config->get('Core.Encoding');
  3760. if ($escape = $config->get('Core.EscapeNonASCIICharacters')) {
  3761. $str = self::convertToASCIIDumbLossless($str);
  3762. }
  3763. if ($encoding === 'utf-8') {
  3764. return $str;
  3765. }
  3766. static $iconv = null;
  3767. if ($iconv === null) {
  3768. $iconv = self::iconvAvailable();
  3769. }
  3770. if ($iconv && !$config->get('Test.ForceNoIconv')) {
  3771. // Undo our previous fix in convertToUTF8, otherwise iconv will barf
  3772. $ascii_fix = self::testEncodingSupportsASCII($encoding);
  3773. if (!$escape && !empty($ascii_fix)) {
  3774. $clear_fix = array();
  3775. foreach ($ascii_fix as $utf8 => $native) {
  3776. $clear_fix[$utf8] = '';
  3777. }
  3778. $str = strtr($str, $clear_fix);
  3779. }
  3780. $str = strtr($str, array_flip($ascii_fix));
  3781. // Normal stuff
  3782. $str = self::iconv('utf-8', $encoding . '//IGNORE', $str);
  3783. return $str;
  3784. } elseif ($encoding === 'iso-8859-1') {
  3785. $str = utf8_decode($str);
  3786. return $str;
  3787. }
  3788. trigger_error('Encoding not supported', E_USER_ERROR);
  3789. // You might be tempted to assume that the ASCII representation
  3790. // might be OK, however, this is *not* universally true over all
  3791. // encodings. So we take the conservative route here, rather
  3792. // than forcibly turn on %Core.EscapeNonASCIICharacters
  3793. }
  3794. /**
  3795. * Lossless (character-wise) conversion of HTML to ASCII
  3796. * @param string $str UTF-8 string to be converted to ASCII
  3797. * @return string ASCII encoded string with non-ASCII character entity-ized
  3798. * @warning Adapted from MediaWiki, claiming fair use: this is a common
  3799. * algorithm. If you disagree with this license fudgery,
  3800. * implement it yourself.
  3801. * @note Uses decimal numeric entities since they are best supported.
  3802. * @note This is a DUMB function: it has no concept of keeping
  3803. * character entities that the projected character encoding
  3804. * can allow. We could possibly implement a smart version
  3805. * but that would require it to also know which Unicode
  3806. * codepoints the charset supported (not an easy task).
  3807. * @note Sort of with cleanUTF8() but it assumes that $str is
  3808. * well-formed UTF-8
  3809. */
  3810. public static function convertToASCIIDumbLossless($str)
  3811. {
  3812. $bytesleft = 0;
  3813. $result = '';
  3814. $working = 0;
  3815. $len = strlen($str);
  3816. for ($i = 0; $i < $len; $i++) {
  3817. $bytevalue = ord($str[$i]);
  3818. if ($bytevalue <= 0x7F) { //0xxx xxxx
  3819. $result .= chr($bytevalue);
  3820. $bytesleft = 0;
  3821. } elseif ($bytevalue <= 0xBF) { //10xx xxxx
  3822. $working = $working << 6;
  3823. $working += ($bytevalue & 0x3F);
  3824. $bytesleft--;
  3825. if ($bytesleft <= 0) {
  3826. $result .= "&#" . $working . ";";
  3827. }
  3828. } elseif ($bytevalue <= 0xDF) { //110x xxxx
  3829. $working = $bytevalue & 0x1F;
  3830. $bytesleft = 1;
  3831. } elseif ($bytevalue <= 0xEF) { //1110 xxxx
  3832. $working = $bytevalue & 0x0F;
  3833. $bytesleft = 2;
  3834. } else { //1111 0xxx
  3835. $working = $bytevalue & 0x07;
  3836. $bytesleft = 3;
  3837. }
  3838. }
  3839. return $result;
  3840. }
  3841. /** No bugs detected in iconv. */
  3842. const ICONV_OK = 0;
  3843. /** Iconv truncates output if converting from UTF-8 to another
  3844. * character set with //IGNORE, and a non-encodable character is found */
  3845. const ICONV_TRUNCATES = 1;
  3846. /** Iconv does not support //IGNORE, making it unusable for
  3847. * transcoding purposes */
  3848. const ICONV_UNUSABLE = 2;
  3849. /**
  3850. * glibc iconv has a known bug where it doesn't handle the magic
  3851. * //IGNORE stanza correctly. In particular, rather than ignore
  3852. * characters, it will return an EILSEQ after consuming some number
  3853. * of characters, and expect you to restart iconv as if it were
  3854. * an E2BIG. Old versions of PHP did not respect the errno, and
  3855. * returned the fragment, so as a result you would see iconv
  3856. * mysteriously truncating output. We can work around this by
  3857. * manually chopping our input into segments of about 8000
  3858. * characters, as long as PHP ignores the error code. If PHP starts
  3859. * paying attention to the error code, iconv becomes unusable.
  3860. *
  3861. * @return int Error code indicating severity of bug.
  3862. */
  3863. public static function testIconvTruncateBug()
  3864. {
  3865. static $code = null;
  3866. if ($code === null) {
  3867. // better not use iconv, otherwise infinite loop!
  3868. $r = self::unsafeIconv('utf-8', 'ascii//IGNORE', "\xCE\xB1" . str_repeat('a', 9000));
  3869. if ($r === false) {
  3870. $code = self::ICONV_UNUSABLE;
  3871. } elseif (($c = strlen($r)) < 9000) {
  3872. $code = self::ICONV_TRUNCATES;
  3873. } elseif ($c > 9000) {
  3874. trigger_error(
  3875. 'Your copy of iconv is extremely buggy. Please notify HTML Purifier maintainers: ' .
  3876. 'include your iconv version as per phpversion()',
  3877. E_USER_ERROR
  3878. );
  3879. } else {
  3880. $code = self::ICONV_OK;
  3881. }
  3882. }
  3883. return $code;
  3884. }
  3885. /**
  3886. * This expensive function tests whether or not a given character
  3887. * encoding supports ASCII. 7/8-bit encodings like Shift_JIS will
  3888. * fail this test, and require special processing. Variable width
  3889. * encodings shouldn't ever fail.
  3890. *
  3891. * @param string $encoding Encoding name to test, as per iconv format
  3892. * @param bool $bypass Whether or not to bypass the precompiled arrays.
  3893. * @return Array of UTF-8 characters to their corresponding ASCII,
  3894. * which can be used to "undo" any overzealous iconv action.
  3895. */
  3896. public static function testEncodingSupportsASCII($encoding, $bypass = false)
  3897. {
  3898. // All calls to iconv here are unsafe, proof by case analysis:
  3899. // If ICONV_OK, no difference.
  3900. // If ICONV_TRUNCATE, all calls involve one character inputs,
  3901. // so bug is not triggered.
  3902. // If ICONV_UNUSABLE, this call is irrelevant
  3903. static $encodings = array();
  3904. if (!$bypass) {
  3905. if (isset($encodings[$encoding])) {
  3906. return $encodings[$encoding];
  3907. }
  3908. $lenc = strtolower($encoding);
  3909. switch ($lenc) {
  3910. case 'shift_jis':
  3911. return array("\xC2\xA5" => '\\', "\xE2\x80\xBE" => '~');
  3912. case 'johab':
  3913. return array("\xE2\x82\xA9" => '\\');
  3914. }
  3915. if (strpos($lenc, 'iso-8859-') === 0) {
  3916. return array();
  3917. }
  3918. }
  3919. $ret = array();
  3920. if (self::unsafeIconv('UTF-8', $encoding, 'a') === false) {
  3921. return false;
  3922. }
  3923. for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars
  3924. $c = chr($i); // UTF-8 char
  3925. $r = self::unsafeIconv('UTF-8', "$encoding//IGNORE", $c); // initial conversion
  3926. if ($r === '' ||
  3927. // This line is needed for iconv implementations that do not
  3928. // omit characters that do not exist in the target character set
  3929. ($r === $c && self::unsafeIconv($encoding, 'UTF-8//IGNORE', $r) !== $c)
  3930. ) {
  3931. // Reverse engineer: what's the UTF-8 equiv of this byte
  3932. // sequence? This assumes that there's no variable width
  3933. // encoding that doesn't support ASCII.
  3934. $ret[self::unsafeIconv($encoding, 'UTF-8//IGNORE', $c)] = $c;
  3935. }
  3936. }
  3937. $encodings[$encoding] = $ret;
  3938. return $ret;
  3939. }
  3940. }
  3941. /**
  3942. * Object that provides entity lookup table from entity name to character
  3943. */
  3944. class HTMLPurifier_EntityLookup
  3945. {
  3946. /**
  3947. * Assoc array of entity name to character represented.
  3948. * @type array
  3949. */
  3950. public $table;
  3951. /**
  3952. * Sets up the entity lookup table from the serialized file contents.
  3953. * @param bool $file
  3954. * @note The serialized contents are versioned, but were generated
  3955. * using the maintenance script generate_entity_file.php
  3956. * @warning This is not in constructor to help enforce the Singleton
  3957. */
  3958. public function setup($file = false)
  3959. {
  3960. if (!$file) {
  3961. $file = HTMLPURIFIER_PREFIX . '/HTMLPurifier/EntityLookup/entities.ser';
  3962. }
  3963. $this->table = unserialize(file_get_contents($file));
  3964. }
  3965. /**
  3966. * Retrieves sole instance of the object.
  3967. * @param bool|HTMLPurifier_EntityLookup $prototype Optional prototype of custom lookup table to overload with.
  3968. * @return HTMLPurifier_EntityLookup
  3969. */
  3970. public static function instance($prototype = false)
  3971. {
  3972. // no references, since PHP doesn't copy unless modified
  3973. static $instance = null;
  3974. if ($prototype) {
  3975. $instance = $prototype;
  3976. } elseif (!$instance) {
  3977. $instance = new HTMLPurifier_EntityLookup();
  3978. $instance->setup();
  3979. }
  3980. return $instance;
  3981. }
  3982. }
  3983. // if want to implement error collecting here, we'll need to use some sort
  3984. // of global data (probably trigger_error) because it's impossible to pass
  3985. // $config or $context to the callback functions.
  3986. /**
  3987. * Handles referencing and derefencing character entities
  3988. */
  3989. class HTMLPurifier_EntityParser
  3990. {
  3991. /**
  3992. * Reference to entity lookup table.
  3993. * @type HTMLPurifier_EntityLookup
  3994. */
  3995. protected $_entity_lookup;
  3996. /**
  3997. * Callback regex string for parsing entities.
  3998. * @type string
  3999. */
  4000. protected $_substituteEntitiesRegex =
  4001. '/&(?:[#]x([a-fA-F0-9]+)|[#]0*(\d+)|([A-Za-z_:][A-Za-z0-9.\-_:]*));?/';
  4002. // 1. hex 2. dec 3. string (XML style)
  4003. /**
  4004. * Decimal to parsed string conversion table for special entities.
  4005. * @type array
  4006. */
  4007. protected $_special_dec2str =
  4008. array(
  4009. 34 => '"',
  4010. 38 => '&',
  4011. 39 => "'",
  4012. 60 => '<',
  4013. 62 => '>'
  4014. );
  4015. /**
  4016. * Stripped entity names to decimal conversion table for special entities.
  4017. * @type array
  4018. */
  4019. protected $_special_ent2dec =
  4020. array(
  4021. 'quot' => 34,
  4022. 'amp' => 38,
  4023. 'lt' => 60,
  4024. 'gt' => 62
  4025. );
  4026. /**
  4027. * Substitutes non-special entities with their parsed equivalents. Since
  4028. * running this whenever you have parsed character is t3h 5uck, we run
  4029. * it before everything else.
  4030. *
  4031. * @param string $string String to have non-special entities parsed.
  4032. * @return string Parsed string.
  4033. */
  4034. public function substituteNonSpecialEntities($string)
  4035. {
  4036. // it will try to detect missing semicolons, but don't rely on it
  4037. return preg_replace_callback(
  4038. $this->_substituteEntitiesRegex,
  4039. array($this, 'nonSpecialEntityCallback'),
  4040. $string
  4041. );
  4042. }
  4043. /**
  4044. * Callback function for substituteNonSpecialEntities() that does the work.
  4045. *
  4046. * @param array $matches PCRE matches array, with 0 the entire match, and
  4047. * either index 1, 2 or 3 set with a hex value, dec value,
  4048. * or string (respectively).
  4049. * @return string Replacement string.
  4050. */
  4051. protected function nonSpecialEntityCallback($matches)
  4052. {
  4053. // replaces all but big five
  4054. $entity = $matches[0];
  4055. $is_num = (@$matches[0][1] === '#');
  4056. if ($is_num) {
  4057. $is_hex = (@$entity[2] === 'x');
  4058. $code = $is_hex ? hexdec($matches[1]) : (int) $matches[2];
  4059. // abort for special characters
  4060. if (isset($this->_special_dec2str[$code])) {
  4061. return $entity;
  4062. }
  4063. return HTMLPurifier_Encoder::unichr($code);
  4064. } else {
  4065. if (isset($this->_special_ent2dec[$matches[3]])) {
  4066. return $entity;
  4067. }
  4068. if (!$this->_entity_lookup) {
  4069. $this->_entity_lookup = HTMLPurifier_EntityLookup::instance();
  4070. }
  4071. if (isset($this->_entity_lookup->table[$matches[3]])) {
  4072. return $this->_entity_lookup->table[$matches[3]];
  4073. } else {
  4074. return $entity;
  4075. }
  4076. }
  4077. }
  4078. /**
  4079. * Substitutes only special entities with their parsed equivalents.
  4080. *
  4081. * @notice We try to avoid calling this function because otherwise, it
  4082. * would have to be called a lot (for every parsed section).
  4083. *
  4084. * @param string $string String to have non-special entities parsed.
  4085. * @return string Parsed string.
  4086. */
  4087. public function substituteSpecialEntities($string)
  4088. {
  4089. return preg_replace_callback(
  4090. $this->_substituteEntitiesRegex,
  4091. array($this, 'specialEntityCallback'),
  4092. $string
  4093. );
  4094. }
  4095. /**
  4096. * Callback function for substituteSpecialEntities() that does the work.
  4097. *
  4098. * This callback has same syntax as nonSpecialEntityCallback().
  4099. *
  4100. * @param array $matches PCRE-style matches array, with 0 the entire match, and
  4101. * either index 1, 2 or 3 set with a hex value, dec value,
  4102. * or string (respectively).
  4103. * @return string Replacement string.
  4104. */
  4105. protected function specialEntityCallback($matches)
  4106. {
  4107. $entity = $matches[0];
  4108. $is_num = (@$matches[0][1] === '#');
  4109. if ($is_num) {
  4110. $is_hex = (@$entity[2] === 'x');
  4111. $int = $is_hex ? hexdec($matches[1]) : (int) $matches[2];
  4112. return isset($this->_special_dec2str[$int]) ?
  4113. $this->_special_dec2str[$int] :
  4114. $entity;
  4115. } else {
  4116. return isset($this->_special_ent2dec[$matches[3]]) ?
  4117. $this->_special_ent2dec[$matches[3]] :
  4118. $entity;
  4119. }
  4120. }
  4121. }
  4122. /**
  4123. * Error collection class that enables HTML Purifier to report HTML
  4124. * problems back to the user
  4125. */
  4126. class HTMLPurifier_ErrorCollector
  4127. {
  4128. /**
  4129. * Identifiers for the returned error array. These are purposely numeric
  4130. * so list() can be used.
  4131. */
  4132. const LINENO = 0;
  4133. const SEVERITY = 1;
  4134. const MESSAGE = 2;
  4135. const CHILDREN = 3;
  4136. /**
  4137. * @type array
  4138. */
  4139. protected $errors;
  4140. /**
  4141. * @type array
  4142. */
  4143. protected $_current;
  4144. /**
  4145. * @type array
  4146. */
  4147. protected $_stacks = array(array());
  4148. /**
  4149. * @type HTMLPurifier_Language
  4150. */
  4151. protected $locale;
  4152. /**
  4153. * @type HTMLPurifier_Generator
  4154. */
  4155. protected $generator;
  4156. /**
  4157. * @type HTMLPurifier_Context
  4158. */
  4159. protected $context;
  4160. /**
  4161. * @type array
  4162. */
  4163. protected $lines = array();
  4164. /**
  4165. * @param HTMLPurifier_Context $context
  4166. */
  4167. public function __construct($context)
  4168. {
  4169. $this->locale =& $context->get('Locale');
  4170. $this->context = $context;
  4171. $this->_current =& $this->_stacks[0];
  4172. $this->errors =& $this->_stacks[0];
  4173. }
  4174. /**
  4175. * Sends an error message to the collector for later use
  4176. * @param int $severity Error severity, PHP error style (don't use E_USER_)
  4177. * @param string $msg Error message text
  4178. */
  4179. public function send($severity, $msg)
  4180. {
  4181. $args = array();
  4182. if (func_num_args() > 2) {
  4183. $args = func_get_args();
  4184. array_shift($args);
  4185. unset($args[0]);
  4186. }
  4187. $token = $this->context->get('CurrentToken', true);
  4188. $line = $token ? $token->line : $this->context->get('CurrentLine', true);
  4189. $col = $token ? $token->col : $this->context->get('CurrentCol', true);
  4190. $attr = $this->context->get('CurrentAttr', true);
  4191. // perform special substitutions, also add custom parameters
  4192. $subst = array();
  4193. if (!is_null($token)) {
  4194. $args['CurrentToken'] = $token;
  4195. }
  4196. if (!is_null($attr)) {
  4197. $subst['$CurrentAttr.Name'] = $attr;
  4198. if (isset($token->attr[$attr])) {
  4199. $subst['$CurrentAttr.Value'] = $token->attr[$attr];
  4200. }
  4201. }
  4202. if (empty($args)) {
  4203. $msg = $this->locale->getMessage($msg);
  4204. } else {
  4205. $msg = $this->locale->formatMessage($msg, $args);
  4206. }
  4207. if (!empty($subst)) {
  4208. $msg = strtr($msg, $subst);
  4209. }
  4210. // (numerically indexed)
  4211. $error = array(
  4212. self::LINENO => $line,
  4213. self::SEVERITY => $severity,
  4214. self::MESSAGE => $msg,
  4215. self::CHILDREN => array()
  4216. );
  4217. $this->_current[] = $error;
  4218. // NEW CODE BELOW ...
  4219. // Top-level errors are either:
  4220. // TOKEN type, if $value is set appropriately, or
  4221. // "syntax" type, if $value is null
  4222. $new_struct = new HTMLPurifier_ErrorStruct();
  4223. $new_struct->type = HTMLPurifier_ErrorStruct::TOKEN;
  4224. if ($token) {
  4225. $new_struct->value = clone $token;
  4226. }
  4227. if (is_int($line) && is_int($col)) {
  4228. if (isset($this->lines[$line][$col])) {
  4229. $struct = $this->lines[$line][$col];
  4230. } else {
  4231. $struct = $this->lines[$line][$col] = $new_struct;
  4232. }
  4233. // These ksorts may present a performance problem
  4234. ksort($this->lines[$line], SORT_NUMERIC);
  4235. } else {
  4236. if (isset($this->lines[-1])) {
  4237. $struct = $this->lines[-1];
  4238. } else {
  4239. $struct = $this->lines[-1] = $new_struct;
  4240. }
  4241. }
  4242. ksort($this->lines, SORT_NUMERIC);
  4243. // Now, check if we need to operate on a lower structure
  4244. if (!empty($attr)) {
  4245. $struct = $struct->getChild(HTMLPurifier_ErrorStruct::ATTR, $attr);
  4246. if (!$struct->value) {
  4247. $struct->value = array($attr, 'PUT VALUE HERE');
  4248. }
  4249. }
  4250. if (!empty($cssprop)) {
  4251. $struct = $struct->getChild(HTMLPurifier_ErrorStruct::CSSPROP, $cssprop);
  4252. if (!$struct->value) {
  4253. // if we tokenize CSS this might be a little more difficult to do
  4254. $struct->value = array($cssprop, 'PUT VALUE HERE');
  4255. }
  4256. }
  4257. // Ok, structs are all setup, now time to register the error
  4258. $struct->addError($severity, $msg);
  4259. }
  4260. /**
  4261. * Retrieves raw error data for custom formatter to use
  4262. */
  4263. public function getRaw()
  4264. {
  4265. return $this->errors;
  4266. }
  4267. /**
  4268. * Default HTML formatting implementation for error messages
  4269. * @param HTMLPurifier_Config $config Configuration, vital for HTML output nature
  4270. * @param array $errors Errors array to display; used for recursion.
  4271. * @return string
  4272. */
  4273. public function getHTMLFormatted($config, $errors = null)
  4274. {
  4275. $ret = array();
  4276. $this->generator = new HTMLPurifier_Generator($config, $this->context);
  4277. if ($errors === null) {
  4278. $errors = $this->errors;
  4279. }
  4280. // 'At line' message needs to be removed
  4281. // generation code for new structure goes here. It needs to be recursive.
  4282. foreach ($this->lines as $line => $col_array) {
  4283. if ($line == -1) {
  4284. continue;
  4285. }
  4286. foreach ($col_array as $col => $struct) {
  4287. $this->_renderStruct($ret, $struct, $line, $col);
  4288. }
  4289. }
  4290. if (isset($this->lines[-1])) {
  4291. $this->_renderStruct($ret, $this->lines[-1]);
  4292. }
  4293. if (empty($errors)) {
  4294. return '<p>' . $this->locale->getMessage('ErrorCollector: No errors') . '</p>';
  4295. } else {
  4296. return '<ul><li>' . implode('</li><li>', $ret) . '</li></ul>';
  4297. }
  4298. }
  4299. private function _renderStruct(&$ret, $struct, $line = null, $col = null)
  4300. {
  4301. $stack = array($struct);
  4302. $context_stack = array(array());
  4303. while ($current = array_pop($stack)) {
  4304. $context = array_pop($context_stack);
  4305. foreach ($current->errors as $error) {
  4306. list($severity, $msg) = $error;
  4307. $string = '';
  4308. $string .= '<div>';
  4309. // W3C uses an icon to indicate the severity of the error.
  4310. $error = $this->locale->getErrorName($severity);
  4311. $string .= "<span class=\"error e$severity\"><strong>$error</strong></span> ";
  4312. if (!is_null($line) && !is_null($col)) {
  4313. $string .= "<em class=\"location\">Line $line, Column $col: </em> ";
  4314. } else {
  4315. $string .= '<em class="location">End of Document: </em> ';
  4316. }
  4317. $string .= '<strong class="description">' . $this->generator->escape($msg) . '</strong> ';
  4318. $string .= '</div>';
  4319. // Here, have a marker for the character on the column appropriate.
  4320. // Be sure to clip extremely long lines.
  4321. //$string .= '<pre>';
  4322. //$string .= '';
  4323. //$string .= '</pre>';
  4324. $ret[] = $string;
  4325. }
  4326. foreach ($current->children as $array) {
  4327. $context[] = $current;
  4328. $stack = array_merge($stack, array_reverse($array, true));
  4329. for ($i = count($array); $i > 0; $i--) {
  4330. $context_stack[] = $context;
  4331. }
  4332. }
  4333. }
  4334. }
  4335. }
  4336. /**
  4337. * Records errors for particular segments of an HTML document such as tokens,
  4338. * attributes or CSS properties. They can contain error structs (which apply
  4339. * to components of what they represent), but their main purpose is to hold
  4340. * errors applying to whatever struct is being used.
  4341. */
  4342. class HTMLPurifier_ErrorStruct
  4343. {
  4344. /**
  4345. * Possible values for $children first-key. Note that top-level structures
  4346. * are automatically token-level.
  4347. */
  4348. const TOKEN = 0;
  4349. const ATTR = 1;
  4350. const CSSPROP = 2;
  4351. /**
  4352. * Type of this struct.
  4353. * @type string
  4354. */
  4355. public $type;
  4356. /**
  4357. * Value of the struct we are recording errors for. There are various
  4358. * values for this:
  4359. * - TOKEN: Instance of HTMLPurifier_Token
  4360. * - ATTR: array('attr-name', 'value')
  4361. * - CSSPROP: array('prop-name', 'value')
  4362. * @type mixed
  4363. */
  4364. public $value;
  4365. /**
  4366. * Errors registered for this structure.
  4367. * @type array
  4368. */
  4369. public $errors = array();
  4370. /**
  4371. * Child ErrorStructs that are from this structure. For example, a TOKEN
  4372. * ErrorStruct would contain ATTR ErrorStructs. This is a multi-dimensional
  4373. * array in structure: [TYPE]['identifier']
  4374. * @type array
  4375. */
  4376. public $children = array();
  4377. /**
  4378. * @param string $type
  4379. * @param string $id
  4380. * @return mixed
  4381. */
  4382. public function getChild($type, $id)
  4383. {
  4384. if (!isset($this->children[$type][$id])) {
  4385. $this->children[$type][$id] = new HTMLPurifier_ErrorStruct();
  4386. $this->children[$type][$id]->type = $type;
  4387. }
  4388. return $this->children[$type][$id];
  4389. }
  4390. /**
  4391. * @param int $severity
  4392. * @param string $message
  4393. */
  4394. public function addError($severity, $message)
  4395. {
  4396. $this->errors[] = array($severity, $message);
  4397. }
  4398. }
  4399. /**
  4400. * Global exception class for HTML Purifier; any exceptions we throw
  4401. * are from here.
  4402. */
  4403. class HTMLPurifier_Exception extends Exception
  4404. {
  4405. }
  4406. /**
  4407. * Represents a pre or post processing filter on HTML Purifier's output
  4408. *
  4409. * Sometimes, a little ad-hoc fixing of HTML has to be done before
  4410. * it gets sent through HTML Purifier: you can use filters to acheive
  4411. * this effect. For instance, YouTube videos can be preserved using
  4412. * this manner. You could have used a decorator for this task, but
  4413. * PHP's support for them is not terribly robust, so we're going
  4414. * to just loop through the filters.
  4415. *
  4416. * Filters should be exited first in, last out. If there are three filters,
  4417. * named 1, 2 and 3, the order of execution should go 1->preFilter,
  4418. * 2->preFilter, 3->preFilter, purify, 3->postFilter, 2->postFilter,
  4419. * 1->postFilter.
  4420. *
  4421. * @note Methods are not declared abstract as it is perfectly legitimate
  4422. * for an implementation not to want anything to happen on a step
  4423. */
  4424. class HTMLPurifier_Filter
  4425. {
  4426. /**
  4427. * Name of the filter for identification purposes.
  4428. * @type string
  4429. */
  4430. public $name;
  4431. /**
  4432. * Pre-processor function, handles HTML before HTML Purifier
  4433. * @param string $html
  4434. * @param HTMLPurifier_Config $config
  4435. * @param HTMLPurifier_Context $context
  4436. * @return string
  4437. */
  4438. public function preFilter($html, $config, $context)
  4439. {
  4440. return $html;
  4441. }
  4442. /**
  4443. * Post-processor function, handles HTML after HTML Purifier
  4444. * @param string $html
  4445. * @param HTMLPurifier_Config $config
  4446. * @param HTMLPurifier_Context $context
  4447. * @return string
  4448. */
  4449. public function postFilter($html, $config, $context)
  4450. {
  4451. return $html;
  4452. }
  4453. }
  4454. /**
  4455. * Generates HTML from tokens.
  4456. * @todo Refactor interface so that configuration/context is determined
  4457. * upon instantiation, no need for messy generateFromTokens() calls
  4458. * @todo Make some of the more internal functions protected, and have
  4459. * unit tests work around that
  4460. */
  4461. class HTMLPurifier_Generator
  4462. {
  4463. /**
  4464. * Whether or not generator should produce XML output.
  4465. * @type bool
  4466. */
  4467. private $_xhtml = true;
  4468. /**
  4469. * :HACK: Whether or not generator should comment the insides of <script> tags.
  4470. * @type bool
  4471. */
  4472. private $_scriptFix = false;
  4473. /**
  4474. * Cache of HTMLDefinition during HTML output to determine whether or
  4475. * not attributes should be minimized.
  4476. * @type HTMLPurifier_HTMLDefinition
  4477. */
  4478. private $_def;
  4479. /**
  4480. * Cache of %Output.SortAttr.
  4481. * @type bool
  4482. */
  4483. private $_sortAttr;
  4484. /**
  4485. * Cache of %Output.FlashCompat.
  4486. * @type bool
  4487. */
  4488. private $_flashCompat;
  4489. /**
  4490. * Cache of %Output.FixInnerHTML.
  4491. * @type bool
  4492. */
  4493. private $_innerHTMLFix;
  4494. /**
  4495. * Stack for keeping track of object information when outputting IE
  4496. * compatibility code.
  4497. * @type array
  4498. */
  4499. private $_flashStack = array();
  4500. /**
  4501. * Configuration for the generator
  4502. * @type HTMLPurifier_Config
  4503. */
  4504. protected $config;
  4505. /**
  4506. * @param HTMLPurifier_Config $config
  4507. * @param HTMLPurifier_Context $context
  4508. */
  4509. public function __construct($config, $context)
  4510. {
  4511. $this->config = $config;
  4512. $this->_scriptFix = $config->get('Output.CommentScriptContents');
  4513. $this->_innerHTMLFix = $config->get('Output.FixInnerHTML');
  4514. $this->_sortAttr = $config->get('Output.SortAttr');
  4515. $this->_flashCompat = $config->get('Output.FlashCompat');
  4516. $this->_def = $config->getHTMLDefinition();
  4517. $this->_xhtml = $this->_def->doctype->xml;
  4518. }
  4519. /**
  4520. * Generates HTML from an array of tokens.
  4521. * @param HTMLPurifier_Token[] $tokens Array of HTMLPurifier_Token
  4522. * @return string Generated HTML
  4523. */
  4524. public function generateFromTokens($tokens)
  4525. {
  4526. if (!$tokens) {
  4527. return '';
  4528. }
  4529. // Basic algorithm
  4530. $html = '';
  4531. for ($i = 0, $size = count($tokens); $i < $size; $i++) {
  4532. if ($this->_scriptFix && $tokens[$i]->name === 'script'
  4533. && $i + 2 < $size && $tokens[$i+2] instanceof HTMLPurifier_Token_End) {
  4534. // script special case
  4535. // the contents of the script block must be ONE token
  4536. // for this to work.
  4537. $html .= $this->generateFromToken($tokens[$i++]);
  4538. $html .= $this->generateScriptFromToken($tokens[$i++]);
  4539. }
  4540. $html .= $this->generateFromToken($tokens[$i]);
  4541. }
  4542. // Tidy cleanup
  4543. if (extension_loaded('tidy') && $this->config->get('Output.TidyFormat')) {
  4544. $tidy = new Tidy;
  4545. $tidy->parseString(
  4546. $html,
  4547. array(
  4548. 'indent'=> true,
  4549. 'output-xhtml' => $this->_xhtml,
  4550. 'show-body-only' => true,
  4551. 'indent-spaces' => 2,
  4552. 'wrap' => 68,
  4553. ),
  4554. 'utf8'
  4555. );
  4556. $tidy->cleanRepair();
  4557. $html = (string) $tidy; // explicit cast necessary
  4558. }
  4559. // Normalize newlines to system defined value
  4560. if ($this->config->get('Core.NormalizeNewlines')) {
  4561. $nl = $this->config->get('Output.Newline');
  4562. if ($nl === null) {
  4563. $nl = PHP_EOL;
  4564. }
  4565. if ($nl !== "\n") {
  4566. $html = str_replace("\n", $nl, $html);
  4567. }
  4568. }
  4569. return $html;
  4570. }
  4571. /**
  4572. * Generates HTML from a single token.
  4573. * @param HTMLPurifier_Token $token HTMLPurifier_Token object.
  4574. * @return string Generated HTML
  4575. */
  4576. public function generateFromToken($token)
  4577. {
  4578. if (!$token instanceof HTMLPurifier_Token) {
  4579. trigger_error('Cannot generate HTML from non-HTMLPurifier_Token object', E_USER_WARNING);
  4580. return '';
  4581. } elseif ($token instanceof HTMLPurifier_Token_Start) {
  4582. $attr = $this->generateAttributes($token->attr, $token->name);
  4583. if ($this->_flashCompat) {
  4584. if ($token->name == "object") {
  4585. $flash = new stdclass();
  4586. $flash->attr = $token->attr;
  4587. $flash->param = array();
  4588. $this->_flashStack[] = $flash;
  4589. }
  4590. }
  4591. return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
  4592. } elseif ($token instanceof HTMLPurifier_Token_End) {
  4593. $_extra = '';
  4594. if ($this->_flashCompat) {
  4595. if ($token->name == "object" && !empty($this->_flashStack)) {
  4596. // doesn't do anything for now
  4597. }
  4598. }
  4599. return $_extra . '</' . $token->name . '>';
  4600. } elseif ($token instanceof HTMLPurifier_Token_Empty) {
  4601. if ($this->_flashCompat && $token->name == "param" && !empty($this->_flashStack)) {
  4602. $this->_flashStack[count($this->_flashStack)-1]->param[$token->attr['name']] = $token->attr['value'];
  4603. }
  4604. $attr = $this->generateAttributes($token->attr, $token->name);
  4605. return '<' . $token->name . ($attr ? ' ' : '') . $attr .
  4606. ( $this->_xhtml ? ' /': '' ) // <br /> v. <br>
  4607. . '>';
  4608. } elseif ($token instanceof HTMLPurifier_Token_Text) {
  4609. return $this->escape($token->data, ENT_NOQUOTES);
  4610. } elseif ($token instanceof HTMLPurifier_Token_Comment) {
  4611. return '<!--' . $token->data . '-->';
  4612. } else {
  4613. return '';
  4614. }
  4615. }
  4616. /**
  4617. * Special case processor for the contents of script tags
  4618. * @param HTMLPurifier_Token $token HTMLPurifier_Token object.
  4619. * @return string
  4620. * @warning This runs into problems if there's already a literal
  4621. * --> somewhere inside the script contents.
  4622. */
  4623. public function generateScriptFromToken($token)
  4624. {
  4625. if (!$token instanceof HTMLPurifier_Token_Text) {
  4626. return $this->generateFromToken($token);
  4627. }
  4628. // Thanks <http://lachy.id.au/log/2005/05/script-comments>
  4629. $data = preg_replace('#//\s*$#', '', $token->data);
  4630. return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>';
  4631. }
  4632. /**
  4633. * Generates attribute declarations from attribute array.
  4634. * @note This does not include the leading or trailing space.
  4635. * @param array $assoc_array_of_attributes Attribute array
  4636. * @param string $element Name of element attributes are for, used to check
  4637. * attribute minimization.
  4638. * @return string Generated HTML fragment for insertion.
  4639. */
  4640. public function generateAttributes($assoc_array_of_attributes, $element = '')
  4641. {
  4642. $html = '';
  4643. if ($this->_sortAttr) {
  4644. ksort($assoc_array_of_attributes);
  4645. }
  4646. foreach ($assoc_array_of_attributes as $key => $value) {
  4647. if (!$this->_xhtml) {
  4648. // Remove namespaced attributes
  4649. if (strpos($key, ':') !== false) {
  4650. continue;
  4651. }
  4652. // Check if we should minimize the attribute: val="val" -> val
  4653. if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) {
  4654. $html .= $key . ' ';
  4655. continue;
  4656. }
  4657. }
  4658. // Workaround for Internet Explorer innerHTML bug.
  4659. // Essentially, Internet Explorer, when calculating
  4660. // innerHTML, omits quotes if there are no instances of
  4661. // angled brackets, quotes or spaces. However, when parsing
  4662. // HTML (for example, when you assign to innerHTML), it
  4663. // treats backticks as quotes. Thus,
  4664. // <img alt="``" />
  4665. // becomes
  4666. // <img alt=`` />
  4667. // becomes
  4668. // <img alt='' />
  4669. // Fortunately, all we need to do is trigger an appropriate
  4670. // quoting style, which we do by adding an extra space.
  4671. // This also is consistent with the W3C spec, which states
  4672. // that user agents may ignore leading or trailing
  4673. // whitespace (in fact, most don't, at least for attributes
  4674. // like alt, but an extra space at the end is barely
  4675. // noticeable). Still, we have a configuration knob for
  4676. // this, since this transformation is not necesary if you
  4677. // don't process user input with innerHTML or you don't plan
  4678. // on supporting Internet Explorer.
  4679. if ($this->_innerHTMLFix) {
  4680. if (strpos($value, '`') !== false) {
  4681. // check if correct quoting style would not already be
  4682. // triggered
  4683. if (strcspn($value, '"\' <>') === strlen($value)) {
  4684. // protect!
  4685. $value .= ' ';
  4686. }
  4687. }
  4688. }
  4689. $html .= $key.'="'.$this->escape($value).'" ';
  4690. }
  4691. return rtrim($html);
  4692. }
  4693. /**
  4694. * Escapes raw text data.
  4695. * @todo This really ought to be protected, but until we have a facility
  4696. * for properly generating HTML here w/o using tokens, it stays
  4697. * public.
  4698. * @param string $string String data to escape for HTML.
  4699. * @param int $quote Quoting style, like htmlspecialchars. ENT_NOQUOTES is
  4700. * permissible for non-attribute output.
  4701. * @return string escaped data.
  4702. */
  4703. public function escape($string, $quote = null)
  4704. {
  4705. // Workaround for APC bug on Mac Leopard reported by sidepodcast
  4706. // http://htmlpurifier.org/phorum/read.php?3,4823,4846
  4707. if ($quote === null) {
  4708. $quote = ENT_COMPAT;
  4709. }
  4710. return htmlspecialchars($string, $quote, 'UTF-8');
  4711. }
  4712. }
  4713. /**
  4714. * Definition of the purified HTML that describes allowed children,
  4715. * attributes, and many other things.
  4716. *
  4717. * Conventions:
  4718. *
  4719. * All member variables that are prefixed with info
  4720. * (including the main $info array) are used by HTML Purifier internals
  4721. * and should not be directly edited when customizing the HTMLDefinition.
  4722. * They can usually be set via configuration directives or custom
  4723. * modules.
  4724. *
  4725. * On the other hand, member variables without the info prefix are used
  4726. * internally by the HTMLDefinition and MUST NOT be used by other HTML
  4727. * Purifier internals. Many of them, however, are public, and may be
  4728. * edited by userspace code to tweak the behavior of HTMLDefinition.
  4729. *
  4730. * @note This class is inspected by Printer_HTMLDefinition; please
  4731. * update that class if things here change.
  4732. *
  4733. * @warning Directives that change this object's structure must be in
  4734. * the HTML or Attr namespace!
  4735. */
  4736. class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
  4737. {
  4738. // FULLY-PUBLIC VARIABLES ---------------------------------------------
  4739. /**
  4740. * Associative array of element names to HTMLPurifier_ElementDef.
  4741. * @type HTMLPurifier_ElementDef[]
  4742. */
  4743. public $info = array();
  4744. /**
  4745. * Associative array of global attribute name to attribute definition.
  4746. * @type array
  4747. */
  4748. public $info_global_attr = array();
  4749. /**
  4750. * String name of parent element HTML will be going into.
  4751. * @type string
  4752. */
  4753. public $info_parent = 'div';
  4754. /**
  4755. * Definition for parent element, allows parent element to be a
  4756. * tag that's not allowed inside the HTML fragment.
  4757. * @type HTMLPurifier_ElementDef
  4758. */
  4759. public $info_parent_def;
  4760. /**
  4761. * String name of element used to wrap inline elements in block context.
  4762. * @type string
  4763. * @note This is rarely used except for BLOCKQUOTEs in strict mode
  4764. */
  4765. public $info_block_wrapper = 'p';
  4766. /**
  4767. * Associative array of deprecated tag name to HTMLPurifier_TagTransform.
  4768. * @type array
  4769. */
  4770. public $info_tag_transform = array();
  4771. /**
  4772. * Indexed list of HTMLPurifier_AttrTransform to be performed before validation.
  4773. * @type HTMLPurifier_AttrTransform[]
  4774. */
  4775. public $info_attr_transform_pre = array();
  4776. /**
  4777. * Indexed list of HTMLPurifier_AttrTransform to be performed after validation.
  4778. * @type HTMLPurifier_AttrTransform[]
  4779. */
  4780. public $info_attr_transform_post = array();
  4781. /**
  4782. * Nested lookup array of content set name (Block, Inline) to
  4783. * element name to whether or not it belongs in that content set.
  4784. * @type array
  4785. */
  4786. public $info_content_sets = array();
  4787. /**
  4788. * Indexed list of HTMLPurifier_Injector to be used.
  4789. * @type HTMLPurifier_Injector[]
  4790. */
  4791. public $info_injector = array();
  4792. /**
  4793. * Doctype object
  4794. * @type HTMLPurifier_Doctype
  4795. */
  4796. public $doctype;
  4797. // RAW CUSTOMIZATION STUFF --------------------------------------------
  4798. /**
  4799. * Adds a custom attribute to a pre-existing element
  4800. * @note This is strictly convenience, and does not have a corresponding
  4801. * method in HTMLPurifier_HTMLModule
  4802. * @param string $element_name Element name to add attribute to
  4803. * @param string $attr_name Name of attribute
  4804. * @param mixed $def Attribute definition, can be string or object, see
  4805. * HTMLPurifier_AttrTypes for details
  4806. */
  4807. public function addAttribute($element_name, $attr_name, $def)
  4808. {
  4809. $module = $this->getAnonymousModule();
  4810. if (!isset($module->info[$element_name])) {
  4811. $element = $module->addBlankElement($element_name);
  4812. } else {
  4813. $element = $module->info[$element_name];
  4814. }
  4815. $element->attr[$attr_name] = $def;
  4816. }
  4817. /**
  4818. * Adds a custom element to your HTML definition
  4819. * @see HTMLPurifier_HTMLModule::addElement() for detailed
  4820. * parameter and return value descriptions.
  4821. */
  4822. public function addElement($element_name, $type, $contents, $attr_collections, $attributes = array())
  4823. {
  4824. $module = $this->getAnonymousModule();
  4825. // assume that if the user is calling this, the element
  4826. // is safe. This may not be a good idea
  4827. $element = $module->addElement($element_name, $type, $contents, $attr_collections, $attributes);
  4828. return $element;
  4829. }
  4830. /**
  4831. * Adds a blank element to your HTML definition, for overriding
  4832. * existing behavior
  4833. * @param string $element_name
  4834. * @return HTMLPurifier_ElementDef
  4835. * @see HTMLPurifier_HTMLModule::addBlankElement() for detailed
  4836. * parameter and return value descriptions.
  4837. */
  4838. public function addBlankElement($element_name)
  4839. {
  4840. $module = $this->getAnonymousModule();
  4841. $element = $module->addBlankElement($element_name);
  4842. return $element;
  4843. }
  4844. /**
  4845. * Retrieves a reference to the anonymous module, so you can
  4846. * bust out advanced features without having to make your own
  4847. * module.
  4848. * @return HTMLPurifier_HTMLModule
  4849. */
  4850. public function getAnonymousModule()
  4851. {
  4852. if (!$this->_anonModule) {
  4853. $this->_anonModule = new HTMLPurifier_HTMLModule();
  4854. $this->_anonModule->name = 'Anonymous';
  4855. }
  4856. return $this->_anonModule;
  4857. }
  4858. private $_anonModule = null;
  4859. // PUBLIC BUT INTERNAL VARIABLES --------------------------------------
  4860. /**
  4861. * @type string
  4862. */
  4863. public $type = 'HTML';
  4864. /**
  4865. * @type HTMLPurifier_HTMLModuleManager
  4866. */
  4867. public $manager;
  4868. /**
  4869. * Performs low-cost, preliminary initialization.
  4870. */
  4871. public function __construct()
  4872. {
  4873. $this->manager = new HTMLPurifier_HTMLModuleManager();
  4874. }
  4875. /**
  4876. * @param HTMLPurifier_Config $config
  4877. */
  4878. protected function doSetup($config)
  4879. {
  4880. $this->processModules($config);
  4881. $this->setupConfigStuff($config);
  4882. unset($this->manager);
  4883. // cleanup some of the element definitions
  4884. foreach ($this->info as $k => $v) {
  4885. unset($this->info[$k]->content_model);
  4886. unset($this->info[$k]->content_model_type);
  4887. }
  4888. }
  4889. /**
  4890. * Extract out the information from the manager
  4891. * @param HTMLPurifier_Config $config
  4892. */
  4893. protected function processModules($config)
  4894. {
  4895. if ($this->_anonModule) {
  4896. // for user specific changes
  4897. // this is late-loaded so we don't have to deal with PHP4
  4898. // reference wonky-ness
  4899. $this->manager->addModule($this->_anonModule);
  4900. unset($this->_anonModule);
  4901. }
  4902. $this->manager->setup($config);
  4903. $this->doctype = $this->manager->doctype;
  4904. foreach ($this->manager->modules as $module) {
  4905. foreach ($module->info_tag_transform as $k => $v) {
  4906. if ($v === false) {
  4907. unset($this->info_tag_transform[$k]);
  4908. } else {
  4909. $this->info_tag_transform[$k] = $v;
  4910. }
  4911. }
  4912. foreach ($module->info_attr_transform_pre as $k => $v) {
  4913. if ($v === false) {
  4914. unset($this->info_attr_transform_pre[$k]);
  4915. } else {
  4916. $this->info_attr_transform_pre[$k] = $v;
  4917. }
  4918. }
  4919. foreach ($module->info_attr_transform_post as $k => $v) {
  4920. if ($v === false) {
  4921. unset($this->info_attr_transform_post[$k]);
  4922. } else {
  4923. $this->info_attr_transform_post[$k] = $v;
  4924. }
  4925. }
  4926. foreach ($module->info_injector as $k => $v) {
  4927. if ($v === false) {
  4928. unset($this->info_injector[$k]);
  4929. } else {
  4930. $this->info_injector[$k] = $v;
  4931. }
  4932. }
  4933. }
  4934. $this->info = $this->manager->getElements();
  4935. $this->info_content_sets = $this->manager->contentSets->lookup;
  4936. }
  4937. /**
  4938. * Sets up stuff based on config. We need a better way of doing this.
  4939. * @param HTMLPurifier_Config $config
  4940. */
  4941. protected function setupConfigStuff($config)
  4942. {
  4943. $block_wrapper = $config->get('HTML.BlockWrapper');
  4944. if (isset($this->info_content_sets['Block'][$block_wrapper])) {
  4945. $this->info_block_wrapper = $block_wrapper;
  4946. } else {
  4947. trigger_error(
  4948. 'Cannot use non-block element as block wrapper',
  4949. E_USER_ERROR
  4950. );
  4951. }
  4952. $parent = $config->get('HTML.Parent');
  4953. $def = $this->manager->getElement($parent, true);
  4954. if ($def) {
  4955. $this->info_parent = $parent;
  4956. $this->info_parent_def = $def;
  4957. } else {
  4958. trigger_error(
  4959. 'Cannot use unrecognized element as parent',
  4960. E_USER_ERROR
  4961. );
  4962. $this->info_parent_def = $this->manager->getElement($this->info_parent, true);
  4963. }
  4964. // support template text
  4965. $support = "(for information on implementing this, see the support forums) ";
  4966. // setup allowed elements -----------------------------------------
  4967. $allowed_elements = $config->get('HTML.AllowedElements');
  4968. $allowed_attributes = $config->get('HTML.AllowedAttributes'); // retrieve early
  4969. if (!is_array($allowed_elements) && !is_array($allowed_attributes)) {
  4970. $allowed = $config->get('HTML.Allowed');
  4971. if (is_string($allowed)) {
  4972. list($allowed_elements, $allowed_attributes) = $this->parseTinyMCEAllowedList($allowed);
  4973. }
  4974. }
  4975. if (is_array($allowed_elements)) {
  4976. foreach ($this->info as $name => $d) {
  4977. if (!isset($allowed_elements[$name])) {
  4978. unset($this->info[$name]);
  4979. }
  4980. unset($allowed_elements[$name]);
  4981. }
  4982. // emit errors
  4983. foreach ($allowed_elements as $element => $d) {
  4984. $element = htmlspecialchars($element); // PHP doesn't escape errors, be careful!
  4985. trigger_error("Element '$element' is not supported $support", E_USER_WARNING);
  4986. }
  4987. }
  4988. // setup allowed attributes ---------------------------------------
  4989. $allowed_attributes_mutable = $allowed_attributes; // by copy!
  4990. if (is_array($allowed_attributes)) {
  4991. // This actually doesn't do anything, since we went away from
  4992. // global attributes. It's possible that userland code uses
  4993. // it, but HTMLModuleManager doesn't!
  4994. foreach ($this->info_global_attr as $attr => $x) {
  4995. $keys = array($attr, "*@$attr", "*.$attr");
  4996. $delete = true;
  4997. foreach ($keys as $key) {
  4998. if ($delete && isset($allowed_attributes[$key])) {
  4999. $delete = false;
  5000. }
  5001. if (isset($allowed_attributes_mutable[$key])) {
  5002. unset($allowed_attributes_mutable[$key]);
  5003. }
  5004. }
  5005. if ($delete) {
  5006. unset($this->info_global_attr[$attr]);
  5007. }
  5008. }
  5009. foreach ($this->info as $tag => $info) {
  5010. foreach ($info->attr as $attr => $x) {
  5011. $keys = array("$tag@$attr", $attr, "*@$attr", "$tag.$attr", "*.$attr");
  5012. $delete = true;
  5013. foreach ($keys as $key) {
  5014. if ($delete && isset($allowed_attributes[$key])) {
  5015. $delete = false;
  5016. }
  5017. if (isset($allowed_attributes_mutable[$key])) {
  5018. unset($allowed_attributes_mutable[$key]);
  5019. }
  5020. }
  5021. if ($delete) {
  5022. if ($this->info[$tag]->attr[$attr]->required) {
  5023. trigger_error(
  5024. "Required attribute '$attr' in element '$tag' " .
  5025. "was not allowed, which means '$tag' will not be allowed either",
  5026. E_USER_WARNING
  5027. );
  5028. }
  5029. unset($this->info[$tag]->attr[$attr]);
  5030. }
  5031. }
  5032. }
  5033. // emit errors
  5034. foreach ($allowed_attributes_mutable as $elattr => $d) {
  5035. $bits = preg_split('/[.@]/', $elattr, 2);
  5036. $c = count($bits);
  5037. switch ($c) {
  5038. case 2:
  5039. if ($bits[0] !== '*') {
  5040. $element = htmlspecialchars($bits[0]);
  5041. $attribute = htmlspecialchars($bits[1]);
  5042. if (!isset($this->info[$element])) {
  5043. trigger_error(
  5044. "Cannot allow attribute '$attribute' if element " .
  5045. "'$element' is not allowed/supported $support"
  5046. );
  5047. } else {
  5048. trigger_error(
  5049. "Attribute '$attribute' in element '$element' not supported $support",
  5050. E_USER_WARNING
  5051. );
  5052. }
  5053. break;
  5054. }
  5055. // otherwise fall through
  5056. case 1:
  5057. $attribute = htmlspecialchars($bits[0]);
  5058. trigger_error(
  5059. "Global attribute '$attribute' is not ".
  5060. "supported in any elements $support",
  5061. E_USER_WARNING
  5062. );
  5063. break;
  5064. }
  5065. }
  5066. }
  5067. // setup forbidden elements ---------------------------------------
  5068. $forbidden_elements = $config->get('HTML.ForbiddenElements');
  5069. $forbidden_attributes = $config->get('HTML.ForbiddenAttributes');
  5070. foreach ($this->info as $tag => $info) {
  5071. if (isset($forbidden_elements[$tag])) {
  5072. unset($this->info[$tag]);
  5073. continue;
  5074. }
  5075. foreach ($info->attr as $attr => $x) {
  5076. if (isset($forbidden_attributes["$tag@$attr"]) ||
  5077. isset($forbidden_attributes["*@$attr"]) ||
  5078. isset($forbidden_attributes[$attr])
  5079. ) {
  5080. unset($this->info[$tag]->attr[$attr]);
  5081. continue;
  5082. } elseif (isset($forbidden_attributes["$tag.$attr"])) { // this segment might get removed eventually
  5083. // $tag.$attr are not user supplied, so no worries!
  5084. trigger_error(
  5085. "Error with $tag.$attr: tag.attr syntax not supported for " .
  5086. "HTML.ForbiddenAttributes; use tag@attr instead",
  5087. E_USER_WARNING
  5088. );
  5089. }
  5090. }
  5091. }
  5092. foreach ($forbidden_attributes as $key => $v) {
  5093. if (strlen($key) < 2) {
  5094. continue;
  5095. }
  5096. if ($key[0] != '*') {
  5097. continue;
  5098. }
  5099. if ($key[1] == '.') {
  5100. trigger_error(
  5101. "Error with $key: *.attr syntax not supported for HTML.ForbiddenAttributes; use attr instead",
  5102. E_USER_WARNING
  5103. );
  5104. }
  5105. }
  5106. // setup injectors -----------------------------------------------------
  5107. foreach ($this->info_injector as $i => $injector) {
  5108. if ($injector->checkNeeded($config) !== false) {
  5109. // remove injector that does not have it's required
  5110. // elements/attributes present, and is thus not needed.
  5111. unset($this->info_injector[$i]);
  5112. }
  5113. }
  5114. }
  5115. /**
  5116. * Parses a TinyMCE-flavored Allowed Elements and Attributes list into
  5117. * separate lists for processing. Format is element[attr1|attr2],element2...
  5118. * @warning Although it's largely drawn from TinyMCE's implementation,
  5119. * it is different, and you'll probably have to modify your lists
  5120. * @param array $list String list to parse
  5121. * @return array
  5122. * @todo Give this its own class, probably static interface
  5123. */
  5124. public function parseTinyMCEAllowedList($list)
  5125. {
  5126. $list = str_replace(array(' ', "\t"), '', $list);
  5127. $elements = array();
  5128. $attributes = array();
  5129. $chunks = preg_split('/(,|[\n\r]+)/', $list);
  5130. foreach ($chunks as $chunk) {
  5131. if (empty($chunk)) {
  5132. continue;
  5133. }
  5134. // remove TinyMCE element control characters
  5135. if (!strpos($chunk, '[')) {
  5136. $element = $chunk;
  5137. $attr = false;
  5138. } else {
  5139. list($element, $attr) = explode('[', $chunk);
  5140. }
  5141. if ($element !== '*') {
  5142. $elements[$element] = true;
  5143. }
  5144. if (!$attr) {
  5145. continue;
  5146. }
  5147. $attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ]
  5148. $attr = explode('|', $attr);
  5149. foreach ($attr as $key) {
  5150. $attributes["$element.$key"] = true;
  5151. }
  5152. }
  5153. return array($elements, $attributes);
  5154. }
  5155. }
  5156. /**
  5157. * Represents an XHTML 1.1 module, with information on elements, tags
  5158. * and attributes.
  5159. * @note Even though this is technically XHTML 1.1, it is also used for
  5160. * regular HTML parsing. We are using modulization as a convenient
  5161. * way to represent the internals of HTMLDefinition, and our
  5162. * implementation is by no means conforming and does not directly
  5163. * use the normative DTDs or XML schemas.
  5164. * @note The public variables in a module should almost directly
  5165. * correspond to the variables in HTMLPurifier_HTMLDefinition.
  5166. * However, the prefix info carries no special meaning in these
  5167. * objects (include it anyway if that's the correspondence though).
  5168. * @todo Consider making some member functions protected
  5169. */
  5170. class HTMLPurifier_HTMLModule
  5171. {
  5172. // -- Overloadable ----------------------------------------------------
  5173. /**
  5174. * Short unique string identifier of the module.
  5175. * @type string
  5176. */
  5177. public $name;
  5178. /**
  5179. * Informally, a list of elements this module changes.
  5180. * Not used in any significant way.
  5181. * @type array
  5182. */
  5183. public $elements = array();
  5184. /**
  5185. * Associative array of element names to element definitions.
  5186. * Some definitions may be incomplete, to be merged in later
  5187. * with the full definition.
  5188. * @type array
  5189. */
  5190. public $info = array();
  5191. /**
  5192. * Associative array of content set names to content set additions.
  5193. * This is commonly used to, say, add an A element to the Inline
  5194. * content set. This corresponds to an internal variable $content_sets
  5195. * and NOT info_content_sets member variable of HTMLDefinition.
  5196. * @type array
  5197. */
  5198. public $content_sets = array();
  5199. /**
  5200. * Associative array of attribute collection names to attribute
  5201. * collection additions. More rarely used for adding attributes to
  5202. * the global collections. Example is the StyleAttribute module adding
  5203. * the style attribute to the Core. Corresponds to HTMLDefinition's
  5204. * attr_collections->info, since the object's data is only info,
  5205. * with extra behavior associated with it.
  5206. * @type array
  5207. */
  5208. public $attr_collections = array();
  5209. /**
  5210. * Associative array of deprecated tag name to HTMLPurifier_TagTransform.
  5211. * @type array
  5212. */
  5213. public $info_tag_transform = array();
  5214. /**
  5215. * List of HTMLPurifier_AttrTransform to be performed before validation.
  5216. * @type array
  5217. */
  5218. public $info_attr_transform_pre = array();
  5219. /**
  5220. * List of HTMLPurifier_AttrTransform to be performed after validation.
  5221. * @type array
  5222. */
  5223. public $info_attr_transform_post = array();
  5224. /**
  5225. * List of HTMLPurifier_Injector to be performed during well-formedness fixing.
  5226. * An injector will only be invoked if all of it's pre-requisites are met;
  5227. * if an injector fails setup, there will be no error; it will simply be
  5228. * silently disabled.
  5229. * @type array
  5230. */
  5231. public $info_injector = array();
  5232. /**
  5233. * Boolean flag that indicates whether or not getChildDef is implemented.
  5234. * For optimization reasons: may save a call to a function. Be sure
  5235. * to set it if you do implement getChildDef(), otherwise it will have
  5236. * no effect!
  5237. * @type bool
  5238. */
  5239. public $defines_child_def = false;
  5240. /**
  5241. * Boolean flag whether or not this module is safe. If it is not safe, all
  5242. * of its members are unsafe. Modules are safe by default (this might be
  5243. * slightly dangerous, but it doesn't make much sense to force HTML Purifier,
  5244. * which is based off of safe HTML, to explicitly say, "This is safe," even
  5245. * though there are modules which are "unsafe")
  5246. *
  5247. * @type bool
  5248. * @note Previously, safety could be applied at an element level granularity.
  5249. * We've removed this ability, so in order to add "unsafe" elements
  5250. * or attributes, a dedicated module with this property set to false
  5251. * must be used.
  5252. */
  5253. public $safe = true;
  5254. /**
  5255. * Retrieves a proper HTMLPurifier_ChildDef subclass based on
  5256. * content_model and content_model_type member variables of
  5257. * the HTMLPurifier_ElementDef class. There is a similar function
  5258. * in HTMLPurifier_HTMLDefinition.
  5259. * @param HTMLPurifier_ElementDef $def
  5260. * @return HTMLPurifier_ChildDef subclass
  5261. */
  5262. public function getChildDef($def)
  5263. {
  5264. return false;
  5265. }
  5266. // -- Convenience -----------------------------------------------------
  5267. /**
  5268. * Convenience function that sets up a new element
  5269. * @param string $element Name of element to add
  5270. * @param string|bool $type What content set should element be registered to?
  5271. * Set as false to skip this step.
  5272. * @param string $contents Allowed children in form of:
  5273. * "$content_model_type: $content_model"
  5274. * @param array $attr_includes What attribute collections to register to
  5275. * element?
  5276. * @param array $attr What unique attributes does the element define?
  5277. * @see HTMLPurifier_ElementDef:: for in-depth descriptions of these parameters.
  5278. * @return HTMLPurifier_ElementDef Created element definition object, so you
  5279. * can set advanced parameters
  5280. */
  5281. public function addElement($element, $type, $contents, $attr_includes = array(), $attr = array())
  5282. {
  5283. $this->elements[] = $element;
  5284. // parse content_model
  5285. list($content_model_type, $content_model) = $this->parseContents($contents);
  5286. // merge in attribute inclusions
  5287. $this->mergeInAttrIncludes($attr, $attr_includes);
  5288. // add element to content sets
  5289. if ($type) {
  5290. $this->addElementToContentSet($element, $type);
  5291. }
  5292. // create element
  5293. $this->info[$element] = HTMLPurifier_ElementDef::create(
  5294. $content_model,
  5295. $content_model_type,
  5296. $attr
  5297. );
  5298. // literal object $contents means direct child manipulation
  5299. if (!is_string($contents)) {
  5300. $this->info[$element]->child = $contents;
  5301. }
  5302. return $this->info[$element];
  5303. }
  5304. /**
  5305. * Convenience function that creates a totally blank, non-standalone
  5306. * element.
  5307. * @param string $element Name of element to create
  5308. * @return HTMLPurifier_ElementDef Created element
  5309. */
  5310. public function addBlankElement($element)
  5311. {
  5312. if (!isset($this->info[$element])) {
  5313. $this->elements[] = $element;
  5314. $this->info[$element] = new HTMLPurifier_ElementDef();
  5315. $this->info[$element]->standalone = false;
  5316. } else {
  5317. trigger_error("Definition for $element already exists in module, cannot redefine");
  5318. }
  5319. return $this->info[$element];
  5320. }
  5321. /**
  5322. * Convenience function that registers an element to a content set
  5323. * @param string $element Element to register
  5324. * @param string $type Name content set (warning: case sensitive, usually upper-case
  5325. * first letter)
  5326. */
  5327. public function addElementToContentSet($element, $type)
  5328. {
  5329. if (!isset($this->content_sets[$type])) {
  5330. $this->content_sets[$type] = '';
  5331. } else {
  5332. $this->content_sets[$type] .= ' | ';
  5333. }
  5334. $this->content_sets[$type] .= $element;
  5335. }
  5336. /**
  5337. * Convenience function that transforms single-string contents
  5338. * into separate content model and content model type
  5339. * @param string $contents Allowed children in form of:
  5340. * "$content_model_type: $content_model"
  5341. * @return array
  5342. * @note If contents is an object, an array of two nulls will be
  5343. * returned, and the callee needs to take the original $contents
  5344. * and use it directly.
  5345. */
  5346. public function parseContents($contents)
  5347. {
  5348. if (!is_string($contents)) {
  5349. return array(null, null);
  5350. } // defer
  5351. switch ($contents) {
  5352. // check for shorthand content model forms
  5353. case 'Empty':
  5354. return array('empty', '');
  5355. case 'Inline':
  5356. return array('optional', 'Inline | #PCDATA');
  5357. case 'Flow':
  5358. return array('optional', 'Flow | #PCDATA');
  5359. }
  5360. list($content_model_type, $content_model) = explode(':', $contents);
  5361. $content_model_type = strtolower(trim($content_model_type));
  5362. $content_model = trim($content_model);
  5363. return array($content_model_type, $content_model);
  5364. }
  5365. /**
  5366. * Convenience function that merges a list of attribute includes into
  5367. * an attribute array.
  5368. * @param array $attr Reference to attr array to modify
  5369. * @param array $attr_includes Array of includes / string include to merge in
  5370. */
  5371. public function mergeInAttrIncludes(&$attr, $attr_includes)
  5372. {
  5373. if (!is_array($attr_includes)) {
  5374. if (empty($attr_includes)) {
  5375. $attr_includes = array();
  5376. } else {
  5377. $attr_includes = array($attr_includes);
  5378. }
  5379. }
  5380. $attr[0] = $attr_includes;
  5381. }
  5382. /**
  5383. * Convenience function that generates a lookup table with boolean
  5384. * true as value.
  5385. * @param string $list List of values to turn into a lookup
  5386. * @note You can also pass an arbitrary number of arguments in
  5387. * place of the regular argument
  5388. * @return array array equivalent of list
  5389. */
  5390. public function makeLookup($list)
  5391. {
  5392. if (is_string($list)) {
  5393. $list = func_get_args();
  5394. }
  5395. $ret = array();
  5396. foreach ($list as $value) {
  5397. if (is_null($value)) {
  5398. continue;
  5399. }
  5400. $ret[$value] = true;
  5401. }
  5402. return $ret;
  5403. }
  5404. /**
  5405. * Lazy load construction of the module after determining whether
  5406. * or not it's needed, and also when a finalized configuration object
  5407. * is available.
  5408. * @param HTMLPurifier_Config $config
  5409. */
  5410. public function setup($config)
  5411. {
  5412. }
  5413. }
  5414. class HTMLPurifier_HTMLModuleManager
  5415. {
  5416. /**
  5417. * @type HTMLPurifier_DoctypeRegistry
  5418. */
  5419. public $doctypes;
  5420. /**
  5421. * Instance of current doctype.
  5422. * @type string
  5423. */
  5424. public $doctype;
  5425. /**
  5426. * @type HTMLPurifier_AttrTypes
  5427. */
  5428. public $attrTypes;
  5429. /**
  5430. * Active instances of modules for the specified doctype are
  5431. * indexed, by name, in this array.
  5432. * @type HTMLPurifier_HTMLModule[]
  5433. */
  5434. public $modules = array();
  5435. /**
  5436. * Array of recognized HTMLPurifier_HTMLModule instances,
  5437. * indexed by module's class name. This array is usually lazy loaded, but a
  5438. * user can overload a module by pre-emptively registering it.
  5439. * @type HTMLPurifier_HTMLModule[]
  5440. */
  5441. public $registeredModules = array();
  5442. /**
  5443. * List of extra modules that were added by the user
  5444. * using addModule(). These get unconditionally merged into the current doctype, whatever
  5445. * it may be.
  5446. * @type HTMLPurifier_HTMLModule[]
  5447. */
  5448. public $userModules = array();
  5449. /**
  5450. * Associative array of element name to list of modules that have
  5451. * definitions for the element; this array is dynamically filled.
  5452. * @type array
  5453. */
  5454. public $elementLookup = array();
  5455. /**
  5456. * List of prefixes we should use for registering small names.
  5457. * @type array
  5458. */
  5459. public $prefixes = array('HTMLPurifier_HTMLModule_');
  5460. /**
  5461. * @type HTMLPurifier_ContentSets
  5462. */
  5463. public $contentSets;
  5464. /**
  5465. * @type HTMLPurifier_AttrCollections
  5466. */
  5467. public $attrCollections;
  5468. /**
  5469. * If set to true, unsafe elements and attributes will be allowed.
  5470. * @type bool
  5471. */
  5472. public $trusted = false;
  5473. public function __construct()
  5474. {
  5475. // editable internal objects
  5476. $this->attrTypes = new HTMLPurifier_AttrTypes();
  5477. $this->doctypes = new HTMLPurifier_DoctypeRegistry();
  5478. // setup basic modules
  5479. $common = array(
  5480. 'CommonAttributes', 'Text', 'Hypertext', 'List',
  5481. 'Presentation', 'Edit', 'Bdo', 'Tables', 'Image',
  5482. 'StyleAttribute',
  5483. // Unsafe:
  5484. 'Scripting', 'Object', 'Forms',
  5485. // Sorta legacy, but present in strict:
  5486. 'Name',
  5487. );
  5488. $transitional = array('Legacy', 'Target', 'Iframe');
  5489. $xml = array('XMLCommonAttributes');
  5490. $non_xml = array('NonXMLCommonAttributes');
  5491. // setup basic doctypes
  5492. $this->doctypes->register(
  5493. 'HTML 4.01 Transitional',
  5494. false,
  5495. array_merge($common, $transitional, $non_xml),
  5496. array('Tidy_Transitional', 'Tidy_Proprietary'),
  5497. array(),
  5498. '-//W3C//DTD HTML 4.01 Transitional//EN',
  5499. 'http://www.w3.org/TR/html4/loose.dtd'
  5500. );
  5501. $this->doctypes->register(
  5502. 'HTML 4.01 Strict',
  5503. false,
  5504. array_merge($common, $non_xml),
  5505. array('Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'),
  5506. array(),
  5507. '-//W3C//DTD HTML 4.01//EN',
  5508. 'http://www.w3.org/TR/html4/strict.dtd'
  5509. );
  5510. $this->doctypes->register(
  5511. 'XHTML 1.0 Transitional',
  5512. true,
  5513. array_merge($common, $transitional, $xml, $non_xml),
  5514. array('Tidy_Transitional', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Name'),
  5515. array(),
  5516. '-//W3C//DTD XHTML 1.0 Transitional//EN',
  5517. 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'
  5518. );
  5519. $this->doctypes->register(
  5520. 'XHTML 1.0 Strict',
  5521. true,
  5522. array_merge($common, $xml, $non_xml),
  5523. array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'),
  5524. array(),
  5525. '-//W3C//DTD XHTML 1.0 Strict//EN',
  5526. 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'
  5527. );
  5528. $this->doctypes->register(
  5529. 'XHTML 1.1',
  5530. true,
  5531. // Iframe is a real XHTML 1.1 module, despite being
  5532. // "transitional"!
  5533. array_merge($common, $xml, array('Ruby', 'Iframe')),
  5534. array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict', 'Tidy_Name'), // Tidy_XHTML1_1
  5535. array(),
  5536. '-//W3C//DTD XHTML 1.1//EN',
  5537. 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd'
  5538. );
  5539. }
  5540. /**
  5541. * Registers a module to the recognized module list, useful for
  5542. * overloading pre-existing modules.
  5543. * @param $module Mixed: string module name, with or without
  5544. * HTMLPurifier_HTMLModule prefix, or instance of
  5545. * subclass of HTMLPurifier_HTMLModule.
  5546. * @param $overload Boolean whether or not to overload previous modules.
  5547. * If this is not set, and you do overload a module,
  5548. * HTML Purifier will complain with a warning.
  5549. * @note This function will not call autoload, you must instantiate
  5550. * (and thus invoke) autoload outside the method.
  5551. * @note If a string is passed as a module name, different variants
  5552. * will be tested in this order:
  5553. * - Check for HTMLPurifier_HTMLModule_$name
  5554. * - Check all prefixes with $name in order they were added
  5555. * - Check for literal object name
  5556. * - Throw fatal error
  5557. * If your object name collides with an internal class, specify
  5558. * your module manually. All modules must have been included
  5559. * externally: registerModule will not perform inclusions for you!
  5560. */
  5561. public function registerModule($module, $overload = false)
  5562. {
  5563. if (is_string($module)) {
  5564. // attempt to load the module
  5565. $original_module = $module;
  5566. $ok = false;
  5567. foreach ($this->prefixes as $prefix) {
  5568. $module = $prefix . $original_module;
  5569. if (class_exists($module)) {
  5570. $ok = true;
  5571. break;
  5572. }
  5573. }
  5574. if (!$ok) {
  5575. $module = $original_module;
  5576. if (!class_exists($module)) {
  5577. trigger_error(
  5578. $original_module . ' module does not exist',
  5579. E_USER_ERROR
  5580. );
  5581. return;
  5582. }
  5583. }
  5584. $module = new $module();
  5585. }
  5586. if (empty($module->name)) {
  5587. trigger_error('Module instance of ' . get_class($module) . ' must have name');
  5588. return;
  5589. }
  5590. if (!$overload && isset($this->registeredModules[$module->name])) {
  5591. trigger_error('Overloading ' . $module->name . ' without explicit overload parameter', E_USER_WARNING);
  5592. }
  5593. $this->registeredModules[$module->name] = $module;
  5594. }
  5595. /**
  5596. * Adds a module to the current doctype by first registering it,
  5597. * and then tacking it on to the active doctype
  5598. */
  5599. public function addModule($module)
  5600. {
  5601. $this->registerModule($module);
  5602. if (is_object($module)) {
  5603. $module = $module->name;
  5604. }
  5605. $this->userModules[] = $module;
  5606. }
  5607. /**
  5608. * Adds a class prefix that registerModule() will use to resolve a
  5609. * string name to a concrete class
  5610. */
  5611. public function addPrefix($prefix)
  5612. {
  5613. $this->prefixes[] = $prefix;
  5614. }
  5615. /**
  5616. * Performs processing on modules, after being called you may
  5617. * use getElement() and getElements()
  5618. * @param HTMLPurifier_Config $config
  5619. */
  5620. public function setup($config)
  5621. {
  5622. $this->trusted = $config->get('HTML.Trusted');
  5623. // generate
  5624. $this->doctype = $this->doctypes->make($config);
  5625. $modules = $this->doctype->modules;
  5626. // take out the default modules that aren't allowed
  5627. $lookup = $config->get('HTML.AllowedModules');
  5628. $special_cases = $config->get('HTML.CoreModules');
  5629. if (is_array($lookup)) {
  5630. foreach ($modules as $k => $m) {
  5631. if (isset($special_cases[$m])) {
  5632. continue;
  5633. }
  5634. if (!isset($lookup[$m])) {
  5635. unset($modules[$k]);
  5636. }
  5637. }
  5638. }
  5639. // custom modules
  5640. if ($config->get('HTML.Proprietary')) {
  5641. $modules[] = 'Proprietary';
  5642. }
  5643. if ($config->get('HTML.SafeObject')) {
  5644. $modules[] = 'SafeObject';
  5645. }
  5646. if ($config->get('HTML.SafeEmbed')) {
  5647. $modules[] = 'SafeEmbed';
  5648. }
  5649. if ($config->get('HTML.SafeScripting') !== array()) {
  5650. $modules[] = 'SafeScripting';
  5651. }
  5652. if ($config->get('HTML.Nofollow')) {
  5653. $modules[] = 'Nofollow';
  5654. }
  5655. if ($config->get('HTML.TargetBlank')) {
  5656. $modules[] = 'TargetBlank';
  5657. }
  5658. // merge in custom modules
  5659. $modules = array_merge($modules, $this->userModules);
  5660. foreach ($modules as $module) {
  5661. $this->processModule($module);
  5662. $this->modules[$module]->setup($config);
  5663. }
  5664. foreach ($this->doctype->tidyModules as $module) {
  5665. $this->processModule($module);
  5666. $this->modules[$module]->setup($config);
  5667. }
  5668. // prepare any injectors
  5669. foreach ($this->modules as $module) {
  5670. $n = array();
  5671. foreach ($module->info_injector as $injector) {
  5672. if (!is_object($injector)) {
  5673. $class = "HTMLPurifier_Injector_$injector";
  5674. $injector = new $class;
  5675. }
  5676. $n[$injector->name] = $injector;
  5677. }
  5678. $module->info_injector = $n;
  5679. }
  5680. // setup lookup table based on all valid modules
  5681. foreach ($this->modules as $module) {
  5682. foreach ($module->info as $name => $def) {
  5683. if (!isset($this->elementLookup[$name])) {
  5684. $this->elementLookup[$name] = array();
  5685. }
  5686. $this->elementLookup[$name][] = $module->name;
  5687. }
  5688. }
  5689. // note the different choice
  5690. $this->contentSets = new HTMLPurifier_ContentSets(
  5691. // content set assembly deals with all possible modules,
  5692. // not just ones deemed to be "safe"
  5693. $this->modules
  5694. );
  5695. $this->attrCollections = new HTMLPurifier_AttrCollections(
  5696. $this->attrTypes,
  5697. // there is no way to directly disable a global attribute,
  5698. // but using AllowedAttributes or simply not including
  5699. // the module in your custom doctype should be sufficient
  5700. $this->modules
  5701. );
  5702. }
  5703. /**
  5704. * Takes a module and adds it to the active module collection,
  5705. * registering it if necessary.
  5706. */
  5707. public function processModule($module)
  5708. {
  5709. if (!isset($this->registeredModules[$module]) || is_object($module)) {
  5710. $this->registerModule($module);
  5711. }
  5712. $this->modules[$module] = $this->registeredModules[$module];
  5713. }
  5714. /**
  5715. * Retrieves merged element definitions.
  5716. * @return Array of HTMLPurifier_ElementDef
  5717. */
  5718. public function getElements()
  5719. {
  5720. $elements = array();
  5721. foreach ($this->modules as $module) {
  5722. if (!$this->trusted && !$module->safe) {
  5723. continue;
  5724. }
  5725. foreach ($module->info as $name => $v) {
  5726. if (isset($elements[$name])) {
  5727. continue;
  5728. }
  5729. $elements[$name] = $this->getElement($name);
  5730. }
  5731. }
  5732. // remove dud elements, this happens when an element that
  5733. // appeared to be safe actually wasn't
  5734. foreach ($elements as $n => $v) {
  5735. if ($v === false) {
  5736. unset($elements[$n]);
  5737. }
  5738. }
  5739. return $elements;
  5740. }
  5741. /**
  5742. * Retrieves a single merged element definition
  5743. * @param string $name Name of element
  5744. * @param bool $trusted Boolean trusted overriding parameter: set to true
  5745. * if you want the full version of an element
  5746. * @return HTMLPurifier_ElementDef Merged HTMLPurifier_ElementDef
  5747. * @note You may notice that modules are getting iterated over twice (once
  5748. * in getElements() and once here). This
  5749. * is because
  5750. */
  5751. public function getElement($name, $trusted = null)
  5752. {
  5753. if (!isset($this->elementLookup[$name])) {
  5754. return false;
  5755. }
  5756. // setup global state variables
  5757. $def = false;
  5758. if ($trusted === null) {
  5759. $trusted = $this->trusted;
  5760. }
  5761. // iterate through each module that has registered itself to this
  5762. // element
  5763. foreach ($this->elementLookup[$name] as $module_name) {
  5764. $module = $this->modules[$module_name];
  5765. // refuse to create/merge from a module that is deemed unsafe--
  5766. // pretend the module doesn't exist--when trusted mode is not on.
  5767. if (!$trusted && !$module->safe) {
  5768. continue;
  5769. }
  5770. // clone is used because, ideally speaking, the original
  5771. // definition should not be modified. Usually, this will
  5772. // make no difference, but for consistency's sake
  5773. $new_def = clone $module->info[$name];
  5774. if (!$def && $new_def->standalone) {
  5775. $def = $new_def;
  5776. } elseif ($def) {
  5777. // This will occur even if $new_def is standalone. In practice,
  5778. // this will usually result in a full replacement.
  5779. $def->mergeIn($new_def);
  5780. } else {
  5781. // :TODO:
  5782. // non-standalone definitions that don't have a standalone
  5783. // to merge into could be deferred to the end
  5784. // HOWEVER, it is perfectly valid for a non-standalone
  5785. // definition to lack a standalone definition, even
  5786. // after all processing: this allows us to safely
  5787. // specify extra attributes for elements that may not be
  5788. // enabled all in one place. In particular, this might
  5789. // be the case for trusted elements. WARNING: care must
  5790. // be taken that the /extra/ definitions are all safe.
  5791. continue;
  5792. }
  5793. // attribute value expansions
  5794. $this->attrCollections->performInclusions($def->attr);
  5795. $this->attrCollections->expandIdentifiers($def->attr, $this->attrTypes);
  5796. // descendants_are_inline, for ChildDef_Chameleon
  5797. if (is_string($def->content_model) &&
  5798. strpos($def->content_model, 'Inline') !== false) {
  5799. if ($name != 'del' && $name != 'ins') {
  5800. // this is for you, ins/del
  5801. $def->descendants_are_inline = true;
  5802. }
  5803. }
  5804. $this->contentSets->generateChildDef($def, $module);
  5805. }
  5806. // This can occur if there is a blank definition, but no base to
  5807. // mix it in with
  5808. if (!$def) {
  5809. return false;
  5810. }
  5811. // add information on required attributes
  5812. foreach ($def->attr as $attr_name => $attr_def) {
  5813. if ($attr_def->required) {
  5814. $def->required_attr[] = $attr_name;
  5815. }
  5816. }
  5817. return $def;
  5818. }
  5819. }
  5820. /**
  5821. * Component of HTMLPurifier_AttrContext that accumulates IDs to prevent dupes
  5822. * @note In Slashdot-speak, dupe means duplicate.
  5823. * @note The default constructor does not accept $config or $context objects:
  5824. * use must use the static build() factory method to perform initialization.
  5825. */
  5826. class HTMLPurifier_IDAccumulator
  5827. {
  5828. /**
  5829. * Lookup table of IDs we've accumulated.
  5830. * @public
  5831. */
  5832. public $ids = array();
  5833. /**
  5834. * Builds an IDAccumulator, also initializing the default blacklist
  5835. * @param HTMLPurifier_Config $config Instance of HTMLPurifier_Config
  5836. * @param HTMLPurifier_Context $context Instance of HTMLPurifier_Context
  5837. * @return HTMLPurifier_IDAccumulator Fully initialized HTMLPurifier_IDAccumulator
  5838. */
  5839. public static function build($config, $context)
  5840. {
  5841. $id_accumulator = new HTMLPurifier_IDAccumulator();
  5842. $id_accumulator->load($config->get('Attr.IDBlacklist'));
  5843. return $id_accumulator;
  5844. }
  5845. /**
  5846. * Add an ID to the lookup table.
  5847. * @param string $id ID to be added.
  5848. * @return bool status, true if success, false if there's a dupe
  5849. */
  5850. public function add($id)
  5851. {
  5852. if (isset($this->ids[$id])) {
  5853. return false;
  5854. }
  5855. return $this->ids[$id] = true;
  5856. }
  5857. /**
  5858. * Load a list of IDs into the lookup table
  5859. * @param $array_of_ids Array of IDs to load
  5860. * @note This function doesn't care about duplicates
  5861. */
  5862. public function load($array_of_ids)
  5863. {
  5864. foreach ($array_of_ids as $id) {
  5865. $this->ids[$id] = true;
  5866. }
  5867. }
  5868. }
  5869. /**
  5870. * Injects tokens into the document while parsing for well-formedness.
  5871. * This enables "formatter-like" functionality such as auto-paragraphing,
  5872. * smiley-ification and linkification to take place.
  5873. *
  5874. * A note on how handlers create changes; this is done by assigning a new
  5875. * value to the $token reference. These values can take a variety of forms and
  5876. * are best described HTMLPurifier_Strategy_MakeWellFormed->processToken()
  5877. * documentation.
  5878. *
  5879. * @todo Allow injectors to request a re-run on their output. This
  5880. * would help if an operation is recursive.
  5881. */
  5882. abstract class HTMLPurifier_Injector
  5883. {
  5884. /**
  5885. * Advisory name of injector, this is for friendly error messages.
  5886. * @type string
  5887. */
  5888. public $name;
  5889. /**
  5890. * @type HTMLPurifier_HTMLDefinition
  5891. */
  5892. protected $htmlDefinition;
  5893. /**
  5894. * Reference to CurrentNesting variable in Context. This is an array
  5895. * list of tokens that we are currently "inside"
  5896. * @type array
  5897. */
  5898. protected $currentNesting;
  5899. /**
  5900. * Reference to current token.
  5901. * @type HTMLPurifier_Token
  5902. */
  5903. protected $currentToken;
  5904. /**
  5905. * Reference to InputZipper variable in Context.
  5906. * @type HTMLPurifier_Zipper
  5907. */
  5908. protected $inputZipper;
  5909. /**
  5910. * Array of elements and attributes this injector creates and therefore
  5911. * need to be allowed by the definition. Takes form of
  5912. * array('element' => array('attr', 'attr2'), 'element2')
  5913. * @type array
  5914. */
  5915. public $needed = array();
  5916. /**
  5917. * Number of elements to rewind backwards (relative).
  5918. * @type bool|int
  5919. */
  5920. protected $rewindOffset = false;
  5921. /**
  5922. * Rewind to a spot to re-perform processing. This is useful if you
  5923. * deleted a node, and now need to see if this change affected any
  5924. * earlier nodes. Rewinding does not affect other injectors, and can
  5925. * result in infinite loops if not used carefully.
  5926. * @param bool|int $offset
  5927. * @warning HTML Purifier will prevent you from fast-forwarding with this
  5928. * function.
  5929. */
  5930. public function rewindOffset($offset)
  5931. {
  5932. $this->rewindOffset = $offset;
  5933. }
  5934. /**
  5935. * Retrieves rewind offset, and then unsets it.
  5936. * @return bool|int
  5937. */
  5938. public function getRewindOffset()
  5939. {
  5940. $r = $this->rewindOffset;
  5941. $this->rewindOffset = false;
  5942. return $r;
  5943. }
  5944. /**
  5945. * Prepares the injector by giving it the config and context objects:
  5946. * this allows references to important variables to be made within
  5947. * the injector. This function also checks if the HTML environment
  5948. * will work with the Injector (see checkNeeded()).
  5949. * @param HTMLPurifier_Config $config
  5950. * @param HTMLPurifier_Context $context
  5951. * @return bool|string Boolean false if success, string of missing needed element/attribute if failure
  5952. */
  5953. public function prepare($config, $context)
  5954. {
  5955. $this->htmlDefinition = $config->getHTMLDefinition();
  5956. // Even though this might fail, some unit tests ignore this and
  5957. // still test checkNeeded, so be careful. Maybe get rid of that
  5958. // dependency.
  5959. $result = $this->checkNeeded($config);
  5960. if ($result !== false) {
  5961. return $result;
  5962. }
  5963. $this->currentNesting =& $context->get('CurrentNesting');
  5964. $this->currentToken =& $context->get('CurrentToken');
  5965. $this->inputZipper =& $context->get('InputZipper');
  5966. return false;
  5967. }
  5968. /**
  5969. * This function checks if the HTML environment
  5970. * will work with the Injector: if p tags are not allowed, the
  5971. * Auto-Paragraphing injector should not be enabled.
  5972. * @param HTMLPurifier_Config $config
  5973. * @return bool|string Boolean false if success, string of missing needed element/attribute if failure
  5974. */
  5975. public function checkNeeded($config)
  5976. {
  5977. $def = $config->getHTMLDefinition();
  5978. foreach ($this->needed as $element => $attributes) {
  5979. if (is_int($element)) {
  5980. $element = $attributes;
  5981. }
  5982. if (!isset($def->info[$element])) {
  5983. return $element;
  5984. }
  5985. if (!is_array($attributes)) {
  5986. continue;
  5987. }
  5988. foreach ($attributes as $name) {
  5989. if (!isset($def->info[$element]->attr[$name])) {
  5990. return "$element.$name";
  5991. }
  5992. }
  5993. }
  5994. return false;
  5995. }
  5996. /**
  5997. * Tests if the context node allows a certain element
  5998. * @param string $name Name of element to test for
  5999. * @return bool True if element is allowed, false if it is not
  6000. */
  6001. public function allowsElement($name)
  6002. {
  6003. if (!empty($this->currentNesting)) {
  6004. $parent_token = array_pop($this->currentNesting);
  6005. $this->currentNesting[] = $parent_token;
  6006. $parent = $this->htmlDefinition->info[$parent_token->name];
  6007. } else {
  6008. $parent = $this->htmlDefinition->info_parent_def;
  6009. }
  6010. if (!isset($parent->child->elements[$name]) || isset($parent->excludes[$name])) {
  6011. return false;
  6012. }
  6013. // check for exclusion
  6014. for ($i = count($this->currentNesting) - 2; $i >= 0; $i--) {
  6015. $node = $this->currentNesting[$i];
  6016. $def = $this->htmlDefinition->info[$node->name];
  6017. if (isset($def->excludes[$name])) {
  6018. return false;
  6019. }
  6020. }
  6021. return true;
  6022. }
  6023. /**
  6024. * Iterator function, which starts with the next token and continues until
  6025. * you reach the end of the input tokens.
  6026. * @warning Please prevent previous references from interfering with this
  6027. * functions by setting $i = null beforehand!
  6028. * @param int $i Current integer index variable for inputTokens
  6029. * @param HTMLPurifier_Token $current Current token variable.
  6030. * Do NOT use $token, as that variable is also a reference
  6031. * @return bool
  6032. */
  6033. protected function forward(&$i, &$current)
  6034. {
  6035. if ($i === null) {
  6036. $i = count($this->inputZipper->back) - 1;
  6037. } else {
  6038. $i--;
  6039. }
  6040. if ($i < 0) {
  6041. return false;
  6042. }
  6043. $current = $this->inputZipper->back[$i];
  6044. return true;
  6045. }
  6046. /**
  6047. * Similar to _forward, but accepts a third parameter $nesting (which
  6048. * should be initialized at 0) and stops when we hit the end tag
  6049. * for the node $this->inputIndex starts in.
  6050. * @param int $i Current integer index variable for inputTokens
  6051. * @param HTMLPurifier_Token $current Current token variable.
  6052. * Do NOT use $token, as that variable is also a reference
  6053. * @param int $nesting
  6054. * @return bool
  6055. */
  6056. protected function forwardUntilEndToken(&$i, &$current, &$nesting)
  6057. {
  6058. $result = $this->forward($i, $current);
  6059. if (!$result) {
  6060. return false;
  6061. }
  6062. if ($nesting === null) {
  6063. $nesting = 0;
  6064. }
  6065. if ($current instanceof HTMLPurifier_Token_Start) {
  6066. $nesting++;
  6067. } elseif ($current instanceof HTMLPurifier_Token_End) {
  6068. if ($nesting <= 0) {
  6069. return false;
  6070. }
  6071. $nesting--;
  6072. }
  6073. return true;
  6074. }
  6075. /**
  6076. * Iterator function, starts with the previous token and continues until
  6077. * you reach the beginning of input tokens.
  6078. * @warning Please prevent previous references from interfering with this
  6079. * functions by setting $i = null beforehand!
  6080. * @param int $i Current integer index variable for inputTokens
  6081. * @param HTMLPurifier_Token $current Current token variable.
  6082. * Do NOT use $token, as that variable is also a reference
  6083. * @return bool
  6084. */
  6085. protected function backward(&$i, &$current)
  6086. {
  6087. if ($i === null) {
  6088. $i = count($this->inputZipper->front) - 1;
  6089. } else {
  6090. $i--;
  6091. }
  6092. if ($i < 0) {
  6093. return false;
  6094. }
  6095. $current = $this->inputZipper->front[$i];
  6096. return true;
  6097. }
  6098. /**
  6099. * Handler that is called when a text token is processed
  6100. */
  6101. public function handleText(&$token)
  6102. {
  6103. }
  6104. /**
  6105. * Handler that is called when a start or empty token is processed
  6106. */
  6107. public function handleElement(&$token)
  6108. {
  6109. }
  6110. /**
  6111. * Handler that is called when an end token is processed
  6112. */
  6113. public function handleEnd(&$token)
  6114. {
  6115. $this->notifyEnd($token);
  6116. }
  6117. /**
  6118. * Notifier that is called when an end token is processed
  6119. * @param HTMLPurifier_Token $token Current token variable.
  6120. * @note This differs from handlers in that the token is read-only
  6121. * @deprecated
  6122. */
  6123. public function notifyEnd($token)
  6124. {
  6125. }
  6126. }
  6127. /**
  6128. * Represents a language and defines localizable string formatting and
  6129. * other functions, as well as the localized messages for HTML Purifier.
  6130. */
  6131. class HTMLPurifier_Language
  6132. {
  6133. /**
  6134. * ISO 639 language code of language. Prefers shortest possible version.
  6135. * @type string
  6136. */
  6137. public $code = 'en';
  6138. /**
  6139. * Fallback language code.
  6140. * @type bool|string
  6141. */
  6142. public $fallback = false;
  6143. /**
  6144. * Array of localizable messages.
  6145. * @type array
  6146. */
  6147. public $messages = array();
  6148. /**
  6149. * Array of localizable error codes.
  6150. * @type array
  6151. */
  6152. public $errorNames = array();
  6153. /**
  6154. * True if no message file was found for this language, so English
  6155. * is being used instead. Check this if you'd like to notify the
  6156. * user that they've used a non-supported language.
  6157. * @type bool
  6158. */
  6159. public $error = false;
  6160. /**
  6161. * Has the language object been loaded yet?
  6162. * @type bool
  6163. * @todo Make it private, fix usage in HTMLPurifier_LanguageTest
  6164. */
  6165. public $_loaded = false;
  6166. /**
  6167. * @type HTMLPurifier_Config
  6168. */
  6169. protected $config;
  6170. /**
  6171. * @type HTMLPurifier_Context
  6172. */
  6173. protected $context;
  6174. /**
  6175. * @param HTMLPurifier_Config $config
  6176. * @param HTMLPurifier_Context $context
  6177. */
  6178. public function __construct($config, $context)
  6179. {
  6180. $this->config = $config;
  6181. $this->context = $context;
  6182. }
  6183. /**
  6184. * Loads language object with necessary info from factory cache
  6185. * @note This is a lazy loader
  6186. */
  6187. public function load()
  6188. {
  6189. if ($this->_loaded) {
  6190. return;
  6191. }
  6192. $factory = HTMLPurifier_LanguageFactory::instance();
  6193. $factory->loadLanguage($this->code);
  6194. foreach ($factory->keys as $key) {
  6195. $this->$key = $factory->cache[$this->code][$key];
  6196. }
  6197. $this->_loaded = true;
  6198. }
  6199. /**
  6200. * Retrieves a localised message.
  6201. * @param string $key string identifier of message
  6202. * @return string localised message
  6203. */
  6204. public function getMessage($key)
  6205. {
  6206. if (!$this->_loaded) {
  6207. $this->load();
  6208. }
  6209. if (!isset($this->messages[$key])) {
  6210. return "[$key]";
  6211. }
  6212. return $this->messages[$key];
  6213. }
  6214. /**
  6215. * Retrieves a localised error name.
  6216. * @param int $int error number, corresponding to PHP's error reporting
  6217. * @return string localised message
  6218. */
  6219. public function getErrorName($int)
  6220. {
  6221. if (!$this->_loaded) {
  6222. $this->load();
  6223. }
  6224. if (!isset($this->errorNames[$int])) {
  6225. return "[Error: $int]";
  6226. }
  6227. return $this->errorNames[$int];
  6228. }
  6229. /**
  6230. * Converts an array list into a string readable representation
  6231. * @param array $array
  6232. * @return string
  6233. */
  6234. public function listify($array)
  6235. {
  6236. $sep = $this->getMessage('Item separator');
  6237. $sep_last = $this->getMessage('Item separator last');
  6238. $ret = '';
  6239. for ($i = 0, $c = count($array); $i < $c; $i++) {
  6240. if ($i == 0) {
  6241. } elseif ($i + 1 < $c) {
  6242. $ret .= $sep;
  6243. } else {
  6244. $ret .= $sep_last;
  6245. }
  6246. $ret .= $array[$i];
  6247. }
  6248. return $ret;
  6249. }
  6250. /**
  6251. * Formats a localised message with passed parameters
  6252. * @param string $key string identifier of message
  6253. * @param array $args Parameters to substitute in
  6254. * @return string localised message
  6255. * @todo Implement conditionals? Right now, some messages make
  6256. * reference to line numbers, but those aren't always available
  6257. */
  6258. public function formatMessage($key, $args = array())
  6259. {
  6260. if (!$this->_loaded) {
  6261. $this->load();
  6262. }
  6263. if (!isset($this->messages[$key])) {
  6264. return "[$key]";
  6265. }
  6266. $raw = $this->messages[$key];
  6267. $subst = array();
  6268. $generator = false;
  6269. foreach ($args as $i => $value) {
  6270. if (is_object($value)) {
  6271. if ($value instanceof HTMLPurifier_Token) {
  6272. // factor this out some time
  6273. if (!$generator) {
  6274. $generator = $this->context->get('Generator');
  6275. }
  6276. if (isset($value->name)) {
  6277. $subst['$'.$i.'.Name'] = $value->name;
  6278. }
  6279. if (isset($value->data)) {
  6280. $subst['$'.$i.'.Data'] = $value->data;
  6281. }
  6282. $subst['$'.$i.'.Compact'] =
  6283. $subst['$'.$i.'.Serialized'] = $generator->generateFromToken($value);
  6284. // a more complex algorithm for compact representation
  6285. // could be introduced for all types of tokens. This
  6286. // may need to be factored out into a dedicated class
  6287. if (!empty($value->attr)) {
  6288. $stripped_token = clone $value;
  6289. $stripped_token->attr = array();
  6290. $subst['$'.$i.'.Compact'] = $generator->generateFromToken($stripped_token);
  6291. }
  6292. $subst['$'.$i.'.Line'] = $value->line ? $value->line : 'unknown';
  6293. }
  6294. continue;
  6295. } elseif (is_array($value)) {
  6296. $keys = array_keys($value);
  6297. if (array_keys($keys) === $keys) {
  6298. // list
  6299. $subst['$'.$i] = $this->listify($value);
  6300. } else {
  6301. // associative array
  6302. // no $i implementation yet, sorry
  6303. $subst['$'.$i.'.Keys'] = $this->listify($keys);
  6304. $subst['$'.$i.'.Values'] = $this->listify(array_values($value));
  6305. }
  6306. continue;
  6307. }
  6308. $subst['$' . $i] = $value;
  6309. }
  6310. return strtr($raw, $subst);
  6311. }
  6312. }
  6313. /**
  6314. * Class responsible for generating HTMLPurifier_Language objects, managing
  6315. * caching and fallbacks.
  6316. * @note Thanks to MediaWiki for the general logic, although this version
  6317. * has been entirely rewritten
  6318. * @todo Serialized cache for languages
  6319. */
  6320. class HTMLPurifier_LanguageFactory
  6321. {
  6322. /**
  6323. * Cache of language code information used to load HTMLPurifier_Language objects.
  6324. * Structure is: $factory->cache[$language_code][$key] = $value
  6325. * @type array
  6326. */
  6327. public $cache;
  6328. /**
  6329. * Valid keys in the HTMLPurifier_Language object. Designates which
  6330. * variables to slurp out of a message file.
  6331. * @type array
  6332. */
  6333. public $keys = array('fallback', 'messages', 'errorNames');
  6334. /**
  6335. * Instance to validate language codes.
  6336. * @type HTMLPurifier_AttrDef_Lang
  6337. *
  6338. */
  6339. protected $validator;
  6340. /**
  6341. * Cached copy of dirname(__FILE__), directory of current file without
  6342. * trailing slash.
  6343. * @type string
  6344. */
  6345. protected $dir;
  6346. /**
  6347. * Keys whose contents are a hash map and can be merged.
  6348. * @type array
  6349. */
  6350. protected $mergeable_keys_map = array('messages' => true, 'errorNames' => true);
  6351. /**
  6352. * Keys whose contents are a list and can be merged.
  6353. * @value array lookup
  6354. */
  6355. protected $mergeable_keys_list = array();
  6356. /**
  6357. * Retrieve sole instance of the factory.
  6358. * @param HTMLPurifier_LanguageFactory $prototype Optional prototype to overload sole instance with,
  6359. * or bool true to reset to default factory.
  6360. * @return HTMLPurifier_LanguageFactory
  6361. */
  6362. public static function instance($prototype = null)
  6363. {
  6364. static $instance = null;
  6365. if ($prototype !== null) {
  6366. $instance = $prototype;
  6367. } elseif ($instance === null || $prototype == true) {
  6368. $instance = new HTMLPurifier_LanguageFactory();
  6369. $instance->setup();
  6370. }
  6371. return $instance;
  6372. }
  6373. /**
  6374. * Sets up the singleton, much like a constructor
  6375. * @note Prevents people from getting this outside of the singleton
  6376. */
  6377. public function setup()
  6378. {
  6379. $this->validator = new HTMLPurifier_AttrDef_Lang();
  6380. $this->dir = HTMLPURIFIER_PREFIX . '/HTMLPurifier';
  6381. }
  6382. /**
  6383. * Creates a language object, handles class fallbacks
  6384. * @param HTMLPurifier_Config $config
  6385. * @param HTMLPurifier_Context $context
  6386. * @param bool|string $code Code to override configuration with. Private parameter.
  6387. * @return HTMLPurifier_Language
  6388. */
  6389. public function create($config, $context, $code = false)
  6390. {
  6391. // validate language code
  6392. if ($code === false) {
  6393. $code = $this->validator->validate(
  6394. $config->get('Core.Language'),
  6395. $config,
  6396. $context
  6397. );
  6398. } else {
  6399. $code = $this->validator->validate($code, $config, $context);
  6400. }
  6401. if ($code === false) {
  6402. $code = 'en'; // malformed code becomes English
  6403. }
  6404. $pcode = str_replace('-', '_', $code); // make valid PHP classname
  6405. static $depth = 0; // recursion protection
  6406. if ($code == 'en') {
  6407. $lang = new HTMLPurifier_Language($config, $context);
  6408. } else {
  6409. $class = 'HTMLPurifier_Language_' . $pcode;
  6410. $file = $this->dir . '/Language/classes/' . $code . '.php';
  6411. if (file_exists($file) || class_exists($class, false)) {
  6412. $lang = new $class($config, $context);
  6413. } else {
  6414. // Go fallback
  6415. $raw_fallback = $this->getFallbackFor($code);
  6416. $fallback = $raw_fallback ? $raw_fallback : 'en';
  6417. $depth++;
  6418. $lang = $this->create($config, $context, $fallback);
  6419. if (!$raw_fallback) {
  6420. $lang->error = true;
  6421. }
  6422. $depth--;
  6423. }
  6424. }
  6425. $lang->code = $code;
  6426. return $lang;
  6427. }
  6428. /**
  6429. * Returns the fallback language for language
  6430. * @note Loads the original language into cache
  6431. * @param string $code language code
  6432. * @return string|bool
  6433. */
  6434. public function getFallbackFor($code)
  6435. {
  6436. $this->loadLanguage($code);
  6437. return $this->cache[$code]['fallback'];
  6438. }
  6439. /**
  6440. * Loads language into the cache, handles message file and fallbacks
  6441. * @param string $code language code
  6442. */
  6443. public function loadLanguage($code)
  6444. {
  6445. static $languages_seen = array(); // recursion guard
  6446. // abort if we've already loaded it
  6447. if (isset($this->cache[$code])) {
  6448. return;
  6449. }
  6450. // generate filename
  6451. $filename = $this->dir . '/Language/messages/' . $code . '.php';
  6452. // default fallback : may be overwritten by the ensuing include
  6453. $fallback = ($code != 'en') ? 'en' : false;
  6454. // load primary localisation
  6455. if (!file_exists($filename)) {
  6456. // skip the include: will rely solely on fallback
  6457. $filename = $this->dir . '/Language/messages/en.php';
  6458. $cache = array();
  6459. } else {
  6460. include $filename;
  6461. $cache = compact($this->keys);
  6462. }
  6463. // load fallback localisation
  6464. if (!empty($fallback)) {
  6465. // infinite recursion guard
  6466. if (isset($languages_seen[$code])) {
  6467. trigger_error(
  6468. 'Circular fallback reference in language ' .
  6469. $code,
  6470. E_USER_ERROR
  6471. );
  6472. $fallback = 'en';
  6473. }
  6474. $language_seen[$code] = true;
  6475. // load the fallback recursively
  6476. $this->loadLanguage($fallback);
  6477. $fallback_cache = $this->cache[$fallback];
  6478. // merge fallback with current language
  6479. foreach ($this->keys as $key) {
  6480. if (isset($cache[$key]) && isset($fallback_cache[$key])) {
  6481. if (isset($this->mergeable_keys_map[$key])) {
  6482. $cache[$key] = $cache[$key] + $fallback_cache[$key];
  6483. } elseif (isset($this->mergeable_keys_list[$key])) {
  6484. $cache[$key] = array_merge($fallback_cache[$key], $cache[$key]);
  6485. }
  6486. } else {
  6487. $cache[$key] = $fallback_cache[$key];
  6488. }
  6489. }
  6490. }
  6491. // save to cache for later retrieval
  6492. $this->cache[$code] = $cache;
  6493. return;
  6494. }
  6495. }
  6496. /**
  6497. * Represents a measurable length, with a string numeric magnitude
  6498. * and a unit. This object is immutable.
  6499. */
  6500. class HTMLPurifier_Length
  6501. {
  6502. /**
  6503. * String numeric magnitude.
  6504. * @type string
  6505. */
  6506. protected $n;
  6507. /**
  6508. * String unit. False is permitted if $n = 0.
  6509. * @type string|bool
  6510. */
  6511. protected $unit;
  6512. /**
  6513. * Whether or not this length is valid. Null if not calculated yet.
  6514. * @type bool
  6515. */
  6516. protected $isValid;
  6517. /**
  6518. * Array Lookup array of units recognized by CSS 2.1
  6519. * @type array
  6520. */
  6521. protected static $allowedUnits = array(
  6522. 'em' => true, 'ex' => true, 'px' => true, 'in' => true,
  6523. 'cm' => true, 'mm' => true, 'pt' => true, 'pc' => true
  6524. );
  6525. /**
  6526. * @param string $n Magnitude
  6527. * @param bool|string $u Unit
  6528. */
  6529. public function __construct($n = '0', $u = false)
  6530. {
  6531. $this->n = (string) $n;
  6532. $this->unit = $u !== false ? (string) $u : false;
  6533. }
  6534. /**
  6535. * @param string $s Unit string, like '2em' or '3.4in'
  6536. * @return HTMLPurifier_Length
  6537. * @warning Does not perform validation.
  6538. */
  6539. public static function make($s)
  6540. {
  6541. if ($s instanceof HTMLPurifier_Length) {
  6542. return $s;
  6543. }
  6544. $n_length = strspn($s, '1234567890.+-');
  6545. $n = substr($s, 0, $n_length);
  6546. $unit = substr($s, $n_length);
  6547. if ($unit === '') {
  6548. $unit = false;
  6549. }
  6550. return new HTMLPurifier_Length($n, $unit);
  6551. }
  6552. /**
  6553. * Validates the number and unit.
  6554. * @return bool
  6555. */
  6556. protected function validate()
  6557. {
  6558. // Special case:
  6559. if ($this->n === '+0' || $this->n === '-0') {
  6560. $this->n = '0';
  6561. }
  6562. if ($this->n === '0' && $this->unit === false) {
  6563. return true;
  6564. }
  6565. if (!ctype_lower($this->unit)) {
  6566. $this->unit = strtolower($this->unit);
  6567. }
  6568. if (!isset(HTMLPurifier_Length::$allowedUnits[$this->unit])) {
  6569. return false;
  6570. }
  6571. // Hack:
  6572. $def = new HTMLPurifier_AttrDef_CSS_Number();
  6573. $result = $def->validate($this->n, false, false);
  6574. if ($result === false) {
  6575. return false;
  6576. }
  6577. $this->n = $result;
  6578. return true;
  6579. }
  6580. /**
  6581. * Returns string representation of number.
  6582. * @return string
  6583. */
  6584. public function toString()
  6585. {
  6586. if (!$this->isValid()) {
  6587. return false;
  6588. }
  6589. return $this->n . $this->unit;
  6590. }
  6591. /**
  6592. * Retrieves string numeric magnitude.
  6593. * @return string
  6594. */
  6595. public function getN()
  6596. {
  6597. return $this->n;
  6598. }
  6599. /**
  6600. * Retrieves string unit.
  6601. * @return string
  6602. */
  6603. public function getUnit()
  6604. {
  6605. return $this->unit;
  6606. }
  6607. /**
  6608. * Returns true if this length unit is valid.
  6609. * @return bool
  6610. */
  6611. public function isValid()
  6612. {
  6613. if ($this->isValid === null) {
  6614. $this->isValid = $this->validate();
  6615. }
  6616. return $this->isValid;
  6617. }
  6618. /**
  6619. * Compares two lengths, and returns 1 if greater, -1 if less and 0 if equal.
  6620. * @param HTMLPurifier_Length $l
  6621. * @return int
  6622. * @warning If both values are too large or small, this calculation will
  6623. * not work properly
  6624. */
  6625. public function compareTo($l)
  6626. {
  6627. if ($l === false) {
  6628. return false;
  6629. }
  6630. if ($l->unit !== $this->unit) {
  6631. $converter = new HTMLPurifier_UnitConverter();
  6632. $l = $converter->convert($l, $this->unit);
  6633. if ($l === false) {
  6634. return false;
  6635. }
  6636. }
  6637. return $this->n - $l->n;
  6638. }
  6639. }
  6640. /**
  6641. * Forgivingly lexes HTML (SGML-style) markup into tokens.
  6642. *
  6643. * A lexer parses a string of SGML-style markup and converts them into
  6644. * corresponding tokens. It doesn't check for well-formedness, although its
  6645. * internal mechanism may make this automatic (such as the case of
  6646. * HTMLPurifier_Lexer_DOMLex). There are several implementations to choose
  6647. * from.
  6648. *
  6649. * A lexer is HTML-oriented: it might work with XML, but it's not
  6650. * recommended, as we adhere to a subset of the specification for optimization
  6651. * reasons. This might change in the future. Also, most tokenizers are not
  6652. * expected to handle DTDs or PIs.
  6653. *
  6654. * This class should not be directly instantiated, but you may use create() to
  6655. * retrieve a default copy of the lexer. Being a supertype, this class
  6656. * does not actually define any implementation, but offers commonly used
  6657. * convenience functions for subclasses.
  6658. *
  6659. * @note The unit tests will instantiate this class for testing purposes, as
  6660. * many of the utility functions require a class to be instantiated.
  6661. * This means that, even though this class is not runnable, it will
  6662. * not be declared abstract.
  6663. *
  6664. * @par
  6665. *
  6666. * @note
  6667. * We use tokens rather than create a DOM representation because DOM would:
  6668. *
  6669. * @par
  6670. * -# Require more processing and memory to create,
  6671. * -# Is not streamable, and
  6672. * -# Has the entire document structure (html and body not needed).
  6673. *
  6674. * @par
  6675. * However, DOM is helpful in that it makes it easy to move around nodes
  6676. * without a lot of lookaheads to see when a tag is closed. This is a
  6677. * limitation of the token system and some workarounds would be nice.
  6678. */
  6679. class HTMLPurifier_Lexer
  6680. {
  6681. /**
  6682. * Whether or not this lexer implements line-number/column-number tracking.
  6683. * If it does, set to true.
  6684. */
  6685. public $tracksLineNumbers = false;
  6686. // -- STATIC ----------------------------------------------------------
  6687. /**
  6688. * Retrieves or sets the default Lexer as a Prototype Factory.
  6689. *
  6690. * By default HTMLPurifier_Lexer_DOMLex will be returned. There are
  6691. * a few exceptions involving special features that only DirectLex
  6692. * implements.
  6693. *
  6694. * @note The behavior of this class has changed, rather than accepting
  6695. * a prototype object, it now accepts a configuration object.
  6696. * To specify your own prototype, set %Core.LexerImpl to it.
  6697. * This change in behavior de-singletonizes the lexer object.
  6698. *
  6699. * @param HTMLPurifier_Config $config
  6700. * @return HTMLPurifier_Lexer
  6701. * @throws HTMLPurifier_Exception
  6702. */
  6703. public static function create($config)
  6704. {
  6705. if (!($config instanceof HTMLPurifier_Config)) {
  6706. $lexer = $config;
  6707. trigger_error(
  6708. "Passing a prototype to
  6709. HTMLPurifier_Lexer::create() is deprecated, please instead
  6710. use %Core.LexerImpl",
  6711. E_USER_WARNING
  6712. );
  6713. } else {
  6714. $lexer = $config->get('Core.LexerImpl');
  6715. }
  6716. $needs_tracking =
  6717. $config->get('Core.MaintainLineNumbers') ||
  6718. $config->get('Core.CollectErrors');
  6719. $inst = null;
  6720. if (is_object($lexer)) {
  6721. $inst = $lexer;
  6722. } else {
  6723. if (is_null($lexer)) {
  6724. do {
  6725. // auto-detection algorithm
  6726. if ($needs_tracking) {
  6727. $lexer = 'DirectLex';
  6728. break;
  6729. }
  6730. if (class_exists('DOMDocument') &&
  6731. method_exists('DOMDocument', 'loadHTML') &&
  6732. !extension_loaded('domxml')
  6733. ) {
  6734. // check for DOM support, because while it's part of the
  6735. // core, it can be disabled compile time. Also, the PECL
  6736. // domxml extension overrides the default DOM, and is evil
  6737. // and nasty and we shan't bother to support it
  6738. $lexer = 'DOMLex';
  6739. } else {
  6740. $lexer = 'DirectLex';
  6741. }
  6742. } while (0);
  6743. } // do..while so we can break
  6744. // instantiate recognized string names
  6745. switch ($lexer) {
  6746. case 'DOMLex':
  6747. $inst = new HTMLPurifier_Lexer_DOMLex();
  6748. break;
  6749. case 'DirectLex':
  6750. $inst = new HTMLPurifier_Lexer_DirectLex();
  6751. break;
  6752. case 'PH5P':
  6753. $inst = new HTMLPurifier_Lexer_PH5P();
  6754. break;
  6755. default:
  6756. throw new HTMLPurifier_Exception(
  6757. "Cannot instantiate unrecognized Lexer type " .
  6758. htmlspecialchars($lexer)
  6759. );
  6760. }
  6761. }
  6762. if (!$inst) {
  6763. throw new HTMLPurifier_Exception('No lexer was instantiated');
  6764. }
  6765. // once PHP DOM implements native line numbers, or we
  6766. // hack out something using XSLT, remove this stipulation
  6767. if ($needs_tracking && !$inst->tracksLineNumbers) {
  6768. throw new HTMLPurifier_Exception(
  6769. 'Cannot use lexer that does not support line numbers with ' .
  6770. 'Core.MaintainLineNumbers or Core.CollectErrors (use DirectLex instead)'
  6771. );
  6772. }
  6773. return $inst;
  6774. }
  6775. // -- CONVENIENCE MEMBERS ---------------------------------------------
  6776. public function __construct()
  6777. {
  6778. $this->_entity_parser = new HTMLPurifier_EntityParser();
  6779. }
  6780. /**
  6781. * Most common entity to raw value conversion table for special entities.
  6782. * @type array
  6783. */
  6784. protected $_special_entity2str =
  6785. array(
  6786. '&quot;' => '"',
  6787. '&amp;' => '&',
  6788. '&lt;' => '<',
  6789. '&gt;' => '>',
  6790. '&#39;' => "'",
  6791. '&#039;' => "'",
  6792. '&#x27;' => "'"
  6793. );
  6794. /**
  6795. * Parses special entities into the proper characters.
  6796. *
  6797. * This string will translate escaped versions of the special characters
  6798. * into the correct ones.
  6799. *
  6800. * @warning
  6801. * You should be able to treat the output of this function as
  6802. * completely parsed, but that's only because all other entities should
  6803. * have been handled previously in substituteNonSpecialEntities()
  6804. *
  6805. * @param string $string String character data to be parsed.
  6806. * @return string Parsed character data.
  6807. */
  6808. public function parseData($string)
  6809. {
  6810. // following functions require at least one character
  6811. if ($string === '') {
  6812. return '';
  6813. }
  6814. // subtracts amps that cannot possibly be escaped
  6815. $num_amp = substr_count($string, '&') - substr_count($string, '& ') -
  6816. ($string[strlen($string) - 1] === '&' ? 1 : 0);
  6817. if (!$num_amp) {
  6818. return $string;
  6819. } // abort if no entities
  6820. $num_esc_amp = substr_count($string, '&amp;');
  6821. $string = strtr($string, $this->_special_entity2str);
  6822. // code duplication for sake of optimization, see above
  6823. $num_amp_2 = substr_count($string, '&') - substr_count($string, '& ') -
  6824. ($string[strlen($string) - 1] === '&' ? 1 : 0);
  6825. if ($num_amp_2 <= $num_esc_amp) {
  6826. return $string;
  6827. }
  6828. // hmm... now we have some uncommon entities. Use the callback.
  6829. $string = $this->_entity_parser->substituteSpecialEntities($string);
  6830. return $string;
  6831. }
  6832. /**
  6833. * Lexes an HTML string into tokens.
  6834. * @param $string String HTML.
  6835. * @param HTMLPurifier_Config $config
  6836. * @param HTMLPurifier_Context $context
  6837. * @return HTMLPurifier_Token[] array representation of HTML.
  6838. */
  6839. public function tokenizeHTML($string, $config, $context)
  6840. {
  6841. trigger_error('Call to abstract class', E_USER_ERROR);
  6842. }
  6843. /**
  6844. * Translates CDATA sections into regular sections (through escaping).
  6845. * @param string $string HTML string to process.
  6846. * @return string HTML with CDATA sections escaped.
  6847. */
  6848. protected static function escapeCDATA($string)
  6849. {
  6850. return preg_replace_callback(
  6851. '/<!\[CDATA\[(.+?)\]\]>/s',
  6852. array('HTMLPurifier_Lexer', 'CDATACallback'),
  6853. $string
  6854. );
  6855. }
  6856. /**
  6857. * Special CDATA case that is especially convoluted for <script>
  6858. * @param string $string HTML string to process.
  6859. * @return string HTML with CDATA sections escaped.
  6860. */
  6861. protected static function escapeCommentedCDATA($string)
  6862. {
  6863. return preg_replace_callback(
  6864. '#<!--//--><!\[CDATA\[//><!--(.+?)//--><!\]\]>#s',
  6865. array('HTMLPurifier_Lexer', 'CDATACallback'),
  6866. $string
  6867. );
  6868. }
  6869. /**
  6870. * Special Internet Explorer conditional comments should be removed.
  6871. * @param string $string HTML string to process.
  6872. * @return string HTML with conditional comments removed.
  6873. */
  6874. protected static function removeIEConditional($string)
  6875. {
  6876. return preg_replace(
  6877. '#<!--\[if [^>]+\]>.*?<!\[endif\]-->#si', // probably should generalize for all strings
  6878. '',
  6879. $string
  6880. );
  6881. }
  6882. /**
  6883. * Callback function for escapeCDATA() that does the work.
  6884. *
  6885. * @warning Though this is public in order to let the callback happen,
  6886. * calling it directly is not recommended.
  6887. * @param array $matches PCRE matches array, with index 0 the entire match
  6888. * and 1 the inside of the CDATA section.
  6889. * @return string Escaped internals of the CDATA section.
  6890. */
  6891. protected static function CDATACallback($matches)
  6892. {
  6893. // not exactly sure why the character set is needed, but whatever
  6894. return htmlspecialchars($matches[1], ENT_COMPAT, 'UTF-8');
  6895. }
  6896. /**
  6897. * Takes a piece of HTML and normalizes it by converting entities, fixing
  6898. * encoding, extracting bits, and other good stuff.
  6899. * @param string $html HTML.
  6900. * @param HTMLPurifier_Config $config
  6901. * @param HTMLPurifier_Context $context
  6902. * @return string
  6903. * @todo Consider making protected
  6904. */
  6905. public function normalize($html, $config, $context)
  6906. {
  6907. // normalize newlines to \n
  6908. if ($config->get('Core.NormalizeNewlines')) {
  6909. $html = str_replace("\r\n", "\n", $html);
  6910. $html = str_replace("\r", "\n", $html);
  6911. }
  6912. if ($config->get('HTML.Trusted')) {
  6913. // escape convoluted CDATA
  6914. $html = $this->escapeCommentedCDATA($html);
  6915. }
  6916. // escape CDATA
  6917. $html = $this->escapeCDATA($html);
  6918. $html = $this->removeIEConditional($html);
  6919. // extract body from document if applicable
  6920. if ($config->get('Core.ConvertDocumentToFragment')) {
  6921. $e = false;
  6922. if ($config->get('Core.CollectErrors')) {
  6923. $e =& $context->get('ErrorCollector');
  6924. }
  6925. $new_html = $this->extractBody($html);
  6926. if ($e && $new_html != $html) {
  6927. $e->send(E_WARNING, 'Lexer: Extracted body');
  6928. }
  6929. $html = $new_html;
  6930. }
  6931. // expand entities that aren't the big five
  6932. $html = $this->_entity_parser->substituteNonSpecialEntities($html);
  6933. // clean into wellformed UTF-8 string for an SGML context: this has
  6934. // to be done after entity expansion because the entities sometimes
  6935. // represent non-SGML characters (horror, horror!)
  6936. $html = HTMLPurifier_Encoder::cleanUTF8($html);
  6937. // if processing instructions are to removed, remove them now
  6938. if ($config->get('Core.RemoveProcessingInstructions')) {
  6939. $html = preg_replace('#<\?.+?\?>#s', '', $html);
  6940. }
  6941. return $html;
  6942. }
  6943. /**
  6944. * Takes a string of HTML (fragment or document) and returns the content
  6945. * @todo Consider making protected
  6946. */
  6947. public function extractBody($html)
  6948. {
  6949. $matches = array();
  6950. $result = preg_match('!<body[^>]*>(.*)</body>!is', $html, $matches);
  6951. if ($result) {
  6952. return $matches[1];
  6953. } else {
  6954. return $html;
  6955. }
  6956. }
  6957. }
  6958. /**
  6959. * Abstract base node class that all others inherit from.
  6960. *
  6961. * Why do we not use the DOM extension? (1) It is not always available,
  6962. * (2) it has funny constraints on the data it can represent,
  6963. * whereas we want a maximally flexible representation, and (3) its
  6964. * interface is a bit cumbersome.
  6965. */
  6966. abstract class HTMLPurifier_Node
  6967. {
  6968. /**
  6969. * Line number of the start token in the source document
  6970. * @type int
  6971. */
  6972. public $line;
  6973. /**
  6974. * Column number of the start token in the source document. Null if unknown.
  6975. * @type int
  6976. */
  6977. public $col;
  6978. /**
  6979. * Lookup array of processing that this token is exempt from.
  6980. * Currently, valid values are "ValidateAttributes".
  6981. * @type array
  6982. */
  6983. public $armor = array();
  6984. /**
  6985. * When true, this node should be ignored as non-existent.
  6986. *
  6987. * Who is responsible for ignoring dead nodes? FixNesting is
  6988. * responsible for removing them before passing on to child
  6989. * validators.
  6990. */
  6991. public $dead = false;
  6992. /**
  6993. * Returns a pair of start and end tokens, where the end token
  6994. * is null if it is not necessary. Does not include children.
  6995. * @type array
  6996. */
  6997. abstract public function toTokenPair();
  6998. }
  6999. /**
  7000. * Class that handles operations involving percent-encoding in URIs.
  7001. *
  7002. * @warning
  7003. * Be careful when reusing instances of PercentEncoder. The object
  7004. * you use for normalize() SHOULD NOT be used for encode(), or
  7005. * vice-versa.
  7006. */
  7007. class HTMLPurifier_PercentEncoder
  7008. {
  7009. /**
  7010. * Reserved characters to preserve when using encode().
  7011. * @type array
  7012. */
  7013. protected $preserve = array();
  7014. /**
  7015. * String of characters that should be preserved while using encode().
  7016. * @param bool $preserve
  7017. */
  7018. public function __construct($preserve = false)
  7019. {
  7020. // unreserved letters, ought to const-ify
  7021. for ($i = 48; $i <= 57; $i++) { // digits
  7022. $this->preserve[$i] = true;
  7023. }
  7024. for ($i = 65; $i <= 90; $i++) { // upper-case
  7025. $this->preserve[$i] = true;
  7026. }
  7027. for ($i = 97; $i <= 122; $i++) { // lower-case
  7028. $this->preserve[$i] = true;
  7029. }
  7030. $this->preserve[45] = true; // Dash -
  7031. $this->preserve[46] = true; // Period .
  7032. $this->preserve[95] = true; // Underscore _
  7033. $this->preserve[126]= true; // Tilde ~
  7034. // extra letters not to escape
  7035. if ($preserve !== false) {
  7036. for ($i = 0, $c = strlen($preserve); $i < $c; $i++) {
  7037. $this->preserve[ord($preserve[$i])] = true;
  7038. }
  7039. }
  7040. }
  7041. /**
  7042. * Our replacement for urlencode, it encodes all non-reserved characters,
  7043. * as well as any extra characters that were instructed to be preserved.
  7044. * @note
  7045. * Assumes that the string has already been normalized, making any
  7046. * and all percent escape sequences valid. Percents will not be
  7047. * re-escaped, regardless of their status in $preserve
  7048. * @param string $string String to be encoded
  7049. * @return string Encoded string.
  7050. */
  7051. public function encode($string)
  7052. {
  7053. $ret = '';
  7054. for ($i = 0, $c = strlen($string); $i < $c; $i++) {
  7055. if ($string[$i] !== '%' && !isset($this->preserve[$int = ord($string[$i])])) {
  7056. $ret .= '%' . sprintf('%02X', $int);
  7057. } else {
  7058. $ret .= $string[$i];
  7059. }
  7060. }
  7061. return $ret;
  7062. }
  7063. /**
  7064. * Fix up percent-encoding by decoding unreserved characters and normalizing.
  7065. * @warning This function is affected by $preserve, even though the
  7066. * usual desired behavior is for this not to preserve those
  7067. * characters. Be careful when reusing instances of PercentEncoder!
  7068. * @param string $string String to normalize
  7069. * @return string
  7070. */
  7071. public function normalize($string)
  7072. {
  7073. if ($string == '') {
  7074. return '';
  7075. }
  7076. $parts = explode('%', $string);
  7077. $ret = array_shift($parts);
  7078. foreach ($parts as $part) {
  7079. $length = strlen($part);
  7080. if ($length < 2) {
  7081. $ret .= '%25' . $part;
  7082. continue;
  7083. }
  7084. $encoding = substr($part, 0, 2);
  7085. $text = substr($part, 2);
  7086. if (!ctype_xdigit($encoding)) {
  7087. $ret .= '%25' . $part;
  7088. continue;
  7089. }
  7090. $int = hexdec($encoding);
  7091. if (isset($this->preserve[$int])) {
  7092. $ret .= chr($int) . $text;
  7093. continue;
  7094. }
  7095. $encoding = strtoupper($encoding);
  7096. $ret .= '%' . $encoding . $text;
  7097. }
  7098. return $ret;
  7099. }
  7100. }
  7101. /**
  7102. * Generic property list implementation
  7103. */
  7104. class HTMLPurifier_PropertyList
  7105. {
  7106. /**
  7107. * Internal data-structure for properties.
  7108. * @type array
  7109. */
  7110. protected $data = array();
  7111. /**
  7112. * Parent plist.
  7113. * @type HTMLPurifier_PropertyList
  7114. */
  7115. protected $parent;
  7116. /**
  7117. * Cache.
  7118. * @type array
  7119. */
  7120. protected $cache;
  7121. /**
  7122. * @param HTMLPurifier_PropertyList $parent Parent plist
  7123. */
  7124. public function __construct($parent = null)
  7125. {
  7126. $this->parent = $parent;
  7127. }
  7128. /**
  7129. * Recursively retrieves the value for a key
  7130. * @param string $name
  7131. * @throws HTMLPurifier_Exception
  7132. */
  7133. public function get($name)
  7134. {
  7135. if ($this->has($name)) {
  7136. return $this->data[$name];
  7137. }
  7138. // possible performance bottleneck, convert to iterative if necessary
  7139. if ($this->parent) {
  7140. return $this->parent->get($name);
  7141. }
  7142. throw new HTMLPurifier_Exception("Key '$name' not found");
  7143. }
  7144. /**
  7145. * Sets the value of a key, for this plist
  7146. * @param string $name
  7147. * @param mixed $value
  7148. */
  7149. public function set($name, $value)
  7150. {
  7151. $this->data[$name] = $value;
  7152. }
  7153. /**
  7154. * Returns true if a given key exists
  7155. * @param string $name
  7156. * @return bool
  7157. */
  7158. public function has($name)
  7159. {
  7160. return array_key_exists($name, $this->data);
  7161. }
  7162. /**
  7163. * Resets a value to the value of it's parent, usually the default. If
  7164. * no value is specified, the entire plist is reset.
  7165. * @param string $name
  7166. */
  7167. public function reset($name = null)
  7168. {
  7169. if ($name == null) {
  7170. $this->data = array();
  7171. } else {
  7172. unset($this->data[$name]);
  7173. }
  7174. }
  7175. /**
  7176. * Squashes this property list and all of its property lists into a single
  7177. * array, and returns the array. This value is cached by default.
  7178. * @param bool $force If true, ignores the cache and regenerates the array.
  7179. * @return array
  7180. */
  7181. public function squash($force = false)
  7182. {
  7183. if ($this->cache !== null && !$force) {
  7184. return $this->cache;
  7185. }
  7186. if ($this->parent) {
  7187. return $this->cache = array_merge($this->parent->squash($force), $this->data);
  7188. } else {
  7189. return $this->cache = $this->data;
  7190. }
  7191. }
  7192. /**
  7193. * Returns the parent plist.
  7194. * @return HTMLPurifier_PropertyList
  7195. */
  7196. public function getParent()
  7197. {
  7198. return $this->parent;
  7199. }
  7200. /**
  7201. * Sets the parent plist.
  7202. * @param HTMLPurifier_PropertyList $plist Parent plist
  7203. */
  7204. public function setParent($plist)
  7205. {
  7206. $this->parent = $plist;
  7207. }
  7208. }
  7209. /**
  7210. * Property list iterator. Do not instantiate this class directly.
  7211. */
  7212. class HTMLPurifier_PropertyListIterator extends FilterIterator
  7213. {
  7214. /**
  7215. * @type int
  7216. */
  7217. protected $l;
  7218. /**
  7219. * @type string
  7220. */
  7221. protected $filter;
  7222. /**
  7223. * @param Iterator $iterator Array of data to iterate over
  7224. * @param string $filter Optional prefix to only allow values of
  7225. */
  7226. public function __construct(Iterator $iterator, $filter = null)
  7227. {
  7228. parent::__construct($iterator);
  7229. $this->l = strlen($filter);
  7230. $this->filter = $filter;
  7231. }
  7232. /**
  7233. * @return bool
  7234. */
  7235. public function accept()
  7236. {
  7237. $key = $this->getInnerIterator()->key();
  7238. if (strncmp($key, $this->filter, $this->l) !== 0) {
  7239. return false;
  7240. }
  7241. return true;
  7242. }
  7243. }
  7244. /**
  7245. * A simple array-backed queue, based off of the classic Okasaki
  7246. * persistent amortized queue. The basic idea is to maintain two
  7247. * stacks: an input stack and an output stack. When the output
  7248. * stack runs out, reverse the input stack and use it as the output
  7249. * stack.
  7250. *
  7251. * We don't use the SPL implementation because it's only supported
  7252. * on PHP 5.3 and later.
  7253. *
  7254. * Exercise: Prove that push/pop on this queue take amortized O(1) time.
  7255. *
  7256. * Exercise: Extend this queue to be a deque, while preserving amortized
  7257. * O(1) time. Some care must be taken on rebalancing to avoid quadratic
  7258. * behaviour caused by repeatedly shuffling data from the input stack
  7259. * to the output stack and back.
  7260. */
  7261. class HTMLPurifier_Queue {
  7262. private $input;
  7263. private $output;
  7264. public function __construct($input = array()) {
  7265. $this->input = $input;
  7266. $this->output = array();
  7267. }
  7268. /**
  7269. * Shifts an element off the front of the queue.
  7270. */
  7271. public function shift() {
  7272. if (empty($this->output)) {
  7273. $this->output = array_reverse($this->input);
  7274. $this->input = array();
  7275. }
  7276. if (empty($this->output)) {
  7277. return NULL;
  7278. }
  7279. return array_pop($this->output);
  7280. }
  7281. /**
  7282. * Pushes an element onto the front of the queue.
  7283. */
  7284. public function push($x) {
  7285. array_push($this->input, $x);
  7286. }
  7287. /**
  7288. * Checks if it's empty.
  7289. */
  7290. public function isEmpty() {
  7291. return empty($this->input) && empty($this->output);
  7292. }
  7293. }
  7294. /**
  7295. * Supertype for classes that define a strategy for modifying/purifying tokens.
  7296. *
  7297. * While HTMLPurifier's core purpose is fixing HTML into something proper,
  7298. * strategies provide plug points for extra configuration or even extra
  7299. * features, such as custom tags, custom parsing of text, etc.
  7300. */
  7301. abstract class HTMLPurifier_Strategy
  7302. {
  7303. /**
  7304. * Executes the strategy on the tokens.
  7305. *
  7306. * @param HTMLPurifier_Token[] $tokens Array of HTMLPurifier_Token objects to be operated on.
  7307. * @param HTMLPurifier_Config $config
  7308. * @param HTMLPurifier_Context $context
  7309. * @return HTMLPurifier_Token[] Processed array of token objects.
  7310. */
  7311. abstract public function execute($tokens, $config, $context);
  7312. }
  7313. /**
  7314. * This is in almost every respect equivalent to an array except
  7315. * that it keeps track of which keys were accessed.
  7316. *
  7317. * @warning For the sake of backwards compatibility with early versions
  7318. * of PHP 5, you must not use the $hash[$key] syntax; if you do
  7319. * our version of offsetGet is never called.
  7320. */
  7321. class HTMLPurifier_StringHash extends ArrayObject
  7322. {
  7323. /**
  7324. * @type array
  7325. */
  7326. protected $accessed = array();
  7327. /**
  7328. * Retrieves a value, and logs the access.
  7329. * @param mixed $index
  7330. * @return mixed
  7331. */
  7332. public function offsetGet($index)
  7333. {
  7334. $this->accessed[$index] = true;
  7335. return parent::offsetGet($index);
  7336. }
  7337. /**
  7338. * Returns a lookup array of all array indexes that have been accessed.
  7339. * @return array in form array($index => true).
  7340. */
  7341. public function getAccessed()
  7342. {
  7343. return $this->accessed;
  7344. }
  7345. /**
  7346. * Resets the access array.
  7347. */
  7348. public function resetAccessed()
  7349. {
  7350. $this->accessed = array();
  7351. }
  7352. }
  7353. /**
  7354. * Parses string hash files. File format is as such:
  7355. *
  7356. * DefaultKeyValue
  7357. * KEY: Value
  7358. * KEY2: Value2
  7359. * --MULTILINE-KEY--
  7360. * Multiline
  7361. * value.
  7362. *
  7363. * Which would output something similar to:
  7364. *
  7365. * array(
  7366. * 'ID' => 'DefaultKeyValue',
  7367. * 'KEY' => 'Value',
  7368. * 'KEY2' => 'Value2',
  7369. * 'MULTILINE-KEY' => "Multiline\nvalue.\n",
  7370. * )
  7371. *
  7372. * We use this as an easy to use file-format for configuration schema
  7373. * files, but the class itself is usage agnostic.
  7374. *
  7375. * You can use ---- to forcibly terminate parsing of a single string-hash;
  7376. * this marker is used in multi string-hashes to delimit boundaries.
  7377. */
  7378. class HTMLPurifier_StringHashParser
  7379. {
  7380. /**
  7381. * @type string
  7382. */
  7383. public $default = 'ID';
  7384. /**
  7385. * Parses a file that contains a single string-hash.
  7386. * @param string $file
  7387. * @return array
  7388. */
  7389. public function parseFile($file)
  7390. {
  7391. if (!file_exists($file)) {
  7392. return false;
  7393. }
  7394. $fh = fopen($file, 'r');
  7395. if (!$fh) {
  7396. return false;
  7397. }
  7398. $ret = $this->parseHandle($fh);
  7399. fclose($fh);
  7400. return $ret;
  7401. }
  7402. /**
  7403. * Parses a file that contains multiple string-hashes delimited by '----'
  7404. * @param string $file
  7405. * @return array
  7406. */
  7407. public function parseMultiFile($file)
  7408. {
  7409. if (!file_exists($file)) {
  7410. return false;
  7411. }
  7412. $ret = array();
  7413. $fh = fopen($file, 'r');
  7414. if (!$fh) {
  7415. return false;
  7416. }
  7417. while (!feof($fh)) {
  7418. $ret[] = $this->parseHandle($fh);
  7419. }
  7420. fclose($fh);
  7421. return $ret;
  7422. }
  7423. /**
  7424. * Internal parser that acepts a file handle.
  7425. * @note While it's possible to simulate in-memory parsing by using
  7426. * custom stream wrappers, if such a use-case arises we should
  7427. * factor out the file handle into its own class.
  7428. * @param resource $fh File handle with pointer at start of valid string-hash
  7429. * block.
  7430. * @return array
  7431. */
  7432. protected function parseHandle($fh)
  7433. {
  7434. $state = false;
  7435. $single = false;
  7436. $ret = array();
  7437. do {
  7438. $line = fgets($fh);
  7439. if ($line === false) {
  7440. break;
  7441. }
  7442. $line = rtrim($line, "\n\r");
  7443. if (!$state && $line === '') {
  7444. continue;
  7445. }
  7446. if ($line === '----') {
  7447. break;
  7448. }
  7449. if (strncmp('--#', $line, 3) === 0) {
  7450. // Comment
  7451. continue;
  7452. } elseif (strncmp('--', $line, 2) === 0) {
  7453. // Multiline declaration
  7454. $state = trim($line, '- ');
  7455. if (!isset($ret[$state])) {
  7456. $ret[$state] = '';
  7457. }
  7458. continue;
  7459. } elseif (!$state) {
  7460. $single = true;
  7461. if (strpos($line, ':') !== false) {
  7462. // Single-line declaration
  7463. list($state, $line) = explode(':', $line, 2);
  7464. $line = trim($line);
  7465. } else {
  7466. // Use default declaration
  7467. $state = $this->default;
  7468. }
  7469. }
  7470. if ($single) {
  7471. $ret[$state] = $line;
  7472. $single = false;
  7473. $state = false;
  7474. } else {
  7475. $ret[$state] .= "$line\n";
  7476. }
  7477. } while (!feof($fh));
  7478. return $ret;
  7479. }
  7480. }
  7481. /**
  7482. * Defines a mutation of an obsolete tag into a valid tag.
  7483. */
  7484. abstract class HTMLPurifier_TagTransform
  7485. {
  7486. /**
  7487. * Tag name to transform the tag to.
  7488. * @type string
  7489. */
  7490. public $transform_to;
  7491. /**
  7492. * Transforms the obsolete tag into the valid tag.
  7493. * @param HTMLPurifier_Token_Tag $tag Tag to be transformed.
  7494. * @param HTMLPurifier_Config $config Mandatory HTMLPurifier_Config object
  7495. * @param HTMLPurifier_Context $context Mandatory HTMLPurifier_Context object
  7496. */
  7497. abstract public function transform($tag, $config, $context);
  7498. /**
  7499. * Prepends CSS properties to the style attribute, creating the
  7500. * attribute if it doesn't exist.
  7501. * @warning Copied over from AttrTransform, be sure to keep in sync
  7502. * @param array $attr Attribute array to process (passed by reference)
  7503. * @param string $css CSS to prepend
  7504. */
  7505. protected function prependCSS(&$attr, $css)
  7506. {
  7507. $attr['style'] = isset($attr['style']) ? $attr['style'] : '';
  7508. $attr['style'] = $css . $attr['style'];
  7509. }
  7510. }
  7511. /**
  7512. * Abstract base token class that all others inherit from.
  7513. */
  7514. abstract class HTMLPurifier_Token
  7515. {
  7516. /**
  7517. * Line number node was on in source document. Null if unknown.
  7518. * @type int
  7519. */
  7520. public $line;
  7521. /**
  7522. * Column of line node was on in source document. Null if unknown.
  7523. * @type int
  7524. */
  7525. public $col;
  7526. /**
  7527. * Lookup array of processing that this token is exempt from.
  7528. * Currently, valid values are "ValidateAttributes" and
  7529. * "MakeWellFormed_TagClosedError"
  7530. * @type array
  7531. */
  7532. public $armor = array();
  7533. /**
  7534. * Used during MakeWellFormed.
  7535. * @type
  7536. */
  7537. public $skip;
  7538. /**
  7539. * @type
  7540. */
  7541. public $rewind;
  7542. /**
  7543. * @type
  7544. */
  7545. public $carryover;
  7546. /**
  7547. * @param string $n
  7548. * @return null|string
  7549. */
  7550. public function __get($n)
  7551. {
  7552. if ($n === 'type') {
  7553. trigger_error('Deprecated type property called; use instanceof', E_USER_NOTICE);
  7554. switch (get_class($this)) {
  7555. case 'HTMLPurifier_Token_Start':
  7556. return 'start';
  7557. case 'HTMLPurifier_Token_Empty':
  7558. return 'empty';
  7559. case 'HTMLPurifier_Token_End':
  7560. return 'end';
  7561. case 'HTMLPurifier_Token_Text':
  7562. return 'text';
  7563. case 'HTMLPurifier_Token_Comment':
  7564. return 'comment';
  7565. default:
  7566. return null;
  7567. }
  7568. }
  7569. }
  7570. /**
  7571. * Sets the position of the token in the source document.
  7572. * @param int $l
  7573. * @param int $c
  7574. */
  7575. public function position($l = null, $c = null)
  7576. {
  7577. $this->line = $l;
  7578. $this->col = $c;
  7579. }
  7580. /**
  7581. * Convenience function for DirectLex settings line/col position.
  7582. * @param int $l
  7583. * @param int $c
  7584. */
  7585. public function rawPosition($l, $c)
  7586. {
  7587. if ($c === -1) {
  7588. $l++;
  7589. }
  7590. $this->line = $l;
  7591. $this->col = $c;
  7592. }
  7593. /**
  7594. * Converts a token into its corresponding node.
  7595. */
  7596. abstract public function toNode();
  7597. }
  7598. /**
  7599. * Factory for token generation.
  7600. *
  7601. * @note Doing some benchmarking indicates that the new operator is much
  7602. * slower than the clone operator (even discounting the cost of the
  7603. * constructor). This class is for that optimization.
  7604. * Other then that, there's not much point as we don't
  7605. * maintain parallel HTMLPurifier_Token hierarchies (the main reason why
  7606. * you'd want to use an abstract factory).
  7607. * @todo Port DirectLex to use this
  7608. */
  7609. class HTMLPurifier_TokenFactory
  7610. {
  7611. // p stands for prototype
  7612. /**
  7613. * @type HTMLPurifier_Token_Start
  7614. */
  7615. private $p_start;
  7616. /**
  7617. * @type HTMLPurifier_Token_End
  7618. */
  7619. private $p_end;
  7620. /**
  7621. * @type HTMLPurifier_Token_Empty
  7622. */
  7623. private $p_empty;
  7624. /**
  7625. * @type HTMLPurifier_Token_Text
  7626. */
  7627. private $p_text;
  7628. /**
  7629. * @type HTMLPurifier_Token_Comment
  7630. */
  7631. private $p_comment;
  7632. /**
  7633. * Generates blank prototypes for cloning.
  7634. */
  7635. public function __construct()
  7636. {
  7637. $this->p_start = new HTMLPurifier_Token_Start('', array());
  7638. $this->p_end = new HTMLPurifier_Token_End('');
  7639. $this->p_empty = new HTMLPurifier_Token_Empty('', array());
  7640. $this->p_text = new HTMLPurifier_Token_Text('');
  7641. $this->p_comment = new HTMLPurifier_Token_Comment('');
  7642. }
  7643. /**
  7644. * Creates a HTMLPurifier_Token_Start.
  7645. * @param string $name Tag name
  7646. * @param array $attr Associative array of attributes
  7647. * @return HTMLPurifier_Token_Start Generated HTMLPurifier_Token_Start
  7648. */
  7649. public function createStart($name, $attr = array())
  7650. {
  7651. $p = clone $this->p_start;
  7652. $p->__construct($name, $attr);
  7653. return $p;
  7654. }
  7655. /**
  7656. * Creates a HTMLPurifier_Token_End.
  7657. * @param string $name Tag name
  7658. * @return HTMLPurifier_Token_End Generated HTMLPurifier_Token_End
  7659. */
  7660. public function createEnd($name)
  7661. {
  7662. $p = clone $this->p_end;
  7663. $p->__construct($name);
  7664. return $p;
  7665. }
  7666. /**
  7667. * Creates a HTMLPurifier_Token_Empty.
  7668. * @param string $name Tag name
  7669. * @param array $attr Associative array of attributes
  7670. * @return HTMLPurifier_Token_Empty Generated HTMLPurifier_Token_Empty
  7671. */
  7672. public function createEmpty($name, $attr = array())
  7673. {
  7674. $p = clone $this->p_empty;
  7675. $p->__construct($name, $attr);
  7676. return $p;
  7677. }
  7678. /**
  7679. * Creates a HTMLPurifier_Token_Text.
  7680. * @param string $data Data of text token
  7681. * @return HTMLPurifier_Token_Text Generated HTMLPurifier_Token_Text
  7682. */
  7683. public function createText($data)
  7684. {
  7685. $p = clone $this->p_text;
  7686. $p->__construct($data);
  7687. return $p;
  7688. }
  7689. /**
  7690. * Creates a HTMLPurifier_Token_Comment.
  7691. * @param string $data Data of comment token
  7692. * @return HTMLPurifier_Token_Comment Generated HTMLPurifier_Token_Comment
  7693. */
  7694. public function createComment($data)
  7695. {
  7696. $p = clone $this->p_comment;
  7697. $p->__construct($data);
  7698. return $p;
  7699. }
  7700. }
  7701. /**
  7702. * HTML Purifier's internal representation of a URI.
  7703. * @note
  7704. * Internal data-structures are completely escaped. If the data needs
  7705. * to be used in a non-URI context (which is very unlikely), be sure
  7706. * to decode it first. The URI may not necessarily be well-formed until
  7707. * validate() is called.
  7708. */
  7709. class HTMLPurifier_URI
  7710. {
  7711. /**
  7712. * @type string
  7713. */
  7714. public $scheme;
  7715. /**
  7716. * @type string
  7717. */
  7718. public $userinfo;
  7719. /**
  7720. * @type string
  7721. */
  7722. public $host;
  7723. /**
  7724. * @type int
  7725. */
  7726. public $port;
  7727. /**
  7728. * @type string
  7729. */
  7730. public $path;
  7731. /**
  7732. * @type string
  7733. */
  7734. public $query;
  7735. /**
  7736. * @type string
  7737. */
  7738. public $fragment;
  7739. /**
  7740. * @param string $scheme
  7741. * @param string $userinfo
  7742. * @param string $host
  7743. * @param int $port
  7744. * @param string $path
  7745. * @param string $query
  7746. * @param string $fragment
  7747. * @note Automatically normalizes scheme and port
  7748. */
  7749. public function __construct($scheme, $userinfo, $host, $port, $path, $query, $fragment)
  7750. {
  7751. $this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme);
  7752. $this->userinfo = $userinfo;
  7753. $this->host = $host;
  7754. $this->port = is_null($port) ? $port : (int)$port;
  7755. $this->path = $path;
  7756. $this->query = $query;
  7757. $this->fragment = $fragment;
  7758. }
  7759. /**
  7760. * Retrieves a scheme object corresponding to the URI's scheme/default
  7761. * @param HTMLPurifier_Config $config
  7762. * @param HTMLPurifier_Context $context
  7763. * @return HTMLPurifier_URIScheme Scheme object appropriate for validating this URI
  7764. */
  7765. public function getSchemeObj($config, $context)
  7766. {
  7767. $registry = HTMLPurifier_URISchemeRegistry::instance();
  7768. if ($this->scheme !== null) {
  7769. $scheme_obj = $registry->getScheme($this->scheme, $config, $context);
  7770. if (!$scheme_obj) {
  7771. return false;
  7772. } // invalid scheme, clean it out
  7773. } else {
  7774. // no scheme: retrieve the default one
  7775. $def = $config->getDefinition('URI');
  7776. $scheme_obj = $def->getDefaultScheme($config, $context);
  7777. if (!$scheme_obj) {
  7778. // something funky happened to the default scheme object
  7779. trigger_error(
  7780. 'Default scheme object "' . $def->defaultScheme . '" was not readable',
  7781. E_USER_WARNING
  7782. );
  7783. return false;
  7784. }
  7785. }
  7786. return $scheme_obj;
  7787. }
  7788. /**
  7789. * Generic validation method applicable for all schemes. May modify
  7790. * this URI in order to get it into a compliant form.
  7791. * @param HTMLPurifier_Config $config
  7792. * @param HTMLPurifier_Context $context
  7793. * @return bool True if validation/filtering succeeds, false if failure
  7794. */
  7795. public function validate($config, $context)
  7796. {
  7797. // ABNF definitions from RFC 3986
  7798. $chars_sub_delims = '!$&\'()*+,;=';
  7799. $chars_gen_delims = ':/?#[]@';
  7800. $chars_pchar = $chars_sub_delims . ':@';
  7801. // validate host
  7802. if (!is_null($this->host)) {
  7803. $host_def = new HTMLPurifier_AttrDef_URI_Host();
  7804. $this->host = $host_def->validate($this->host, $config, $context);
  7805. if ($this->host === false) {
  7806. $this->host = null;
  7807. }
  7808. }
  7809. // validate scheme
  7810. // NOTE: It's not appropriate to check whether or not this
  7811. // scheme is in our registry, since a URIFilter may convert a
  7812. // URI that we don't allow into one we do. So instead, we just
  7813. // check if the scheme can be dropped because there is no host
  7814. // and it is our default scheme.
  7815. if (!is_null($this->scheme) && is_null($this->host) || $this->host === '') {
  7816. // support for relative paths is pretty abysmal when the
  7817. // scheme is present, so axe it when possible
  7818. $def = $config->getDefinition('URI');
  7819. if ($def->defaultScheme === $this->scheme) {
  7820. $this->scheme = null;
  7821. }
  7822. }
  7823. // validate username
  7824. if (!is_null($this->userinfo)) {
  7825. $encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':');
  7826. $this->userinfo = $encoder->encode($this->userinfo);
  7827. }
  7828. // validate port
  7829. if (!is_null($this->port)) {
  7830. if ($this->port < 1 || $this->port > 65535) {
  7831. $this->port = null;
  7832. }
  7833. }
  7834. // validate path
  7835. $segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/');
  7836. if (!is_null($this->host)) { // this catches $this->host === ''
  7837. // path-abempty (hier and relative)
  7838. // http://www.example.com/my/path
  7839. // //www.example.com/my/path (looks odd, but works, and
  7840. // recognized by most browsers)
  7841. // (this set is valid or invalid on a scheme by scheme
  7842. // basis, so we'll deal with it later)
  7843. // file:///my/path
  7844. // ///my/path
  7845. $this->path = $segments_encoder->encode($this->path);
  7846. } elseif ($this->path !== '') {
  7847. if ($this->path[0] === '/') {
  7848. // path-absolute (hier and relative)
  7849. // http:/my/path
  7850. // /my/path
  7851. if (strlen($this->path) >= 2 && $this->path[1] === '/') {
  7852. // This could happen if both the host gets stripped
  7853. // out
  7854. // http://my/path
  7855. // //my/path
  7856. $this->path = '';
  7857. } else {
  7858. $this->path = $segments_encoder->encode($this->path);
  7859. }
  7860. } elseif (!is_null($this->scheme)) {
  7861. // path-rootless (hier)
  7862. // http:my/path
  7863. // Short circuit evaluation means we don't need to check nz
  7864. $this->path = $segments_encoder->encode($this->path);
  7865. } else {
  7866. // path-noscheme (relative)
  7867. // my/path
  7868. // (once again, not checking nz)
  7869. $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@');
  7870. $c = strpos($this->path, '/');
  7871. if ($c !== false) {
  7872. $this->path =
  7873. $segment_nc_encoder->encode(substr($this->path, 0, $c)) .
  7874. $segments_encoder->encode(substr($this->path, $c));
  7875. } else {
  7876. $this->path = $segment_nc_encoder->encode($this->path);
  7877. }
  7878. }
  7879. } else {
  7880. // path-empty (hier and relative)
  7881. $this->path = ''; // just to be safe
  7882. }
  7883. // qf = query and fragment
  7884. $qf_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/?');
  7885. if (!is_null($this->query)) {
  7886. $this->query = $qf_encoder->encode($this->query);
  7887. }
  7888. if (!is_null($this->fragment)) {
  7889. $this->fragment = $qf_encoder->encode($this->fragment);
  7890. }
  7891. return true;
  7892. }
  7893. /**
  7894. * Convert URI back to string
  7895. * @return string URI appropriate for output
  7896. */
  7897. public function toString()
  7898. {
  7899. // reconstruct authority
  7900. $authority = null;
  7901. // there is a rendering difference between a null authority
  7902. // (http:foo-bar) and an empty string authority
  7903. // (http:///foo-bar).
  7904. if (!is_null($this->host)) {
  7905. $authority = '';
  7906. if (!is_null($this->userinfo)) {
  7907. $authority .= $this->userinfo . '@';
  7908. }
  7909. $authority .= $this->host;
  7910. if (!is_null($this->port)) {
  7911. $authority .= ':' . $this->port;
  7912. }
  7913. }
  7914. // Reconstruct the result
  7915. // One might wonder about parsing quirks from browsers after
  7916. // this reconstruction. Unfortunately, parsing behavior depends
  7917. // on what *scheme* was employed (file:///foo is handled *very*
  7918. // differently than http:///foo), so unfortunately we have to
  7919. // defer to the schemes to do the right thing.
  7920. $result = '';
  7921. if (!is_null($this->scheme)) {
  7922. $result .= $this->scheme . ':';
  7923. }
  7924. if (!is_null($authority)) {
  7925. $result .= '//' . $authority;
  7926. }
  7927. $result .= $this->path;
  7928. if (!is_null($this->query)) {
  7929. $result .= '?' . $this->query;
  7930. }
  7931. if (!is_null($this->fragment)) {
  7932. $result .= '#' . $this->fragment;
  7933. }
  7934. return $result;
  7935. }
  7936. /**
  7937. * Returns true if this URL might be considered a 'local' URL given
  7938. * the current context. This is true when the host is null, or
  7939. * when it matches the host supplied to the configuration.
  7940. *
  7941. * Note that this does not do any scheme checking, so it is mostly
  7942. * only appropriate for metadata that doesn't care about protocol
  7943. * security. isBenign is probably what you actually want.
  7944. * @param HTMLPurifier_Config $config
  7945. * @param HTMLPurifier_Context $context
  7946. * @return bool
  7947. */
  7948. public function isLocal($config, $context)
  7949. {
  7950. if ($this->host === null) {
  7951. return true;
  7952. }
  7953. $uri_def = $config->getDefinition('URI');
  7954. if ($uri_def->host === $this->host) {
  7955. return true;
  7956. }
  7957. return false;
  7958. }
  7959. /**
  7960. * Returns true if this URL should be considered a 'benign' URL,
  7961. * that is:
  7962. *
  7963. * - It is a local URL (isLocal), and
  7964. * - It has a equal or better level of security
  7965. * @param HTMLPurifier_Config $config
  7966. * @param HTMLPurifier_Context $context
  7967. * @return bool
  7968. */
  7969. public function isBenign($config, $context)
  7970. {
  7971. if (!$this->isLocal($config, $context)) {
  7972. return false;
  7973. }
  7974. $scheme_obj = $this->getSchemeObj($config, $context);
  7975. if (!$scheme_obj) {
  7976. return false;
  7977. } // conservative approach
  7978. $current_scheme_obj = $config->getDefinition('URI')->getDefaultScheme($config, $context);
  7979. if ($current_scheme_obj->secure) {
  7980. if (!$scheme_obj->secure) {
  7981. return false;
  7982. }
  7983. }
  7984. return true;
  7985. }
  7986. }
  7987. class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition
  7988. {
  7989. public $type = 'URI';
  7990. protected $filters = array();
  7991. protected $postFilters = array();
  7992. protected $registeredFilters = array();
  7993. /**
  7994. * HTMLPurifier_URI object of the base specified at %URI.Base
  7995. */
  7996. public $base;
  7997. /**
  7998. * String host to consider "home" base, derived off of $base
  7999. */
  8000. public $host;
  8001. /**
  8002. * Name of default scheme based on %URI.DefaultScheme and %URI.Base
  8003. */
  8004. public $defaultScheme;
  8005. public function __construct()
  8006. {
  8007. $this->registerFilter(new HTMLPurifier_URIFilter_DisableExternal());
  8008. $this->registerFilter(new HTMLPurifier_URIFilter_DisableExternalResources());
  8009. $this->registerFilter(new HTMLPurifier_URIFilter_DisableResources());
  8010. $this->registerFilter(new HTMLPurifier_URIFilter_HostBlacklist());
  8011. $this->registerFilter(new HTMLPurifier_URIFilter_SafeIframe());
  8012. $this->registerFilter(new HTMLPurifier_URIFilter_MakeAbsolute());
  8013. $this->registerFilter(new HTMLPurifier_URIFilter_Munge());
  8014. }
  8015. public function registerFilter($filter)
  8016. {
  8017. $this->registeredFilters[$filter->name] = $filter;
  8018. }
  8019. public function addFilter($filter, $config)
  8020. {
  8021. $r = $filter->prepare($config);
  8022. if ($r === false) return; // null is ok, for backwards compat
  8023. if ($filter->post) {
  8024. $this->postFilters[$filter->name] = $filter;
  8025. } else {
  8026. $this->filters[$filter->name] = $filter;
  8027. }
  8028. }
  8029. protected function doSetup($config)
  8030. {
  8031. $this->setupMemberVariables($config);
  8032. $this->setupFilters($config);
  8033. }
  8034. protected function setupFilters($config)
  8035. {
  8036. foreach ($this->registeredFilters as $name => $filter) {
  8037. if ($filter->always_load) {
  8038. $this->addFilter($filter, $config);
  8039. } else {
  8040. $conf = $config->get('URI.' . $name);
  8041. if ($conf !== false && $conf !== null) {
  8042. $this->addFilter($filter, $config);
  8043. }
  8044. }
  8045. }
  8046. unset($this->registeredFilters);
  8047. }
  8048. protected function setupMemberVariables($config)
  8049. {
  8050. $this->host = $config->get('URI.Host');
  8051. $base_uri = $config->get('URI.Base');
  8052. if (!is_null($base_uri)) {
  8053. $parser = new HTMLPurifier_URIParser();
  8054. $this->base = $parser->parse($base_uri);
  8055. $this->defaultScheme = $this->base->scheme;
  8056. if (is_null($this->host)) $this->host = $this->base->host;
  8057. }
  8058. if (is_null($this->defaultScheme)) $this->defaultScheme = $config->get('URI.DefaultScheme');
  8059. }
  8060. public function getDefaultScheme($config, $context)
  8061. {
  8062. return HTMLPurifier_URISchemeRegistry::instance()->getScheme($this->defaultScheme, $config, $context);
  8063. }
  8064. public function filter(&$uri, $config, $context)
  8065. {
  8066. foreach ($this->filters as $name => $f) {
  8067. $result = $f->filter($uri, $config, $context);
  8068. if (!$result) return false;
  8069. }
  8070. return true;
  8071. }
  8072. public function postFilter(&$uri, $config, $context)
  8073. {
  8074. foreach ($this->postFilters as $name => $f) {
  8075. $result = $f->filter($uri, $config, $context);
  8076. if (!$result) return false;
  8077. }
  8078. return true;
  8079. }
  8080. }
  8081. /**
  8082. * Chainable filters for custom URI processing.
  8083. *
  8084. * These filters can perform custom actions on a URI filter object,
  8085. * including transformation or blacklisting. A filter named Foo
  8086. * must have a corresponding configuration directive %URI.Foo,
  8087. * unless always_load is specified to be true.
  8088. *
  8089. * The following contexts may be available while URIFilters are being
  8090. * processed:
  8091. *
  8092. * - EmbeddedURI: true if URI is an embedded resource that will
  8093. * be loaded automatically on page load
  8094. * - CurrentToken: a reference to the token that is currently
  8095. * being processed
  8096. * - CurrentAttr: the name of the attribute that is currently being
  8097. * processed
  8098. * - CurrentCSSProperty: the name of the CSS property that is
  8099. * currently being processed (if applicable)
  8100. *
  8101. * @warning This filter is called before scheme object validation occurs.
  8102. * Make sure, if you require a specific scheme object, you
  8103. * you check that it exists. This allows filters to convert
  8104. * proprietary URI schemes into regular ones.
  8105. */
  8106. abstract class HTMLPurifier_URIFilter
  8107. {
  8108. /**
  8109. * Unique identifier of filter.
  8110. * @type string
  8111. */
  8112. public $name;
  8113. /**
  8114. * True if this filter should be run after scheme validation.
  8115. * @type bool
  8116. */
  8117. public $post = false;
  8118. /**
  8119. * True if this filter should always be loaded.
  8120. * This permits a filter to be named Foo without the corresponding
  8121. * %URI.Foo directive existing.
  8122. * @type bool
  8123. */
  8124. public $always_load = false;
  8125. /**
  8126. * Performs initialization for the filter. If the filter returns
  8127. * false, this means that it shouldn't be considered active.
  8128. * @param HTMLPurifier_Config $config
  8129. * @return bool
  8130. */
  8131. public function prepare($config)
  8132. {
  8133. return true;
  8134. }
  8135. /**
  8136. * Filter a URI object
  8137. * @param HTMLPurifier_URI $uri Reference to URI object variable
  8138. * @param HTMLPurifier_Config $config
  8139. * @param HTMLPurifier_Context $context
  8140. * @return bool Whether or not to continue processing: false indicates
  8141. * URL is no good, true indicates continue processing. Note that
  8142. * all changes are committed directly on the URI object
  8143. */
  8144. abstract public function filter(&$uri, $config, $context);
  8145. }
  8146. /**
  8147. * Parses a URI into the components and fragment identifier as specified
  8148. * by RFC 3986.
  8149. */
  8150. class HTMLPurifier_URIParser
  8151. {
  8152. /**
  8153. * Instance of HTMLPurifier_PercentEncoder to do normalization with.
  8154. */
  8155. protected $percentEncoder;
  8156. public function __construct()
  8157. {
  8158. $this->percentEncoder = new HTMLPurifier_PercentEncoder();
  8159. }
  8160. /**
  8161. * Parses a URI.
  8162. * @param $uri string URI to parse
  8163. * @return HTMLPurifier_URI representation of URI. This representation has
  8164. * not been validated yet and may not conform to RFC.
  8165. */
  8166. public function parse($uri)
  8167. {
  8168. $uri = $this->percentEncoder->normalize($uri);
  8169. // Regexp is as per Appendix B.
  8170. // Note that ["<>] are an addition to the RFC's recommended
  8171. // characters, because they represent external delimeters.
  8172. $r_URI = '!'.
  8173. '(([a-zA-Z0-9\.\+\-]+):)?'. // 2. Scheme
  8174. '(//([^/?#"<>]*))?'. // 4. Authority
  8175. '([^?#"<>]*)'. // 5. Path
  8176. '(\?([^#"<>]*))?'. // 7. Query
  8177. '(#([^"<>]*))?'. // 8. Fragment
  8178. '!';
  8179. $matches = array();
  8180. $result = preg_match($r_URI, $uri, $matches);
  8181. if (!$result) return false; // *really* invalid URI
  8182. // seperate out parts
  8183. $scheme = !empty($matches[1]) ? $matches[2] : null;
  8184. $authority = !empty($matches[3]) ? $matches[4] : null;
  8185. $path = $matches[5]; // always present, can be empty
  8186. $query = !empty($matches[6]) ? $matches[7] : null;
  8187. $fragment = !empty($matches[8]) ? $matches[9] : null;
  8188. // further parse authority
  8189. if ($authority !== null) {
  8190. $r_authority = "/^((.+?)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";
  8191. $matches = array();
  8192. preg_match($r_authority, $authority, $matches);
  8193. $userinfo = !empty($matches[1]) ? $matches[2] : null;
  8194. $host = !empty($matches[3]) ? $matches[3] : '';
  8195. $port = !empty($matches[4]) ? (int) $matches[5] : null;
  8196. } else {
  8197. $port = $host = $userinfo = null;
  8198. }
  8199. return new HTMLPurifier_URI(
  8200. $scheme, $userinfo, $host, $port, $path, $query, $fragment);
  8201. }
  8202. }
  8203. /**
  8204. * Validator for the components of a URI for a specific scheme
  8205. */
  8206. abstract class HTMLPurifier_URIScheme
  8207. {
  8208. /**
  8209. * Scheme's default port (integer). If an explicit port number is
  8210. * specified that coincides with the default port, it will be
  8211. * elided.
  8212. * @type int
  8213. */
  8214. public $default_port = null;
  8215. /**
  8216. * Whether or not URIs of this scheme are locatable by a browser
  8217. * http and ftp are accessible, while mailto and news are not.
  8218. * @type bool
  8219. */
  8220. public $browsable = false;
  8221. /**
  8222. * Whether or not data transmitted over this scheme is encrypted.
  8223. * https is secure, http is not.
  8224. * @type bool
  8225. */
  8226. public $secure = false;
  8227. /**
  8228. * Whether or not the URI always uses <hier_part>, resolves edge cases
  8229. * with making relative URIs absolute
  8230. * @type bool
  8231. */
  8232. public $hierarchical = false;
  8233. /**
  8234. * Whether or not the URI may omit a hostname when the scheme is
  8235. * explicitly specified, ala file:///path/to/file. As of writing,
  8236. * 'file' is the only scheme that browsers support his properly.
  8237. * @type bool
  8238. */
  8239. public $may_omit_host = false;
  8240. /**
  8241. * Validates the components of a URI for a specific scheme.
  8242. * @param HTMLPurifier_URI $uri Reference to a HTMLPurifier_URI object
  8243. * @param HTMLPurifier_Config $config
  8244. * @param HTMLPurifier_Context $context
  8245. * @return bool success or failure
  8246. */
  8247. abstract public function doValidate(&$uri, $config, $context);
  8248. /**
  8249. * Public interface for validating components of a URI. Performs a
  8250. * bunch of default actions. Don't overload this method.
  8251. * @param HTMLPurifier_URI $uri Reference to a HTMLPurifier_URI object
  8252. * @param HTMLPurifier_Config $config
  8253. * @param HTMLPurifier_Context $context
  8254. * @return bool success or failure
  8255. */
  8256. public function validate(&$uri, $config, $context)
  8257. {
  8258. if ($this->default_port == $uri->port) {
  8259. $uri->port = null;
  8260. }
  8261. // kludge: browsers do funny things when the scheme but not the
  8262. // authority is set
  8263. if (!$this->may_omit_host &&
  8264. // if the scheme is present, a missing host is always in error
  8265. (!is_null($uri->scheme) && ($uri->host === '' || is_null($uri->host))) ||
  8266. // if the scheme is not present, a *blank* host is in error,
  8267. // since this translates into '///path' which most browsers
  8268. // interpret as being 'http://path'.
  8269. (is_null($uri->scheme) && $uri->host === '')
  8270. ) {
  8271. do {
  8272. if (is_null($uri->scheme)) {
  8273. if (substr($uri->path, 0, 2) != '//') {
  8274. $uri->host = null;
  8275. break;
  8276. }
  8277. // URI is '////path', so we cannot nullify the
  8278. // host to preserve semantics. Try expanding the
  8279. // hostname instead (fall through)
  8280. }
  8281. // first see if we can manually insert a hostname
  8282. $host = $config->get('URI.Host');
  8283. if (!is_null($host)) {
  8284. $uri->host = $host;
  8285. } else {
  8286. // we can't do anything sensible, reject the URL.
  8287. return false;
  8288. }
  8289. } while (false);
  8290. }
  8291. return $this->doValidate($uri, $config, $context);
  8292. }
  8293. }
  8294. /**
  8295. * Registry for retrieving specific URI scheme validator objects.
  8296. */
  8297. class HTMLPurifier_URISchemeRegistry
  8298. {
  8299. /**
  8300. * Retrieve sole instance of the registry.
  8301. * @param HTMLPurifier_URISchemeRegistry $prototype Optional prototype to overload sole instance with,
  8302. * or bool true to reset to default registry.
  8303. * @return HTMLPurifier_URISchemeRegistry
  8304. * @note Pass a registry object $prototype with a compatible interface and
  8305. * the function will copy it and return it all further times.
  8306. */
  8307. public static function instance($prototype = null)
  8308. {
  8309. static $instance = null;
  8310. if ($prototype !== null) {
  8311. $instance = $prototype;
  8312. } elseif ($instance === null || $prototype == true) {
  8313. $instance = new HTMLPurifier_URISchemeRegistry();
  8314. }
  8315. return $instance;
  8316. }
  8317. /**
  8318. * Cache of retrieved schemes.
  8319. * @type HTMLPurifier_URIScheme[]
  8320. */
  8321. protected $schemes = array();
  8322. /**
  8323. * Retrieves a scheme validator object
  8324. * @param string $scheme String scheme name like http or mailto
  8325. * @param HTMLPurifier_Config $config
  8326. * @param HTMLPurifier_Context $context
  8327. * @return HTMLPurifier_URIScheme
  8328. */
  8329. public function getScheme($scheme, $config, $context)
  8330. {
  8331. if (!$config) {
  8332. $config = HTMLPurifier_Config::createDefault();
  8333. }
  8334. // important, otherwise attacker could include arbitrary file
  8335. $allowed_schemes = $config->get('URI.AllowedSchemes');
  8336. if (!$config->get('URI.OverrideAllowedSchemes') &&
  8337. !isset($allowed_schemes[$scheme])
  8338. ) {
  8339. return;
  8340. }
  8341. if (isset($this->schemes[$scheme])) {
  8342. return $this->schemes[$scheme];
  8343. }
  8344. if (!isset($allowed_schemes[$scheme])) {
  8345. return;
  8346. }
  8347. $class = 'HTMLPurifier_URIScheme_' . $scheme;
  8348. if (!class_exists($class)) {
  8349. return;
  8350. }
  8351. $this->schemes[$scheme] = new $class();
  8352. return $this->schemes[$scheme];
  8353. }
  8354. /**
  8355. * Registers a custom scheme to the cache, bypassing reflection.
  8356. * @param string $scheme Scheme name
  8357. * @param HTMLPurifier_URIScheme $scheme_obj
  8358. */
  8359. public function register($scheme, $scheme_obj)
  8360. {
  8361. $this->schemes[$scheme] = $scheme_obj;
  8362. }
  8363. }
  8364. /**
  8365. * Class for converting between different unit-lengths as specified by
  8366. * CSS.
  8367. */
  8368. class HTMLPurifier_UnitConverter
  8369. {
  8370. const ENGLISH = 1;
  8371. const METRIC = 2;
  8372. const DIGITAL = 3;
  8373. /**
  8374. * Units information array. Units are grouped into measuring systems
  8375. * (English, Metric), and are assigned an integer representing
  8376. * the conversion factor between that unit and the smallest unit in
  8377. * the system. Numeric indexes are actually magical constants that
  8378. * encode conversion data from one system to the next, with a O(n^2)
  8379. * constraint on memory (this is generally not a problem, since
  8380. * the number of measuring systems is small.)
  8381. */
  8382. protected static $units = array(
  8383. self::ENGLISH => array(
  8384. 'px' => 3, // This is as per CSS 2.1 and Firefox. Your mileage may vary
  8385. 'pt' => 4,
  8386. 'pc' => 48,
  8387. 'in' => 288,
  8388. self::METRIC => array('pt', '0.352777778', 'mm'),
  8389. ),
  8390. self::METRIC => array(
  8391. 'mm' => 1,
  8392. 'cm' => 10,
  8393. self::ENGLISH => array('mm', '2.83464567', 'pt'),
  8394. ),
  8395. );
  8396. /**
  8397. * Minimum bcmath precision for output.
  8398. * @type int
  8399. */
  8400. protected $outputPrecision;
  8401. /**
  8402. * Bcmath precision for internal calculations.
  8403. * @type int
  8404. */
  8405. protected $internalPrecision;
  8406. /**
  8407. * Whether or not BCMath is available.
  8408. * @type bool
  8409. */
  8410. private $bcmath;
  8411. public function __construct($output_precision = 4, $internal_precision = 10, $force_no_bcmath = false)
  8412. {
  8413. $this->outputPrecision = $output_precision;
  8414. $this->internalPrecision = $internal_precision;
  8415. $this->bcmath = !$force_no_bcmath && function_exists('bcmul');
  8416. }
  8417. /**
  8418. * Converts a length object of one unit into another unit.
  8419. * @param HTMLPurifier_Length $length
  8420. * Instance of HTMLPurifier_Length to convert. You must validate()
  8421. * it before passing it here!
  8422. * @param string $to_unit
  8423. * Unit to convert to.
  8424. * @return HTMLPurifier_Length|bool
  8425. * @note
  8426. * About precision: This conversion function pays very special
  8427. * attention to the incoming precision of values and attempts
  8428. * to maintain a number of significant figure. Results are
  8429. * fairly accurate up to nine digits. Some caveats:
  8430. * - If a number is zero-padded as a result of this significant
  8431. * figure tracking, the zeroes will be eliminated.
  8432. * - If a number contains less than four sigfigs ($outputPrecision)
  8433. * and this causes some decimals to be excluded, those
  8434. * decimals will be added on.
  8435. */
  8436. public function convert($length, $to_unit)
  8437. {
  8438. if (!$length->isValid()) {
  8439. return false;
  8440. }
  8441. $n = $length->getN();
  8442. $unit = $length->getUnit();
  8443. if ($n === '0' || $unit === false) {
  8444. return new HTMLPurifier_Length('0', false);
  8445. }
  8446. $state = $dest_state = false;
  8447. foreach (self::$units as $k => $x) {
  8448. if (isset($x[$unit])) {
  8449. $state = $k;
  8450. }
  8451. if (isset($x[$to_unit])) {
  8452. $dest_state = $k;
  8453. }
  8454. }
  8455. if (!$state || !$dest_state) {
  8456. return false;
  8457. }
  8458. // Some calculations about the initial precision of the number;
  8459. // this will be useful when we need to do final rounding.
  8460. $sigfigs = $this->getSigFigs($n);
  8461. if ($sigfigs < $this->outputPrecision) {
  8462. $sigfigs = $this->outputPrecision;
  8463. }
  8464. // BCMath's internal precision deals only with decimals. Use
  8465. // our default if the initial number has no decimals, or increase
  8466. // it by how ever many decimals, thus, the number of guard digits
  8467. // will always be greater than or equal to internalPrecision.
  8468. $log = (int)floor(log(abs($n), 10));
  8469. $cp = ($log < 0) ? $this->internalPrecision - $log : $this->internalPrecision; // internal precision
  8470. for ($i = 0; $i < 2; $i++) {
  8471. // Determine what unit IN THIS SYSTEM we need to convert to
  8472. if ($dest_state === $state) {
  8473. // Simple conversion
  8474. $dest_unit = $to_unit;
  8475. } else {
  8476. // Convert to the smallest unit, pending a system shift
  8477. $dest_unit = self::$units[$state][$dest_state][0];
  8478. }
  8479. // Do the conversion if necessary
  8480. if ($dest_unit !== $unit) {
  8481. $factor = $this->div(self::$units[$state][$unit], self::$units[$state][$dest_unit], $cp);
  8482. $n = $this->mul($n, $factor, $cp);
  8483. $unit = $dest_unit;
  8484. }
  8485. // Output was zero, so bail out early. Shouldn't ever happen.
  8486. if ($n === '') {
  8487. $n = '0';
  8488. $unit = $to_unit;
  8489. break;
  8490. }
  8491. // It was a simple conversion, so bail out
  8492. if ($dest_state === $state) {
  8493. break;
  8494. }
  8495. if ($i !== 0) {
  8496. // Conversion failed! Apparently, the system we forwarded
  8497. // to didn't have this unit. This should never happen!
  8498. return false;
  8499. }
  8500. // Pre-condition: $i == 0
  8501. // Perform conversion to next system of units
  8502. $n = $this->mul($n, self::$units[$state][$dest_state][1], $cp);
  8503. $unit = self::$units[$state][$dest_state][2];
  8504. $state = $dest_state;
  8505. // One more loop around to convert the unit in the new system.
  8506. }
  8507. // Post-condition: $unit == $to_unit
  8508. if ($unit !== $to_unit) {
  8509. return false;
  8510. }
  8511. // Useful for debugging:
  8512. //echo "<pre>n";
  8513. //echo "$n\nsigfigs = $sigfigs\nnew_log = $new_log\nlog = $log\nrp = $rp\n</pre>\n";
  8514. $n = $this->round($n, $sigfigs);
  8515. if (strpos($n, '.') !== false) {
  8516. $n = rtrim($n, '0');
  8517. }
  8518. $n = rtrim($n, '.');
  8519. return new HTMLPurifier_Length($n, $unit);
  8520. }
  8521. /**
  8522. * Returns the number of significant figures in a string number.
  8523. * @param string $n Decimal number
  8524. * @return int number of sigfigs
  8525. */
  8526. public function getSigFigs($n)
  8527. {
  8528. $n = ltrim($n, '0+-');
  8529. $dp = strpos($n, '.'); // decimal position
  8530. if ($dp === false) {
  8531. $sigfigs = strlen(rtrim($n, '0'));
  8532. } else {
  8533. $sigfigs = strlen(ltrim($n, '0.')); // eliminate extra decimal character
  8534. if ($dp !== 0) {
  8535. $sigfigs--;
  8536. }
  8537. }
  8538. return $sigfigs;
  8539. }
  8540. /**
  8541. * Adds two numbers, using arbitrary precision when available.
  8542. * @param string $s1
  8543. * @param string $s2
  8544. * @param int $scale
  8545. * @return string
  8546. */
  8547. private function add($s1, $s2, $scale)
  8548. {
  8549. if ($this->bcmath) {
  8550. return bcadd($s1, $s2, $scale);
  8551. } else {
  8552. return $this->scale((float)$s1 + (float)$s2, $scale);
  8553. }
  8554. }
  8555. /**
  8556. * Multiples two numbers, using arbitrary precision when available.
  8557. * @param string $s1
  8558. * @param string $s2
  8559. * @param int $scale
  8560. * @return string
  8561. */
  8562. private function mul($s1, $s2, $scale)
  8563. {
  8564. if ($this->bcmath) {
  8565. return bcmul($s1, $s2, $scale);
  8566. } else {
  8567. return $this->scale((float)$s1 * (float)$s2, $scale);
  8568. }
  8569. }
  8570. /**
  8571. * Divides two numbers, using arbitrary precision when available.
  8572. * @param string $s1
  8573. * @param string $s2
  8574. * @param int $scale
  8575. * @return string
  8576. */
  8577. private function div($s1, $s2, $scale)
  8578. {
  8579. if ($this->bcmath) {
  8580. return bcdiv($s1, $s2, $scale);
  8581. } else {
  8582. return $this->scale((float)$s1 / (float)$s2, $scale);
  8583. }
  8584. }
  8585. /**
  8586. * Rounds a number according to the number of sigfigs it should have,
  8587. * using arbitrary precision when available.
  8588. * @param float $n
  8589. * @param int $sigfigs
  8590. * @return string
  8591. */
  8592. private function round($n, $sigfigs)
  8593. {
  8594. $new_log = (int)floor(log(abs($n), 10)); // Number of digits left of decimal - 1
  8595. $rp = $sigfigs - $new_log - 1; // Number of decimal places needed
  8596. $neg = $n < 0 ? '-' : ''; // Negative sign
  8597. if ($this->bcmath) {
  8598. if ($rp >= 0) {
  8599. $n = bcadd($n, $neg . '0.' . str_repeat('0', $rp) . '5', $rp + 1);
  8600. $n = bcdiv($n, '1', $rp);
  8601. } else {
  8602. // This algorithm partially depends on the standardized
  8603. // form of numbers that comes out of bcmath.
  8604. $n = bcadd($n, $neg . '5' . str_repeat('0', $new_log - $sigfigs), 0);
  8605. $n = substr($n, 0, $sigfigs + strlen($neg)) . str_repeat('0', $new_log - $sigfigs + 1);
  8606. }
  8607. return $n;
  8608. } else {
  8609. return $this->scale(round($n, $sigfigs - $new_log - 1), $rp + 1);
  8610. }
  8611. }
  8612. /**
  8613. * Scales a float to $scale digits right of decimal point, like BCMath.
  8614. * @param float $r
  8615. * @param int $scale
  8616. * @return string
  8617. */
  8618. private function scale($r, $scale)
  8619. {
  8620. if ($scale < 0) {
  8621. // The f sprintf type doesn't support negative numbers, so we
  8622. // need to cludge things manually. First get the string.
  8623. $r = sprintf('%.0f', (float)$r);
  8624. // Due to floating point precision loss, $r will more than likely
  8625. // look something like 4652999999999.9234. We grab one more digit
  8626. // than we need to precise from $r and then use that to round
  8627. // appropriately.
  8628. $precise = (string)round(substr($r, 0, strlen($r) + $scale), -1);
  8629. // Now we return it, truncating the zero that was rounded off.
  8630. return substr($precise, 0, -1) . str_repeat('0', -$scale + 1);
  8631. }
  8632. return sprintf('%.' . $scale . 'f', (float)$r);
  8633. }
  8634. }
  8635. /**
  8636. * Parses string representations into their corresponding native PHP
  8637. * variable type. The base implementation does a simple type-check.
  8638. */
  8639. class HTMLPurifier_VarParser
  8640. {
  8641. const STRING = 1;
  8642. const ISTRING = 2;
  8643. const TEXT = 3;
  8644. const ITEXT = 4;
  8645. const INT = 5;
  8646. const FLOAT = 6;
  8647. const BOOL = 7;
  8648. const LOOKUP = 8;
  8649. const ALIST = 9;
  8650. const HASH = 10;
  8651. const MIXED = 11;
  8652. /**
  8653. * Lookup table of allowed types. Mainly for backwards compatibility, but
  8654. * also convenient for transforming string type names to the integer constants.
  8655. */
  8656. public static $types = array(
  8657. 'string' => self::STRING,
  8658. 'istring' => self::ISTRING,
  8659. 'text' => self::TEXT,
  8660. 'itext' => self::ITEXT,
  8661. 'int' => self::INT,
  8662. 'float' => self::FLOAT,
  8663. 'bool' => self::BOOL,
  8664. 'lookup' => self::LOOKUP,
  8665. 'list' => self::ALIST,
  8666. 'hash' => self::HASH,
  8667. 'mixed' => self::MIXED
  8668. );
  8669. /**
  8670. * Lookup table of types that are string, and can have aliases or
  8671. * allowed value lists.
  8672. */
  8673. public static $stringTypes = array(
  8674. self::STRING => true,
  8675. self::ISTRING => true,
  8676. self::TEXT => true,
  8677. self::ITEXT => true,
  8678. );
  8679. /**
  8680. * Validate a variable according to type.
  8681. * It may return NULL as a valid type if $allow_null is true.
  8682. *
  8683. * @param mixed $var Variable to validate
  8684. * @param int $type Type of variable, see HTMLPurifier_VarParser->types
  8685. * @param bool $allow_null Whether or not to permit null as a value
  8686. * @return string Validated and type-coerced variable
  8687. * @throws HTMLPurifier_VarParserException
  8688. */
  8689. final public function parse($var, $type, $allow_null = false)
  8690. {
  8691. if (is_string($type)) {
  8692. if (!isset(HTMLPurifier_VarParser::$types[$type])) {
  8693. throw new HTMLPurifier_VarParserException("Invalid type '$type'");
  8694. } else {
  8695. $type = HTMLPurifier_VarParser::$types[$type];
  8696. }
  8697. }
  8698. $var = $this->parseImplementation($var, $type, $allow_null);
  8699. if ($allow_null && $var === null) {
  8700. return null;
  8701. }
  8702. // These are basic checks, to make sure nothing horribly wrong
  8703. // happened in our implementations.
  8704. switch ($type) {
  8705. case (self::STRING):
  8706. case (self::ISTRING):
  8707. case (self::TEXT):
  8708. case (self::ITEXT):
  8709. if (!is_string($var)) {
  8710. break;
  8711. }
  8712. if ($type == self::ISTRING || $type == self::ITEXT) {
  8713. $var = strtolower($var);
  8714. }
  8715. return $var;
  8716. case (self::INT):
  8717. if (!is_int($var)) {
  8718. break;
  8719. }
  8720. return $var;
  8721. case (self::FLOAT):
  8722. if (!is_float($var)) {
  8723. break;
  8724. }
  8725. return $var;
  8726. case (self::BOOL):
  8727. if (!is_bool($var)) {
  8728. break;
  8729. }
  8730. return $var;
  8731. case (self::LOOKUP):
  8732. case (self::ALIST):
  8733. case (self::HASH):
  8734. if (!is_array($var)) {
  8735. break;
  8736. }
  8737. if ($type === self::LOOKUP) {
  8738. foreach ($var as $k) {
  8739. if ($k !== true) {
  8740. $this->error('Lookup table contains value other than true');
  8741. }
  8742. }
  8743. } elseif ($type === self::ALIST) {
  8744. $keys = array_keys($var);
  8745. if (array_keys($keys) !== $keys) {
  8746. $this->error('Indices for list are not uniform');
  8747. }
  8748. }
  8749. return $var;
  8750. case (self::MIXED):
  8751. return $var;
  8752. default:
  8753. $this->errorInconsistent(get_class($this), $type);
  8754. }
  8755. $this->errorGeneric($var, $type);
  8756. }
  8757. /**
  8758. * Actually implements the parsing. Base implementation does not
  8759. * do anything to $var. Subclasses should overload this!
  8760. * @param mixed $var
  8761. * @param int $type
  8762. * @param bool $allow_null
  8763. * @return string
  8764. */
  8765. protected function parseImplementation($var, $type, $allow_null)
  8766. {
  8767. return $var;
  8768. }
  8769. /**
  8770. * Throws an exception.
  8771. * @throws HTMLPurifier_VarParserException
  8772. */
  8773. protected function error($msg)
  8774. {
  8775. throw new HTMLPurifier_VarParserException($msg);
  8776. }
  8777. /**
  8778. * Throws an inconsistency exception.
  8779. * @note This should not ever be called. It would be called if we
  8780. * extend the allowed values of HTMLPurifier_VarParser without
  8781. * updating subclasses.
  8782. * @param string $class
  8783. * @param int $type
  8784. * @throws HTMLPurifier_Exception
  8785. */
  8786. protected function errorInconsistent($class, $type)
  8787. {
  8788. throw new HTMLPurifier_Exception(
  8789. "Inconsistency in $class: " . HTMLPurifier_VarParser::getTypeName($type) .
  8790. " not implemented"
  8791. );
  8792. }
  8793. /**
  8794. * Generic error for if a type didn't work.
  8795. * @param mixed $var
  8796. * @param int $type
  8797. */
  8798. protected function errorGeneric($var, $type)
  8799. {
  8800. $vtype = gettype($var);
  8801. $this->error("Expected type " . HTMLPurifier_VarParser::getTypeName($type) . ", got $vtype");
  8802. }
  8803. /**
  8804. * @param int $type
  8805. * @return string
  8806. */
  8807. public static function getTypeName($type)
  8808. {
  8809. static $lookup;
  8810. if (!$lookup) {
  8811. // Lazy load the alternative lookup table
  8812. $lookup = array_flip(HTMLPurifier_VarParser::$types);
  8813. }
  8814. if (!isset($lookup[$type])) {
  8815. return 'unknown';
  8816. }
  8817. return $lookup[$type];
  8818. }
  8819. }
  8820. /**
  8821. * Exception type for HTMLPurifier_VarParser
  8822. */
  8823. class HTMLPurifier_VarParserException extends HTMLPurifier_Exception
  8824. {
  8825. }
  8826. /**
  8827. * A zipper is a purely-functional data structure which contains
  8828. * a focus that can be efficiently manipulated. It is known as
  8829. * a "one-hole context". This mutable variant implements a zipper
  8830. * for a list as a pair of two arrays, laid out as follows:
  8831. *
  8832. * Base list: 1 2 3 4 [ ] 6 7 8 9
  8833. * Front list: 1 2 3 4
  8834. * Back list: 9 8 7 6
  8835. *
  8836. * User is expected to keep track of the "current element" and properly
  8837. * fill it back in as necessary. (ToDo: Maybe it's more user friendly
  8838. * to implicitly track the current element?)
  8839. *
  8840. * Nota bene: the current class gets confused if you try to store NULLs
  8841. * in the list.
  8842. */
  8843. class HTMLPurifier_Zipper
  8844. {
  8845. public $front, $back;
  8846. public function __construct($front, $back) {
  8847. $this->front = $front;
  8848. $this->back = $back;
  8849. }
  8850. /**
  8851. * Creates a zipper from an array, with a hole in the
  8852. * 0-index position.
  8853. * @param Array to zipper-ify.
  8854. * @return Tuple of zipper and element of first position.
  8855. */
  8856. static public function fromArray($array) {
  8857. $z = new self(array(), array_reverse($array));
  8858. $t = $z->delete(); // delete the "dummy hole"
  8859. return array($z, $t);
  8860. }
  8861. /**
  8862. * Convert zipper back into a normal array, optionally filling in
  8863. * the hole with a value. (Usually you should supply a $t, unless you
  8864. * are at the end of the array.)
  8865. */
  8866. public function toArray($t = NULL) {
  8867. $a = $this->front;
  8868. if ($t !== NULL) $a[] = $t;
  8869. for ($i = count($this->back)-1; $i >= 0; $i--) {
  8870. $a[] = $this->back[$i];
  8871. }
  8872. return $a;
  8873. }
  8874. /**
  8875. * Move hole to the next element.
  8876. * @param $t Element to fill hole with
  8877. * @return Original contents of new hole.
  8878. */
  8879. public function next($t) {
  8880. if ($t !== NULL) array_push($this->front, $t);
  8881. return empty($this->back) ? NULL : array_pop($this->back);
  8882. }
  8883. /**
  8884. * Iterated hole advancement.
  8885. * @param $t Element to fill hole with
  8886. * @param $i How many forward to advance hole
  8887. * @return Original contents of new hole, i away
  8888. */
  8889. public function advance($t, $n) {
  8890. for ($i = 0; $i < $n; $i++) {
  8891. $t = $this->next($t);
  8892. }
  8893. return $t;
  8894. }
  8895. /**
  8896. * Move hole to the previous element
  8897. * @param $t Element to fill hole with
  8898. * @return Original contents of new hole.
  8899. */
  8900. public function prev($t) {
  8901. if ($t !== NULL) array_push($this->back, $t);
  8902. return empty($this->front) ? NULL : array_pop($this->front);
  8903. }
  8904. /**
  8905. * Delete contents of current hole, shifting hole to
  8906. * next element.
  8907. * @return Original contents of new hole.
  8908. */
  8909. public function delete() {
  8910. return empty($this->back) ? NULL : array_pop($this->back);
  8911. }
  8912. /**
  8913. * Returns true if we are at the end of the list.
  8914. * @return bool
  8915. */
  8916. public function done() {
  8917. return empty($this->back);
  8918. }
  8919. /**
  8920. * Insert element before hole.
  8921. * @param Element to insert
  8922. */
  8923. public function insertBefore($t) {
  8924. if ($t !== NULL) array_push($this->front, $t);
  8925. }
  8926. /**
  8927. * Insert element after hole.
  8928. * @param Element to insert
  8929. */
  8930. public function insertAfter($t) {
  8931. if ($t !== NULL) array_push($this->back, $t);
  8932. }
  8933. /**
  8934. * Splice in multiple elements at hole. Functional specification
  8935. * in terms of array_splice:
  8936. *
  8937. * $arr1 = $arr;
  8938. * $old1 = array_splice($arr1, $i, $delete, $replacement);
  8939. *
  8940. * list($z, $t) = HTMLPurifier_Zipper::fromArray($arr);
  8941. * $t = $z->advance($t, $i);
  8942. * list($old2, $t) = $z->splice($t, $delete, $replacement);
  8943. * $arr2 = $z->toArray($t);
  8944. *
  8945. * assert($old1 === $old2);
  8946. * assert($arr1 === $arr2);
  8947. *
  8948. * NB: the absolute index location after this operation is
  8949. * *unchanged!*
  8950. *
  8951. * @param Current contents of hole.
  8952. */
  8953. public function splice($t, $delete, $replacement) {
  8954. // delete
  8955. $old = array();
  8956. $r = $t;
  8957. for ($i = $delete; $i > 0; $i--) {
  8958. $old[] = $r;
  8959. $r = $this->delete();
  8960. }
  8961. // insert
  8962. for ($i = count($replacement)-1; $i >= 0; $i--) {
  8963. $this->insertAfter($r);
  8964. $r = $replacement[$i];
  8965. }
  8966. return array($old, $r);
  8967. }
  8968. }
  8969. /**
  8970. * Validates the HTML attribute style, otherwise known as CSS.
  8971. * @note We don't implement the whole CSS specification, so it might be
  8972. * difficult to reuse this component in the context of validating
  8973. * actual stylesheet declarations.
  8974. * @note If we were really serious about validating the CSS, we would
  8975. * tokenize the styles and then parse the tokens. Obviously, we
  8976. * are not doing that. Doing that could seriously harm performance,
  8977. * but would make these components a lot more viable for a CSS
  8978. * filtering solution.
  8979. */
  8980. class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
  8981. {
  8982. /**
  8983. * @param string $css
  8984. * @param HTMLPurifier_Config $config
  8985. * @param HTMLPurifier_Context $context
  8986. * @return bool|string
  8987. */
  8988. public function validate($css, $config, $context)
  8989. {
  8990. $css = $this->parseCDATA($css);
  8991. $definition = $config->getCSSDefinition();
  8992. // we're going to break the spec and explode by semicolons.
  8993. // This is because semicolon rarely appears in escaped form
  8994. // Doing this is generally flaky but fast
  8995. // IT MIGHT APPEAR IN URIs, see HTMLPurifier_AttrDef_CSSURI
  8996. // for details
  8997. $declarations = explode(';', $css);
  8998. $propvalues = array();
  8999. /**
  9000. * Name of the current CSS property being validated.
  9001. */
  9002. $property = false;
  9003. $context->register('CurrentCSSProperty', $property);
  9004. foreach ($declarations as $declaration) {
  9005. if (!$declaration) {
  9006. continue;
  9007. }
  9008. if (!strpos($declaration, ':')) {
  9009. continue;
  9010. }
  9011. list($property, $value) = explode(':', $declaration, 2);
  9012. $property = trim($property);
  9013. $value = trim($value);
  9014. $ok = false;
  9015. do {
  9016. if (isset($definition->info[$property])) {
  9017. $ok = true;
  9018. break;
  9019. }
  9020. if (ctype_lower($property)) {
  9021. break;
  9022. }
  9023. $property = strtolower($property);
  9024. if (isset($definition->info[$property])) {
  9025. $ok = true;
  9026. break;
  9027. }
  9028. } while (0);
  9029. if (!$ok) {
  9030. continue;
  9031. }
  9032. // inefficient call, since the validator will do this again
  9033. if (strtolower(trim($value)) !== 'inherit') {
  9034. // inherit works for everything (but only on the base property)
  9035. $result = $definition->info[$property]->validate(
  9036. $value,
  9037. $config,
  9038. $context
  9039. );
  9040. } else {
  9041. $result = 'inherit';
  9042. }
  9043. if ($result === false) {
  9044. continue;
  9045. }
  9046. $propvalues[$property] = $result;
  9047. }
  9048. $context->destroy('CurrentCSSProperty');
  9049. // procedure does not write the new CSS simultaneously, so it's
  9050. // slightly inefficient, but it's the only way of getting rid of
  9051. // duplicates. Perhaps config to optimize it, but not now.
  9052. $new_declarations = '';
  9053. foreach ($propvalues as $prop => $value) {
  9054. $new_declarations .= "$prop:$value;";
  9055. }
  9056. return $new_declarations ? $new_declarations : false;
  9057. }
  9058. }
  9059. /**
  9060. * Dummy AttrDef that mimics another AttrDef, BUT it generates clones
  9061. * with make.
  9062. */
  9063. class HTMLPurifier_AttrDef_Clone extends HTMLPurifier_AttrDef
  9064. {
  9065. /**
  9066. * What we're cloning.
  9067. * @type HTMLPurifier_AttrDef
  9068. */
  9069. protected $clone;
  9070. /**
  9071. * @param HTMLPurifier_AttrDef $clone
  9072. */
  9073. public function __construct($clone)
  9074. {
  9075. $this->clone = $clone;
  9076. }
  9077. /**
  9078. * @param string $v
  9079. * @param HTMLPurifier_Config $config
  9080. * @param HTMLPurifier_Context $context
  9081. * @return bool|string
  9082. */
  9083. public function validate($v, $config, $context)
  9084. {
  9085. return $this->clone->validate($v, $config, $context);
  9086. }
  9087. /**
  9088. * @param string $string
  9089. * @return HTMLPurifier_AttrDef
  9090. */
  9091. public function make($string)
  9092. {
  9093. return clone $this->clone;
  9094. }
  9095. }
  9096. // Enum = Enumerated
  9097. /**
  9098. * Validates a keyword against a list of valid values.
  9099. * @warning The case-insensitive compare of this function uses PHP's
  9100. * built-in strtolower and ctype_lower functions, which may
  9101. * cause problems with international comparisons
  9102. */
  9103. class HTMLPurifier_AttrDef_Enum extends HTMLPurifier_AttrDef
  9104. {
  9105. /**
  9106. * Lookup table of valid values.
  9107. * @type array
  9108. * @todo Make protected
  9109. */
  9110. public $valid_values = array();
  9111. /**
  9112. * Bool indicating whether or not enumeration is case sensitive.
  9113. * @note In general this is always case insensitive.
  9114. */
  9115. protected $case_sensitive = false; // values according to W3C spec
  9116. /**
  9117. * @param array $valid_values List of valid values
  9118. * @param bool $case_sensitive Whether or not case sensitive
  9119. */
  9120. public function __construct($valid_values = array(), $case_sensitive = false)
  9121. {
  9122. $this->valid_values = array_flip($valid_values);
  9123. $this->case_sensitive = $case_sensitive;
  9124. }
  9125. /**
  9126. * @param string $string
  9127. * @param HTMLPurifier_Config $config
  9128. * @param HTMLPurifier_Context $context
  9129. * @return bool|string
  9130. */
  9131. public function validate($string, $config, $context)
  9132. {
  9133. $string = trim($string);
  9134. if (!$this->case_sensitive) {
  9135. // we may want to do full case-insensitive libraries
  9136. $string = ctype_lower($string) ? $string : strtolower($string);
  9137. }
  9138. $result = isset($this->valid_values[$string]);
  9139. return $result ? $string : false;
  9140. }
  9141. /**
  9142. * @param string $string In form of comma-delimited list of case-insensitive
  9143. * valid values. Example: "foo,bar,baz". Prepend "s:" to make
  9144. * case sensitive
  9145. * @return HTMLPurifier_AttrDef_Enum
  9146. */
  9147. public function make($string)
  9148. {
  9149. if (strlen($string) > 2 && $string[0] == 's' && $string[1] == ':') {
  9150. $string = substr($string, 2);
  9151. $sensitive = true;
  9152. } else {
  9153. $sensitive = false;
  9154. }
  9155. $values = explode(',', $string);
  9156. return new HTMLPurifier_AttrDef_Enum($values, $sensitive);
  9157. }
  9158. }
  9159. /**
  9160. * Validates an integer.
  9161. * @note While this class was modeled off the CSS definition, no currently
  9162. * allowed CSS uses this type. The properties that do are: widows,
  9163. * orphans, z-index, counter-increment, counter-reset. Some of the
  9164. * HTML attributes, however, find use for a non-negative version of this.
  9165. */
  9166. class HTMLPurifier_AttrDef_Integer extends HTMLPurifier_AttrDef
  9167. {
  9168. /**
  9169. * Whether or not negative values are allowed.
  9170. * @type bool
  9171. */
  9172. protected $negative = true;
  9173. /**
  9174. * Whether or not zero is allowed.
  9175. * @type bool
  9176. */
  9177. protected $zero = true;
  9178. /**
  9179. * Whether or not positive values are allowed.
  9180. * @type bool
  9181. */
  9182. protected $positive = true;
  9183. /**
  9184. * @param $negative Bool indicating whether or not negative values are allowed
  9185. * @param $zero Bool indicating whether or not zero is allowed
  9186. * @param $positive Bool indicating whether or not positive values are allowed
  9187. */
  9188. public function __construct($negative = true, $zero = true, $positive = true)
  9189. {
  9190. $this->negative = $negative;
  9191. $this->zero = $zero;
  9192. $this->positive = $positive;
  9193. }
  9194. /**
  9195. * @param string $integer
  9196. * @param HTMLPurifier_Config $config
  9197. * @param HTMLPurifier_Context $context
  9198. * @return bool|string
  9199. */
  9200. public function validate($integer, $config, $context)
  9201. {
  9202. $integer = $this->parseCDATA($integer);
  9203. if ($integer === '') {
  9204. return false;
  9205. }
  9206. // we could possibly simply typecast it to integer, but there are
  9207. // certain fringe cases that must not return an integer.
  9208. // clip leading sign
  9209. if ($this->negative && $integer[0] === '-') {
  9210. $digits = substr($integer, 1);
  9211. if ($digits === '0') {
  9212. $integer = '0';
  9213. } // rm minus sign for zero
  9214. } elseif ($this->positive && $integer[0] === '+') {
  9215. $digits = $integer = substr($integer, 1); // rm unnecessary plus
  9216. } else {
  9217. $digits = $integer;
  9218. }
  9219. // test if it's numeric
  9220. if (!ctype_digit($digits)) {
  9221. return false;
  9222. }
  9223. // perform scope tests
  9224. if (!$this->zero && $integer == 0) {
  9225. return false;
  9226. }
  9227. if (!$this->positive && $integer > 0) {
  9228. return false;
  9229. }
  9230. if (!$this->negative && $integer < 0) {
  9231. return false;
  9232. }
  9233. return $integer;
  9234. }
  9235. }
  9236. /**
  9237. * Validates the HTML attribute lang, effectively a language code.
  9238. * @note Built according to RFC 3066, which obsoleted RFC 1766
  9239. */
  9240. class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef
  9241. {
  9242. /**
  9243. * @param string $string
  9244. * @param HTMLPurifier_Config $config
  9245. * @param HTMLPurifier_Context $context
  9246. * @return bool|string
  9247. */
  9248. public function validate($string, $config, $context)
  9249. {
  9250. $string = trim($string);
  9251. if (!$string) {
  9252. return false;
  9253. }
  9254. $subtags = explode('-', $string);
  9255. $num_subtags = count($subtags);
  9256. if ($num_subtags == 0) { // sanity check
  9257. return false;
  9258. }
  9259. // process primary subtag : $subtags[0]
  9260. $length = strlen($subtags[0]);
  9261. switch ($length) {
  9262. case 0:
  9263. return false;
  9264. case 1:
  9265. if (!($subtags[0] == 'x' || $subtags[0] == 'i')) {
  9266. return false;
  9267. }
  9268. break;
  9269. case 2:
  9270. case 3:
  9271. if (!ctype_alpha($subtags[0])) {
  9272. return false;
  9273. } elseif (!ctype_lower($subtags[0])) {
  9274. $subtags[0] = strtolower($subtags[0]);
  9275. }
  9276. break;
  9277. default:
  9278. return false;
  9279. }
  9280. $new_string = $subtags[0];
  9281. if ($num_subtags == 1) {
  9282. return $new_string;
  9283. }
  9284. // process second subtag : $subtags[1]
  9285. $length = strlen($subtags[1]);
  9286. if ($length == 0 || ($length == 1 && $subtags[1] != 'x') || $length > 8 || !ctype_alnum($subtags[1])) {
  9287. return $new_string;
  9288. }
  9289. if (!ctype_lower($subtags[1])) {
  9290. $subtags[1] = strtolower($subtags[1]);
  9291. }
  9292. $new_string .= '-' . $subtags[1];
  9293. if ($num_subtags == 2) {
  9294. return $new_string;
  9295. }
  9296. // process all other subtags, index 2 and up
  9297. for ($i = 2; $i < $num_subtags; $i++) {
  9298. $length = strlen($subtags[$i]);
  9299. if ($length == 0 || $length > 8 || !ctype_alnum($subtags[$i])) {
  9300. return $new_string;
  9301. }
  9302. if (!ctype_lower($subtags[$i])) {
  9303. $subtags[$i] = strtolower($subtags[$i]);
  9304. }
  9305. $new_string .= '-' . $subtags[$i];
  9306. }
  9307. return $new_string;
  9308. }
  9309. }
  9310. /**
  9311. * Decorator that, depending on a token, switches between two definitions.
  9312. */
  9313. class HTMLPurifier_AttrDef_Switch
  9314. {
  9315. /**
  9316. * @type string
  9317. */
  9318. protected $tag;
  9319. /**
  9320. * @type HTMLPurifier_AttrDef
  9321. */
  9322. protected $withTag;
  9323. /**
  9324. * @type HTMLPurifier_AttrDef
  9325. */
  9326. protected $withoutTag;
  9327. /**
  9328. * @param string $tag Tag name to switch upon
  9329. * @param HTMLPurifier_AttrDef $with_tag Call if token matches tag
  9330. * @param HTMLPurifier_AttrDef $without_tag Call if token doesn't match, or there is no token
  9331. */
  9332. public function __construct($tag, $with_tag, $without_tag)
  9333. {
  9334. $this->tag = $tag;
  9335. $this->withTag = $with_tag;
  9336. $this->withoutTag = $without_tag;
  9337. }
  9338. /**
  9339. * @param string $string
  9340. * @param HTMLPurifier_Config $config
  9341. * @param HTMLPurifier_Context $context
  9342. * @return bool|string
  9343. */
  9344. public function validate($string, $config, $context)
  9345. {
  9346. $token = $context->get('CurrentToken', true);
  9347. if (!$token || $token->name !== $this->tag) {
  9348. return $this->withoutTag->validate($string, $config, $context);
  9349. } else {
  9350. return $this->withTag->validate($string, $config, $context);
  9351. }
  9352. }
  9353. }
  9354. /**
  9355. * Validates arbitrary text according to the HTML spec.
  9356. */
  9357. class HTMLPurifier_AttrDef_Text extends HTMLPurifier_AttrDef
  9358. {
  9359. /**
  9360. * @param string $string
  9361. * @param HTMLPurifier_Config $config
  9362. * @param HTMLPurifier_Context $context
  9363. * @return bool|string
  9364. */
  9365. public function validate($string, $config, $context)
  9366. {
  9367. return $this->parseCDATA($string);
  9368. }
  9369. }
  9370. /**
  9371. * Validates a URI as defined by RFC 3986.
  9372. * @note Scheme-specific mechanics deferred to HTMLPurifier_URIScheme
  9373. */
  9374. class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
  9375. {
  9376. /**
  9377. * @type HTMLPurifier_URIParser
  9378. */
  9379. protected $parser;
  9380. /**
  9381. * @type bool
  9382. */
  9383. protected $embedsResource;
  9384. /**
  9385. * @param bool $embeds_resource Does the URI here result in an extra HTTP request?
  9386. */
  9387. public function __construct($embeds_resource = false)
  9388. {
  9389. $this->parser = new HTMLPurifier_URIParser();
  9390. $this->embedsResource = (bool)$embeds_resource;
  9391. }
  9392. /**
  9393. * @param string $string
  9394. * @return HTMLPurifier_AttrDef_URI
  9395. */
  9396. public function make($string)
  9397. {
  9398. $embeds = ($string === 'embedded');
  9399. return new HTMLPurifier_AttrDef_URI($embeds);
  9400. }
  9401. /**
  9402. * @param string $uri
  9403. * @param HTMLPurifier_Config $config
  9404. * @param HTMLPurifier_Context $context
  9405. * @return bool|string
  9406. */
  9407. public function validate($uri, $config, $context)
  9408. {
  9409. if ($config->get('URI.Disable')) {
  9410. return false;
  9411. }
  9412. $uri = $this->parseCDATA($uri);
  9413. // parse the URI
  9414. $uri = $this->parser->parse($uri);
  9415. if ($uri === false) {
  9416. return false;
  9417. }
  9418. // add embedded flag to context for validators
  9419. $context->register('EmbeddedURI', $this->embedsResource);
  9420. $ok = false;
  9421. do {
  9422. // generic validation
  9423. $result = $uri->validate($config, $context);
  9424. if (!$result) {
  9425. break;
  9426. }
  9427. // chained filtering
  9428. $uri_def = $config->getDefinition('URI');
  9429. $result = $uri_def->filter($uri, $config, $context);
  9430. if (!$result) {
  9431. break;
  9432. }
  9433. // scheme-specific validation
  9434. $scheme_obj = $uri->getSchemeObj($config, $context);
  9435. if (!$scheme_obj) {
  9436. break;
  9437. }
  9438. if ($this->embedsResource && !$scheme_obj->browsable) {
  9439. break;
  9440. }
  9441. $result = $scheme_obj->validate($uri, $config, $context);
  9442. if (!$result) {
  9443. break;
  9444. }
  9445. // Post chained filtering
  9446. $result = $uri_def->postFilter($uri, $config, $context);
  9447. if (!$result) {
  9448. break;
  9449. }
  9450. // survived gauntlet
  9451. $ok = true;
  9452. } while (false);
  9453. $context->destroy('EmbeddedURI');
  9454. if (!$ok) {
  9455. return false;
  9456. }
  9457. // back to string
  9458. return $uri->toString();
  9459. }
  9460. }
  9461. /**
  9462. * Validates a number as defined by the CSS spec.
  9463. */
  9464. class HTMLPurifier_AttrDef_CSS_Number extends HTMLPurifier_AttrDef
  9465. {
  9466. /**
  9467. * Indicates whether or not only positive values are allowed.
  9468. * @type bool
  9469. */
  9470. protected $non_negative = false;
  9471. /**
  9472. * @param bool $non_negative indicates whether negatives are forbidden
  9473. */
  9474. public function __construct($non_negative = false)
  9475. {
  9476. $this->non_negative = $non_negative;
  9477. }
  9478. /**
  9479. * @param string $number
  9480. * @param HTMLPurifier_Config $config
  9481. * @param HTMLPurifier_Context $context
  9482. * @return string|bool
  9483. * @warning Some contexts do not pass $config, $context. These
  9484. * variables should not be used without checking HTMLPurifier_Length
  9485. */
  9486. public function validate($number, $config, $context)
  9487. {
  9488. $number = $this->parseCDATA($number);
  9489. if ($number === '') {
  9490. return false;
  9491. }
  9492. if ($number === '0') {
  9493. return '0';
  9494. }
  9495. $sign = '';
  9496. switch ($number[0]) {
  9497. case '-':
  9498. if ($this->non_negative) {
  9499. return false;
  9500. }
  9501. $sign = '-';
  9502. case '+':
  9503. $number = substr($number, 1);
  9504. }
  9505. if (ctype_digit($number)) {
  9506. $number = ltrim($number, '0');
  9507. return $number ? $sign . $number : '0';
  9508. }
  9509. // Period is the only non-numeric character allowed
  9510. if (strpos($number, '.') === false) {
  9511. return false;
  9512. }
  9513. list($left, $right) = explode('.', $number, 2);
  9514. if ($left === '' && $right === '') {
  9515. return false;
  9516. }
  9517. if ($left !== '' && !ctype_digit($left)) {
  9518. return false;
  9519. }
  9520. $left = ltrim($left, '0');
  9521. $right = rtrim($right, '0');
  9522. if ($right === '') {
  9523. return $left ? $sign . $left : '0';
  9524. } elseif (!ctype_digit($right)) {
  9525. return false;
  9526. }
  9527. return $sign . $left . '.' . $right;
  9528. }
  9529. }
  9530. class HTMLPurifier_AttrDef_CSS_AlphaValue extends HTMLPurifier_AttrDef_CSS_Number
  9531. {
  9532. public function __construct()
  9533. {
  9534. parent::__construct(false); // opacity is non-negative, but we will clamp it
  9535. }
  9536. /**
  9537. * @param string $number
  9538. * @param HTMLPurifier_Config $config
  9539. * @param HTMLPurifier_Context $context
  9540. * @return string
  9541. */
  9542. public function validate($number, $config, $context)
  9543. {
  9544. $result = parent::validate($number, $config, $context);
  9545. if ($result === false) {
  9546. return $result;
  9547. }
  9548. $float = (float)$result;
  9549. if ($float < 0.0) {
  9550. $result = '0';
  9551. }
  9552. if ($float > 1.0) {
  9553. $result = '1';
  9554. }
  9555. return $result;
  9556. }
  9557. }
  9558. /**
  9559. * Validates shorthand CSS property background.
  9560. * @warning Does not support url tokens that have internal spaces.
  9561. */
  9562. class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef
  9563. {
  9564. /**
  9565. * Local copy of component validators.
  9566. * @type HTMLPurifier_AttrDef[]
  9567. * @note See HTMLPurifier_AttrDef_Font::$info for a similar impl.
  9568. */
  9569. protected $info;
  9570. /**
  9571. * @param HTMLPurifier_Config $config
  9572. */
  9573. public function __construct($config)
  9574. {
  9575. $def = $config->getCSSDefinition();
  9576. $this->info['background-color'] = $def->info['background-color'];
  9577. $this->info['background-image'] = $def->info['background-image'];
  9578. $this->info['background-repeat'] = $def->info['background-repeat'];
  9579. $this->info['background-attachment'] = $def->info['background-attachment'];
  9580. $this->info['background-position'] = $def->info['background-position'];
  9581. }
  9582. /**
  9583. * @param string $string
  9584. * @param HTMLPurifier_Config $config
  9585. * @param HTMLPurifier_Context $context
  9586. * @return bool|string
  9587. */
  9588. public function validate($string, $config, $context)
  9589. {
  9590. // regular pre-processing
  9591. $string = $this->parseCDATA($string);
  9592. if ($string === '') {
  9593. return false;
  9594. }
  9595. // munge rgb() decl if necessary
  9596. $string = $this->mungeRgb($string);
  9597. // assumes URI doesn't have spaces in it
  9598. $bits = explode(' ', $string); // bits to process
  9599. $caught = array();
  9600. $caught['color'] = false;
  9601. $caught['image'] = false;
  9602. $caught['repeat'] = false;
  9603. $caught['attachment'] = false;
  9604. $caught['position'] = false;
  9605. $i = 0; // number of catches
  9606. foreach ($bits as $bit) {
  9607. if ($bit === '') {
  9608. continue;
  9609. }
  9610. foreach ($caught as $key => $status) {
  9611. if ($key != 'position') {
  9612. if ($status !== false) {
  9613. continue;
  9614. }
  9615. $r = $this->info['background-' . $key]->validate($bit, $config, $context);
  9616. } else {
  9617. $r = $bit;
  9618. }
  9619. if ($r === false) {
  9620. continue;
  9621. }
  9622. if ($key == 'position') {
  9623. if ($caught[$key] === false) {
  9624. $caught[$key] = '';
  9625. }
  9626. $caught[$key] .= $r . ' ';
  9627. } else {
  9628. $caught[$key] = $r;
  9629. }
  9630. $i++;
  9631. break;
  9632. }
  9633. }
  9634. if (!$i) {
  9635. return false;
  9636. }
  9637. if ($caught['position'] !== false) {
  9638. $caught['position'] = $this->info['background-position']->
  9639. validate($caught['position'], $config, $context);
  9640. }
  9641. $ret = array();
  9642. foreach ($caught as $value) {
  9643. if ($value === false) {
  9644. continue;
  9645. }
  9646. $ret[] = $value;
  9647. }
  9648. if (empty($ret)) {
  9649. return false;
  9650. }
  9651. return implode(' ', $ret);
  9652. }
  9653. }
  9654. /* W3C says:
  9655. [ // adjective and number must be in correct order, even if
  9656. // you could switch them without introducing ambiguity.
  9657. // some browsers support that syntax
  9658. [
  9659. <percentage> | <length> | left | center | right
  9660. ]
  9661. [
  9662. <percentage> | <length> | top | center | bottom
  9663. ]?
  9664. ] |
  9665. [ // this signifies that the vertical and horizontal adjectives
  9666. // can be arbitrarily ordered, however, there can only be two,
  9667. // one of each, or none at all
  9668. [
  9669. left | center | right
  9670. ] ||
  9671. [
  9672. top | center | bottom
  9673. ]
  9674. ]
  9675. top, left = 0%
  9676. center, (none) = 50%
  9677. bottom, right = 100%
  9678. */
  9679. /* QuirksMode says:
  9680. keyword + length/percentage must be ordered correctly, as per W3C
  9681. Internet Explorer and Opera, however, support arbitrary ordering. We
  9682. should fix it up.
  9683. Minor issue though, not strictly necessary.
  9684. */
  9685. // control freaks may appreciate the ability to convert these to
  9686. // percentages or something, but it's not necessary
  9687. /**
  9688. * Validates the value of background-position.
  9689. */
  9690. class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
  9691. {
  9692. /**
  9693. * @type HTMLPurifier_AttrDef_CSS_Length
  9694. */
  9695. protected $length;
  9696. /**
  9697. * @type HTMLPurifier_AttrDef_CSS_Percentage
  9698. */
  9699. protected $percentage;
  9700. public function __construct()
  9701. {
  9702. $this->length = new HTMLPurifier_AttrDef_CSS_Length();
  9703. $this->percentage = new HTMLPurifier_AttrDef_CSS_Percentage();
  9704. }
  9705. /**
  9706. * @param string $string
  9707. * @param HTMLPurifier_Config $config
  9708. * @param HTMLPurifier_Context $context
  9709. * @return bool|string
  9710. */
  9711. public function validate($string, $config, $context)
  9712. {
  9713. $string = $this->parseCDATA($string);
  9714. $bits = explode(' ', $string);
  9715. $keywords = array();
  9716. $keywords['h'] = false; // left, right
  9717. $keywords['v'] = false; // top, bottom
  9718. $keywords['ch'] = false; // center (first word)
  9719. $keywords['cv'] = false; // center (second word)
  9720. $measures = array();
  9721. $i = 0;
  9722. $lookup = array(
  9723. 'top' => 'v',
  9724. 'bottom' => 'v',
  9725. 'left' => 'h',
  9726. 'right' => 'h',
  9727. 'center' => 'c'
  9728. );
  9729. foreach ($bits as $bit) {
  9730. if ($bit === '') {
  9731. continue;
  9732. }
  9733. // test for keyword
  9734. $lbit = ctype_lower($bit) ? $bit : strtolower($bit);
  9735. if (isset($lookup[$lbit])) {
  9736. $status = $lookup[$lbit];
  9737. if ($status == 'c') {
  9738. if ($i == 0) {
  9739. $status = 'ch';
  9740. } else {
  9741. $status = 'cv';
  9742. }
  9743. }
  9744. $keywords[$status] = $lbit;
  9745. $i++;
  9746. }
  9747. // test for length
  9748. $r = $this->length->validate($bit, $config, $context);
  9749. if ($r !== false) {
  9750. $measures[] = $r;
  9751. $i++;
  9752. }
  9753. // test for percentage
  9754. $r = $this->percentage->validate($bit, $config, $context);
  9755. if ($r !== false) {
  9756. $measures[] = $r;
  9757. $i++;
  9758. }
  9759. }
  9760. if (!$i) {
  9761. return false;
  9762. } // no valid values were caught
  9763. $ret = array();
  9764. // first keyword
  9765. if ($keywords['h']) {
  9766. $ret[] = $keywords['h'];
  9767. } elseif ($keywords['ch']) {
  9768. $ret[] = $keywords['ch'];
  9769. $keywords['cv'] = false; // prevent re-use: center = center center
  9770. } elseif (count($measures)) {
  9771. $ret[] = array_shift($measures);
  9772. }
  9773. if ($keywords['v']) {
  9774. $ret[] = $keywords['v'];
  9775. } elseif ($keywords['cv']) {
  9776. $ret[] = $keywords['cv'];
  9777. } elseif (count($measures)) {
  9778. $ret[] = array_shift($measures);
  9779. }
  9780. if (empty($ret)) {
  9781. return false;
  9782. }
  9783. return implode(' ', $ret);
  9784. }
  9785. }
  9786. /**
  9787. * Validates the border property as defined by CSS.
  9788. */
  9789. class HTMLPurifier_AttrDef_CSS_Border extends HTMLPurifier_AttrDef
  9790. {
  9791. /**
  9792. * Local copy of properties this property is shorthand for.
  9793. * @type HTMLPurifier_AttrDef[]
  9794. */
  9795. protected $info = array();
  9796. /**
  9797. * @param HTMLPurifier_Config $config
  9798. */
  9799. public function __construct($config)
  9800. {
  9801. $def = $config->getCSSDefinition();
  9802. $this->info['border-width'] = $def->info['border-width'];
  9803. $this->info['border-style'] = $def->info['border-style'];
  9804. $this->info['border-top-color'] = $def->info['border-top-color'];
  9805. }
  9806. /**
  9807. * @param string $string
  9808. * @param HTMLPurifier_Config $config
  9809. * @param HTMLPurifier_Context $context
  9810. * @return bool|string
  9811. */
  9812. public function validate($string, $config, $context)
  9813. {
  9814. $string = $this->parseCDATA($string);
  9815. $string = $this->mungeRgb($string);
  9816. $bits = explode(' ', $string);
  9817. $done = array(); // segments we've finished
  9818. $ret = ''; // return value
  9819. foreach ($bits as $bit) {
  9820. foreach ($this->info as $propname => $validator) {
  9821. if (isset($done[$propname])) {
  9822. continue;
  9823. }
  9824. $r = $validator->validate($bit, $config, $context);
  9825. if ($r !== false) {
  9826. $ret .= $r . ' ';
  9827. $done[$propname] = true;
  9828. break;
  9829. }
  9830. }
  9831. }
  9832. return rtrim($ret);
  9833. }
  9834. }
  9835. /**
  9836. * Validates Color as defined by CSS.
  9837. */
  9838. class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
  9839. {
  9840. /**
  9841. * @param string $color
  9842. * @param HTMLPurifier_Config $config
  9843. * @param HTMLPurifier_Context $context
  9844. * @return bool|string
  9845. */
  9846. public function validate($color, $config, $context)
  9847. {
  9848. static $colors = null;
  9849. if ($colors === null) {
  9850. $colors = $config->get('Core.ColorKeywords');
  9851. }
  9852. $color = trim($color);
  9853. if ($color === '') {
  9854. return false;
  9855. }
  9856. $lower = strtolower($color);
  9857. if (isset($colors[$lower])) {
  9858. return $colors[$lower];
  9859. }
  9860. if (strpos($color, 'rgb(') !== false) {
  9861. // rgb literal handling
  9862. $length = strlen($color);
  9863. if (strpos($color, ')') !== $length - 1) {
  9864. return false;
  9865. }
  9866. $triad = substr($color, 4, $length - 4 - 1);
  9867. $parts = explode(',', $triad);
  9868. if (count($parts) !== 3) {
  9869. return false;
  9870. }
  9871. $type = false; // to ensure that they're all the same type
  9872. $new_parts = array();
  9873. foreach ($parts as $part) {
  9874. $part = trim($part);
  9875. if ($part === '') {
  9876. return false;
  9877. }
  9878. $length = strlen($part);
  9879. if ($part[$length - 1] === '%') {
  9880. // handle percents
  9881. if (!$type) {
  9882. $type = 'percentage';
  9883. } elseif ($type !== 'percentage') {
  9884. return false;
  9885. }
  9886. $num = (float)substr($part, 0, $length - 1);
  9887. if ($num < 0) {
  9888. $num = 0;
  9889. }
  9890. if ($num > 100) {
  9891. $num = 100;
  9892. }
  9893. $new_parts[] = "$num%";
  9894. } else {
  9895. // handle integers
  9896. if (!$type) {
  9897. $type = 'integer';
  9898. } elseif ($type !== 'integer') {
  9899. return false;
  9900. }
  9901. $num = (int)$part;
  9902. if ($num < 0) {
  9903. $num = 0;
  9904. }
  9905. if ($num > 255) {
  9906. $num = 255;
  9907. }
  9908. $new_parts[] = (string)$num;
  9909. }
  9910. }
  9911. $new_triad = implode(',', $new_parts);
  9912. $color = "rgb($new_triad)";
  9913. } else {
  9914. // hexadecimal handling
  9915. if ($color[0] === '#') {
  9916. $hex = substr($color, 1);
  9917. } else {
  9918. $hex = $color;
  9919. $color = '#' . $color;
  9920. }
  9921. $length = strlen($hex);
  9922. if ($length !== 3 && $length !== 6) {
  9923. return false;
  9924. }
  9925. if (!ctype_xdigit($hex)) {
  9926. return false;
  9927. }
  9928. }
  9929. return $color;
  9930. }
  9931. }
  9932. /**
  9933. * Allows multiple validators to attempt to validate attribute.
  9934. *
  9935. * Composite is just what it sounds like: a composite of many validators.
  9936. * This means that multiple HTMLPurifier_AttrDef objects will have a whack
  9937. * at the string. If one of them passes, that's what is returned. This is
  9938. * especially useful for CSS values, which often are a choice between
  9939. * an enumerated set of predefined values or a flexible data type.
  9940. */
  9941. class HTMLPurifier_AttrDef_CSS_Composite extends HTMLPurifier_AttrDef
  9942. {
  9943. /**
  9944. * List of objects that may process strings.
  9945. * @type HTMLPurifier_AttrDef[]
  9946. * @todo Make protected
  9947. */
  9948. public $defs;
  9949. /**
  9950. * @param HTMLPurifier_AttrDef[] $defs List of HTMLPurifier_AttrDef objects
  9951. */
  9952. public function __construct($defs)
  9953. {
  9954. $this->defs = $defs;
  9955. }
  9956. /**
  9957. * @param string $string
  9958. * @param HTMLPurifier_Config $config
  9959. * @param HTMLPurifier_Context $context
  9960. * @return bool|string
  9961. */
  9962. public function validate($string, $config, $context)
  9963. {
  9964. foreach ($this->defs as $i => $def) {
  9965. $result = $this->defs[$i]->validate($string, $config, $context);
  9966. if ($result !== false) {
  9967. return $result;
  9968. }
  9969. }
  9970. return false;
  9971. }
  9972. }
  9973. /**
  9974. * Decorator which enables CSS properties to be disabled for specific elements.
  9975. */
  9976. class HTMLPurifier_AttrDef_CSS_DenyElementDecorator extends HTMLPurifier_AttrDef
  9977. {
  9978. /**
  9979. * @type HTMLPurifier_AttrDef
  9980. */
  9981. public $def;
  9982. /**
  9983. * @type string
  9984. */
  9985. public $element;
  9986. /**
  9987. * @param HTMLPurifier_AttrDef $def Definition to wrap
  9988. * @param string $element Element to deny
  9989. */
  9990. public function __construct($def, $element)
  9991. {
  9992. $this->def = $def;
  9993. $this->element = $element;
  9994. }
  9995. /**
  9996. * Checks if CurrentToken is set and equal to $this->element
  9997. * @param string $string
  9998. * @param HTMLPurifier_Config $config
  9999. * @param HTMLPurifier_Context $context
  10000. * @return bool|string
  10001. */
  10002. public function validate($string, $config, $context)
  10003. {
  10004. $token = $context->get('CurrentToken', true);
  10005. if ($token && $token->name == $this->element) {
  10006. return false;
  10007. }
  10008. return $this->def->validate($string, $config, $context);
  10009. }
  10010. }
  10011. /**
  10012. * Microsoft's proprietary filter: CSS property
  10013. * @note Currently supports the alpha filter. In the future, this will
  10014. * probably need an extensible framework
  10015. */
  10016. class HTMLPurifier_AttrDef_CSS_Filter extends HTMLPurifier_AttrDef
  10017. {
  10018. /**
  10019. * @type HTMLPurifier_AttrDef_Integer
  10020. */
  10021. protected $intValidator;
  10022. public function __construct()
  10023. {
  10024. $this->intValidator = new HTMLPurifier_AttrDef_Integer();
  10025. }
  10026. /**
  10027. * @param string $value
  10028. * @param HTMLPurifier_Config $config
  10029. * @param HTMLPurifier_Context $context
  10030. * @return bool|string
  10031. */
  10032. public function validate($value, $config, $context)
  10033. {
  10034. $value = $this->parseCDATA($value);
  10035. if ($value === 'none') {
  10036. return $value;
  10037. }
  10038. // if we looped this we could support multiple filters
  10039. $function_length = strcspn($value, '(');
  10040. $function = trim(substr($value, 0, $function_length));
  10041. if ($function !== 'alpha' &&
  10042. $function !== 'Alpha' &&
  10043. $function !== 'progid:DXImageTransform.Microsoft.Alpha'
  10044. ) {
  10045. return false;
  10046. }
  10047. $cursor = $function_length + 1;
  10048. $parameters_length = strcspn($value, ')', $cursor);
  10049. $parameters = substr($value, $cursor, $parameters_length);
  10050. $params = explode(',', $parameters);
  10051. $ret_params = array();
  10052. $lookup = array();
  10053. foreach ($params as $param) {
  10054. list($key, $value) = explode('=', $param);
  10055. $key = trim($key);
  10056. $value = trim($value);
  10057. if (isset($lookup[$key])) {
  10058. continue;
  10059. }
  10060. if ($key !== 'opacity') {
  10061. continue;
  10062. }
  10063. $value = $this->intValidator->validate($value, $config, $context);
  10064. if ($value === false) {
  10065. continue;
  10066. }
  10067. $int = (int)$value;
  10068. if ($int > 100) {
  10069. $value = '100';
  10070. }
  10071. if ($int < 0) {
  10072. $value = '0';
  10073. }
  10074. $ret_params[] = "$key=$value";
  10075. $lookup[$key] = true;
  10076. }
  10077. $ret_parameters = implode(',', $ret_params);
  10078. $ret_function = "$function($ret_parameters)";
  10079. return $ret_function;
  10080. }
  10081. }
  10082. /**
  10083. * Validates shorthand CSS property font.
  10084. */
  10085. class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef
  10086. {
  10087. /**
  10088. * Local copy of validators
  10089. * @type HTMLPurifier_AttrDef[]
  10090. * @note If we moved specific CSS property definitions to their own
  10091. * classes instead of having them be assembled at run time by
  10092. * CSSDefinition, this wouldn't be necessary. We'd instantiate
  10093. * our own copies.
  10094. */
  10095. protected $info = array();
  10096. /**
  10097. * @param HTMLPurifier_Config $config
  10098. */
  10099. public function __construct($config)
  10100. {
  10101. $def = $config->getCSSDefinition();
  10102. $this->info['font-style'] = $def->info['font-style'];
  10103. $this->info['font-variant'] = $def->info['font-variant'];
  10104. $this->info['font-weight'] = $def->info['font-weight'];
  10105. $this->info['font-size'] = $def->info['font-size'];
  10106. $this->info['line-height'] = $def->info['line-height'];
  10107. $this->info['font-family'] = $def->info['font-family'];
  10108. }
  10109. /**
  10110. * @param string $string
  10111. * @param HTMLPurifier_Config $config
  10112. * @param HTMLPurifier_Context $context
  10113. * @return bool|string
  10114. */
  10115. public function validate($string, $config, $context)
  10116. {
  10117. static $system_fonts = array(
  10118. 'caption' => true,
  10119. 'icon' => true,
  10120. 'menu' => true,
  10121. 'message-box' => true,
  10122. 'small-caption' => true,
  10123. 'status-bar' => true
  10124. );
  10125. // regular pre-processing
  10126. $string = $this->parseCDATA($string);
  10127. if ($string === '') {
  10128. return false;
  10129. }
  10130. // check if it's one of the keywords
  10131. $lowercase_string = strtolower($string);
  10132. if (isset($system_fonts[$lowercase_string])) {
  10133. return $lowercase_string;
  10134. }
  10135. $bits = explode(' ', $string); // bits to process
  10136. $stage = 0; // this indicates what we're looking for
  10137. $caught = array(); // which stage 0 properties have we caught?
  10138. $stage_1 = array('font-style', 'font-variant', 'font-weight');
  10139. $final = ''; // output
  10140. for ($i = 0, $size = count($bits); $i < $size; $i++) {
  10141. if ($bits[$i] === '') {
  10142. continue;
  10143. }
  10144. switch ($stage) {
  10145. case 0: // attempting to catch font-style, font-variant or font-weight
  10146. foreach ($stage_1 as $validator_name) {
  10147. if (isset($caught[$validator_name])) {
  10148. continue;
  10149. }
  10150. $r = $this->info[$validator_name]->validate(
  10151. $bits[$i],
  10152. $config,
  10153. $context
  10154. );
  10155. if ($r !== false) {
  10156. $final .= $r . ' ';
  10157. $caught[$validator_name] = true;
  10158. break;
  10159. }
  10160. }
  10161. // all three caught, continue on
  10162. if (count($caught) >= 3) {
  10163. $stage = 1;
  10164. }
  10165. if ($r !== false) {
  10166. break;
  10167. }
  10168. case 1: // attempting to catch font-size and perhaps line-height
  10169. $found_slash = false;
  10170. if (strpos($bits[$i], '/') !== false) {
  10171. list($font_size, $line_height) =
  10172. explode('/', $bits[$i]);
  10173. if ($line_height === '') {
  10174. // ooh, there's a space after the slash!
  10175. $line_height = false;
  10176. $found_slash = true;
  10177. }
  10178. } else {
  10179. $font_size = $bits[$i];
  10180. $line_height = false;
  10181. }
  10182. $r = $this->info['font-size']->validate(
  10183. $font_size,
  10184. $config,
  10185. $context
  10186. );
  10187. if ($r !== false) {
  10188. $final .= $r;
  10189. // attempt to catch line-height
  10190. if ($line_height === false) {
  10191. // we need to scroll forward
  10192. for ($j = $i + 1; $j < $size; $j++) {
  10193. if ($bits[$j] === '') {
  10194. continue;
  10195. }
  10196. if ($bits[$j] === '/') {
  10197. if ($found_slash) {
  10198. return false;
  10199. } else {
  10200. $found_slash = true;
  10201. continue;
  10202. }
  10203. }
  10204. $line_height = $bits[$j];
  10205. break;
  10206. }
  10207. } else {
  10208. // slash already found
  10209. $found_slash = true;
  10210. $j = $i;
  10211. }
  10212. if ($found_slash) {
  10213. $i = $j;
  10214. $r = $this->info['line-height']->validate(
  10215. $line_height,
  10216. $config,
  10217. $context
  10218. );
  10219. if ($r !== false) {
  10220. $final .= '/' . $r;
  10221. }
  10222. }
  10223. $final .= ' ';
  10224. $stage = 2;
  10225. break;
  10226. }
  10227. return false;
  10228. case 2: // attempting to catch font-family
  10229. $font_family =
  10230. implode(' ', array_slice($bits, $i, $size - $i));
  10231. $r = $this->info['font-family']->validate(
  10232. $font_family,
  10233. $config,
  10234. $context
  10235. );
  10236. if ($r !== false) {
  10237. $final .= $r . ' ';
  10238. // processing completed successfully
  10239. return rtrim($final);
  10240. }
  10241. return false;
  10242. }
  10243. }
  10244. return false;
  10245. }
  10246. }
  10247. /**
  10248. * Validates a font family list according to CSS spec
  10249. */
  10250. class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
  10251. {
  10252. protected $mask = null;
  10253. public function __construct()
  10254. {
  10255. $this->mask = '_- ';
  10256. for ($c = 'a'; $c <= 'z'; $c++) {
  10257. $this->mask .= $c;
  10258. }
  10259. for ($c = 'A'; $c <= 'Z'; $c++) {
  10260. $this->mask .= $c;
  10261. }
  10262. for ($c = '0'; $c <= '9'; $c++) {
  10263. $this->mask .= $c;
  10264. } // cast-y, but should be fine
  10265. // special bytes used by UTF-8
  10266. for ($i = 0x80; $i <= 0xFF; $i++) {
  10267. // We don't bother excluding invalid bytes in this range,
  10268. // because the our restriction of well-formed UTF-8 will
  10269. // prevent these from ever occurring.
  10270. $this->mask .= chr($i);
  10271. }
  10272. /*
  10273. PHP's internal strcspn implementation is
  10274. O(length of string * length of mask), making it inefficient
  10275. for large masks. However, it's still faster than
  10276. preg_match 8)
  10277. for (p = s1;;) {
  10278. spanp = s2;
  10279. do {
  10280. if (*spanp == c || p == s1_end) {
  10281. return p - s1;
  10282. }
  10283. } while (spanp++ < (s2_end - 1));
  10284. c = *++p;
  10285. }
  10286. */
  10287. // possible optimization: invert the mask.
  10288. }
  10289. /**
  10290. * @param string $string
  10291. * @param HTMLPurifier_Config $config
  10292. * @param HTMLPurifier_Context $context
  10293. * @return bool|string
  10294. */
  10295. public function validate($string, $config, $context)
  10296. {
  10297. static $generic_names = array(
  10298. 'serif' => true,
  10299. 'sans-serif' => true,
  10300. 'monospace' => true,
  10301. 'fantasy' => true,
  10302. 'cursive' => true
  10303. );
  10304. $allowed_fonts = $config->get('CSS.AllowedFonts');
  10305. // assume that no font names contain commas in them
  10306. $fonts = explode(',', $string);
  10307. $final = '';
  10308. foreach ($fonts as $font) {
  10309. $font = trim($font);
  10310. if ($font === '') {
  10311. continue;
  10312. }
  10313. // match a generic name
  10314. if (isset($generic_names[$font])) {
  10315. if ($allowed_fonts === null || isset($allowed_fonts[$font])) {
  10316. $final .= $font . ', ';
  10317. }
  10318. continue;
  10319. }
  10320. // match a quoted name
  10321. if ($font[0] === '"' || $font[0] === "'") {
  10322. $length = strlen($font);
  10323. if ($length <= 2) {
  10324. continue;
  10325. }
  10326. $quote = $font[0];
  10327. if ($font[$length - 1] !== $quote) {
  10328. continue;
  10329. }
  10330. $font = substr($font, 1, $length - 2);
  10331. }
  10332. $font = $this->expandCSSEscape($font);
  10333. // $font is a pure representation of the font name
  10334. if ($allowed_fonts !== null && !isset($allowed_fonts[$font])) {
  10335. continue;
  10336. }
  10337. if (ctype_alnum($font) && $font !== '') {
  10338. // very simple font, allow it in unharmed
  10339. $final .= $font . ', ';
  10340. continue;
  10341. }
  10342. // bugger out on whitespace. form feed (0C) really
  10343. // shouldn't show up regardless
  10344. $font = str_replace(array("\n", "\t", "\r", "\x0C"), ' ', $font);
  10345. // Here, there are various classes of characters which need
  10346. // to be treated differently:
  10347. // - Alphanumeric characters are essentially safe. We
  10348. // handled these above.
  10349. // - Spaces require quoting, though most parsers will do
  10350. // the right thing if there aren't any characters that
  10351. // can be misinterpreted
  10352. // - Dashes rarely occur, but they fairly unproblematic
  10353. // for parsing/rendering purposes.
  10354. // The above characters cover the majority of Western font
  10355. // names.
  10356. // - Arbitrary Unicode characters not in ASCII. Because
  10357. // most parsers give little thought to Unicode, treatment
  10358. // of these codepoints is basically uniform, even for
  10359. // punctuation-like codepoints. These characters can
  10360. // show up in non-Western pages and are supported by most
  10361. // major browsers, for example: "MS 明朝" is a
  10362. // legitimate font-name
  10363. // <http://ja.wikipedia.org/wiki/MS_明朝>. See
  10364. // the CSS3 spec for more examples:
  10365. // <http://www.w3.org/TR/2011/WD-css3-fonts-20110324/localizedfamilynames.png>
  10366. // You can see live samples of these on the Internet:
  10367. // <http://www.google.co.jp/search?q=font-family+MS+明朝|ゴシック>
  10368. // However, most of these fonts have ASCII equivalents:
  10369. // for example, 'MS Mincho', and it's considered
  10370. // professional to use ASCII font names instead of
  10371. // Unicode font names. Thanks Takeshi Terada for
  10372. // providing this information.
  10373. // The following characters, to my knowledge, have not been
  10374. // used to name font names.
  10375. // - Single quote. While theoretically you might find a
  10376. // font name that has a single quote in its name (serving
  10377. // as an apostrophe, e.g. Dave's Scribble), I haven't
  10378. // been able to find any actual examples of this.
  10379. // Internet Explorer's cssText translation (which I
  10380. // believe is invoked by innerHTML) normalizes any
  10381. // quoting to single quotes, and fails to escape single
  10382. // quotes. (Note that this is not IE's behavior for all
  10383. // CSS properties, just some sort of special casing for
  10384. // font-family). So a single quote *cannot* be used
  10385. // safely in the font-family context if there will be an
  10386. // innerHTML/cssText translation. Note that Firefox 3.x
  10387. // does this too.
  10388. // - Double quote. In IE, these get normalized to
  10389. // single-quotes, no matter what the encoding. (Fun
  10390. // fact, in IE8, the 'content' CSS property gained
  10391. // support, where they special cased to preserve encoded
  10392. // double quotes, but still translate unadorned double
  10393. // quotes into single quotes.) So, because their
  10394. // fixpoint behavior is identical to single quotes, they
  10395. // cannot be allowed either. Firefox 3.x displays
  10396. // single-quote style behavior.
  10397. // - Backslashes are reduced by one (so \\ -> \) every
  10398. // iteration, so they cannot be used safely. This shows
  10399. // up in IE7, IE8 and FF3
  10400. // - Semicolons, commas and backticks are handled properly.
  10401. // - The rest of the ASCII punctuation is handled properly.
  10402. // We haven't checked what browsers do to unadorned
  10403. // versions, but this is not important as long as the
  10404. // browser doesn't /remove/ surrounding quotes (as IE does
  10405. // for HTML).
  10406. //
  10407. // With these results in hand, we conclude that there are
  10408. // various levels of safety:
  10409. // - Paranoid: alphanumeric, spaces and dashes(?)
  10410. // - International: Paranoid + non-ASCII Unicode
  10411. // - Edgy: Everything except quotes, backslashes
  10412. // - NoJS: Standards compliance, e.g. sod IE. Note that
  10413. // with some judicious character escaping (since certain
  10414. // types of escaping doesn't work) this is theoretically
  10415. // OK as long as innerHTML/cssText is not called.
  10416. // We believe that international is a reasonable default
  10417. // (that we will implement now), and once we do more
  10418. // extensive research, we may feel comfortable with dropping
  10419. // it down to edgy.
  10420. // Edgy: alphanumeric, spaces, dashes, underscores and Unicode. Use of
  10421. // str(c)spn assumes that the string was already well formed
  10422. // Unicode (which of course it is).
  10423. if (strspn($font, $this->mask) !== strlen($font)) {
  10424. continue;
  10425. }
  10426. // Historical:
  10427. // In the absence of innerHTML/cssText, these ugly
  10428. // transforms don't pose a security risk (as \\ and \"
  10429. // might--these escapes are not supported by most browsers).
  10430. // We could try to be clever and use single-quote wrapping
  10431. // when there is a double quote present, but I have choosen
  10432. // not to implement that. (NOTE: you can reduce the amount
  10433. // of escapes by one depending on what quoting style you use)
  10434. // $font = str_replace('\\', '\\5C ', $font);
  10435. // $font = str_replace('"', '\\22 ', $font);
  10436. // $font = str_replace("'", '\\27 ', $font);
  10437. // font possibly with spaces, requires quoting
  10438. $final .= "'$font', ";
  10439. }
  10440. $final = rtrim($final, ', ');
  10441. if ($final === '') {
  10442. return false;
  10443. }
  10444. return $final;
  10445. }
  10446. }
  10447. /**
  10448. * Validates based on {ident} CSS grammar production
  10449. */
  10450. class HTMLPurifier_AttrDef_CSS_Ident extends HTMLPurifier_AttrDef
  10451. {
  10452. /**
  10453. * @param string $string
  10454. * @param HTMLPurifier_Config $config
  10455. * @param HTMLPurifier_Context $context
  10456. * @return bool|string
  10457. */
  10458. public function validate($string, $config, $context)
  10459. {
  10460. $string = trim($string);
  10461. // early abort: '' and '0' (strings that convert to false) are invalid
  10462. if (!$string) {
  10463. return false;
  10464. }
  10465. $pattern = '/^(-?[A-Za-z_][A-Za-z_\-0-9]*)$/';
  10466. if (!preg_match($pattern, $string)) {
  10467. return false;
  10468. }
  10469. return $string;
  10470. }
  10471. }
  10472. /**
  10473. * Decorator which enables !important to be used in CSS values.
  10474. */
  10475. class HTMLPurifier_AttrDef_CSS_ImportantDecorator extends HTMLPurifier_AttrDef
  10476. {
  10477. /**
  10478. * @type HTMLPurifier_AttrDef
  10479. */
  10480. public $def;
  10481. /**
  10482. * @type bool
  10483. */
  10484. public $allow;
  10485. /**
  10486. * @param HTMLPurifier_AttrDef $def Definition to wrap
  10487. * @param bool $allow Whether or not to allow !important
  10488. */
  10489. public function __construct($def, $allow = false)
  10490. {
  10491. $this->def = $def;
  10492. $this->allow = $allow;
  10493. }
  10494. /**
  10495. * Intercepts and removes !important if necessary
  10496. * @param string $string
  10497. * @param HTMLPurifier_Config $config
  10498. * @param HTMLPurifier_Context $context
  10499. * @return bool|string
  10500. */
  10501. public function validate($string, $config, $context)
  10502. {
  10503. // test for ! and important tokens
  10504. $string = trim($string);
  10505. $is_important = false;
  10506. // :TODO: optimization: test directly for !important and ! important
  10507. if (strlen($string) >= 9 && substr($string, -9) === 'important') {
  10508. $temp = rtrim(substr($string, 0, -9));
  10509. // use a temp, because we might want to restore important
  10510. if (strlen($temp) >= 1 && substr($temp, -1) === '!') {
  10511. $string = rtrim(substr($temp, 0, -1));
  10512. $is_important = true;
  10513. }
  10514. }
  10515. $string = $this->def->validate($string, $config, $context);
  10516. if ($this->allow && $is_important) {
  10517. $string .= ' !important';
  10518. }
  10519. return $string;
  10520. }
  10521. }
  10522. /**
  10523. * Represents a Length as defined by CSS.
  10524. */
  10525. class HTMLPurifier_AttrDef_CSS_Length extends HTMLPurifier_AttrDef
  10526. {
  10527. /**
  10528. * @type HTMLPurifier_Length|string
  10529. */
  10530. protected $min;
  10531. /**
  10532. * @type HTMLPurifier_Length|string
  10533. */
  10534. protected $max;
  10535. /**
  10536. * @param HTMLPurifier_Length|string $min Minimum length, or null for no bound. String is also acceptable.
  10537. * @param HTMLPurifier_Length|string $max Maximum length, or null for no bound. String is also acceptable.
  10538. */
  10539. public function __construct($min = null, $max = null)
  10540. {
  10541. $this->min = $min !== null ? HTMLPurifier_Length::make($min) : null;
  10542. $this->max = $max !== null ? HTMLPurifier_Length::make($max) : null;
  10543. }
  10544. /**
  10545. * @param string $string
  10546. * @param HTMLPurifier_Config $config
  10547. * @param HTMLPurifier_Context $context
  10548. * @return bool|string
  10549. */
  10550. public function validate($string, $config, $context)
  10551. {
  10552. $string = $this->parseCDATA($string);
  10553. // Optimizations
  10554. if ($string === '') {
  10555. return false;
  10556. }
  10557. if ($string === '0') {
  10558. return '0';
  10559. }
  10560. if (strlen($string) === 1) {
  10561. return false;
  10562. }
  10563. $length = HTMLPurifier_Length::make($string);
  10564. if (!$length->isValid()) {
  10565. return false;
  10566. }
  10567. if ($this->min) {
  10568. $c = $length->compareTo($this->min);
  10569. if ($c === false) {
  10570. return false;
  10571. }
  10572. if ($c < 0) {
  10573. return false;
  10574. }
  10575. }
  10576. if ($this->max) {
  10577. $c = $length->compareTo($this->max);
  10578. if ($c === false) {
  10579. return false;
  10580. }
  10581. if ($c > 0) {
  10582. return false;
  10583. }
  10584. }
  10585. return $length->toString();
  10586. }
  10587. }
  10588. /**
  10589. * Validates shorthand CSS property list-style.
  10590. * @warning Does not support url tokens that have internal spaces.
  10591. */
  10592. class HTMLPurifier_AttrDef_CSS_ListStyle extends HTMLPurifier_AttrDef
  10593. {
  10594. /**
  10595. * Local copy of validators.
  10596. * @type HTMLPurifier_AttrDef[]
  10597. * @note See HTMLPurifier_AttrDef_CSS_Font::$info for a similar impl.
  10598. */
  10599. protected $info;
  10600. /**
  10601. * @param HTMLPurifier_Config $config
  10602. */
  10603. public function __construct($config)
  10604. {
  10605. $def = $config->getCSSDefinition();
  10606. $this->info['list-style-type'] = $def->info['list-style-type'];
  10607. $this->info['list-style-position'] = $def->info['list-style-position'];
  10608. $this->info['list-style-image'] = $def->info['list-style-image'];
  10609. }
  10610. /**
  10611. * @param string $string
  10612. * @param HTMLPurifier_Config $config
  10613. * @param HTMLPurifier_Context $context
  10614. * @return bool|string
  10615. */
  10616. public function validate($string, $config, $context)
  10617. {
  10618. // regular pre-processing
  10619. $string = $this->parseCDATA($string);
  10620. if ($string === '') {
  10621. return false;
  10622. }
  10623. // assumes URI doesn't have spaces in it
  10624. $bits = explode(' ', strtolower($string)); // bits to process
  10625. $caught = array();
  10626. $caught['type'] = false;
  10627. $caught['position'] = false;
  10628. $caught['image'] = false;
  10629. $i = 0; // number of catches
  10630. $none = false;
  10631. foreach ($bits as $bit) {
  10632. if ($i >= 3) {
  10633. return;
  10634. } // optimization bit
  10635. if ($bit === '') {
  10636. continue;
  10637. }
  10638. foreach ($caught as $key => $status) {
  10639. if ($status !== false) {
  10640. continue;
  10641. }
  10642. $r = $this->info['list-style-' . $key]->validate($bit, $config, $context);
  10643. if ($r === false) {
  10644. continue;
  10645. }
  10646. if ($r === 'none') {
  10647. if ($none) {
  10648. continue;
  10649. } else {
  10650. $none = true;
  10651. }
  10652. if ($key == 'image') {
  10653. continue;
  10654. }
  10655. }
  10656. $caught[$key] = $r;
  10657. $i++;
  10658. break;
  10659. }
  10660. }
  10661. if (!$i) {
  10662. return false;
  10663. }
  10664. $ret = array();
  10665. // construct type
  10666. if ($caught['type']) {
  10667. $ret[] = $caught['type'];
  10668. }
  10669. // construct image
  10670. if ($caught['image']) {
  10671. $ret[] = $caught['image'];
  10672. }
  10673. // construct position
  10674. if ($caught['position']) {
  10675. $ret[] = $caught['position'];
  10676. }
  10677. if (empty($ret)) {
  10678. return false;
  10679. }
  10680. return implode(' ', $ret);
  10681. }
  10682. }
  10683. /**
  10684. * Framework class for strings that involve multiple values.
  10685. *
  10686. * Certain CSS properties such as border-width and margin allow multiple
  10687. * lengths to be specified. This class can take a vanilla border-width
  10688. * definition and multiply it, usually into a max of four.
  10689. *
  10690. * @note Even though the CSS specification isn't clear about it, inherit
  10691. * can only be used alone: it will never manifest as part of a multi
  10692. * shorthand declaration. Thus, this class does not allow inherit.
  10693. */
  10694. class HTMLPurifier_AttrDef_CSS_Multiple extends HTMLPurifier_AttrDef
  10695. {
  10696. /**
  10697. * Instance of component definition to defer validation to.
  10698. * @type HTMLPurifier_AttrDef
  10699. * @todo Make protected
  10700. */
  10701. public $single;
  10702. /**
  10703. * Max number of values allowed.
  10704. * @todo Make protected
  10705. */
  10706. public $max;
  10707. /**
  10708. * @param HTMLPurifier_AttrDef $single HTMLPurifier_AttrDef to multiply
  10709. * @param int $max Max number of values allowed (usually four)
  10710. */
  10711. public function __construct($single, $max = 4)
  10712. {
  10713. $this->single = $single;
  10714. $this->max = $max;
  10715. }
  10716. /**
  10717. * @param string $string
  10718. * @param HTMLPurifier_Config $config
  10719. * @param HTMLPurifier_Context $context
  10720. * @return bool|string
  10721. */
  10722. public function validate($string, $config, $context)
  10723. {
  10724. $string = $this->parseCDATA($string);
  10725. if ($string === '') {
  10726. return false;
  10727. }
  10728. $parts = explode(' ', $string); // parseCDATA replaced \r, \t and \n
  10729. $length = count($parts);
  10730. $final = '';
  10731. for ($i = 0, $num = 0; $i < $length && $num < $this->max; $i++) {
  10732. if (ctype_space($parts[$i])) {
  10733. continue;
  10734. }
  10735. $result = $this->single->validate($parts[$i], $config, $context);
  10736. if ($result !== false) {
  10737. $final .= $result . ' ';
  10738. $num++;
  10739. }
  10740. }
  10741. if ($final === '') {
  10742. return false;
  10743. }
  10744. return rtrim($final);
  10745. }
  10746. }
  10747. /**
  10748. * Validates a Percentage as defined by the CSS spec.
  10749. */
  10750. class HTMLPurifier_AttrDef_CSS_Percentage extends HTMLPurifier_AttrDef
  10751. {
  10752. /**
  10753. * Instance to defer number validation to.
  10754. * @type HTMLPurifier_AttrDef_CSS_Number
  10755. */
  10756. protected $number_def;
  10757. /**
  10758. * @param bool $non_negative Whether to forbid negative values
  10759. */
  10760. public function __construct($non_negative = false)
  10761. {
  10762. $this->number_def = new HTMLPurifier_AttrDef_CSS_Number($non_negative);
  10763. }
  10764. /**
  10765. * @param string $string
  10766. * @param HTMLPurifier_Config $config
  10767. * @param HTMLPurifier_Context $context
  10768. * @return bool|string
  10769. */
  10770. public function validate($string, $config, $context)
  10771. {
  10772. $string = $this->parseCDATA($string);
  10773. if ($string === '') {
  10774. return false;
  10775. }
  10776. $length = strlen($string);
  10777. if ($length === 1) {
  10778. return false;
  10779. }
  10780. if ($string[$length - 1] !== '%') {
  10781. return false;
  10782. }
  10783. $number = substr($string, 0, $length - 1);
  10784. $number = $this->number_def->validate($number, $config, $context);
  10785. if ($number === false) {
  10786. return false;
  10787. }
  10788. return "$number%";
  10789. }
  10790. }
  10791. /**
  10792. * Validates the value for the CSS property text-decoration
  10793. * @note This class could be generalized into a version that acts sort of
  10794. * like Enum except you can compound the allowed values.
  10795. */
  10796. class HTMLPurifier_AttrDef_CSS_TextDecoration extends HTMLPurifier_AttrDef
  10797. {
  10798. /**
  10799. * @param string $string
  10800. * @param HTMLPurifier_Config $config
  10801. * @param HTMLPurifier_Context $context
  10802. * @return bool|string
  10803. */
  10804. public function validate($string, $config, $context)
  10805. {
  10806. static $allowed_values = array(
  10807. 'line-through' => true,
  10808. 'overline' => true,
  10809. 'underline' => true,
  10810. );
  10811. $string = strtolower($this->parseCDATA($string));
  10812. if ($string === 'none') {
  10813. return $string;
  10814. }
  10815. $parts = explode(' ', $string);
  10816. $final = '';
  10817. foreach ($parts as $part) {
  10818. if (isset($allowed_values[$part])) {
  10819. $final .= $part . ' ';
  10820. }
  10821. }
  10822. $final = rtrim($final);
  10823. if ($final === '') {
  10824. return false;
  10825. }
  10826. return $final;
  10827. }
  10828. }
  10829. /**
  10830. * Validates a URI in CSS syntax, which uses url('http://example.com')
  10831. * @note While theoretically speaking a URI in a CSS document could
  10832. * be non-embedded, as of CSS2 there is no such usage so we're
  10833. * generalizing it. This may need to be changed in the future.
  10834. * @warning Since HTMLPurifier_AttrDef_CSS blindly uses semicolons as
  10835. * the separator, you cannot put a literal semicolon in
  10836. * in the URI. Try percent encoding it, in that case.
  10837. */
  10838. class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
  10839. {
  10840. public function __construct()
  10841. {
  10842. parent::__construct(true); // always embedded
  10843. }
  10844. /**
  10845. * @param string $uri_string
  10846. * @param HTMLPurifier_Config $config
  10847. * @param HTMLPurifier_Context $context
  10848. * @return bool|string
  10849. */
  10850. public function validate($uri_string, $config, $context)
  10851. {
  10852. // parse the URI out of the string and then pass it onto
  10853. // the parent object
  10854. $uri_string = $this->parseCDATA($uri_string);
  10855. if (strpos($uri_string, 'url(') !== 0) {
  10856. return false;
  10857. }
  10858. $uri_string = substr($uri_string, 4);
  10859. $new_length = strlen($uri_string) - 1;
  10860. if ($uri_string[$new_length] != ')') {
  10861. return false;
  10862. }
  10863. $uri = trim(substr($uri_string, 0, $new_length));
  10864. if (!empty($uri) && ($uri[0] == "'" || $uri[0] == '"')) {
  10865. $quote = $uri[0];
  10866. $new_length = strlen($uri) - 1;
  10867. if ($uri[$new_length] !== $quote) {
  10868. return false;
  10869. }
  10870. $uri = substr($uri, 1, $new_length - 1);
  10871. }
  10872. $uri = $this->expandCSSEscape($uri);
  10873. $result = parent::validate($uri, $config, $context);
  10874. if ($result === false) {
  10875. return false;
  10876. }
  10877. // extra sanity check; should have been done by URI
  10878. $result = str_replace(array('"', "\\", "\n", "\x0c", "\r"), "", $result);
  10879. // suspicious characters are ()'; we're going to percent encode
  10880. // them for safety.
  10881. $result = str_replace(array('(', ')', "'"), array('%28', '%29', '%27'), $result);
  10882. // there's an extra bug where ampersands lose their escaping on
  10883. // an innerHTML cycle, so a very unlucky query parameter could
  10884. // then change the meaning of the URL. Unfortunately, there's
  10885. // not much we can do about that...
  10886. return "url(\"$result\")";
  10887. }
  10888. }
  10889. /**
  10890. * Validates a boolean attribute
  10891. */
  10892. class HTMLPurifier_AttrDef_HTML_Bool extends HTMLPurifier_AttrDef
  10893. {
  10894. /**
  10895. * @type bool
  10896. */
  10897. protected $name;
  10898. /**
  10899. * @type bool
  10900. */
  10901. public $minimized = true;
  10902. /**
  10903. * @param bool $name
  10904. */
  10905. public function __construct($name = false)
  10906. {
  10907. $this->name = $name;
  10908. }
  10909. /**
  10910. * @param string $string
  10911. * @param HTMLPurifier_Config $config
  10912. * @param HTMLPurifier_Context $context
  10913. * @return bool|string
  10914. */
  10915. public function validate($string, $config, $context)
  10916. {
  10917. if (empty($string)) {
  10918. return false;
  10919. }
  10920. return $this->name;
  10921. }
  10922. /**
  10923. * @param string $string Name of attribute
  10924. * @return HTMLPurifier_AttrDef_HTML_Bool
  10925. */
  10926. public function make($string)
  10927. {
  10928. return new HTMLPurifier_AttrDef_HTML_Bool($string);
  10929. }
  10930. }
  10931. /**
  10932. * Validates contents based on NMTOKENS attribute type.
  10933. */
  10934. class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
  10935. {
  10936. /**
  10937. * @param string $string
  10938. * @param HTMLPurifier_Config $config
  10939. * @param HTMLPurifier_Context $context
  10940. * @return bool|string
  10941. */
  10942. public function validate($string, $config, $context)
  10943. {
  10944. $string = trim($string);
  10945. // early abort: '' and '0' (strings that convert to false) are invalid
  10946. if (!$string) {
  10947. return false;
  10948. }
  10949. $tokens = $this->split($string, $config, $context);
  10950. $tokens = $this->filter($tokens, $config, $context);
  10951. if (empty($tokens)) {
  10952. return false;
  10953. }
  10954. return implode(' ', $tokens);
  10955. }
  10956. /**
  10957. * Splits a space separated list of tokens into its constituent parts.
  10958. * @param string $string
  10959. * @param HTMLPurifier_Config $config
  10960. * @param HTMLPurifier_Context $context
  10961. * @return array
  10962. */
  10963. protected function split($string, $config, $context)
  10964. {
  10965. // OPTIMIZABLE!
  10966. // do the preg_match, capture all subpatterns for reformulation
  10967. // we don't support U+00A1 and up codepoints or
  10968. // escaping because I don't know how to do that with regexps
  10969. // and plus it would complicate optimization efforts (you never
  10970. // see that anyway).
  10971. $pattern = '/(?:(?<=\s)|\A)' . // look behind for space or string start
  10972. '((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)' .
  10973. '(?:(?=\s)|\z)/'; // look ahead for space or string end
  10974. preg_match_all($pattern, $string, $matches);
  10975. return $matches[1];
  10976. }
  10977. /**
  10978. * Template method for removing certain tokens based on arbitrary criteria.
  10979. * @note If we wanted to be really functional, we'd do an array_filter
  10980. * with a callback. But... we're not.
  10981. * @param array $tokens
  10982. * @param HTMLPurifier_Config $config
  10983. * @param HTMLPurifier_Context $context
  10984. * @return array
  10985. */
  10986. protected function filter($tokens, $config, $context)
  10987. {
  10988. return $tokens;
  10989. }
  10990. }
  10991. /**
  10992. * Implements special behavior for class attribute (normally NMTOKENS)
  10993. */
  10994. class HTMLPurifier_AttrDef_HTML_Class extends HTMLPurifier_AttrDef_HTML_Nmtokens
  10995. {
  10996. /**
  10997. * @param string $string
  10998. * @param HTMLPurifier_Config $config
  10999. * @param HTMLPurifier_Context $context
  11000. * @return bool|string
  11001. */
  11002. protected function split($string, $config, $context)
  11003. {
  11004. // really, this twiddle should be lazy loaded
  11005. $name = $config->getDefinition('HTML')->doctype->name;
  11006. if ($name == "XHTML 1.1" || $name == "XHTML 2.0") {
  11007. return parent::split($string, $config, $context);
  11008. } else {
  11009. return preg_split('/\s+/', $string);
  11010. }
  11011. }
  11012. /**
  11013. * @param array $tokens
  11014. * @param HTMLPurifier_Config $config
  11015. * @param HTMLPurifier_Context $context
  11016. * @return array
  11017. */
  11018. protected function filter($tokens, $config, $context)
  11019. {
  11020. $allowed = $config->get('Attr.AllowedClasses');
  11021. $forbidden = $config->get('Attr.ForbiddenClasses');
  11022. $ret = array();
  11023. foreach ($tokens as $token) {
  11024. if (($allowed === null || isset($allowed[$token])) &&
  11025. !isset($forbidden[$token]) &&
  11026. // We need this O(n) check because of PHP's array
  11027. // implementation that casts -0 to 0.
  11028. !in_array($token, $ret, true)
  11029. ) {
  11030. $ret[] = $token;
  11031. }
  11032. }
  11033. return $ret;
  11034. }
  11035. }
  11036. /**
  11037. * Validates a color according to the HTML spec.
  11038. */
  11039. class HTMLPurifier_AttrDef_HTML_Color extends HTMLPurifier_AttrDef
  11040. {
  11041. /**
  11042. * @param string $string
  11043. * @param HTMLPurifier_Config $config
  11044. * @param HTMLPurifier_Context $context
  11045. * @return bool|string
  11046. */
  11047. public function validate($string, $config, $context)
  11048. {
  11049. static $colors = null;
  11050. if ($colors === null) {
  11051. $colors = $config->get('Core.ColorKeywords');
  11052. }
  11053. $string = trim($string);
  11054. if (empty($string)) {
  11055. return false;
  11056. }
  11057. $lower = strtolower($string);
  11058. if (isset($colors[$lower])) {
  11059. return $colors[$lower];
  11060. }
  11061. if ($string[0] === '#') {
  11062. $hex = substr($string, 1);
  11063. } else {
  11064. $hex = $string;
  11065. }
  11066. $length = strlen($hex);
  11067. if ($length !== 3 && $length !== 6) {
  11068. return false;
  11069. }
  11070. if (!ctype_xdigit($hex)) {
  11071. return false;
  11072. }
  11073. if ($length === 3) {
  11074. $hex = $hex[0] . $hex[0] . $hex[1] . $hex[1] . $hex[2] . $hex[2];
  11075. }
  11076. return "#$hex";
  11077. }
  11078. }
  11079. /**
  11080. * Special-case enum attribute definition that lazy loads allowed frame targets
  11081. */
  11082. class HTMLPurifier_AttrDef_HTML_FrameTarget extends HTMLPurifier_AttrDef_Enum
  11083. {
  11084. /**
  11085. * @type array
  11086. */
  11087. public $valid_values = false; // uninitialized value
  11088. /**
  11089. * @type bool
  11090. */
  11091. protected $case_sensitive = false;
  11092. public function __construct()
  11093. {
  11094. }
  11095. /**
  11096. * @param string $string
  11097. * @param HTMLPurifier_Config $config
  11098. * @param HTMLPurifier_Context $context
  11099. * @return bool|string
  11100. */
  11101. public function validate($string, $config, $context)
  11102. {
  11103. if ($this->valid_values === false) {
  11104. $this->valid_values = $config->get('Attr.AllowedFrameTargets');
  11105. }
  11106. return parent::validate($string, $config, $context);
  11107. }
  11108. }
  11109. /**
  11110. * Validates the HTML attribute ID.
  11111. * @warning Even though this is the id processor, it
  11112. * will ignore the directive Attr:IDBlacklist, since it will only
  11113. * go according to the ID accumulator. Since the accumulator is
  11114. * automatically generated, it will have already absorbed the
  11115. * blacklist. If you're hacking around, make sure you use load()!
  11116. */
  11117. class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
  11118. {
  11119. // selector is NOT a valid thing to use for IDREFs, because IDREFs
  11120. // *must* target IDs that exist, whereas selector #ids do not.
  11121. /**
  11122. * Determines whether or not we're validating an ID in a CSS
  11123. * selector context.
  11124. * @type bool
  11125. */
  11126. protected $selector;
  11127. /**
  11128. * @param bool $selector
  11129. */
  11130. public function __construct($selector = false)
  11131. {
  11132. $this->selector = $selector;
  11133. }
  11134. /**
  11135. * @param string $id
  11136. * @param HTMLPurifier_Config $config
  11137. * @param HTMLPurifier_Context $context
  11138. * @return bool|string
  11139. */
  11140. public function validate($id, $config, $context)
  11141. {
  11142. if (!$this->selector && !$config->get('Attr.EnableID')) {
  11143. return false;
  11144. }
  11145. $id = trim($id); // trim it first
  11146. if ($id === '') {
  11147. return false;
  11148. }
  11149. $prefix = $config->get('Attr.IDPrefix');
  11150. if ($prefix !== '') {
  11151. $prefix .= $config->get('Attr.IDPrefixLocal');
  11152. // prevent re-appending the prefix
  11153. if (strpos($id, $prefix) !== 0) {
  11154. $id = $prefix . $id;
  11155. }
  11156. } elseif ($config->get('Attr.IDPrefixLocal') !== '') {
  11157. trigger_error(
  11158. '%Attr.IDPrefixLocal cannot be used unless ' .
  11159. '%Attr.IDPrefix is set',
  11160. E_USER_WARNING
  11161. );
  11162. }
  11163. if (!$this->selector) {
  11164. $id_accumulator =& $context->get('IDAccumulator');
  11165. if (isset($id_accumulator->ids[$id])) {
  11166. return false;
  11167. }
  11168. }
  11169. // we purposely avoid using regex, hopefully this is faster
  11170. if (ctype_alpha($id)) {
  11171. $result = true;
  11172. } else {
  11173. if (!ctype_alpha(@$id[0])) {
  11174. return false;
  11175. }
  11176. // primitive style of regexps, I suppose
  11177. $trim = trim(
  11178. $id,
  11179. 'A..Za..z0..9:-._'
  11180. );
  11181. $result = ($trim === '');
  11182. }
  11183. $regexp = $config->get('Attr.IDBlacklistRegexp');
  11184. if ($regexp && preg_match($regexp, $id)) {
  11185. return false;
  11186. }
  11187. if (!$this->selector && $result) {
  11188. $id_accumulator->add($id);
  11189. }
  11190. // if no change was made to the ID, return the result
  11191. // else, return the new id if stripping whitespace made it
  11192. // valid, or return false.
  11193. return $result ? $id : false;
  11194. }
  11195. }
  11196. /**
  11197. * Validates an integer representation of pixels according to the HTML spec.
  11198. */
  11199. class HTMLPurifier_AttrDef_HTML_Pixels extends HTMLPurifier_AttrDef
  11200. {
  11201. /**
  11202. * @type int
  11203. */
  11204. protected $max;
  11205. /**
  11206. * @param int $max
  11207. */
  11208. public function __construct($max = null)
  11209. {
  11210. $this->max = $max;
  11211. }
  11212. /**
  11213. * @param string $string
  11214. * @param HTMLPurifier_Config $config
  11215. * @param HTMLPurifier_Context $context
  11216. * @return bool|string
  11217. */
  11218. public function validate($string, $config, $context)
  11219. {
  11220. $string = trim($string);
  11221. if ($string === '0') {
  11222. return $string;
  11223. }
  11224. if ($string === '') {
  11225. return false;
  11226. }
  11227. $length = strlen($string);
  11228. if (substr($string, $length - 2) == 'px') {
  11229. $string = substr($string, 0, $length - 2);
  11230. }
  11231. if (!is_numeric($string)) {
  11232. return false;
  11233. }
  11234. $int = (int)$string;
  11235. if ($int < 0) {
  11236. return '0';
  11237. }
  11238. // upper-bound value, extremely high values can
  11239. // crash operating systems, see <http://ha.ckers.org/imagecrash.html>
  11240. // WARNING, above link WILL crash you if you're using Windows
  11241. if ($this->max !== null && $int > $this->max) {
  11242. return (string)$this->max;
  11243. }
  11244. return (string)$int;
  11245. }
  11246. /**
  11247. * @param string $string
  11248. * @return HTMLPurifier_AttrDef
  11249. */
  11250. public function make($string)
  11251. {
  11252. if ($string === '') {
  11253. $max = null;
  11254. } else {
  11255. $max = (int)$string;
  11256. }
  11257. $class = get_class($this);
  11258. return new $class($max);
  11259. }
  11260. }
  11261. /**
  11262. * Validates the HTML type length (not to be confused with CSS's length).
  11263. *
  11264. * This accepts integer pixels or percentages as lengths for certain
  11265. * HTML attributes.
  11266. */
  11267. class HTMLPurifier_AttrDef_HTML_Length extends HTMLPurifier_AttrDef_HTML_Pixels
  11268. {
  11269. /**
  11270. * @param string $string
  11271. * @param HTMLPurifier_Config $config
  11272. * @param HTMLPurifier_Context $context
  11273. * @return bool|string
  11274. */
  11275. public function validate($string, $config, $context)
  11276. {
  11277. $string = trim($string);
  11278. if ($string === '') {
  11279. return false;
  11280. }
  11281. $parent_result = parent::validate($string, $config, $context);
  11282. if ($parent_result !== false) {
  11283. return $parent_result;
  11284. }
  11285. $length = strlen($string);
  11286. $last_char = $string[$length - 1];
  11287. if ($last_char !== '%') {
  11288. return false;
  11289. }
  11290. $points = substr($string, 0, $length - 1);
  11291. if (!is_numeric($points)) {
  11292. return false;
  11293. }
  11294. $points = (int)$points;
  11295. if ($points < 0) {
  11296. return '0%';
  11297. }
  11298. if ($points > 100) {
  11299. return '100%';
  11300. }
  11301. return ((string)$points) . '%';
  11302. }
  11303. }
  11304. /**
  11305. * Validates a rel/rev link attribute against a directive of allowed values
  11306. * @note We cannot use Enum because link types allow multiple
  11307. * values.
  11308. * @note Assumes link types are ASCII text
  11309. */
  11310. class HTMLPurifier_AttrDef_HTML_LinkTypes extends HTMLPurifier_AttrDef
  11311. {
  11312. /**
  11313. * Name config attribute to pull.
  11314. * @type string
  11315. */
  11316. protected $name;
  11317. /**
  11318. * @param string $name
  11319. */
  11320. public function __construct($name)
  11321. {
  11322. $configLookup = array(
  11323. 'rel' => 'AllowedRel',
  11324. 'rev' => 'AllowedRev'
  11325. );
  11326. if (!isset($configLookup[$name])) {
  11327. trigger_error(
  11328. 'Unrecognized attribute name for link ' .
  11329. 'relationship.',
  11330. E_USER_ERROR
  11331. );
  11332. return;
  11333. }
  11334. $this->name = $configLookup[$name];
  11335. }
  11336. /**
  11337. * @param string $string
  11338. * @param HTMLPurifier_Config $config
  11339. * @param HTMLPurifier_Context $context
  11340. * @return bool|string
  11341. */
  11342. public function validate($string, $config, $context)
  11343. {
  11344. $allowed = $config->get('Attr.' . $this->name);
  11345. if (empty($allowed)) {
  11346. return false;
  11347. }
  11348. $string = $this->parseCDATA($string);
  11349. $parts = explode(' ', $string);
  11350. // lookup to prevent duplicates
  11351. $ret_lookup = array();
  11352. foreach ($parts as $part) {
  11353. $part = strtolower(trim($part));
  11354. if (!isset($allowed[$part])) {
  11355. continue;
  11356. }
  11357. $ret_lookup[$part] = true;
  11358. }
  11359. if (empty($ret_lookup)) {
  11360. return false;
  11361. }
  11362. $string = implode(' ', array_keys($ret_lookup));
  11363. return $string;
  11364. }
  11365. }
  11366. /**
  11367. * Validates a MultiLength as defined by the HTML spec.
  11368. *
  11369. * A multilength is either a integer (pixel count), a percentage, or
  11370. * a relative number.
  11371. */
  11372. class HTMLPurifier_AttrDef_HTML_MultiLength extends HTMLPurifier_AttrDef_HTML_Length
  11373. {
  11374. /**
  11375. * @param string $string
  11376. * @param HTMLPurifier_Config $config
  11377. * @param HTMLPurifier_Context $context
  11378. * @return bool|string
  11379. */
  11380. public function validate($string, $config, $context)
  11381. {
  11382. $string = trim($string);
  11383. if ($string === '') {
  11384. return false;
  11385. }
  11386. $parent_result = parent::validate($string, $config, $context);
  11387. if ($parent_result !== false) {
  11388. return $parent_result;
  11389. }
  11390. $length = strlen($string);
  11391. $last_char = $string[$length - 1];
  11392. if ($last_char !== '*') {
  11393. return false;
  11394. }
  11395. $int = substr($string, 0, $length - 1);
  11396. if ($int == '') {
  11397. return '*';
  11398. }
  11399. if (!is_numeric($int)) {
  11400. return false;
  11401. }
  11402. $int = (int)$int;
  11403. if ($int < 0) {
  11404. return false;
  11405. }
  11406. if ($int == 0) {
  11407. return '0';
  11408. }
  11409. if ($int == 1) {
  11410. return '*';
  11411. }
  11412. return ((string)$int) . '*';
  11413. }
  11414. }
  11415. abstract class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef
  11416. {
  11417. /**
  11418. * Unpacks a mailbox into its display-name and address
  11419. * @param string $string
  11420. * @return mixed
  11421. */
  11422. public function unpack($string)
  11423. {
  11424. // needs to be implemented
  11425. }
  11426. }
  11427. // sub-implementations
  11428. /**
  11429. * Validates a host according to the IPv4, IPv6 and DNS (future) specifications.
  11430. */
  11431. class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
  11432. {
  11433. /**
  11434. * IPv4 sub-validator.
  11435. * @type HTMLPurifier_AttrDef_URI_IPv4
  11436. */
  11437. protected $ipv4;
  11438. /**
  11439. * IPv6 sub-validator.
  11440. * @type HTMLPurifier_AttrDef_URI_IPv6
  11441. */
  11442. protected $ipv6;
  11443. public function __construct()
  11444. {
  11445. $this->ipv4 = new HTMLPurifier_AttrDef_URI_IPv4();
  11446. $this->ipv6 = new HTMLPurifier_AttrDef_URI_IPv6();
  11447. }
  11448. /**
  11449. * @param string $string
  11450. * @param HTMLPurifier_Config $config
  11451. * @param HTMLPurifier_Context $context
  11452. * @return bool|string
  11453. */
  11454. public function validate($string, $config, $context)
  11455. {
  11456. $length = strlen($string);
  11457. // empty hostname is OK; it's usually semantically equivalent:
  11458. // the default host as defined by a URI scheme is used:
  11459. //
  11460. // If the URI scheme defines a default for host, then that
  11461. // default applies when the host subcomponent is undefined
  11462. // or when the registered name is empty (zero length).
  11463. if ($string === '') {
  11464. return '';
  11465. }
  11466. if ($length > 1 && $string[0] === '[' && $string[$length - 1] === ']') {
  11467. //IPv6
  11468. $ip = substr($string, 1, $length - 2);
  11469. $valid = $this->ipv6->validate($ip, $config, $context);
  11470. if ($valid === false) {
  11471. return false;
  11472. }
  11473. return '[' . $valid . ']';
  11474. }
  11475. // need to do checks on unusual encodings too
  11476. $ipv4 = $this->ipv4->validate($string, $config, $context);
  11477. if ($ipv4 !== false) {
  11478. return $ipv4;
  11479. }
  11480. // A regular domain name.
  11481. // This doesn't match I18N domain names, but we don't have proper IRI support,
  11482. // so force users to insert Punycode.
  11483. // There is not a good sense in which underscores should be
  11484. // allowed, since it's technically not! (And if you go as
  11485. // far to allow everything as specified by the DNS spec...
  11486. // well, that's literally everything, modulo some space limits
  11487. // for the components and the overall name (which, by the way,
  11488. // we are NOT checking!). So we (arbitrarily) decide this:
  11489. // let's allow underscores wherever we would have allowed
  11490. // hyphens, if they are enabled. This is a pretty good match
  11491. // for browser behavior, for example, a large number of browsers
  11492. // cannot handle foo_.example.com, but foo_bar.example.com is
  11493. // fairly well supported.
  11494. $underscore = $config->get('Core.AllowHostnameUnderscore') ? '_' : '';
  11495. // The productions describing this are:
  11496. $a = '[a-z]'; // alpha
  11497. $an = '[a-z0-9]'; // alphanum
  11498. $and = "[a-z0-9-$underscore]"; // alphanum | "-"
  11499. // domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
  11500. $domainlabel = "$an($and*$an)?";
  11501. // toplabel = alpha | alpha *( alphanum | "-" ) alphanum
  11502. $toplabel = "$a($and*$an)?";
  11503. // hostname = *( domainlabel "." ) toplabel [ "." ]
  11504. if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
  11505. return $string;
  11506. }
  11507. // If we have Net_IDNA2 support, we can support IRIs by
  11508. // punycoding them. (This is the most portable thing to do,
  11509. // since otherwise we have to assume browsers support
  11510. if ($config->get('Core.EnableIDNA')) {
  11511. $idna = new Net_IDNA2(array('encoding' => 'utf8', 'overlong' => false, 'strict' => true));
  11512. // we need to encode each period separately
  11513. $parts = explode('.', $string);
  11514. try {
  11515. $new_parts = array();
  11516. foreach ($parts as $part) {
  11517. $encodable = false;
  11518. for ($i = 0, $c = strlen($part); $i < $c; $i++) {
  11519. if (ord($part[$i]) > 0x7a) {
  11520. $encodable = true;
  11521. break;
  11522. }
  11523. }
  11524. if (!$encodable) {
  11525. $new_parts[] = $part;
  11526. } else {
  11527. $new_parts[] = $idna->encode($part);
  11528. }
  11529. }
  11530. $string = implode('.', $new_parts);
  11531. if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
  11532. return $string;
  11533. }
  11534. } catch (Exception $e) {
  11535. // XXX error reporting
  11536. }
  11537. }
  11538. return false;
  11539. }
  11540. }
  11541. /**
  11542. * Validates an IPv4 address
  11543. * @author Feyd @ forums.devnetwork.net (public domain)
  11544. */
  11545. class HTMLPurifier_AttrDef_URI_IPv4 extends HTMLPurifier_AttrDef
  11546. {
  11547. /**
  11548. * IPv4 regex, protected so that IPv6 can reuse it.
  11549. * @type string
  11550. */
  11551. protected $ip4;
  11552. /**
  11553. * @param string $aIP
  11554. * @param HTMLPurifier_Config $config
  11555. * @param HTMLPurifier_Context $context
  11556. * @return bool|string
  11557. */
  11558. public function validate($aIP, $config, $context)
  11559. {
  11560. if (!$this->ip4) {
  11561. $this->_loadRegex();
  11562. }
  11563. if (preg_match('#^' . $this->ip4 . '$#s', $aIP)) {
  11564. return $aIP;
  11565. }
  11566. return false;
  11567. }
  11568. /**
  11569. * Lazy load function to prevent regex from being stuffed in
  11570. * cache.
  11571. */
  11572. protected function _loadRegex()
  11573. {
  11574. $oct = '(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'; // 0-255
  11575. $this->ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})";
  11576. }
  11577. }
  11578. /**
  11579. * Validates an IPv6 address.
  11580. * @author Feyd @ forums.devnetwork.net (public domain)
  11581. * @note This function requires brackets to have been removed from address
  11582. * in URI.
  11583. */
  11584. class HTMLPurifier_AttrDef_URI_IPv6 extends HTMLPurifier_AttrDef_URI_IPv4
  11585. {
  11586. /**
  11587. * @param string $aIP
  11588. * @param HTMLPurifier_Config $config
  11589. * @param HTMLPurifier_Context $context
  11590. * @return bool|string
  11591. */
  11592. public function validate($aIP, $config, $context)
  11593. {
  11594. if (!$this->ip4) {
  11595. $this->_loadRegex();
  11596. }
  11597. $original = $aIP;
  11598. $hex = '[0-9a-fA-F]';
  11599. $blk = '(?:' . $hex . '{1,4})';
  11600. $pre = '(?:/(?:12[0-8]|1[0-1][0-9]|[1-9][0-9]|[0-9]))'; // /0 - /128
  11601. // prefix check
  11602. if (strpos($aIP, '/') !== false) {
  11603. if (preg_match('#' . $pre . '$#s', $aIP, $find)) {
  11604. $aIP = substr($aIP, 0, 0 - strlen($find[0]));
  11605. unset($find);
  11606. } else {
  11607. return false;
  11608. }
  11609. }
  11610. // IPv4-compatiblity check
  11611. if (preg_match('#(?<=:' . ')' . $this->ip4 . '$#s', $aIP, $find)) {
  11612. $aIP = substr($aIP, 0, 0 - strlen($find[0]));
  11613. $ip = explode('.', $find[0]);
  11614. $ip = array_map('dechex', $ip);
  11615. $aIP .= $ip[0] . $ip[1] . ':' . $ip[2] . $ip[3];
  11616. unset($find, $ip);
  11617. }
  11618. // compression check
  11619. $aIP = explode('::', $aIP);
  11620. $c = count($aIP);
  11621. if ($c > 2) {
  11622. return false;
  11623. } elseif ($c == 2) {
  11624. list($first, $second) = $aIP;
  11625. $first = explode(':', $first);
  11626. $second = explode(':', $second);
  11627. if (count($first) + count($second) > 8) {
  11628. return false;
  11629. }
  11630. while (count($first) < 8) {
  11631. array_push($first, '0');
  11632. }
  11633. array_splice($first, 8 - count($second), 8, $second);
  11634. $aIP = $first;
  11635. unset($first, $second);
  11636. } else {
  11637. $aIP = explode(':', $aIP[0]);
  11638. }
  11639. $c = count($aIP);
  11640. if ($c != 8) {
  11641. return false;
  11642. }
  11643. // All the pieces should be 16-bit hex strings. Are they?
  11644. foreach ($aIP as $piece) {
  11645. if (!preg_match('#^[0-9a-fA-F]{4}$#s', sprintf('%04s', $piece))) {
  11646. return false;
  11647. }
  11648. }
  11649. return $original;
  11650. }
  11651. }
  11652. /**
  11653. * Primitive email validation class based on the regexp found at
  11654. * http://www.regular-expressions.info/email.html
  11655. */
  11656. class HTMLPurifier_AttrDef_URI_Email_SimpleCheck extends HTMLPurifier_AttrDef_URI_Email
  11657. {
  11658. /**
  11659. * @param string $string
  11660. * @param HTMLPurifier_Config $config
  11661. * @param HTMLPurifier_Context $context
  11662. * @return bool|string
  11663. */
  11664. public function validate($string, $config, $context)
  11665. {
  11666. // no support for named mailboxes i.e. "Bob <bob@example.com>"
  11667. // that needs more percent encoding to be done
  11668. if ($string == '') {
  11669. return false;
  11670. }
  11671. $string = trim($string);
  11672. $result = preg_match('/^[A-Z0-9._%-]+@[A-Z0-9.-]+\.[A-Z]{2,4}$/i', $string);
  11673. return $result ? $string : false;
  11674. }
  11675. }
  11676. /**
  11677. * Pre-transform that changes proprietary background attribute to CSS.
  11678. */
  11679. class HTMLPurifier_AttrTransform_Background extends HTMLPurifier_AttrTransform
  11680. {
  11681. /**
  11682. * @param array $attr
  11683. * @param HTMLPurifier_Config $config
  11684. * @param HTMLPurifier_Context $context
  11685. * @return array
  11686. */
  11687. public function transform($attr, $config, $context)
  11688. {
  11689. if (!isset($attr['background'])) {
  11690. return $attr;
  11691. }
  11692. $background = $this->confiscateAttr($attr, 'background');
  11693. // some validation should happen here
  11694. $this->prependCSS($attr, "background-image:url($background);");
  11695. return $attr;
  11696. }
  11697. }
  11698. // this MUST be placed in post, as it assumes that any value in dir is valid
  11699. /**
  11700. * Post-trasnform that ensures that bdo tags have the dir attribute set.
  11701. */
  11702. class HTMLPurifier_AttrTransform_BdoDir extends HTMLPurifier_AttrTransform
  11703. {
  11704. /**
  11705. * @param array $attr
  11706. * @param HTMLPurifier_Config $config
  11707. * @param HTMLPurifier_Context $context
  11708. * @return array
  11709. */
  11710. public function transform($attr, $config, $context)
  11711. {
  11712. if (isset($attr['dir'])) {
  11713. return $attr;
  11714. }
  11715. $attr['dir'] = $config->get('Attr.DefaultTextDir');
  11716. return $attr;
  11717. }
  11718. }
  11719. /**
  11720. * Pre-transform that changes deprecated bgcolor attribute to CSS.
  11721. */
  11722. class HTMLPurifier_AttrTransform_BgColor extends HTMLPurifier_AttrTransform
  11723. {
  11724. /**
  11725. * @param array $attr
  11726. * @param HTMLPurifier_Config $config
  11727. * @param HTMLPurifier_Context $context
  11728. * @return array
  11729. */
  11730. public function transform($attr, $config, $context)
  11731. {
  11732. if (!isset($attr['bgcolor'])) {
  11733. return $attr;
  11734. }
  11735. $bgcolor = $this->confiscateAttr($attr, 'bgcolor');
  11736. // some validation should happen here
  11737. $this->prependCSS($attr, "background-color:$bgcolor;");
  11738. return $attr;
  11739. }
  11740. }
  11741. /**
  11742. * Pre-transform that changes converts a boolean attribute to fixed CSS
  11743. */
  11744. class HTMLPurifier_AttrTransform_BoolToCSS extends HTMLPurifier_AttrTransform
  11745. {
  11746. /**
  11747. * Name of boolean attribute that is trigger.
  11748. * @type string
  11749. */
  11750. protected $attr;
  11751. /**
  11752. * CSS declarations to add to style, needs trailing semicolon.
  11753. * @type string
  11754. */
  11755. protected $css;
  11756. /**
  11757. * @param string $attr attribute name to convert from
  11758. * @param string $css CSS declarations to add to style (needs semicolon)
  11759. */
  11760. public function __construct($attr, $css)
  11761. {
  11762. $this->attr = $attr;
  11763. $this->css = $css;
  11764. }
  11765. /**
  11766. * @param array $attr
  11767. * @param HTMLPurifier_Config $config
  11768. * @param HTMLPurifier_Context $context
  11769. * @return array
  11770. */
  11771. public function transform($attr, $config, $context)
  11772. {
  11773. if (!isset($attr[$this->attr])) {
  11774. return $attr;
  11775. }
  11776. unset($attr[$this->attr]);
  11777. $this->prependCSS($attr, $this->css);
  11778. return $attr;
  11779. }
  11780. }
  11781. /**
  11782. * Pre-transform that changes deprecated border attribute to CSS.
  11783. */
  11784. class HTMLPurifier_AttrTransform_Border extends HTMLPurifier_AttrTransform
  11785. {
  11786. /**
  11787. * @param array $attr
  11788. * @param HTMLPurifier_Config $config
  11789. * @param HTMLPurifier_Context $context
  11790. * @return array
  11791. */
  11792. public function transform($attr, $config, $context)
  11793. {
  11794. if (!isset($attr['border'])) {
  11795. return $attr;
  11796. }
  11797. $border_width = $this->confiscateAttr($attr, 'border');
  11798. // some validation should happen here
  11799. $this->prependCSS($attr, "border:{$border_width}px solid;");
  11800. return $attr;
  11801. }
  11802. }
  11803. /**
  11804. * Generic pre-transform that converts an attribute with a fixed number of
  11805. * values (enumerated) to CSS.
  11806. */
  11807. class HTMLPurifier_AttrTransform_EnumToCSS extends HTMLPurifier_AttrTransform
  11808. {
  11809. /**
  11810. * Name of attribute to transform from.
  11811. * @type string
  11812. */
  11813. protected $attr;
  11814. /**
  11815. * Lookup array of attribute values to CSS.
  11816. * @type array
  11817. */
  11818. protected $enumToCSS = array();
  11819. /**
  11820. * Case sensitivity of the matching.
  11821. * @type bool
  11822. * @warning Currently can only be guaranteed to work with ASCII
  11823. * values.
  11824. */
  11825. protected $caseSensitive = false;
  11826. /**
  11827. * @param string $attr Attribute name to transform from
  11828. * @param array $enum_to_css Lookup array of attribute values to CSS
  11829. * @param bool $case_sensitive Case sensitivity indicator, default false
  11830. */
  11831. public function __construct($attr, $enum_to_css, $case_sensitive = false)
  11832. {
  11833. $this->attr = $attr;
  11834. $this->enumToCSS = $enum_to_css;
  11835. $this->caseSensitive = (bool)$case_sensitive;
  11836. }
  11837. /**
  11838. * @param array $attr
  11839. * @param HTMLPurifier_Config $config
  11840. * @param HTMLPurifier_Context $context
  11841. * @return array
  11842. */
  11843. public function transform($attr, $config, $context)
  11844. {
  11845. if (!isset($attr[$this->attr])) {
  11846. return $attr;
  11847. }
  11848. $value = trim($attr[$this->attr]);
  11849. unset($attr[$this->attr]);
  11850. if (!$this->caseSensitive) {
  11851. $value = strtolower($value);
  11852. }
  11853. if (!isset($this->enumToCSS[$value])) {
  11854. return $attr;
  11855. }
  11856. $this->prependCSS($attr, $this->enumToCSS[$value]);
  11857. return $attr;
  11858. }
  11859. }
  11860. // must be called POST validation
  11861. /**
  11862. * Transform that supplies default values for the src and alt attributes
  11863. * in img tags, as well as prevents the img tag from being removed
  11864. * because of a missing alt tag. This needs to be registered as both
  11865. * a pre and post attribute transform.
  11866. */
  11867. class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform
  11868. {
  11869. /**
  11870. * @param array $attr
  11871. * @param HTMLPurifier_Config $config
  11872. * @param HTMLPurifier_Context $context
  11873. * @return array
  11874. */
  11875. public function transform($attr, $config, $context)
  11876. {
  11877. $src = true;
  11878. if (!isset($attr['src'])) {
  11879. if ($config->get('Core.RemoveInvalidImg')) {
  11880. return $attr;
  11881. }
  11882. $attr['src'] = $config->get('Attr.DefaultInvalidImage');
  11883. $src = false;
  11884. }
  11885. if (!isset($attr['alt'])) {
  11886. if ($src) {
  11887. $alt = $config->get('Attr.DefaultImageAlt');
  11888. if ($alt === null) {
  11889. // truncate if the alt is too long
  11890. $attr['alt'] = substr(basename($attr['src']), 0, 40);
  11891. } else {
  11892. $attr['alt'] = $alt;
  11893. }
  11894. } else {
  11895. $attr['alt'] = $config->get('Attr.DefaultInvalidImageAlt');
  11896. }
  11897. }
  11898. return $attr;
  11899. }
  11900. }
  11901. /**
  11902. * Pre-transform that changes deprecated hspace and vspace attributes to CSS
  11903. */
  11904. class HTMLPurifier_AttrTransform_ImgSpace extends HTMLPurifier_AttrTransform
  11905. {
  11906. /**
  11907. * @type string
  11908. */
  11909. protected $attr;
  11910. /**
  11911. * @type array
  11912. */
  11913. protected $css = array(
  11914. 'hspace' => array('left', 'right'),
  11915. 'vspace' => array('top', 'bottom')
  11916. );
  11917. /**
  11918. * @param string $attr
  11919. */
  11920. public function __construct($attr)
  11921. {
  11922. $this->attr = $attr;
  11923. if (!isset($this->css[$attr])) {
  11924. trigger_error(htmlspecialchars($attr) . ' is not valid space attribute');
  11925. }
  11926. }
  11927. /**
  11928. * @param array $attr
  11929. * @param HTMLPurifier_Config $config
  11930. * @param HTMLPurifier_Context $context
  11931. * @return array
  11932. */
  11933. public function transform($attr, $config, $context)
  11934. {
  11935. if (!isset($attr[$this->attr])) {
  11936. return $attr;
  11937. }
  11938. $width = $this->confiscateAttr($attr, $this->attr);
  11939. // some validation could happen here
  11940. if (!isset($this->css[$this->attr])) {
  11941. return $attr;
  11942. }
  11943. $style = '';
  11944. foreach ($this->css[$this->attr] as $suffix) {
  11945. $property = "margin-$suffix";
  11946. $style .= "$property:{$width}px;";
  11947. }
  11948. $this->prependCSS($attr, $style);
  11949. return $attr;
  11950. }
  11951. }
  11952. /**
  11953. * Performs miscellaneous cross attribute validation and filtering for
  11954. * input elements. This is meant to be a post-transform.
  11955. */
  11956. class HTMLPurifier_AttrTransform_Input extends HTMLPurifier_AttrTransform
  11957. {
  11958. /**
  11959. * @type HTMLPurifier_AttrDef_HTML_Pixels
  11960. */
  11961. protected $pixels;
  11962. public function __construct()
  11963. {
  11964. $this->pixels = new HTMLPurifier_AttrDef_HTML_Pixels();
  11965. }
  11966. /**
  11967. * @param array $attr
  11968. * @param HTMLPurifier_Config $config
  11969. * @param HTMLPurifier_Context $context
  11970. * @return array
  11971. */
  11972. public function transform($attr, $config, $context)
  11973. {
  11974. if (!isset($attr['type'])) {
  11975. $t = 'text';
  11976. } else {
  11977. $t = strtolower($attr['type']);
  11978. }
  11979. if (isset($attr['checked']) && $t !== 'radio' && $t !== 'checkbox') {
  11980. unset($attr['checked']);
  11981. }
  11982. if (isset($attr['maxlength']) && $t !== 'text' && $t !== 'password') {
  11983. unset($attr['maxlength']);
  11984. }
  11985. if (isset($attr['size']) && $t !== 'text' && $t !== 'password') {
  11986. $result = $this->pixels->validate($attr['size'], $config, $context);
  11987. if ($result === false) {
  11988. unset($attr['size']);
  11989. } else {
  11990. $attr['size'] = $result;
  11991. }
  11992. }
  11993. if (isset($attr['src']) && $t !== 'image') {
  11994. unset($attr['src']);
  11995. }
  11996. if (!isset($attr['value']) && ($t === 'radio' || $t === 'checkbox')) {
  11997. $attr['value'] = '';
  11998. }
  11999. return $attr;
  12000. }
  12001. }
  12002. /**
  12003. * Post-transform that copies lang's value to xml:lang (and vice-versa)
  12004. * @note Theoretically speaking, this could be a pre-transform, but putting
  12005. * post is more efficient.
  12006. */
  12007. class HTMLPurifier_AttrTransform_Lang extends HTMLPurifier_AttrTransform
  12008. {
  12009. /**
  12010. * @param array $attr
  12011. * @param HTMLPurifier_Config $config
  12012. * @param HTMLPurifier_Context $context
  12013. * @return array
  12014. */
  12015. public function transform($attr, $config, $context)
  12016. {
  12017. $lang = isset($attr['lang']) ? $attr['lang'] : false;
  12018. $xml_lang = isset($attr['xml:lang']) ? $attr['xml:lang'] : false;
  12019. if ($lang !== false && $xml_lang === false) {
  12020. $attr['xml:lang'] = $lang;
  12021. } elseif ($xml_lang !== false) {
  12022. $attr['lang'] = $xml_lang;
  12023. }
  12024. return $attr;
  12025. }
  12026. }
  12027. /**
  12028. * Class for handling width/height length attribute transformations to CSS
  12029. */
  12030. class HTMLPurifier_AttrTransform_Length extends HTMLPurifier_AttrTransform
  12031. {
  12032. /**
  12033. * @type string
  12034. */
  12035. protected $name;
  12036. /**
  12037. * @type string
  12038. */
  12039. protected $cssName;
  12040. public function __construct($name, $css_name = null)
  12041. {
  12042. $this->name = $name;
  12043. $this->cssName = $css_name ? $css_name : $name;
  12044. }
  12045. /**
  12046. * @param array $attr
  12047. * @param HTMLPurifier_Config $config
  12048. * @param HTMLPurifier_Context $context
  12049. * @return array
  12050. */
  12051. public function transform($attr, $config, $context)
  12052. {
  12053. if (!isset($attr[$this->name])) {
  12054. return $attr;
  12055. }
  12056. $length = $this->confiscateAttr($attr, $this->name);
  12057. if (ctype_digit($length)) {
  12058. $length .= 'px';
  12059. }
  12060. $this->prependCSS($attr, $this->cssName . ":$length;");
  12061. return $attr;
  12062. }
  12063. }
  12064. /**
  12065. * Pre-transform that changes deprecated name attribute to ID if necessary
  12066. */
  12067. class HTMLPurifier_AttrTransform_Name extends HTMLPurifier_AttrTransform
  12068. {
  12069. /**
  12070. * @param array $attr
  12071. * @param HTMLPurifier_Config $config
  12072. * @param HTMLPurifier_Context $context
  12073. * @return array
  12074. */
  12075. public function transform($attr, $config, $context)
  12076. {
  12077. // Abort early if we're using relaxed definition of name
  12078. if ($config->get('HTML.Attr.Name.UseCDATA')) {
  12079. return $attr;
  12080. }
  12081. if (!isset($attr['name'])) {
  12082. return $attr;
  12083. }
  12084. $id = $this->confiscateAttr($attr, 'name');
  12085. if (isset($attr['id'])) {
  12086. return $attr;
  12087. }
  12088. $attr['id'] = $id;
  12089. return $attr;
  12090. }
  12091. }
  12092. /**
  12093. * Post-transform that performs validation to the name attribute; if
  12094. * it is present with an equivalent id attribute, it is passed through;
  12095. * otherwise validation is performed.
  12096. */
  12097. class HTMLPurifier_AttrTransform_NameSync extends HTMLPurifier_AttrTransform
  12098. {
  12099. public function __construct()
  12100. {
  12101. $this->idDef = new HTMLPurifier_AttrDef_HTML_ID();
  12102. }
  12103. /**
  12104. * @param array $attr
  12105. * @param HTMLPurifier_Config $config
  12106. * @param HTMLPurifier_Context $context
  12107. * @return array
  12108. */
  12109. public function transform($attr, $config, $context)
  12110. {
  12111. if (!isset($attr['name'])) {
  12112. return $attr;
  12113. }
  12114. $name = $attr['name'];
  12115. if (isset($attr['id']) && $attr['id'] === $name) {
  12116. return $attr;
  12117. }
  12118. $result = $this->idDef->validate($name, $config, $context);
  12119. if ($result === false) {
  12120. unset($attr['name']);
  12121. } else {
  12122. $attr['name'] = $result;
  12123. }
  12124. return $attr;
  12125. }
  12126. }
  12127. // must be called POST validation
  12128. /**
  12129. * Adds rel="nofollow" to all outbound links. This transform is
  12130. * only attached if Attr.Nofollow is TRUE.
  12131. */
  12132. class HTMLPurifier_AttrTransform_Nofollow extends HTMLPurifier_AttrTransform
  12133. {
  12134. /**
  12135. * @type HTMLPurifier_URIParser
  12136. */
  12137. private $parser;
  12138. public function __construct()
  12139. {
  12140. $this->parser = new HTMLPurifier_URIParser();
  12141. }
  12142. /**
  12143. * @param array $attr
  12144. * @param HTMLPurifier_Config $config
  12145. * @param HTMLPurifier_Context $context
  12146. * @return array
  12147. */
  12148. public function transform($attr, $config, $context)
  12149. {
  12150. if (!isset($attr['href'])) {
  12151. return $attr;
  12152. }
  12153. // XXX Kind of inefficient
  12154. $url = $this->parser->parse($attr['href']);
  12155. $scheme = $url->getSchemeObj($config, $context);
  12156. if ($scheme->browsable && !$url->isLocal($config, $context)) {
  12157. if (isset($attr['rel'])) {
  12158. $rels = explode(' ', $attr['rel']);
  12159. if (!in_array('nofollow', $rels)) {
  12160. $rels[] = 'nofollow';
  12161. }
  12162. $attr['rel'] = implode(' ', $rels);
  12163. } else {
  12164. $attr['rel'] = 'nofollow';
  12165. }
  12166. }
  12167. return $attr;
  12168. }
  12169. }
  12170. class HTMLPurifier_AttrTransform_SafeEmbed extends HTMLPurifier_AttrTransform
  12171. {
  12172. /**
  12173. * @type string
  12174. */
  12175. public $name = "SafeEmbed";
  12176. /**
  12177. * @param array $attr
  12178. * @param HTMLPurifier_Config $config
  12179. * @param HTMLPurifier_Context $context
  12180. * @return array
  12181. */
  12182. public function transform($attr, $config, $context)
  12183. {
  12184. $attr['allowscriptaccess'] = 'never';
  12185. $attr['allownetworking'] = 'internal';
  12186. $attr['type'] = 'application/x-shockwave-flash';
  12187. return $attr;
  12188. }
  12189. }
  12190. /**
  12191. * Writes default type for all objects. Currently only supports flash.
  12192. */
  12193. class HTMLPurifier_AttrTransform_SafeObject extends HTMLPurifier_AttrTransform
  12194. {
  12195. /**
  12196. * @type string
  12197. */
  12198. public $name = "SafeObject";
  12199. /**
  12200. * @param array $attr
  12201. * @param HTMLPurifier_Config $config
  12202. * @param HTMLPurifier_Context $context
  12203. * @return array
  12204. */
  12205. public function transform($attr, $config, $context)
  12206. {
  12207. if (!isset($attr['type'])) {
  12208. $attr['type'] = 'application/x-shockwave-flash';
  12209. }
  12210. return $attr;
  12211. }
  12212. }
  12213. /**
  12214. * Validates name/value pairs in param tags to be used in safe objects. This
  12215. * will only allow name values it recognizes, and pre-fill certain attributes
  12216. * with required values.
  12217. *
  12218. * @note
  12219. * This class only supports Flash. In the future, Quicktime support
  12220. * may be added.
  12221. *
  12222. * @warning
  12223. * This class expects an injector to add the necessary parameters tags.
  12224. */
  12225. class HTMLPurifier_AttrTransform_SafeParam extends HTMLPurifier_AttrTransform
  12226. {
  12227. /**
  12228. * @type string
  12229. */
  12230. public $name = "SafeParam";
  12231. /**
  12232. * @type HTMLPurifier_AttrDef_URI
  12233. */
  12234. private $uri;
  12235. public function __construct()
  12236. {
  12237. $this->uri = new HTMLPurifier_AttrDef_URI(true); // embedded
  12238. $this->wmode = new HTMLPurifier_AttrDef_Enum(array('window', 'opaque', 'transparent'));
  12239. }
  12240. /**
  12241. * @param array $attr
  12242. * @param HTMLPurifier_Config $config
  12243. * @param HTMLPurifier_Context $context
  12244. * @return array
  12245. */
  12246. public function transform($attr, $config, $context)
  12247. {
  12248. // If we add support for other objects, we'll need to alter the
  12249. // transforms.
  12250. switch ($attr['name']) {
  12251. // application/x-shockwave-flash
  12252. // Keep this synchronized with Injector/SafeObject.php
  12253. case 'allowScriptAccess':
  12254. $attr['value'] = 'never';
  12255. break;
  12256. case 'allowNetworking':
  12257. $attr['value'] = 'internal';
  12258. break;
  12259. case 'allowFullScreen':
  12260. if ($config->get('HTML.FlashAllowFullScreen')) {
  12261. $attr['value'] = ($attr['value'] == 'true') ? 'true' : 'false';
  12262. } else {
  12263. $attr['value'] = 'false';
  12264. }
  12265. break;
  12266. case 'wmode':
  12267. $attr['value'] = $this->wmode->validate($attr['value'], $config, $context);
  12268. break;
  12269. case 'movie':
  12270. case 'src':
  12271. $attr['name'] = "movie";
  12272. $attr['value'] = $this->uri->validate($attr['value'], $config, $context);
  12273. break;
  12274. case 'flashvars':
  12275. // we're going to allow arbitrary inputs to the SWF, on
  12276. // the reasoning that it could only hack the SWF, not us.
  12277. break;
  12278. // add other cases to support other param name/value pairs
  12279. default:
  12280. $attr['name'] = $attr['value'] = null;
  12281. }
  12282. return $attr;
  12283. }
  12284. }
  12285. /**
  12286. * Implements required attribute stipulation for <script>
  12287. */
  12288. class HTMLPurifier_AttrTransform_ScriptRequired extends HTMLPurifier_AttrTransform
  12289. {
  12290. /**
  12291. * @param array $attr
  12292. * @param HTMLPurifier_Config $config
  12293. * @param HTMLPurifier_Context $context
  12294. * @return array
  12295. */
  12296. public function transform($attr, $config, $context)
  12297. {
  12298. if (!isset($attr['type'])) {
  12299. $attr['type'] = 'text/javascript';
  12300. }
  12301. return $attr;
  12302. }
  12303. }
  12304. // must be called POST validation
  12305. /**
  12306. * Adds target="blank" to all outbound links. This transform is
  12307. * only attached if Attr.TargetBlank is TRUE. This works regardless
  12308. * of whether or not Attr.AllowedFrameTargets
  12309. */
  12310. class HTMLPurifier_AttrTransform_TargetBlank extends HTMLPurifier_AttrTransform
  12311. {
  12312. /**
  12313. * @type HTMLPurifier_URIParser
  12314. */
  12315. private $parser;
  12316. public function __construct()
  12317. {
  12318. $this->parser = new HTMLPurifier_URIParser();
  12319. }
  12320. /**
  12321. * @param array $attr
  12322. * @param HTMLPurifier_Config $config
  12323. * @param HTMLPurifier_Context $context
  12324. * @return array
  12325. */
  12326. public function transform($attr, $config, $context)
  12327. {
  12328. if (!isset($attr['href'])) {
  12329. return $attr;
  12330. }
  12331. // XXX Kind of inefficient
  12332. $url = $this->parser->parse($attr['href']);
  12333. $scheme = $url->getSchemeObj($config, $context);
  12334. if ($scheme->browsable && !$url->isBenign($config, $context)) {
  12335. $attr['target'] = '_blank';
  12336. }
  12337. return $attr;
  12338. }
  12339. }
  12340. /**
  12341. * Sets height/width defaults for <textarea>
  12342. */
  12343. class HTMLPurifier_AttrTransform_Textarea extends HTMLPurifier_AttrTransform
  12344. {
  12345. /**
  12346. * @param array $attr
  12347. * @param HTMLPurifier_Config $config
  12348. * @param HTMLPurifier_Context $context
  12349. * @return array
  12350. */
  12351. public function transform($attr, $config, $context)
  12352. {
  12353. // Calculated from Firefox
  12354. if (!isset($attr['cols'])) {
  12355. $attr['cols'] = '22';
  12356. }
  12357. if (!isset($attr['rows'])) {
  12358. $attr['rows'] = '3';
  12359. }
  12360. return $attr;
  12361. }
  12362. }
  12363. /**
  12364. * Definition that uses different definitions depending on context.
  12365. *
  12366. * The del and ins tags are notable because they allow different types of
  12367. * elements depending on whether or not they're in a block or inline context.
  12368. * Chameleon allows this behavior to happen by using two different
  12369. * definitions depending on context. While this somewhat generalized,
  12370. * it is specifically intended for those two tags.
  12371. */
  12372. class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
  12373. {
  12374. /**
  12375. * Instance of the definition object to use when inline. Usually stricter.
  12376. * @type HTMLPurifier_ChildDef_Optional
  12377. */
  12378. public $inline;
  12379. /**
  12380. * Instance of the definition object to use when block.
  12381. * @type HTMLPurifier_ChildDef_Optional
  12382. */
  12383. public $block;
  12384. /**
  12385. * @type string
  12386. */
  12387. public $type = 'chameleon';
  12388. /**
  12389. * @param array $inline List of elements to allow when inline.
  12390. * @param array $block List of elements to allow when block.
  12391. */
  12392. public function __construct($inline, $block)
  12393. {
  12394. $this->inline = new HTMLPurifier_ChildDef_Optional($inline);
  12395. $this->block = new HTMLPurifier_ChildDef_Optional($block);
  12396. $this->elements = $this->block->elements;
  12397. }
  12398. /**
  12399. * @param HTMLPurifier_Node[] $children
  12400. * @param HTMLPurifier_Config $config
  12401. * @param HTMLPurifier_Context $context
  12402. * @return bool
  12403. */
  12404. public function validateChildren($children, $config, $context)
  12405. {
  12406. if ($context->get('IsInline') === false) {
  12407. return $this->block->validateChildren(
  12408. $children,
  12409. $config,
  12410. $context
  12411. );
  12412. } else {
  12413. return $this->inline->validateChildren(
  12414. $children,
  12415. $config,
  12416. $context
  12417. );
  12418. }
  12419. }
  12420. }
  12421. /**
  12422. * Custom validation class, accepts DTD child definitions
  12423. *
  12424. * @warning Currently this class is an all or nothing proposition, that is,
  12425. * it will only give a bool return value.
  12426. */
  12427. class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
  12428. {
  12429. /**
  12430. * @type string
  12431. */
  12432. public $type = 'custom';
  12433. /**
  12434. * @type bool
  12435. */
  12436. public $allow_empty = false;
  12437. /**
  12438. * Allowed child pattern as defined by the DTD.
  12439. * @type string
  12440. */
  12441. public $dtd_regex;
  12442. /**
  12443. * PCRE regex derived from $dtd_regex.
  12444. * @type string
  12445. */
  12446. private $_pcre_regex;
  12447. /**
  12448. * @param $dtd_regex Allowed child pattern from the DTD
  12449. */
  12450. public function __construct($dtd_regex)
  12451. {
  12452. $this->dtd_regex = $dtd_regex;
  12453. $this->_compileRegex();
  12454. }
  12455. /**
  12456. * Compiles the PCRE regex from a DTD regex ($dtd_regex to $_pcre_regex)
  12457. */
  12458. protected function _compileRegex()
  12459. {
  12460. $raw = str_replace(' ', '', $this->dtd_regex);
  12461. if ($raw{0} != '(') {
  12462. $raw = "($raw)";
  12463. }
  12464. $el = '[#a-zA-Z0-9_.-]+';
  12465. $reg = $raw;
  12466. // COMPLICATED! AND MIGHT BE BUGGY! I HAVE NO CLUE WHAT I'M
  12467. // DOING! Seriously: if there's problems, please report them.
  12468. // collect all elements into the $elements array
  12469. preg_match_all("/$el/", $reg, $matches);
  12470. foreach ($matches[0] as $match) {
  12471. $this->elements[$match] = true;
  12472. }
  12473. // setup all elements as parentheticals with leading commas
  12474. $reg = preg_replace("/$el/", '(,\\0)', $reg);
  12475. // remove commas when they were not solicited
  12476. $reg = preg_replace("/([^,(|]\(+),/", '\\1', $reg);
  12477. // remove all non-paranthetical commas: they are handled by first regex
  12478. $reg = preg_replace("/,\(/", '(', $reg);
  12479. $this->_pcre_regex = $reg;
  12480. }
  12481. /**
  12482. * @param HTMLPurifier_Node[] $children
  12483. * @param HTMLPurifier_Config $config
  12484. * @param HTMLPurifier_Context $context
  12485. * @return bool
  12486. */
  12487. public function validateChildren($children, $config, $context)
  12488. {
  12489. $list_of_children = '';
  12490. $nesting = 0; // depth into the nest
  12491. foreach ($children as $node) {
  12492. if (!empty($node->is_whitespace)) {
  12493. continue;
  12494. }
  12495. $list_of_children .= $node->name . ',';
  12496. }
  12497. // add leading comma to deal with stray comma declarations
  12498. $list_of_children = ',' . rtrim($list_of_children, ',');
  12499. $okay =
  12500. preg_match(
  12501. '/^,?' . $this->_pcre_regex . '$/',
  12502. $list_of_children
  12503. );
  12504. return (bool)$okay;
  12505. }
  12506. }
  12507. /**
  12508. * Definition that disallows all elements.
  12509. * @warning validateChildren() in this class is actually never called, because
  12510. * empty elements are corrected in HTMLPurifier_Strategy_MakeWellFormed
  12511. * before child definitions are parsed in earnest by
  12512. * HTMLPurifier_Strategy_FixNesting.
  12513. */
  12514. class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef
  12515. {
  12516. /**
  12517. * @type bool
  12518. */
  12519. public $allow_empty = true;
  12520. /**
  12521. * @type string
  12522. */
  12523. public $type = 'empty';
  12524. public function __construct()
  12525. {
  12526. }
  12527. /**
  12528. * @param HTMLPurifier_Node[] $children
  12529. * @param HTMLPurifier_Config $config
  12530. * @param HTMLPurifier_Context $context
  12531. * @return array
  12532. */
  12533. public function validateChildren($children, $config, $context)
  12534. {
  12535. return array();
  12536. }
  12537. }
  12538. /**
  12539. * Definition for list containers ul and ol.
  12540. *
  12541. * What does this do? The big thing is to handle ol/ul at the top
  12542. * level of list nodes, which should be handled specially by /folding/
  12543. * them into the previous list node. We generally shouldn't ever
  12544. * see other disallowed elements, because the autoclose behavior
  12545. * in MakeWellFormed handles it.
  12546. */
  12547. class HTMLPurifier_ChildDef_List extends HTMLPurifier_ChildDef
  12548. {
  12549. /**
  12550. * @type string
  12551. */
  12552. public $type = 'list';
  12553. /**
  12554. * @type array
  12555. */
  12556. // lying a little bit, so that we can handle ul and ol ourselves
  12557. // XXX: This whole business with 'wrap' is all a bit unsatisfactory
  12558. public $elements = array('li' => true, 'ul' => true, 'ol' => true);
  12559. /**
  12560. * @param array $children
  12561. * @param HTMLPurifier_Config $config
  12562. * @param HTMLPurifier_Context $context
  12563. * @return array
  12564. */
  12565. public function validateChildren($children, $config, $context)
  12566. {
  12567. // Flag for subclasses
  12568. $this->whitespace = false;
  12569. // if there are no tokens, delete parent node
  12570. if (empty($children)) {
  12571. return false;
  12572. }
  12573. // the new set of children
  12574. $result = array();
  12575. // a little sanity check to make sure it's not ALL whitespace
  12576. $all_whitespace = true;
  12577. $current_li = false;
  12578. foreach ($children as $node) {
  12579. if (!empty($node->is_whitespace)) {
  12580. $result[] = $node;
  12581. continue;
  12582. }
  12583. $all_whitespace = false; // phew, we're not talking about whitespace
  12584. if ($node->name === 'li') {
  12585. // good
  12586. $current_li = $node;
  12587. $result[] = $node;
  12588. } else {
  12589. // we want to tuck this into the previous li
  12590. // Invariant: we expect the node to be ol/ul
  12591. // ToDo: Make this more robust in the case of not ol/ul
  12592. // by distinguishing between existing li and li created
  12593. // to handle non-list elements; non-list elements should
  12594. // not be appended to an existing li; only li created
  12595. // for non-list. This distinction is not currently made.
  12596. if ($current_li === false) {
  12597. $current_li = new HTMLPurifier_Node_Element('li');
  12598. $result[] = $current_li;
  12599. }
  12600. $current_li->children[] = $node;
  12601. $current_li->empty = false; // XXX fascinating! Check for this error elsewhere ToDo
  12602. }
  12603. }
  12604. if (empty($result)) {
  12605. return false;
  12606. }
  12607. if ($all_whitespace) {
  12608. return false;
  12609. }
  12610. return $result;
  12611. }
  12612. }
  12613. /**
  12614. * Definition that allows a set of elements, but disallows empty children.
  12615. */
  12616. class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
  12617. {
  12618. /**
  12619. * Lookup table of allowed elements.
  12620. * @type array
  12621. */
  12622. public $elements = array();
  12623. /**
  12624. * Whether or not the last passed node was all whitespace.
  12625. * @type bool
  12626. */
  12627. protected $whitespace = false;
  12628. /**
  12629. * @param array|string $elements List of allowed element names (lowercase).
  12630. */
  12631. public function __construct($elements)
  12632. {
  12633. if (is_string($elements)) {
  12634. $elements = str_replace(' ', '', $elements);
  12635. $elements = explode('|', $elements);
  12636. }
  12637. $keys = array_keys($elements);
  12638. if ($keys == array_keys($keys)) {
  12639. $elements = array_flip($elements);
  12640. foreach ($elements as $i => $x) {
  12641. $elements[$i] = true;
  12642. if (empty($i)) {
  12643. unset($elements[$i]);
  12644. } // remove blank
  12645. }
  12646. }
  12647. $this->elements = $elements;
  12648. }
  12649. /**
  12650. * @type bool
  12651. */
  12652. public $allow_empty = false;
  12653. /**
  12654. * @type string
  12655. */
  12656. public $type = 'required';
  12657. /**
  12658. * @param array $children
  12659. * @param HTMLPurifier_Config $config
  12660. * @param HTMLPurifier_Context $context
  12661. * @return array
  12662. */
  12663. public function validateChildren($children, $config, $context)
  12664. {
  12665. // Flag for subclasses
  12666. $this->whitespace = false;
  12667. // if there are no tokens, delete parent node
  12668. if (empty($children)) {
  12669. return false;
  12670. }
  12671. // the new set of children
  12672. $result = array();
  12673. // whether or not parsed character data is allowed
  12674. // this controls whether or not we silently drop a tag
  12675. // or generate escaped HTML from it
  12676. $pcdata_allowed = isset($this->elements['#PCDATA']);
  12677. // a little sanity check to make sure it's not ALL whitespace
  12678. $all_whitespace = true;
  12679. $stack = array_reverse($children);
  12680. while (!empty($stack)) {
  12681. $node = array_pop($stack);
  12682. if (!empty($node->is_whitespace)) {
  12683. $result[] = $node;
  12684. continue;
  12685. }
  12686. $all_whitespace = false; // phew, we're not talking about whitespace
  12687. if (!isset($this->elements[$node->name])) {
  12688. // special case text
  12689. // XXX One of these ought to be redundant or something
  12690. if ($pcdata_allowed && $node instanceof HTMLPurifier_Node_Text) {
  12691. $result[] = $node;
  12692. continue;
  12693. }
  12694. // spill the child contents in
  12695. // ToDo: Make configurable
  12696. if ($node instanceof HTMLPurifier_Node_Element) {
  12697. for ($i = count($node->children) - 1; $i >= 0; $i--) {
  12698. $stack[] = $node->children[$i];
  12699. }
  12700. continue;
  12701. }
  12702. continue;
  12703. }
  12704. $result[] = $node;
  12705. }
  12706. if (empty($result)) {
  12707. return false;
  12708. }
  12709. if ($all_whitespace) {
  12710. $this->whitespace = true;
  12711. return false;
  12712. }
  12713. return $result;
  12714. }
  12715. }
  12716. /**
  12717. * Definition that allows a set of elements, and allows no children.
  12718. * @note This is a hack to reuse code from HTMLPurifier_ChildDef_Required,
  12719. * really, one shouldn't inherit from the other. Only altered behavior
  12720. * is to overload a returned false with an array. Thus, it will never
  12721. * return false.
  12722. */
  12723. class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
  12724. {
  12725. /**
  12726. * @type bool
  12727. */
  12728. public $allow_empty = true;
  12729. /**
  12730. * @type string
  12731. */
  12732. public $type = 'optional';
  12733. /**
  12734. * @param array $children
  12735. * @param HTMLPurifier_Config $config
  12736. * @param HTMLPurifier_Context $context
  12737. * @return array
  12738. */
  12739. public function validateChildren($children, $config, $context)
  12740. {
  12741. $result = parent::validateChildren($children, $config, $context);
  12742. // we assume that $children is not modified
  12743. if ($result === false) {
  12744. if (empty($children)) {
  12745. return true;
  12746. } elseif ($this->whitespace) {
  12747. return $children;
  12748. } else {
  12749. return array();
  12750. }
  12751. }
  12752. return $result;
  12753. }
  12754. }
  12755. /**
  12756. * Takes the contents of blockquote when in strict and reformats for validation.
  12757. */
  12758. class HTMLPurifier_ChildDef_StrictBlockquote extends HTMLPurifier_ChildDef_Required
  12759. {
  12760. /**
  12761. * @type array
  12762. */
  12763. protected $real_elements;
  12764. /**
  12765. * @type array
  12766. */
  12767. protected $fake_elements;
  12768. /**
  12769. * @type bool
  12770. */
  12771. public $allow_empty = true;
  12772. /**
  12773. * @type string
  12774. */
  12775. public $type = 'strictblockquote';
  12776. /**
  12777. * @type bool
  12778. */
  12779. protected $init = false;
  12780. /**
  12781. * @param HTMLPurifier_Config $config
  12782. * @return array
  12783. * @note We don't want MakeWellFormed to auto-close inline elements since
  12784. * they might be allowed.
  12785. */
  12786. public function getAllowedElements($config)
  12787. {
  12788. $this->init($config);
  12789. return $this->fake_elements;
  12790. }
  12791. /**
  12792. * @param array $children
  12793. * @param HTMLPurifier_Config $config
  12794. * @param HTMLPurifier_Context $context
  12795. * @return array
  12796. */
  12797. public function validateChildren($children, $config, $context)
  12798. {
  12799. $this->init($config);
  12800. // trick the parent class into thinking it allows more
  12801. $this->elements = $this->fake_elements;
  12802. $result = parent::validateChildren($children, $config, $context);
  12803. $this->elements = $this->real_elements;
  12804. if ($result === false) {
  12805. return array();
  12806. }
  12807. if ($result === true) {
  12808. $result = $children;
  12809. }
  12810. $def = $config->getHTMLDefinition();
  12811. $block_wrap_name = $def->info_block_wrapper;
  12812. $block_wrap = false;
  12813. $ret = array();
  12814. foreach ($result as $node) {
  12815. if ($block_wrap === false) {
  12816. if (($node instanceof HTMLPurifier_Node_Text && !$node->is_whitespace) ||
  12817. ($node instanceof HTMLPurifier_Node_Element && !isset($this->elements[$node->name]))) {
  12818. $block_wrap = new HTMLPurifier_Node_Element($def->info_block_wrapper);
  12819. $ret[] = $block_wrap;
  12820. }
  12821. } else {
  12822. if ($node instanceof HTMLPurifier_Node_Element && isset($this->elements[$node->name])) {
  12823. $block_wrap = false;
  12824. }
  12825. }
  12826. if ($block_wrap) {
  12827. $block_wrap->children[] = $node;
  12828. } else {
  12829. $ret[] = $node;
  12830. }
  12831. }
  12832. return $ret;
  12833. }
  12834. /**
  12835. * @param HTMLPurifier_Config $config
  12836. */
  12837. private function init($config)
  12838. {
  12839. if (!$this->init) {
  12840. $def = $config->getHTMLDefinition();
  12841. // allow all inline elements
  12842. $this->real_elements = $this->elements;
  12843. $this->fake_elements = $def->info_content_sets['Flow'];
  12844. $this->fake_elements['#PCDATA'] = true;
  12845. $this->init = true;
  12846. }
  12847. }
  12848. }
  12849. /**
  12850. * Definition for tables. The general idea is to extract out all of the
  12851. * essential bits, and then reconstruct it later.
  12852. *
  12853. * This is a bit confusing, because the DTDs and the W3C
  12854. * validators seem to disagree on the appropriate definition. The
  12855. * DTD claims:
  12856. *
  12857. * (CAPTION?, (COL*|COLGROUP*), THEAD?, TFOOT?, TBODY+)
  12858. *
  12859. * But actually, the HTML4 spec then has this to say:
  12860. *
  12861. * The TBODY start tag is always required except when the table
  12862. * contains only one table body and no table head or foot sections.
  12863. * The TBODY end tag may always be safely omitted.
  12864. *
  12865. * So the DTD is kind of wrong. The validator is, unfortunately, kind
  12866. * of on crack.
  12867. *
  12868. * The definition changed again in XHTML1.1; and in my opinion, this
  12869. * formulation makes the most sense.
  12870. *
  12871. * caption?, ( col* | colgroup* ), (( thead?, tfoot?, tbody+ ) | ( tr+ ))
  12872. *
  12873. * Essentially, we have two modes: thead/tfoot/tbody mode, and tr mode.
  12874. * If we encounter a thead, tfoot or tbody, we are placed in the former
  12875. * mode, and we *must* wrap any stray tr segments with a tbody. But if
  12876. * we don't run into any of them, just have tr tags is OK.
  12877. */
  12878. class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
  12879. {
  12880. /**
  12881. * @type bool
  12882. */
  12883. public $allow_empty = false;
  12884. /**
  12885. * @type string
  12886. */
  12887. public $type = 'table';
  12888. /**
  12889. * @type array
  12890. */
  12891. public $elements = array(
  12892. 'tr' => true,
  12893. 'tbody' => true,
  12894. 'thead' => true,
  12895. 'tfoot' => true,
  12896. 'caption' => true,
  12897. 'colgroup' => true,
  12898. 'col' => true
  12899. );
  12900. public function __construct()
  12901. {
  12902. }
  12903. /**
  12904. * @param array $children
  12905. * @param HTMLPurifier_Config $config
  12906. * @param HTMLPurifier_Context $context
  12907. * @return array
  12908. */
  12909. public function validateChildren($children, $config, $context)
  12910. {
  12911. if (empty($children)) {
  12912. return false;
  12913. }
  12914. // only one of these elements is allowed in a table
  12915. $caption = false;
  12916. $thead = false;
  12917. $tfoot = false;
  12918. // whitespace
  12919. $initial_ws = array();
  12920. $after_caption_ws = array();
  12921. $after_thead_ws = array();
  12922. $after_tfoot_ws = array();
  12923. // as many of these as you want
  12924. $cols = array();
  12925. $content = array();
  12926. $tbody_mode = false; // if true, then we need to wrap any stray
  12927. // <tr>s with a <tbody>.
  12928. $ws_accum =& $initial_ws;
  12929. foreach ($children as $node) {
  12930. if ($node instanceof HTMLPurifier_Node_Comment) {
  12931. $ws_accum[] = $node;
  12932. continue;
  12933. }
  12934. switch ($node->name) {
  12935. case 'tbody':
  12936. $tbody_mode = true;
  12937. // fall through
  12938. case 'tr':
  12939. $content[] = $node;
  12940. $ws_accum =& $content;
  12941. break;
  12942. case 'caption':
  12943. // there can only be one caption!
  12944. if ($caption !== false) break;
  12945. $caption = $node;
  12946. $ws_accum =& $after_caption_ws;
  12947. break;
  12948. case 'thead':
  12949. $tbody_mode = true;
  12950. // XXX This breaks rendering properties with
  12951. // Firefox, which never floats a <thead> to
  12952. // the top. Ever. (Our scheme will float the
  12953. // first <thead> to the top.) So maybe
  12954. // <thead>s that are not first should be
  12955. // turned into <tbody>? Very tricky, indeed.
  12956. if ($thead === false) {
  12957. $thead = $node;
  12958. $ws_accum =& $after_thead_ws;
  12959. } else {
  12960. // Oops, there's a second one! What
  12961. // should we do? Current behavior is to
  12962. // transmutate the first and last entries into
  12963. // tbody tags, and then put into content.
  12964. // Maybe a better idea is to *attach
  12965. // it* to the existing thead or tfoot?
  12966. // We don't do this, because Firefox
  12967. // doesn't float an extra tfoot to the
  12968. // bottom like it does for the first one.
  12969. $node->name = 'tbody';
  12970. $content[] = $node;
  12971. $ws_accum =& $content;
  12972. }
  12973. break;
  12974. case 'tfoot':
  12975. // see above for some aveats
  12976. $tbody_mode = true;
  12977. if ($tfoot === false) {
  12978. $tfoot = $node;
  12979. $ws_accum =& $after_tfoot_ws;
  12980. } else {
  12981. $node->name = 'tbody';
  12982. $content[] = $node;
  12983. $ws_accum =& $content;
  12984. }
  12985. break;
  12986. case 'colgroup':
  12987. case 'col':
  12988. $cols[] = $node;
  12989. $ws_accum =& $cols;
  12990. break;
  12991. case '#PCDATA':
  12992. // How is whitespace handled? We treat is as sticky to
  12993. // the *end* of the previous element. So all of the
  12994. // nonsense we have worked on is to keep things
  12995. // together.
  12996. if (!empty($node->is_whitespace)) {
  12997. $ws_accum[] = $node;
  12998. }
  12999. break;
  13000. }
  13001. }
  13002. if (empty($content)) {
  13003. return false;
  13004. }
  13005. $ret = $initial_ws;
  13006. if ($caption !== false) {
  13007. $ret[] = $caption;
  13008. $ret = array_merge($ret, $after_caption_ws);
  13009. }
  13010. if ($cols !== false) {
  13011. $ret = array_merge($ret, $cols);
  13012. }
  13013. if ($thead !== false) {
  13014. $ret[] = $thead;
  13015. $ret = array_merge($ret, $after_thead_ws);
  13016. }
  13017. if ($tfoot !== false) {
  13018. $ret[] = $tfoot;
  13019. $ret = array_merge($ret, $after_tfoot_ws);
  13020. }
  13021. if ($tbody_mode) {
  13022. // we have to shuffle tr into tbody
  13023. $current_tr_tbody = null;
  13024. foreach($content as $node) {
  13025. switch ($node->name) {
  13026. case 'tbody':
  13027. $current_tr_tbody = null;
  13028. $ret[] = $node;
  13029. break;
  13030. case 'tr':
  13031. if ($current_tr_tbody === null) {
  13032. $current_tr_tbody = new HTMLPurifier_Node_Element('tbody');
  13033. $ret[] = $current_tr_tbody;
  13034. }
  13035. $current_tr_tbody->children[] = $node;
  13036. break;
  13037. case '#PCDATA':
  13038. assert($node->is_whitespace);
  13039. if ($current_tr_tbody === null) {
  13040. $ret[] = $node;
  13041. } else {
  13042. $current_tr_tbody->children[] = $node;
  13043. }
  13044. break;
  13045. }
  13046. }
  13047. } else {
  13048. $ret = array_merge($ret, $content);
  13049. }
  13050. return $ret;
  13051. }
  13052. }
  13053. class HTMLPurifier_DefinitionCache_Decorator extends HTMLPurifier_DefinitionCache
  13054. {
  13055. /**
  13056. * Cache object we are decorating
  13057. * @type HTMLPurifier_DefinitionCache
  13058. */
  13059. public $cache;
  13060. /**
  13061. * The name of the decorator
  13062. * @var string
  13063. */
  13064. public $name;
  13065. public function __construct()
  13066. {
  13067. }
  13068. /**
  13069. * Lazy decorator function
  13070. * @param HTMLPurifier_DefinitionCache $cache Reference to cache object to decorate
  13071. * @return HTMLPurifier_DefinitionCache_Decorator
  13072. */
  13073. public function decorate(&$cache)
  13074. {
  13075. $decorator = $this->copy();
  13076. // reference is necessary for mocks in PHP 4
  13077. $decorator->cache =& $cache;
  13078. $decorator->type = $cache->type;
  13079. return $decorator;
  13080. }
  13081. /**
  13082. * Cross-compatible clone substitute
  13083. * @return HTMLPurifier_DefinitionCache_Decorator
  13084. */
  13085. public function copy()
  13086. {
  13087. return new HTMLPurifier_DefinitionCache_Decorator();
  13088. }
  13089. /**
  13090. * @param HTMLPurifier_Definition $def
  13091. * @param HTMLPurifier_Config $config
  13092. * @return mixed
  13093. */
  13094. public function add($def, $config)
  13095. {
  13096. return $this->cache->add($def, $config);
  13097. }
  13098. /**
  13099. * @param HTMLPurifier_Definition $def
  13100. * @param HTMLPurifier_Config $config
  13101. * @return mixed
  13102. */
  13103. public function set($def, $config)
  13104. {
  13105. return $this->cache->set($def, $config);
  13106. }
  13107. /**
  13108. * @param HTMLPurifier_Definition $def
  13109. * @param HTMLPurifier_Config $config
  13110. * @return mixed
  13111. */
  13112. public function replace($def, $config)
  13113. {
  13114. return $this->cache->replace($def, $config);
  13115. }
  13116. /**
  13117. * @param HTMLPurifier_Config $config
  13118. * @return mixed
  13119. */
  13120. public function get($config)
  13121. {
  13122. return $this->cache->get($config);
  13123. }
  13124. /**
  13125. * @param HTMLPurifier_Config $config
  13126. * @return mixed
  13127. */
  13128. public function remove($config)
  13129. {
  13130. return $this->cache->remove($config);
  13131. }
  13132. /**
  13133. * @param HTMLPurifier_Config $config
  13134. * @return mixed
  13135. */
  13136. public function flush($config)
  13137. {
  13138. return $this->cache->flush($config);
  13139. }
  13140. /**
  13141. * @param HTMLPurifier_Config $config
  13142. * @return mixed
  13143. */
  13144. public function cleanup($config)
  13145. {
  13146. return $this->cache->cleanup($config);
  13147. }
  13148. }
  13149. /**
  13150. * Null cache object to use when no caching is on.
  13151. */
  13152. class HTMLPurifier_DefinitionCache_Null extends HTMLPurifier_DefinitionCache
  13153. {
  13154. /**
  13155. * @param HTMLPurifier_Definition $def
  13156. * @param HTMLPurifier_Config $config
  13157. * @return bool
  13158. */
  13159. public function add($def, $config)
  13160. {
  13161. return false;
  13162. }
  13163. /**
  13164. * @param HTMLPurifier_Definition $def
  13165. * @param HTMLPurifier_Config $config
  13166. * @return bool
  13167. */
  13168. public function set($def, $config)
  13169. {
  13170. return false;
  13171. }
  13172. /**
  13173. * @param HTMLPurifier_Definition $def
  13174. * @param HTMLPurifier_Config $config
  13175. * @return bool
  13176. */
  13177. public function replace($def, $config)
  13178. {
  13179. return false;
  13180. }
  13181. /**
  13182. * @param HTMLPurifier_Config $config
  13183. * @return bool
  13184. */
  13185. public function remove($config)
  13186. {
  13187. return false;
  13188. }
  13189. /**
  13190. * @param HTMLPurifier_Config $config
  13191. * @return bool
  13192. */
  13193. public function get($config)
  13194. {
  13195. return false;
  13196. }
  13197. /**
  13198. * @param HTMLPurifier_Config $config
  13199. * @return bool
  13200. */
  13201. public function flush($config)
  13202. {
  13203. return false;
  13204. }
  13205. /**
  13206. * @param HTMLPurifier_Config $config
  13207. * @return bool
  13208. */
  13209. public function cleanup($config)
  13210. {
  13211. return false;
  13212. }
  13213. }
  13214. class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCache
  13215. {
  13216. /**
  13217. * @param HTMLPurifier_Definition $def
  13218. * @param HTMLPurifier_Config $config
  13219. * @return int|bool
  13220. */
  13221. public function add($def, $config)
  13222. {
  13223. if (!$this->checkDefType($def)) {
  13224. return;
  13225. }
  13226. $file = $this->generateFilePath($config);
  13227. if (file_exists($file)) {
  13228. return false;
  13229. }
  13230. if (!$this->_prepareDir($config)) {
  13231. return false;
  13232. }
  13233. return $this->_write($file, serialize($def), $config);
  13234. }
  13235. /**
  13236. * @param HTMLPurifier_Definition $def
  13237. * @param HTMLPurifier_Config $config
  13238. * @return int|bool
  13239. */
  13240. public function set($def, $config)
  13241. {
  13242. if (!$this->checkDefType($def)) {
  13243. return;
  13244. }
  13245. $file = $this->generateFilePath($config);
  13246. if (!$this->_prepareDir($config)) {
  13247. return false;
  13248. }
  13249. return $this->_write($file, serialize($def), $config);
  13250. }
  13251. /**
  13252. * @param HTMLPurifier_Definition $def
  13253. * @param HTMLPurifier_Config $config
  13254. * @return int|bool
  13255. */
  13256. public function replace($def, $config)
  13257. {
  13258. if (!$this->checkDefType($def)) {
  13259. return;
  13260. }
  13261. $file = $this->generateFilePath($config);
  13262. if (!file_exists($file)) {
  13263. return false;
  13264. }
  13265. if (!$this->_prepareDir($config)) {
  13266. return false;
  13267. }
  13268. return $this->_write($file, serialize($def), $config);
  13269. }
  13270. /**
  13271. * @param HTMLPurifier_Config $config
  13272. * @return bool|HTMLPurifier_Config
  13273. */
  13274. public function get($config)
  13275. {
  13276. $file = $this->generateFilePath($config);
  13277. if (!file_exists($file)) {
  13278. return false;
  13279. }
  13280. return unserialize(file_get_contents($file));
  13281. }
  13282. /**
  13283. * @param HTMLPurifier_Config $config
  13284. * @return bool
  13285. */
  13286. public function remove($config)
  13287. {
  13288. $file = $this->generateFilePath($config);
  13289. if (!file_exists($file)) {
  13290. return false;
  13291. }
  13292. return unlink($file);
  13293. }
  13294. /**
  13295. * @param HTMLPurifier_Config $config
  13296. * @return bool
  13297. */
  13298. public function flush($config)
  13299. {
  13300. if (!$this->_prepareDir($config)) {
  13301. return false;
  13302. }
  13303. $dir = $this->generateDirectoryPath($config);
  13304. $dh = opendir($dir);
  13305. while (false !== ($filename = readdir($dh))) {
  13306. if (empty($filename)) {
  13307. continue;
  13308. }
  13309. if ($filename[0] === '.') {
  13310. continue;
  13311. }
  13312. unlink($dir . '/' . $filename);
  13313. }
  13314. }
  13315. /**
  13316. * @param HTMLPurifier_Config $config
  13317. * @return bool
  13318. */
  13319. public function cleanup($config)
  13320. {
  13321. if (!$this->_prepareDir($config)) {
  13322. return false;
  13323. }
  13324. $dir = $this->generateDirectoryPath($config);
  13325. $dh = opendir($dir);
  13326. while (false !== ($filename = readdir($dh))) {
  13327. if (empty($filename)) {
  13328. continue;
  13329. }
  13330. if ($filename[0] === '.') {
  13331. continue;
  13332. }
  13333. $key = substr($filename, 0, strlen($filename) - 4);
  13334. if ($this->isOld($key, $config)) {
  13335. unlink($dir . '/' . $filename);
  13336. }
  13337. }
  13338. }
  13339. /**
  13340. * Generates the file path to the serial file corresponding to
  13341. * the configuration and definition name
  13342. * @param HTMLPurifier_Config $config
  13343. * @return string
  13344. * @todo Make protected
  13345. */
  13346. public function generateFilePath($config)
  13347. {
  13348. $key = $this->generateKey($config);
  13349. return $this->generateDirectoryPath($config) . '/' . $key . '.ser';
  13350. }
  13351. /**
  13352. * Generates the path to the directory contain this cache's serial files
  13353. * @param HTMLPurifier_Config $config
  13354. * @return string
  13355. * @note No trailing slash
  13356. * @todo Make protected
  13357. */
  13358. public function generateDirectoryPath($config)
  13359. {
  13360. $base = $this->generateBaseDirectoryPath($config);
  13361. return $base . '/' . $this->type;
  13362. }
  13363. /**
  13364. * Generates path to base directory that contains all definition type
  13365. * serials
  13366. * @param HTMLPurifier_Config $config
  13367. * @return mixed|string
  13368. * @todo Make protected
  13369. */
  13370. public function generateBaseDirectoryPath($config)
  13371. {
  13372. $base = $config->get('Cache.SerializerPath');
  13373. $base = is_null($base) ? HTMLPURIFIER_PREFIX . '/HTMLPurifier/DefinitionCache/Serializer' : $base;
  13374. return $base;
  13375. }
  13376. /**
  13377. * Convenience wrapper function for file_put_contents
  13378. * @param string $file File name to write to
  13379. * @param string $data Data to write into file
  13380. * @param HTMLPurifier_Config $config
  13381. * @return int|bool Number of bytes written if success, or false if failure.
  13382. */
  13383. private function _write($file, $data, $config)
  13384. {
  13385. $result = file_put_contents($file, $data);
  13386. if ($result !== false) {
  13387. // set permissions of the new file (no execute)
  13388. $chmod = $config->get('Cache.SerializerPermissions');
  13389. if (!$chmod) {
  13390. $chmod = 0644; // invalid config or simpletest
  13391. }
  13392. $chmod = $chmod & 0666;
  13393. chmod($file, $chmod);
  13394. }
  13395. return $result;
  13396. }
  13397. /**
  13398. * Prepares the directory that this type stores the serials in
  13399. * @param HTMLPurifier_Config $config
  13400. * @return bool True if successful
  13401. */
  13402. private function _prepareDir($config)
  13403. {
  13404. $directory = $this->generateDirectoryPath($config);
  13405. $chmod = $config->get('Cache.SerializerPermissions');
  13406. if (!$chmod) {
  13407. $chmod = 0755; // invalid config or simpletest
  13408. }
  13409. if (!is_dir($directory)) {
  13410. $base = $this->generateBaseDirectoryPath($config);
  13411. if (!is_dir($base)) {
  13412. trigger_error(
  13413. 'Base directory ' . $base . ' does not exist,
  13414. please create or change using %Cache.SerializerPath',
  13415. E_USER_WARNING
  13416. );
  13417. return false;
  13418. } elseif (!$this->_testPermissions($base, $chmod)) {
  13419. return false;
  13420. }
  13421. $old = umask(0000);
  13422. mkdir($directory, $chmod);
  13423. umask($old);
  13424. } elseif (!$this->_testPermissions($directory, $chmod)) {
  13425. return false;
  13426. }
  13427. return true;
  13428. }
  13429. /**
  13430. * Tests permissions on a directory and throws out friendly
  13431. * error messages and attempts to chmod it itself if possible
  13432. * @param string $dir Directory path
  13433. * @param int $chmod Permissions
  13434. * @return bool True if directory is writable
  13435. */
  13436. private function _testPermissions($dir, $chmod)
  13437. {
  13438. // early abort, if it is writable, everything is hunky-dory
  13439. if (is_writable($dir)) {
  13440. return true;
  13441. }
  13442. if (!is_dir($dir)) {
  13443. // generally, you'll want to handle this beforehand
  13444. // so a more specific error message can be given
  13445. trigger_error(
  13446. 'Directory ' . $dir . ' does not exist',
  13447. E_USER_WARNING
  13448. );
  13449. return false;
  13450. }
  13451. if (function_exists('posix_getuid')) {
  13452. // POSIX system, we can give more specific advice
  13453. if (fileowner($dir) === posix_getuid()) {
  13454. // we can chmod it ourselves
  13455. $chmod = $chmod | 0700;
  13456. if (chmod($dir, $chmod)) {
  13457. return true;
  13458. }
  13459. } elseif (filegroup($dir) === posix_getgid()) {
  13460. $chmod = $chmod | 0070;
  13461. } else {
  13462. // PHP's probably running as nobody, so we'll
  13463. // need to give global permissions
  13464. $chmod = $chmod | 0777;
  13465. }
  13466. trigger_error(
  13467. 'Directory ' . $dir . ' not writable, ' .
  13468. 'please chmod to ' . decoct($chmod),
  13469. E_USER_WARNING
  13470. );
  13471. } else {
  13472. // generic error message
  13473. trigger_error(
  13474. 'Directory ' . $dir . ' not writable, ' .
  13475. 'please alter file permissions',
  13476. E_USER_WARNING
  13477. );
  13478. }
  13479. return false;
  13480. }
  13481. }
  13482. /**
  13483. * Definition cache decorator class that cleans up the cache
  13484. * whenever there is a cache miss.
  13485. */
  13486. class HTMLPurifier_DefinitionCache_Decorator_Cleanup extends HTMLPurifier_DefinitionCache_Decorator
  13487. {
  13488. /**
  13489. * @type string
  13490. */
  13491. public $name = 'Cleanup';
  13492. /**
  13493. * @return HTMLPurifier_DefinitionCache_Decorator_Cleanup
  13494. */
  13495. public function copy()
  13496. {
  13497. return new HTMLPurifier_DefinitionCache_Decorator_Cleanup();
  13498. }
  13499. /**
  13500. * @param HTMLPurifier_Definition $def
  13501. * @param HTMLPurifier_Config $config
  13502. * @return mixed
  13503. */
  13504. public function add($def, $config)
  13505. {
  13506. $status = parent::add($def, $config);
  13507. if (!$status) {
  13508. parent::cleanup($config);
  13509. }
  13510. return $status;
  13511. }
  13512. /**
  13513. * @param HTMLPurifier_Definition $def
  13514. * @param HTMLPurifier_Config $config
  13515. * @return mixed
  13516. */
  13517. public function set($def, $config)
  13518. {
  13519. $status = parent::set($def, $config);
  13520. if (!$status) {
  13521. parent::cleanup($config);
  13522. }
  13523. return $status;
  13524. }
  13525. /**
  13526. * @param HTMLPurifier_Definition $def
  13527. * @param HTMLPurifier_Config $config
  13528. * @return mixed
  13529. */
  13530. public function replace($def, $config)
  13531. {
  13532. $status = parent::replace($def, $config);
  13533. if (!$status) {
  13534. parent::cleanup($config);
  13535. }
  13536. return $status;
  13537. }
  13538. /**
  13539. * @param HTMLPurifier_Config $config
  13540. * @return mixed
  13541. */
  13542. public function get($config)
  13543. {
  13544. $ret = parent::get($config);
  13545. if (!$ret) {
  13546. parent::cleanup($config);
  13547. }
  13548. return $ret;
  13549. }
  13550. }
  13551. /**
  13552. * Definition cache decorator class that saves all cache retrievals
  13553. * to PHP's memory; good for unit tests or circumstances where
  13554. * there are lots of configuration objects floating around.
  13555. */
  13556. class HTMLPurifier_DefinitionCache_Decorator_Memory extends HTMLPurifier_DefinitionCache_Decorator
  13557. {
  13558. /**
  13559. * @type array
  13560. */
  13561. protected $definitions;
  13562. /**
  13563. * @type string
  13564. */
  13565. public $name = 'Memory';
  13566. /**
  13567. * @return HTMLPurifier_DefinitionCache_Decorator_Memory
  13568. */
  13569. public function copy()
  13570. {
  13571. return new HTMLPurifier_DefinitionCache_Decorator_Memory();
  13572. }
  13573. /**
  13574. * @param HTMLPurifier_Definition $def
  13575. * @param HTMLPurifier_Config $config
  13576. * @return mixed
  13577. */
  13578. public function add($def, $config)
  13579. {
  13580. $status = parent::add($def, $config);
  13581. if ($status) {
  13582. $this->definitions[$this->generateKey($config)] = $def;
  13583. }
  13584. return $status;
  13585. }
  13586. /**
  13587. * @param HTMLPurifier_Definition $def
  13588. * @param HTMLPurifier_Config $config
  13589. * @return mixed
  13590. */
  13591. public function set($def, $config)
  13592. {
  13593. $status = parent::set($def, $config);
  13594. if ($status) {
  13595. $this->definitions[$this->generateKey($config)] = $def;
  13596. }
  13597. return $status;
  13598. }
  13599. /**
  13600. * @param HTMLPurifier_Definition $def
  13601. * @param HTMLPurifier_Config $config
  13602. * @return mixed
  13603. */
  13604. public function replace($def, $config)
  13605. {
  13606. $status = parent::replace($def, $config);
  13607. if ($status) {
  13608. $this->definitions[$this->generateKey($config)] = $def;
  13609. }
  13610. return $status;
  13611. }
  13612. /**
  13613. * @param HTMLPurifier_Config $config
  13614. * @return mixed
  13615. */
  13616. public function get($config)
  13617. {
  13618. $key = $this->generateKey($config);
  13619. if (isset($this->definitions[$key])) {
  13620. return $this->definitions[$key];
  13621. }
  13622. $this->definitions[$key] = parent::get($config);
  13623. return $this->definitions[$key];
  13624. }
  13625. }
  13626. /**
  13627. * XHTML 1.1 Bi-directional Text Module, defines elements that
  13628. * declare directionality of content. Text Extension Module.
  13629. */
  13630. class HTMLPurifier_HTMLModule_Bdo extends HTMLPurifier_HTMLModule
  13631. {
  13632. /**
  13633. * @type string
  13634. */
  13635. public $name = 'Bdo';
  13636. /**
  13637. * @type array
  13638. */
  13639. public $attr_collections = array(
  13640. 'I18N' => array('dir' => false)
  13641. );
  13642. /**
  13643. * @param HTMLPurifier_Config $config
  13644. */
  13645. public function setup($config)
  13646. {
  13647. $bdo = $this->addElement(
  13648. 'bdo',
  13649. 'Inline',
  13650. 'Inline',
  13651. array('Core', 'Lang'),
  13652. array(
  13653. 'dir' => 'Enum#ltr,rtl', // required
  13654. // The Abstract Module specification has the attribute
  13655. // inclusions wrong for bdo: bdo allows Lang
  13656. )
  13657. );
  13658. $bdo->attr_transform_post[] = new HTMLPurifier_AttrTransform_BdoDir();
  13659. $this->attr_collections['I18N']['dir'] = 'Enum#ltr,rtl';
  13660. }
  13661. }
  13662. class HTMLPurifier_HTMLModule_CommonAttributes extends HTMLPurifier_HTMLModule
  13663. {
  13664. /**
  13665. * @type string
  13666. */
  13667. public $name = 'CommonAttributes';
  13668. /**
  13669. * @type array
  13670. */
  13671. public $attr_collections = array(
  13672. 'Core' => array(
  13673. 0 => array('Style'),
  13674. // 'xml:space' => false,
  13675. 'class' => 'Class',
  13676. 'id' => 'ID',
  13677. 'title' => 'CDATA',
  13678. ),
  13679. 'Lang' => array(),
  13680. 'I18N' => array(
  13681. 0 => array('Lang'), // proprietary, for xml:lang/lang
  13682. ),
  13683. 'Common' => array(
  13684. 0 => array('Core', 'I18N')
  13685. )
  13686. );
  13687. }
  13688. /**
  13689. * XHTML 1.1 Edit Module, defines editing-related elements. Text Extension
  13690. * Module.
  13691. */
  13692. class HTMLPurifier_HTMLModule_Edit extends HTMLPurifier_HTMLModule
  13693. {
  13694. /**
  13695. * @type string
  13696. */
  13697. public $name = 'Edit';
  13698. /**
  13699. * @param HTMLPurifier_Config $config
  13700. */
  13701. public function setup($config)
  13702. {
  13703. $contents = 'Chameleon: #PCDATA | Inline ! #PCDATA | Flow';
  13704. $attr = array(
  13705. 'cite' => 'URI',
  13706. // 'datetime' => 'Datetime', // not implemented
  13707. );
  13708. $this->addElement('del', 'Inline', $contents, 'Common', $attr);
  13709. $this->addElement('ins', 'Inline', $contents, 'Common', $attr);
  13710. }
  13711. // HTML 4.01 specifies that ins/del must not contain block
  13712. // elements when used in an inline context, chameleon is
  13713. // a complicated workaround to acheive this effect
  13714. // Inline context ! Block context (exclamation mark is
  13715. // separator, see getChildDef for parsing)
  13716. /**
  13717. * @type bool
  13718. */
  13719. public $defines_child_def = true;
  13720. /**
  13721. * @param HTMLPurifier_ElementDef $def
  13722. * @return HTMLPurifier_ChildDef_Chameleon
  13723. */
  13724. public function getChildDef($def)
  13725. {
  13726. if ($def->content_model_type != 'chameleon') {
  13727. return false;
  13728. }
  13729. $value = explode('!', $def->content_model);
  13730. return new HTMLPurifier_ChildDef_Chameleon($value[0], $value[1]);
  13731. }
  13732. }
  13733. /**
  13734. * XHTML 1.1 Forms module, defines all form-related elements found in HTML 4.
  13735. */
  13736. class HTMLPurifier_HTMLModule_Forms extends HTMLPurifier_HTMLModule
  13737. {
  13738. /**
  13739. * @type string
  13740. */
  13741. public $name = 'Forms';
  13742. /**
  13743. * @type bool
  13744. */
  13745. public $safe = false;
  13746. /**
  13747. * @type array
  13748. */
  13749. public $content_sets = array(
  13750. 'Block' => 'Form',
  13751. 'Inline' => 'Formctrl',
  13752. );
  13753. /**
  13754. * @param HTMLPurifier_Config $config
  13755. */
  13756. public function setup($config)
  13757. {
  13758. $form = $this->addElement(
  13759. 'form',
  13760. 'Form',
  13761. 'Required: Heading | List | Block | fieldset',
  13762. 'Common',
  13763. array(
  13764. 'accept' => 'ContentTypes',
  13765. 'accept-charset' => 'Charsets',
  13766. 'action*' => 'URI',
  13767. 'method' => 'Enum#get,post',
  13768. // really ContentType, but these two are the only ones used today
  13769. 'enctype' => 'Enum#application/x-www-form-urlencoded,multipart/form-data',
  13770. )
  13771. );
  13772. $form->excludes = array('form' => true);
  13773. $input = $this->addElement(
  13774. 'input',
  13775. 'Formctrl',
  13776. 'Empty',
  13777. 'Common',
  13778. array(
  13779. 'accept' => 'ContentTypes',
  13780. 'accesskey' => 'Character',
  13781. 'alt' => 'Text',
  13782. 'checked' => 'Bool#checked',
  13783. 'disabled' => 'Bool#disabled',
  13784. 'maxlength' => 'Number',
  13785. 'name' => 'CDATA',
  13786. 'readonly' => 'Bool#readonly',
  13787. 'size' => 'Number',
  13788. 'src' => 'URI#embedded',
  13789. 'tabindex' => 'Number',
  13790. 'type' => 'Enum#text,password,checkbox,button,radio,submit,reset,file,hidden,image',
  13791. 'value' => 'CDATA',
  13792. )
  13793. );
  13794. $input->attr_transform_post[] = new HTMLPurifier_AttrTransform_Input();
  13795. $this->addElement(
  13796. 'select',
  13797. 'Formctrl',
  13798. 'Required: optgroup | option',
  13799. 'Common',
  13800. array(
  13801. 'disabled' => 'Bool#disabled',
  13802. 'multiple' => 'Bool#multiple',
  13803. 'name' => 'CDATA',
  13804. 'size' => 'Number',
  13805. 'tabindex' => 'Number',
  13806. )
  13807. );
  13808. $this->addElement(
  13809. 'option',
  13810. false,
  13811. 'Optional: #PCDATA',
  13812. 'Common',
  13813. array(
  13814. 'disabled' => 'Bool#disabled',
  13815. 'label' => 'Text',
  13816. 'selected' => 'Bool#selected',
  13817. 'value' => 'CDATA',
  13818. )
  13819. );
  13820. // It's illegal for there to be more than one selected, but not
  13821. // be multiple. Also, no selected means undefined behavior. This might
  13822. // be difficult to implement; perhaps an injector, or a context variable.
  13823. $textarea = $this->addElement(
  13824. 'textarea',
  13825. 'Formctrl',
  13826. 'Optional: #PCDATA',
  13827. 'Common',
  13828. array(
  13829. 'accesskey' => 'Character',
  13830. 'cols*' => 'Number',
  13831. 'disabled' => 'Bool#disabled',
  13832. 'name' => 'CDATA',
  13833. 'readonly' => 'Bool#readonly',
  13834. 'rows*' => 'Number',
  13835. 'tabindex' => 'Number',
  13836. )
  13837. );
  13838. $textarea->attr_transform_pre[] = new HTMLPurifier_AttrTransform_Textarea();
  13839. $button = $this->addElement(
  13840. 'button',
  13841. 'Formctrl',
  13842. 'Optional: #PCDATA | Heading | List | Block | Inline',
  13843. 'Common',
  13844. array(
  13845. 'accesskey' => 'Character',
  13846. 'disabled' => 'Bool#disabled',
  13847. 'name' => 'CDATA',
  13848. 'tabindex' => 'Number',
  13849. 'type' => 'Enum#button,submit,reset',
  13850. 'value' => 'CDATA',
  13851. )
  13852. );
  13853. // For exclusions, ideally we'd specify content sets, not literal elements
  13854. $button->excludes = $this->makeLookup(
  13855. 'form',
  13856. 'fieldset', // Form
  13857. 'input',
  13858. 'select',
  13859. 'textarea',
  13860. 'label',
  13861. 'button', // Formctrl
  13862. 'a', // as per HTML 4.01 spec, this is omitted by modularization
  13863. 'isindex',
  13864. 'iframe' // legacy items
  13865. );
  13866. // Extra exclusion: img usemap="" is not permitted within this element.
  13867. // We'll omit this for now, since we don't have any good way of
  13868. // indicating it yet.
  13869. // This is HIGHLY user-unfriendly; we need a custom child-def for this
  13870. $this->addElement('fieldset', 'Form', 'Custom: (#WS?,legend,(Flow|#PCDATA)*)', 'Common');
  13871. $label = $this->addElement(
  13872. 'label',
  13873. 'Formctrl',
  13874. 'Optional: #PCDATA | Inline',
  13875. 'Common',
  13876. array(
  13877. 'accesskey' => 'Character',
  13878. // 'for' => 'IDREF', // IDREF not implemented, cannot allow
  13879. )
  13880. );
  13881. $label->excludes = array('label' => true);
  13882. $this->addElement(
  13883. 'legend',
  13884. false,
  13885. 'Optional: #PCDATA | Inline',
  13886. 'Common',
  13887. array(
  13888. 'accesskey' => 'Character',
  13889. )
  13890. );
  13891. $this->addElement(
  13892. 'optgroup',
  13893. false,
  13894. 'Required: option',
  13895. 'Common',
  13896. array(
  13897. 'disabled' => 'Bool#disabled',
  13898. 'label*' => 'Text',
  13899. )
  13900. );
  13901. // Don't forget an injector for <isin