PageRenderTime 70ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 2ms

/framework/vendors/htmlpurifier/HTMLPurifier.standalone.php

https://bitbucket.org/gencer/yii
PHP | 15832 lines | 8290 code | 1913 blank | 5629 comment | 1329 complexity | cea3a5f66e6653741b92440cd6d59233 MD5 | raw file
Possible License(s): BSD-3-Clause

Large files files are truncated, but you can click here to view the full file

  1. <?php
  2. /**
  3. * @file
  4. * This file was auto-generated by generate-includes.php and includes all of
  5. * the core files required by HTML Purifier. Use this if performance is a
  6. * primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS
  7. * FILE, changes will be overwritten the next time the script is run.
  8. *
  9. * @version 4.6.0
  10. *
  11. * @warning
  12. * You must *not* include any other HTML Purifier files before this file,
  13. * because 'require' not 'require_once' is used.
  14. *
  15. * @warning
  16. * This file requires that the include path contains the HTML Purifier
  17. * library directory; this is not auto-set.
  18. */
  19. /*! @mainpage
  20. *
  21. * HTML Purifier is an HTML filter that will take an arbitrary snippet of
  22. * HTML and rigorously test, validate and filter it into a version that
  23. * is safe for output onto webpages. It achieves this by:
  24. *
  25. * -# Lexing (parsing into tokens) the document,
  26. * -# Executing various strategies on the tokens:
  27. * -# Removing all elements not in the whitelist,
  28. * -# Making the tokens well-formed,
  29. * -# Fixing the nesting of the nodes, and
  30. * -# Validating attributes of the nodes; and
  31. * -# Generating HTML from the purified tokens.
  32. *
  33. * However, most users will only need to interface with the HTMLPurifier
  34. * and HTMLPurifier_Config.
  35. */
  36. /*
  37. HTML Purifier 4.6.0 - Standards Compliant HTML Filtering
  38. Copyright (C) 2006-2008 Edward Z. Yang
  39. This library is free software; you can redistribute it and/or
  40. modify it under the terms of the GNU Lesser General Public
  41. License as published by the Free Software Foundation; either
  42. version 2.1 of the License, or (at your option) any later version.
  43. This library is distributed in the hope that it will be useful,
  44. but WITHOUT ANY WARRANTY; without even the implied warranty of
  45. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  46. Lesser General Public License for more details.
  47. You should have received a copy of the GNU Lesser General Public
  48. License along with this library; if not, write to the Free Software
  49. Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  50. */
  51. /**
  52. * Facade that coordinates HTML Purifier's subsystems in order to purify HTML.
  53. *
  54. * @note There are several points in which configuration can be specified
  55. * for HTML Purifier. The precedence of these (from lowest to
  56. * highest) is as follows:
  57. * -# Instance: new HTMLPurifier($config)
  58. * -# Invocation: purify($html, $config)
  59. * These configurations are entirely independent of each other and
  60. * are *not* merged (this behavior may change in the future).
  61. *
  62. * @todo We need an easier way to inject strategies using the configuration
  63. * object.
  64. */
  65. class HTMLPurifier
  66. {
  67. /**
  68. * Version of HTML Purifier.
  69. * @type string
  70. */
  71. public $version = '4.6.0';
  72. /**
  73. * Constant with version of HTML Purifier.
  74. */
  75. const VERSION = '4.6.0';
  76. /**
  77. * Global configuration object.
  78. * @type HTMLPurifier_Config
  79. */
  80. public $config;
  81. /**
  82. * Array of extra filter objects to run on HTML,
  83. * for backwards compatibility.
  84. * @type HTMLPurifier_Filter[]
  85. */
  86. private $filters = array();
  87. /**
  88. * Single instance of HTML Purifier.
  89. * @type HTMLPurifier
  90. */
  91. private static $instance;
  92. /**
  93. * @type HTMLPurifier_Strategy_Core
  94. */
  95. protected $strategy;
  96. /**
  97. * @type HTMLPurifier_Generator
  98. */
  99. protected $generator;
  100. /**
  101. * Resultant context of last run purification.
  102. * Is an array of contexts if the last called method was purifyArray().
  103. * @type HTMLPurifier_Context
  104. */
  105. public $context;
  106. /**
  107. * Initializes the purifier.
  108. *
  109. * @param HTMLPurifier_Config $config Optional HTMLPurifier_Config object
  110. * for all instances of the purifier, if omitted, a default
  111. * configuration is supplied (which can be overridden on a
  112. * per-use basis).
  113. * The parameter can also be any type that
  114. * HTMLPurifier_Config::create() supports.
  115. */
  116. public function __construct($config = null)
  117. {
  118. $this->config = HTMLPurifier_Config::create($config);
  119. $this->strategy = new HTMLPurifier_Strategy_Core();
  120. }
  121. /**
  122. * Adds a filter to process the output. First come first serve
  123. *
  124. * @param HTMLPurifier_Filter $filter HTMLPurifier_Filter object
  125. */
  126. public function addFilter($filter)
  127. {
  128. trigger_error(
  129. 'HTMLPurifier->addFilter() is deprecated, use configuration directives' .
  130. ' in the Filter namespace or Filter.Custom',
  131. E_USER_WARNING
  132. );
  133. $this->filters[] = $filter;
  134. }
  135. /**
  136. * Filters an HTML snippet/document to be XSS-free and standards-compliant.
  137. *
  138. * @param string $html String of HTML to purify
  139. * @param HTMLPurifier_Config $config Config object for this operation,
  140. * if omitted, defaults to the config object specified during this
  141. * object's construction. The parameter can also be any type
  142. * that HTMLPurifier_Config::create() supports.
  143. *
  144. * @return string Purified HTML
  145. */
  146. public function purify($html, $config = null)
  147. {
  148. // :TODO: make the config merge in, instead of replace
  149. $config = $config ? HTMLPurifier_Config::create($config) : $this->config;
  150. // implementation is partially environment dependant, partially
  151. // configuration dependant
  152. $lexer = HTMLPurifier_Lexer::create($config);
  153. $context = new HTMLPurifier_Context();
  154. // setup HTML generator
  155. $this->generator = new HTMLPurifier_Generator($config, $context);
  156. $context->register('Generator', $this->generator);
  157. // set up global context variables
  158. if ($config->get('Core.CollectErrors')) {
  159. // may get moved out if other facilities use it
  160. $language_factory = HTMLPurifier_LanguageFactory::instance();
  161. $language = $language_factory->create($config, $context);
  162. $context->register('Locale', $language);
  163. $error_collector = new HTMLPurifier_ErrorCollector($context);
  164. $context->register('ErrorCollector', $error_collector);
  165. }
  166. // setup id_accumulator context, necessary due to the fact that
  167. // AttrValidator can be called from many places
  168. $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
  169. $context->register('IDAccumulator', $id_accumulator);
  170. $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
  171. // setup filters
  172. $filter_flags = $config->getBatch('Filter');
  173. $custom_filters = $filter_flags['Custom'];
  174. unset($filter_flags['Custom']);
  175. $filters = array();
  176. foreach ($filter_flags as $filter => $flag) {
  177. if (!$flag) {
  178. continue;
  179. }
  180. if (strpos($filter, '.') !== false) {
  181. continue;
  182. }
  183. $class = "HTMLPurifier_Filter_$filter";
  184. $filters[] = new $class;
  185. }
  186. foreach ($custom_filters as $filter) {
  187. // maybe "HTMLPurifier_Filter_$filter", but be consistent with AutoFormat
  188. $filters[] = $filter;
  189. }
  190. $filters = array_merge($filters, $this->filters);
  191. // maybe prepare(), but later
  192. for ($i = 0, $filter_size = count($filters); $i < $filter_size; $i++) {
  193. $html = $filters[$i]->preFilter($html, $config, $context);
  194. }
  195. // purified HTML
  196. $html =
  197. $this->generator->generateFromTokens(
  198. // list of tokens
  199. $this->strategy->execute(
  200. // list of un-purified tokens
  201. $lexer->tokenizeHTML(
  202. // un-purified HTML
  203. $html,
  204. $config,
  205. $context
  206. ),
  207. $config,
  208. $context
  209. )
  210. );
  211. for ($i = $filter_size - 1; $i >= 0; $i--) {
  212. $html = $filters[$i]->postFilter($html, $config, $context);
  213. }
  214. $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context);
  215. $this->context =& $context;
  216. return $html;
  217. }
  218. /**
  219. * Filters an array of HTML snippets
  220. *
  221. * @param string[] $array_of_html Array of html snippets
  222. * @param HTMLPurifier_Config $config Optional config object for this operation.
  223. * See HTMLPurifier::purify() for more details.
  224. *
  225. * @return string[] Array of purified HTML
  226. */
  227. public function purifyArray($array_of_html, $config = null)
  228. {
  229. $context_array = array();
  230. foreach ($array_of_html as $key => $html) {
  231. $array_of_html[$key] = $this->purify($html, $config);
  232. $context_array[$key] = $this->context;
  233. }
  234. $this->context = $context_array;
  235. return $array_of_html;
  236. }
  237. /**
  238. * Singleton for enforcing just one HTML Purifier in your system
  239. *
  240. * @param HTMLPurifier|HTMLPurifier_Config $prototype Optional prototype
  241. * HTMLPurifier instance to overload singleton with,
  242. * or HTMLPurifier_Config instance to configure the
  243. * generated version with.
  244. *
  245. * @return HTMLPurifier
  246. */
  247. public static function instance($prototype = null)
  248. {
  249. if (!self::$instance || $prototype) {
  250. if ($prototype instanceof HTMLPurifier) {
  251. self::$instance = $prototype;
  252. } elseif ($prototype) {
  253. self::$instance = new HTMLPurifier($prototype);
  254. } else {
  255. self::$instance = new HTMLPurifier();
  256. }
  257. }
  258. return self::$instance;
  259. }
  260. /**
  261. * Singleton for enforcing just one HTML Purifier in your system
  262. *
  263. * @param HTMLPurifier|HTMLPurifier_Config $prototype Optional prototype
  264. * HTMLPurifier instance to overload singleton with,
  265. * or HTMLPurifier_Config instance to configure the
  266. * generated version with.
  267. *
  268. * @return HTMLPurifier
  269. * @note Backwards compatibility, see instance()
  270. */
  271. public static function getInstance($prototype = null)
  272. {
  273. return HTMLPurifier::instance($prototype);
  274. }
  275. }
  276. /**
  277. * Converts a stream of HTMLPurifier_Token into an HTMLPurifier_Node,
  278. * and back again.
  279. *
  280. * @note This transformation is not an equivalence. We mutate the input
  281. * token stream to make it so; see all [MUT] markers in code.
  282. */
  283. class HTMLPurifier_Arborize
  284. {
  285. public static function arborize($tokens, $config, $context) {
  286. $definition = $config->getHTMLDefinition();
  287. $parent = new HTMLPurifier_Token_Start($definition->info_parent);
  288. $stack = array($parent->toNode());
  289. foreach ($tokens as $token) {
  290. $token->skip = null; // [MUT]
  291. $token->carryover = null; // [MUT]
  292. if ($token instanceof HTMLPurifier_Token_End) {
  293. $token->start = null; // [MUT]
  294. $r = array_pop($stack);
  295. assert($r->name === $token->name);
  296. assert(empty($token->attr));
  297. $r->endCol = $token->col;
  298. $r->endLine = $token->line;
  299. $r->endArmor = $token->armor;
  300. continue;
  301. }
  302. $node = $token->toNode();
  303. $stack[count($stack)-1]->children[] = $node;
  304. if ($token instanceof HTMLPurifier_Token_Start) {
  305. $stack[] = $node;
  306. }
  307. }
  308. assert(count($stack) == 1);
  309. return $stack[0];
  310. }
  311. public static function flatten($node, $config, $context) {
  312. $level = 0;
  313. $nodes = array($level => new HTMLPurifier_Queue(array($node)));
  314. $closingTokens = array();
  315. $tokens = array();
  316. do {
  317. while (!$nodes[$level]->isEmpty()) {
  318. $node = $nodes[$level]->shift(); // FIFO
  319. list($start, $end) = $node->toTokenPair();
  320. if ($level > 0) {
  321. $tokens[] = $start;
  322. }
  323. if ($end !== NULL) {
  324. $closingTokens[$level][] = $end;
  325. }
  326. if ($node instanceof HTMLPurifier_Node_Element) {
  327. $level++;
  328. $nodes[$level] = new HTMLPurifier_Queue();
  329. foreach ($node->children as $childNode) {
  330. $nodes[$level]->push($childNode);
  331. }
  332. }
  333. }
  334. $level--;
  335. if ($level && isset($closingTokens[$level])) {
  336. while ($token = array_pop($closingTokens[$level])) {
  337. $tokens[] = $token;
  338. }
  339. }
  340. } while ($level > 0);
  341. return $tokens;
  342. }
  343. }
  344. /**
  345. * Defines common attribute collections that modules reference
  346. */
  347. class HTMLPurifier_AttrCollections
  348. {
  349. /**
  350. * Associative array of attribute collections, indexed by name.
  351. * @type array
  352. */
  353. public $info = array();
  354. /**
  355. * Performs all expansions on internal data for use by other inclusions
  356. * It also collects all attribute collection extensions from
  357. * modules
  358. * @param HTMLPurifier_AttrTypes $attr_types HTMLPurifier_AttrTypes instance
  359. * @param HTMLPurifier_HTMLModule[] $modules Hash array of HTMLPurifier_HTMLModule members
  360. */
  361. public function __construct($attr_types, $modules)
  362. {
  363. // load extensions from the modules
  364. foreach ($modules as $module) {
  365. foreach ($module->attr_collections as $coll_i => $coll) {
  366. if (!isset($this->info[$coll_i])) {
  367. $this->info[$coll_i] = array();
  368. }
  369. foreach ($coll as $attr_i => $attr) {
  370. if ($attr_i === 0 && isset($this->info[$coll_i][$attr_i])) {
  371. // merge in includes
  372. $this->info[$coll_i][$attr_i] = array_merge(
  373. $this->info[$coll_i][$attr_i],
  374. $attr
  375. );
  376. continue;
  377. }
  378. $this->info[$coll_i][$attr_i] = $attr;
  379. }
  380. }
  381. }
  382. // perform internal expansions and inclusions
  383. foreach ($this->info as $name => $attr) {
  384. // merge attribute collections that include others
  385. $this->performInclusions($this->info[$name]);
  386. // replace string identifiers with actual attribute objects
  387. $this->expandIdentifiers($this->info[$name], $attr_types);
  388. }
  389. }
  390. /**
  391. * Takes a reference to an attribute associative array and performs
  392. * all inclusions specified by the zero index.
  393. * @param array &$attr Reference to attribute array
  394. */
  395. public function performInclusions(&$attr)
  396. {
  397. if (!isset($attr[0])) {
  398. return;
  399. }
  400. $merge = $attr[0];
  401. $seen = array(); // recursion guard
  402. // loop through all the inclusions
  403. for ($i = 0; isset($merge[$i]); $i++) {
  404. if (isset($seen[$merge[$i]])) {
  405. continue;
  406. }
  407. $seen[$merge[$i]] = true;
  408. // foreach attribute of the inclusion, copy it over
  409. if (!isset($this->info[$merge[$i]])) {
  410. continue;
  411. }
  412. foreach ($this->info[$merge[$i]] as $key => $value) {
  413. if (isset($attr[$key])) {
  414. continue;
  415. } // also catches more inclusions
  416. $attr[$key] = $value;
  417. }
  418. if (isset($this->info[$merge[$i]][0])) {
  419. // recursion
  420. $merge = array_merge($merge, $this->info[$merge[$i]][0]);
  421. }
  422. }
  423. unset($attr[0]);
  424. }
  425. /**
  426. * Expands all string identifiers in an attribute array by replacing
  427. * them with the appropriate values inside HTMLPurifier_AttrTypes
  428. * @param array &$attr Reference to attribute array
  429. * @param HTMLPurifier_AttrTypes $attr_types HTMLPurifier_AttrTypes instance
  430. */
  431. public function expandIdentifiers(&$attr, $attr_types)
  432. {
  433. // because foreach will process new elements we add, make sure we
  434. // skip duplicates
  435. $processed = array();
  436. foreach ($attr as $def_i => $def) {
  437. // skip inclusions
  438. if ($def_i === 0) {
  439. continue;
  440. }
  441. if (isset($processed[$def_i])) {
  442. continue;
  443. }
  444. // determine whether or not attribute is required
  445. if ($required = (strpos($def_i, '*') !== false)) {
  446. // rename the definition
  447. unset($attr[$def_i]);
  448. $def_i = trim($def_i, '*');
  449. $attr[$def_i] = $def;
  450. }
  451. $processed[$def_i] = true;
  452. // if we've already got a literal object, move on
  453. if (is_object($def)) {
  454. // preserve previous required
  455. $attr[$def_i]->required = ($required || $attr[$def_i]->required);
  456. continue;
  457. }
  458. if ($def === false) {
  459. unset($attr[$def_i]);
  460. continue;
  461. }
  462. if ($t = $attr_types->get($def)) {
  463. $attr[$def_i] = $t;
  464. $attr[$def_i]->required = $required;
  465. } else {
  466. unset($attr[$def_i]);
  467. }
  468. }
  469. }
  470. }
  471. /**
  472. * Base class for all validating attribute definitions.
  473. *
  474. * This family of classes forms the core for not only HTML attribute validation,
  475. * but also any sort of string that needs to be validated or cleaned (which
  476. * means CSS properties and composite definitions are defined here too).
  477. * Besides defining (through code) what precisely makes the string valid,
  478. * subclasses are also responsible for cleaning the code if possible.
  479. */
  480. abstract class HTMLPurifier_AttrDef
  481. {
  482. /**
  483. * Tells us whether or not an HTML attribute is minimized.
  484. * Has no meaning in other contexts.
  485. * @type bool
  486. */
  487. public $minimized = false;
  488. /**
  489. * Tells us whether or not an HTML attribute is required.
  490. * Has no meaning in other contexts
  491. * @type bool
  492. */
  493. public $required = false;
  494. /**
  495. * Validates and cleans passed string according to a definition.
  496. *
  497. * @param string $string String to be validated and cleaned.
  498. * @param HTMLPurifier_Config $config Mandatory HTMLPurifier_Config object.
  499. * @param HTMLPurifier_Context $context Mandatory HTMLPurifier_Context object.
  500. */
  501. abstract public function validate($string, $config, $context);
  502. /**
  503. * Convenience method that parses a string as if it were CDATA.
  504. *
  505. * This method process a string in the manner specified at
  506. * <http://www.w3.org/TR/html4/types.html#h-6.2> by removing
  507. * leading and trailing whitespace, ignoring line feeds, and replacing
  508. * carriage returns and tabs with spaces. While most useful for HTML
  509. * attributes specified as CDATA, it can also be applied to most CSS
  510. * values.
  511. *
  512. * @note This method is not entirely standards compliant, as trim() removes
  513. * more types of whitespace than specified in the spec. In practice,
  514. * this is rarely a problem, as those extra characters usually have
  515. * already been removed by HTMLPurifier_Encoder.
  516. *
  517. * @warning This processing is inconsistent with XML's whitespace handling
  518. * as specified by section 3.3.3 and referenced XHTML 1.0 section
  519. * 4.7. However, note that we are NOT necessarily
  520. * parsing XML, thus, this behavior may still be correct. We
  521. * assume that newlines have been normalized.
  522. */
  523. public function parseCDATA($string)
  524. {
  525. $string = trim($string);
  526. $string = str_replace(array("\n", "\t", "\r"), ' ', $string);
  527. return $string;
  528. }
  529. /**
  530. * Factory method for creating this class from a string.
  531. * @param string $string String construction info
  532. * @return HTMLPurifier_AttrDef Created AttrDef object corresponding to $string
  533. */
  534. public function make($string)
  535. {
  536. // default implementation, return a flyweight of this object.
  537. // If $string has an effect on the returned object (i.e. you
  538. // need to overload this method), it is best
  539. // to clone or instantiate new copies. (Instantiation is safer.)
  540. return $this;
  541. }
  542. /**
  543. * Removes spaces from rgb(0, 0, 0) so that shorthand CSS properties work
  544. * properly. THIS IS A HACK!
  545. * @param string $string a CSS colour definition
  546. * @return string
  547. */
  548. protected function mungeRgb($string)
  549. {
  550. return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string);
  551. }
  552. /**
  553. * Parses a possibly escaped CSS string and returns the "pure"
  554. * version of it.
  555. */
  556. protected function expandCSSEscape($string)
  557. {
  558. // flexibly parse it
  559. $ret = '';
  560. for ($i = 0, $c = strlen($string); $i < $c; $i++) {
  561. if ($string[$i] === '\\') {
  562. $i++;
  563. if ($i >= $c) {
  564. $ret .= '\\';
  565. break;
  566. }
  567. if (ctype_xdigit($string[$i])) {
  568. $code = $string[$i];
  569. for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
  570. if (!ctype_xdigit($string[$i])) {
  571. break;
  572. }
  573. $code .= $string[$i];
  574. }
  575. // We have to be extremely careful when adding
  576. // new characters, to make sure we're not breaking
  577. // the encoding.
  578. $char = HTMLPurifier_Encoder::unichr(hexdec($code));
  579. if (HTMLPurifier_Encoder::cleanUTF8($char) === '') {
  580. continue;
  581. }
  582. $ret .= $char;
  583. if ($i < $c && trim($string[$i]) !== '') {
  584. $i--;
  585. }
  586. continue;
  587. }
  588. if ($string[$i] === "\n") {
  589. continue;
  590. }
  591. }
  592. $ret .= $string[$i];
  593. }
  594. return $ret;
  595. }
  596. }
  597. /**
  598. * Processes an entire attribute array for corrections needing multiple values.
  599. *
  600. * Occasionally, a certain attribute will need to be removed and popped onto
  601. * another value. Instead of creating a complex return syntax for
  602. * HTMLPurifier_AttrDef, we just pass the whole attribute array to a
  603. * specialized object and have that do the special work. That is the
  604. * family of HTMLPurifier_AttrTransform.
  605. *
  606. * An attribute transformation can be assigned to run before or after
  607. * HTMLPurifier_AttrDef validation. See HTMLPurifier_HTMLDefinition for
  608. * more details.
  609. */
  610. abstract class HTMLPurifier_AttrTransform
  611. {
  612. /**
  613. * Abstract: makes changes to the attributes dependent on multiple values.
  614. *
  615. * @param array $attr Assoc array of attributes, usually from
  616. * HTMLPurifier_Token_Tag::$attr
  617. * @param HTMLPurifier_Config $config Mandatory HTMLPurifier_Config object.
  618. * @param HTMLPurifier_Context $context Mandatory HTMLPurifier_Context object
  619. * @return array Processed attribute array.
  620. */
  621. abstract public function transform($attr, $config, $context);
  622. /**
  623. * Prepends CSS properties to the style attribute, creating the
  624. * attribute if it doesn't exist.
  625. * @param array &$attr Attribute array to process (passed by reference)
  626. * @param string $css CSS to prepend
  627. */
  628. public function prependCSS(&$attr, $css)
  629. {
  630. $attr['style'] = isset($attr['style']) ? $attr['style'] : '';
  631. $attr['style'] = $css . $attr['style'];
  632. }
  633. /**
  634. * Retrieves and removes an attribute
  635. * @param array &$attr Attribute array to process (passed by reference)
  636. * @param mixed $key Key of attribute to confiscate
  637. * @return mixed
  638. */
  639. public function confiscateAttr(&$attr, $key)
  640. {
  641. if (!isset($attr[$key])) {
  642. return null;
  643. }
  644. $value = $attr[$key];
  645. unset($attr[$key]);
  646. return $value;
  647. }
  648. }
  649. /**
  650. * Provides lookup array of attribute types to HTMLPurifier_AttrDef objects
  651. */
  652. class HTMLPurifier_AttrTypes
  653. {
  654. /**
  655. * Lookup array of attribute string identifiers to concrete implementations.
  656. * @type HTMLPurifier_AttrDef[]
  657. */
  658. protected $info = array();
  659. /**
  660. * Constructs the info array, supplying default implementations for attribute
  661. * types.
  662. */
  663. public function __construct()
  664. {
  665. // XXX This is kind of poor, since we don't actually /clone/
  666. // instances; instead, we use the supplied make() attribute. So,
  667. // the underlying class must know how to deal with arguments.
  668. // With the old implementation of Enum, that ignored its
  669. // arguments when handling a make dispatch, the IAlign
  670. // definition wouldn't work.
  671. // pseudo-types, must be instantiated via shorthand
  672. $this->info['Enum'] = new HTMLPurifier_AttrDef_Enum();
  673. $this->info['Bool'] = new HTMLPurifier_AttrDef_HTML_Bool();
  674. $this->info['CDATA'] = new HTMLPurifier_AttrDef_Text();
  675. $this->info['ID'] = new HTMLPurifier_AttrDef_HTML_ID();
  676. $this->info['Length'] = new HTMLPurifier_AttrDef_HTML_Length();
  677. $this->info['MultiLength'] = new HTMLPurifier_AttrDef_HTML_MultiLength();
  678. $this->info['NMTOKENS'] = new HTMLPurifier_AttrDef_HTML_Nmtokens();
  679. $this->info['Pixels'] = new HTMLPurifier_AttrDef_HTML_Pixels();
  680. $this->info['Text'] = new HTMLPurifier_AttrDef_Text();
  681. $this->info['URI'] = new HTMLPurifier_AttrDef_URI();
  682. $this->info['LanguageCode'] = new HTMLPurifier_AttrDef_Lang();
  683. $this->info['Color'] = new HTMLPurifier_AttrDef_HTML_Color();
  684. $this->info['IAlign'] = self::makeEnum('top,middle,bottom,left,right');
  685. $this->info['LAlign'] = self::makeEnum('top,bottom,left,right');
  686. $this->info['FrameTarget'] = new HTMLPurifier_AttrDef_HTML_FrameTarget();
  687. // unimplemented aliases
  688. $this->info['ContentType'] = new HTMLPurifier_AttrDef_Text();
  689. $this->info['ContentTypes'] = new HTMLPurifier_AttrDef_Text();
  690. $this->info['Charsets'] = new HTMLPurifier_AttrDef_Text();
  691. $this->info['Character'] = new HTMLPurifier_AttrDef_Text();
  692. // "proprietary" types
  693. $this->info['Class'] = new HTMLPurifier_AttrDef_HTML_Class();
  694. // number is really a positive integer (one or more digits)
  695. // FIXME: ^^ not always, see start and value of list items
  696. $this->info['Number'] = new HTMLPurifier_AttrDef_Integer(false, false, true);
  697. }
  698. private static function makeEnum($in)
  699. {
  700. return new HTMLPurifier_AttrDef_Clone(new HTMLPurifier_AttrDef_Enum(explode(',', $in)));
  701. }
  702. /**
  703. * Retrieves a type
  704. * @param string $type String type name
  705. * @return HTMLPurifier_AttrDef Object AttrDef for type
  706. */
  707. public function get($type)
  708. {
  709. // determine if there is any extra info tacked on
  710. if (strpos($type, '#') !== false) {
  711. list($type, $string) = explode('#', $type, 2);
  712. } else {
  713. $string = '';
  714. }
  715. if (!isset($this->info[$type])) {
  716. trigger_error('Cannot retrieve undefined attribute type ' . $type, E_USER_ERROR);
  717. return;
  718. }
  719. return $this->info[$type]->make($string);
  720. }
  721. /**
  722. * Sets a new implementation for a type
  723. * @param string $type String type name
  724. * @param HTMLPurifier_AttrDef $impl Object AttrDef for type
  725. */
  726. public function set($type, $impl)
  727. {
  728. $this->info[$type] = $impl;
  729. }
  730. }
  731. /**
  732. * Validates the attributes of a token. Doesn't manage required attributes
  733. * very well. The only reason we factored this out was because RemoveForeignElements
  734. * also needed it besides ValidateAttributes.
  735. */
  736. class HTMLPurifier_AttrValidator
  737. {
  738. /**
  739. * Validates the attributes of a token, mutating it as necessary.
  740. * that has valid tokens
  741. * @param HTMLPurifier_Token $token Token to validate.
  742. * @param HTMLPurifier_Config $config Instance of HTMLPurifier_Config
  743. * @param HTMLPurifier_Context $context Instance of HTMLPurifier_Context
  744. */
  745. public function validateToken($token, $config, $context)
  746. {
  747. $definition = $config->getHTMLDefinition();
  748. $e =& $context->get('ErrorCollector', true);
  749. // initialize IDAccumulator if necessary
  750. $ok =& $context->get('IDAccumulator', true);
  751. if (!$ok) {
  752. $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
  753. $context->register('IDAccumulator', $id_accumulator);
  754. }
  755. // initialize CurrentToken if necessary
  756. $current_token =& $context->get('CurrentToken', true);
  757. if (!$current_token) {
  758. $context->register('CurrentToken', $token);
  759. }
  760. if (!$token instanceof HTMLPurifier_Token_Start &&
  761. !$token instanceof HTMLPurifier_Token_Empty
  762. ) {
  763. return;
  764. }
  765. // create alias to global definition array, see also $defs
  766. // DEFINITION CALL
  767. $d_defs = $definition->info_global_attr;
  768. // don't update token until the very end, to ensure an atomic update
  769. $attr = $token->attr;
  770. // do global transformations (pre)
  771. // nothing currently utilizes this
  772. foreach ($definition->info_attr_transform_pre as $transform) {
  773. $attr = $transform->transform($o = $attr, $config, $context);
  774. if ($e) {
  775. if ($attr != $o) {
  776. $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
  777. }
  778. }
  779. }
  780. // do local transformations only applicable to this element (pre)
  781. // ex. <p align="right"> to <p style="text-align:right;">
  782. foreach ($definition->info[$token->name]->attr_transform_pre as $transform) {
  783. $attr = $transform->transform($o = $attr, $config, $context);
  784. if ($e) {
  785. if ($attr != $o) {
  786. $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
  787. }
  788. }
  789. }
  790. // create alias to this element's attribute definition array, see
  791. // also $d_defs (global attribute definition array)
  792. // DEFINITION CALL
  793. $defs = $definition->info[$token->name]->attr;
  794. $attr_key = false;
  795. $context->register('CurrentAttr', $attr_key);
  796. // iterate through all the attribute keypairs
  797. // Watch out for name collisions: $key has previously been used
  798. foreach ($attr as $attr_key => $value) {
  799. // call the definition
  800. if (isset($defs[$attr_key])) {
  801. // there is a local definition defined
  802. if ($defs[$attr_key] === false) {
  803. // We've explicitly been told not to allow this element.
  804. // This is usually when there's a global definition
  805. // that must be overridden.
  806. // Theoretically speaking, we could have a
  807. // AttrDef_DenyAll, but this is faster!
  808. $result = false;
  809. } else {
  810. // validate according to the element's definition
  811. $result = $defs[$attr_key]->validate(
  812. $value,
  813. $config,
  814. $context
  815. );
  816. }
  817. } elseif (isset($d_defs[$attr_key])) {
  818. // there is a global definition defined, validate according
  819. // to the global definition
  820. $result = $d_defs[$attr_key]->validate(
  821. $value,
  822. $config,
  823. $context
  824. );
  825. } else {
  826. // system never heard of the attribute? DELETE!
  827. $result = false;
  828. }
  829. // put the results into effect
  830. if ($result === false || $result === null) {
  831. // this is a generic error message that should replaced
  832. // with more specific ones when possible
  833. if ($e) {
  834. $e->send(E_ERROR, 'AttrValidator: Attribute removed');
  835. }
  836. // remove the attribute
  837. unset($attr[$attr_key]);
  838. } elseif (is_string($result)) {
  839. // generally, if a substitution is happening, there
  840. // was some sort of implicit correction going on. We'll
  841. // delegate it to the attribute classes to say exactly what.
  842. // simple substitution
  843. $attr[$attr_key] = $result;
  844. } else {
  845. // nothing happens
  846. }
  847. // we'd also want slightly more complicated substitution
  848. // involving an array as the return value,
  849. // although we're not sure how colliding attributes would
  850. // resolve (certain ones would be completely overriden,
  851. // others would prepend themselves).
  852. }
  853. $context->destroy('CurrentAttr');
  854. // post transforms
  855. // global (error reporting untested)
  856. foreach ($definition->info_attr_transform_post as $transform) {
  857. $attr = $transform->transform($o = $attr, $config, $context);
  858. if ($e) {
  859. if ($attr != $o) {
  860. $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
  861. }
  862. }
  863. }
  864. // local (error reporting untested)
  865. foreach ($definition->info[$token->name]->attr_transform_post as $transform) {
  866. $attr = $transform->transform($o = $attr, $config, $context);
  867. if ($e) {
  868. if ($attr != $o) {
  869. $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
  870. }
  871. }
  872. }
  873. $token->attr = $attr;
  874. // destroy CurrentToken if we made it ourselves
  875. if (!$current_token) {
  876. $context->destroy('CurrentToken');
  877. }
  878. }
  879. }
  880. // constants are slow, so we use as few as possible
  881. if (!defined('HTMLPURIFIER_PREFIX')) {
  882. define('HTMLPURIFIER_PREFIX', dirname(__FILE__) . '/standalone');
  883. set_include_path(HTMLPURIFIER_PREFIX . PATH_SEPARATOR . get_include_path());
  884. }
  885. // accomodations for versions earlier than 5.0.2
  886. // borrowed from PHP_Compat, LGPL licensed, by Aidan Lister <aidan@php.net>
  887. if (!defined('PHP_EOL')) {
  888. switch (strtoupper(substr(PHP_OS, 0, 3))) {
  889. case 'WIN':
  890. define('PHP_EOL', "\r\n");
  891. break;
  892. case 'DAR':
  893. define('PHP_EOL', "\r");
  894. break;
  895. default:
  896. define('PHP_EOL', "\n");
  897. }
  898. }
  899. /**
  900. * Bootstrap class that contains meta-functionality for HTML Purifier such as
  901. * the autoload function.
  902. *
  903. * @note
  904. * This class may be used without any other files from HTML Purifier.
  905. */
  906. class HTMLPurifier_Bootstrap
  907. {
  908. /**
  909. * Autoload function for HTML Purifier
  910. * @param string $class Class to load
  911. * @return bool
  912. */
  913. public static function autoload($class)
  914. {
  915. $file = HTMLPurifier_Bootstrap::getPath($class);
  916. if (!$file) {
  917. return false;
  918. }
  919. // Technically speaking, it should be ok and more efficient to
  920. // just do 'require', but Antonio Parraga reports that with
  921. // Zend extensions such as Zend debugger and APC, this invariant
  922. // may be broken. Since we have efficient alternatives, pay
  923. // the cost here and avoid the bug.
  924. require_once HTMLPURIFIER_PREFIX . '/' . $file;
  925. return true;
  926. }
  927. /**
  928. * Returns the path for a specific class.
  929. * @param string $class Class path to get
  930. * @return string
  931. */
  932. public static function getPath($class)
  933. {
  934. if (strncmp('HTMLPurifier', $class, 12) !== 0) {
  935. return false;
  936. }
  937. // Custom implementations
  938. if (strncmp('HTMLPurifier_Language_', $class, 22) === 0) {
  939. $code = str_replace('_', '-', substr($class, 22));
  940. $file = 'HTMLPurifier/Language/classes/' . $code . '.php';
  941. } else {
  942. $file = str_replace('_', '/', $class) . '.php';
  943. }
  944. if (!file_exists(HTMLPURIFIER_PREFIX . '/' . $file)) {
  945. return false;
  946. }
  947. return $file;
  948. }
  949. /**
  950. * "Pre-registers" our autoloader on the SPL stack.
  951. */
  952. public static function registerAutoload()
  953. {
  954. $autoload = array('HTMLPurifier_Bootstrap', 'autoload');
  955. if (($funcs = spl_autoload_functions()) === false) {
  956. spl_autoload_register($autoload);
  957. } elseif (function_exists('spl_autoload_unregister')) {
  958. if (version_compare(PHP_VERSION, '5.3.0', '>=')) {
  959. // prepend flag exists, no need for shenanigans
  960. spl_autoload_register($autoload, true, true);
  961. } else {
  962. $buggy = version_compare(PHP_VERSION, '5.2.11', '<');
  963. $compat = version_compare(PHP_VERSION, '5.1.2', '<=') &&
  964. version_compare(PHP_VERSION, '5.1.0', '>=');
  965. foreach ($funcs as $func) {
  966. if ($buggy && is_array($func)) {
  967. // :TRICKY: There are some compatibility issues and some
  968. // places where we need to error out
  969. $reflector = new ReflectionMethod($func[0], $func[1]);
  970. if (!$reflector->isStatic()) {
  971. throw new Exception(
  972. 'HTML Purifier autoloader registrar is not compatible
  973. with non-static object methods due to PHP Bug #44144;
  974. Please do not use HTMLPurifier.autoload.php (or any
  975. file that includes this file); instead, place the code:
  976. spl_autoload_register(array(\'HTMLPurifier_Bootstrap\', \'autoload\'))
  977. after your own autoloaders.'
  978. );
  979. }
  980. // Suprisingly, spl_autoload_register supports the
  981. // Class::staticMethod callback format, although call_user_func doesn't
  982. if ($compat) {
  983. $func = implode('::', $func);
  984. }
  985. }
  986. spl_autoload_unregister($func);
  987. }
  988. spl_autoload_register($autoload);
  989. foreach ($funcs as $func) {
  990. spl_autoload_register($func);
  991. }
  992. }
  993. }
  994. }
  995. }
  996. /**
  997. * Super-class for definition datatype objects, implements serialization
  998. * functions for the class.
  999. */
  1000. abstract class HTMLPurifier_Definition
  1001. {
  1002. /**
  1003. * Has setup() been called yet?
  1004. * @type bool
  1005. */
  1006. public $setup = false;
  1007. /**
  1008. * If true, write out the final definition object to the cache after
  1009. * setup. This will be true only if all invocations to get a raw
  1010. * definition object are also optimized. This does not cause file
  1011. * system thrashing because on subsequent calls the cached object
  1012. * is used and any writes to the raw definition object are short
  1013. * circuited. See enduser-customize.html for the high-level
  1014. * picture.
  1015. * @type bool
  1016. */
  1017. public $optimized = null;
  1018. /**
  1019. * What type of definition is it?
  1020. * @type string
  1021. */
  1022. public $type;
  1023. /**
  1024. * Sets up the definition object into the final form, something
  1025. * not done by the constructor
  1026. * @param HTMLPurifier_Config $config
  1027. */
  1028. abstract protected function doSetup($config);
  1029. /**
  1030. * Setup function that aborts if already setup
  1031. * @param HTMLPurifier_Config $config
  1032. */
  1033. public function setup($config)
  1034. {
  1035. if ($this->setup) {
  1036. return;
  1037. }
  1038. $this->setup = true;
  1039. $this->doSetup($config);
  1040. }
  1041. }
  1042. /**
  1043. * Defines allowed CSS attributes and what their values are.
  1044. * @see HTMLPurifier_HTMLDefinition
  1045. */
  1046. class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
  1047. {
  1048. public $type = 'CSS';
  1049. /**
  1050. * Assoc array of attribute name to definition object.
  1051. * @type HTMLPurifier_AttrDef[]
  1052. */
  1053. public $info = array();
  1054. /**
  1055. * Constructs the info array. The meat of this class.
  1056. * @param HTMLPurifier_Config $config
  1057. */
  1058. protected function doSetup($config)
  1059. {
  1060. $this->info['text-align'] = new HTMLPurifier_AttrDef_Enum(
  1061. array('left', 'right', 'center', 'justify'),
  1062. false
  1063. );
  1064. $border_style =
  1065. $this->info['border-bottom-style'] =
  1066. $this->info['border-right-style'] =
  1067. $this->info['border-left-style'] =
  1068. $this->info['border-top-style'] = new HTMLPurifier_AttrDef_Enum(
  1069. array(
  1070. 'none',
  1071. 'hidden',
  1072. 'dotted',
  1073. 'dashed',
  1074. 'solid',
  1075. 'double',
  1076. 'groove',
  1077. 'ridge',
  1078. 'inset',
  1079. 'outset'
  1080. ),
  1081. false
  1082. );
  1083. $this->info['border-style'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_style);
  1084. $this->info['clear'] = new HTMLPurifier_AttrDef_Enum(
  1085. array('none', 'left', 'right', 'both'),
  1086. false
  1087. );
  1088. $this->info['float'] = new HTMLPurifier_AttrDef_Enum(
  1089. array('none', 'left', 'right'),
  1090. false
  1091. );
  1092. $this->info['font-style'] = new HTMLPurifier_AttrDef_Enum(
  1093. array('normal', 'italic', 'oblique'),
  1094. false
  1095. );
  1096. $this->info['font-variant'] = new HTMLPurifier_AttrDef_Enum(
  1097. array('normal', 'small-caps'),
  1098. false
  1099. );
  1100. $uri_or_none = new HTMLPurifier_AttrDef_CSS_Composite(
  1101. array(
  1102. new HTMLPurifier_AttrDef_Enum(array('none')),
  1103. new HTMLPurifier_AttrDef_CSS_URI()
  1104. )
  1105. );
  1106. $this->info['list-style-position'] = new HTMLPurifier_AttrDef_Enum(
  1107. array('inside', 'outside'),
  1108. false
  1109. );
  1110. $this->info['list-style-type'] = new HTMLPurifier_AttrDef_Enum(
  1111. array(
  1112. 'disc',
  1113. 'circle',
  1114. 'square',
  1115. 'decimal',
  1116. 'lower-roman',
  1117. 'upper-roman',
  1118. 'lower-alpha',
  1119. 'upper-alpha',
  1120. 'none'
  1121. ),
  1122. false
  1123. );
  1124. $this->info['list-style-image'] = $uri_or_none;
  1125. $this->info['list-style'] = new HTMLPurifier_AttrDef_CSS_ListStyle($config);
  1126. $this->info['text-transform'] = new HTMLPurifier_AttrDef_Enum(
  1127. array('capitalize', 'uppercase', 'lowercase', 'none'),
  1128. false
  1129. );
  1130. $this->info['color'] = new HTMLPurifier_AttrDef_CSS_Color();
  1131. $this->info['background-image'] = $uri_or_none;
  1132. $this->info['background-repeat'] = new HTMLPurifier_AttrDef_Enum(
  1133. array('repeat', 'repeat-x', 'repeat-y', 'no-repeat')
  1134. );
  1135. $this->info['background-attachment'] = new HTMLPurifier_AttrDef_Enum(
  1136. array('scroll', 'fixed')
  1137. );
  1138. $this->info['background-position'] = new HTMLPurifier_AttrDef_CSS_BackgroundPosition();
  1139. $border_color =
  1140. $this->info['border-top-color'] =
  1141. $this->info['border-bottom-color'] =
  1142. $this->info['border-left-color'] =
  1143. $this->info['border-right-color'] =
  1144. $this->info['background-color'] = new HTMLPurifier_AttrDef_CSS_Composite(
  1145. array(
  1146. new HTMLPurifier_AttrDef_Enum(array('transparent')),
  1147. new HTMLPurifier_AttrDef_CSS_Color()
  1148. )
  1149. );
  1150. $this->info['background'] = new HTMLPurifier_AttrDef_CSS_Background($config);
  1151. $this->info['border-color'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_color);
  1152. $border_width =
  1153. $this->info['border-top-width'] =
  1154. $this->info['border-bottom-width'] =
  1155. $this->info['border-left-width'] =
  1156. $this->info['border-right-width'] = new HTMLPurifier_AttrDef_CSS_Composite(
  1157. array(
  1158. new HTMLPurifier_AttrDef_Enum(array('thin', 'medium', 'thick')),
  1159. new HTMLPurifier_AttrDef_CSS_Length('0') //disallow negative
  1160. )
  1161. );
  1162. $this->info['border-width'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_width);
  1163. $this->info['letter-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(
  1164. array(
  1165. new HTMLPurifier_AttrDef_Enum(array('normal')),
  1166. new HTMLPurifier_AttrDef_CSS_Length()
  1167. )
  1168. );
  1169. $this->info['word-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(
  1170. array(
  1171. new HTMLPurifier_AttrDef_Enum(array('normal')),
  1172. new HTMLPurifier_AttrDef_CSS_Length()
  1173. )
  1174. );
  1175. $this->info['font-size'] = new HTMLPurifier_AttrDef_CSS_Composite(
  1176. array(
  1177. new HTMLPurifier_AttrDef_Enum(
  1178. array(
  1179. 'xx-small',
  1180. 'x-small',
  1181. 'small',
  1182. 'medium',
  1183. 'large',
  1184. 'x-large',
  1185. 'xx-large',
  1186. 'larger',
  1187. 'smaller'
  1188. )
  1189. ),
  1190. new HTMLPurifier_AttrDef_CSS_Percentage(),
  1191. new HTMLPurifier_AttrDef_CSS_Length()
  1192. )
  1193. );
  1194. $this->info['line-height'] = new HTMLPurifier_AttrDef_CSS_Composite(
  1195. array(
  1196. new HTMLPurifier_AttrDef_Enum(array('normal')),
  1197. new HTMLPurifier_AttrDef_CSS_Number(true), // no negatives
  1198. new HTMLPurifier_AttrDef_CSS_Length('0'),
  1199. new HTMLPurifier_AttrDef_CSS_Percentage(true)
  1200. )
  1201. );
  1202. $margin =
  1203. $this->info['margin-top'] =
  1204. $this->info['margin-bottom'] =
  1205. $this->info['margin-left'] =
  1206. $this->info['margin-right'] = new HTMLPurifier_AttrDef_CSS_Composite(
  1207. array(
  1208. new HTMLPurifier_AttrDef_CSS_Length(),
  1209. new HTMLPurifier_AttrDef_CSS_Percentage(),
  1210. new HTMLPurifier_AttrDef_Enum(array('auto'))
  1211. )
  1212. );
  1213. $this->info['margin'] = new HTMLPurifier_AttrDef_CSS_Multiple($margin);
  1214. // non-negative
  1215. $padding =
  1216. $this->info['padding-top'] =
  1217. $this->info['padding-bottom'] =
  1218. $this->info['padding-left'] =
  1219. $this->info['padding-right'] = new HTMLPurifier_AttrDef_CSS_Composite(
  1220. array(
  1221. new HTMLPurifier_AttrDef_CSS_Length('0'),
  1222. new HTMLPurifier_AttrDef_CSS_Percentage(true)
  1223. )
  1224. );
  1225. $this->info['padding'] = new HTMLPurifier_AttrDef_CSS_Multiple($padding);
  1226. $this->info['text-indent'] = new HTMLPurifier_AttrDef_CSS_Composite(
  1227. array(
  1228. new HTMLPurifier_AttrDef_CSS_Length(),
  1229. new HTMLPurifier_AttrDef_CSS_Percentage()
  1230. )
  1231. );
  1232. $trusted_wh = new HTMLPurifier_AttrD

Large files files are truncated, but you can click here to view the full file