/moodle/lib/htmlpurifier/HTMLPurifier/Strategy/MakeWellFormed.php

https://bitbucket.org/geek745/moodle-db2 · PHP · 316 lines · 205 code · 48 blank · 63 comment · 52 complexity · 2e49daa80113f69e7d707726055f6e63 MD5 · raw file

  1. <?php
  2. require_once 'HTMLPurifier/Strategy.php';
  3. require_once 'HTMLPurifier/HTMLDefinition.php';
  4. require_once 'HTMLPurifier/Generator.php';
  5. require_once 'HTMLPurifier/Injector/AutoParagraph.php';
  6. require_once 'HTMLPurifier/Injector/Linkify.php';
  7. require_once 'HTMLPurifier/Injector/PurifierLinkify.php';
  8. HTMLPurifier_ConfigSchema::define(
  9. 'AutoFormat', 'Custom', array(), 'list', '
  10. <p>
  11. This directive can be used to add custom auto-format injectors.
  12. Specify an array of injector names (class name minus the prefix)
  13. or concrete implementations. Injector class must exist. This directive
  14. has been available since 2.0.1.
  15. </p>
  16. '
  17. );
  18. /**
  19. * Takes tokens makes them well-formed (balance end tags, etc.)
  20. */
  21. class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
  22. {
  23. /**
  24. * Locally shared variable references
  25. * @private
  26. */
  27. var $inputTokens, $inputIndex, $outputTokens, $currentNesting,
  28. $currentInjector, $injectors;
  29. function execute($tokens, $config, &$context) {
  30. $definition = $config->getHTMLDefinition();
  31. // local variables
  32. $result = array();
  33. $generator = new HTMLPurifier_Generator();
  34. $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
  35. $e =& $context->get('ErrorCollector', true);
  36. // member variables
  37. $this->currentNesting = array();
  38. $this->inputIndex = false;
  39. $this->inputTokens =& $tokens;
  40. $this->outputTokens =& $result;
  41. // context variables
  42. $context->register('CurrentNesting', $this->currentNesting);
  43. $context->register('InputIndex', $this->inputIndex);
  44. $context->register('InputTokens', $tokens);
  45. // -- begin INJECTOR --
  46. $this->injectors = array();
  47. $injectors = $config->getBatch('AutoFormat');
  48. $custom_injectors = $injectors['Custom'];
  49. unset($injectors['Custom']); // special case
  50. foreach ($injectors as $injector => $b) {
  51. $injector = "HTMLPurifier_Injector_$injector";
  52. if (!$b) continue;
  53. $this->injectors[] = new $injector;
  54. }
  55. foreach ($custom_injectors as $injector) {
  56. if (is_string($injector)) {
  57. $injector = "HTMLPurifier_Injector_$injector";
  58. $injector = new $injector;
  59. }
  60. $this->injectors[] = $injector;
  61. }
  62. // array index of the injector that resulted in an array
  63. // substitution. This enables processTokens() to know which
  64. // injectors are affected by the added tokens and which are
  65. // not (namely, the ones after the current injector are not
  66. // affected)
  67. $this->currentInjector = false;
  68. // give the injectors references to the definition and context
  69. // variables for performance reasons
  70. foreach ($this->injectors as $i => $x) {
  71. $error = $this->injectors[$i]->prepare($config, $context);
  72. if (!$error) continue;
  73. list($injector) = array_splice($this->injectors, $i, 1);
  74. $name = $injector->name;
  75. trigger_error("Cannot enable $name injector because $error is not allowed", E_USER_WARNING);
  76. }
  77. // warning: most foreach loops follow the convention $i => $x.
  78. // be sure, for PHP4 compatibility, to only perform write operations
  79. // directly referencing the object using $i: $x is only safe for reads
  80. // -- end INJECTOR --
  81. $token = false;
  82. $context->register('CurrentToken', $token);
  83. for ($this->inputIndex = 0; isset($tokens[$this->inputIndex]); $this->inputIndex++) {
  84. // if all goes well, this token will be passed through unharmed
  85. $token = $tokens[$this->inputIndex];
  86. //printTokens($tokens, $this->inputIndex);
  87. foreach ($this->injectors as $i => $x) {
  88. if ($x->skip > 0) $this->injectors[$i]->skip--;
  89. }
  90. // quick-check: if it's not a tag, no need to process
  91. if (empty( $token->is_tag )) {
  92. if ($token->type === 'text') {
  93. // injector handler code; duplicated for performance reasons
  94. foreach ($this->injectors as $i => $x) {
  95. if (!$x->skip) $this->injectors[$i]->handleText($token);
  96. if (is_array($token)) {
  97. $this->currentInjector = $i;
  98. break;
  99. }
  100. }
  101. }
  102. $this->processToken($token, $config, $context);
  103. continue;
  104. }
  105. $info = $definition->info[$token->name]->child;
  106. // quick tag checks: anything that's *not* an end tag
  107. $ok = false;
  108. if ($info->type == 'empty' && $token->type == 'start') {
  109. // test if it claims to be a start tag but is empty
  110. $token = new HTMLPurifier_Token_Empty($token->name, $token->attr);
  111. $ok = true;
  112. } elseif ($info->type != 'empty' && $token->type == 'empty' ) {
  113. // claims to be empty but really is a start tag
  114. $token = array(
  115. new HTMLPurifier_Token_Start($token->name, $token->attr),
  116. new HTMLPurifier_Token_End($token->name)
  117. );
  118. $ok = true;
  119. } elseif ($token->type == 'empty') {
  120. // real empty token
  121. $ok = true;
  122. } elseif ($token->type == 'start') {
  123. // start tag
  124. // ...unless they also have to close their parent
  125. if (!empty($this->currentNesting)) {
  126. $parent = array_pop($this->currentNesting);
  127. $parent_info = $definition->info[$parent->name];
  128. // this can be replaced with a more general algorithm:
  129. // if the token is not allowed by the parent, auto-close
  130. // the parent
  131. if (!isset($parent_info->child->elements[$token->name])) {
  132. if ($e) $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
  133. // close the parent, then re-loop to reprocess token
  134. $result[] = new HTMLPurifier_Token_End($parent->name);
  135. $this->inputIndex--;
  136. continue;
  137. }
  138. $this->currentNesting[] = $parent; // undo the pop
  139. }
  140. $ok = true;
  141. }
  142. // injector handler code; duplicated for performance reasons
  143. if ($ok) {
  144. foreach ($this->injectors as $i => $x) {
  145. if (!$x->skip) $this->injectors[$i]->handleElement($token);
  146. if (is_array($token)) {
  147. $this->currentInjector = $i;
  148. break;
  149. }
  150. }
  151. $this->processToken($token, $config, $context);
  152. continue;
  153. }
  154. // sanity check: we should be dealing with a closing tag
  155. if ($token->type != 'end') continue;
  156. // make sure that we have something open
  157. if (empty($this->currentNesting)) {
  158. if ($escape_invalid_tags) {
  159. if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text');
  160. $result[] = new HTMLPurifier_Token_Text(
  161. $generator->generateFromToken($token, $config, $context)
  162. );
  163. } elseif ($e) {
  164. $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed');
  165. }
  166. continue;
  167. }
  168. // first, check for the simplest case: everything closes neatly
  169. $current_parent = array_pop($this->currentNesting);
  170. if ($current_parent->name == $token->name) {
  171. $result[] = $token;
  172. foreach ($this->injectors as $i => $x) {
  173. $this->injectors[$i]->notifyEnd($token);
  174. }
  175. continue;
  176. }
  177. // okay, so we're trying to close the wrong tag
  178. // undo the pop previous pop
  179. $this->currentNesting[] = $current_parent;
  180. // scroll back the entire nest, trying to find our tag.
  181. // (feature could be to specify how far you'd like to go)
  182. $size = count($this->currentNesting);
  183. // -2 because -1 is the last element, but we already checked that
  184. $skipped_tags = false;
  185. for ($i = $size - 2; $i >= 0; $i--) {
  186. if ($this->currentNesting[$i]->name == $token->name) {
  187. // current nesting is modified
  188. $skipped_tags = array_splice($this->currentNesting, $i);
  189. break;
  190. }
  191. }
  192. // we still didn't find the tag, so remove
  193. if ($skipped_tags === false) {
  194. if ($escape_invalid_tags) {
  195. $result[] = new HTMLPurifier_Token_Text(
  196. $generator->generateFromToken($token, $config, $context)
  197. );
  198. if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text');
  199. } elseif ($e) {
  200. $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed');
  201. }
  202. continue;
  203. }
  204. // okay, we found it, close all the skipped tags
  205. // note that skipped tags contains the element we need closed
  206. for ($i = count($skipped_tags) - 1; $i >= 0; $i--) {
  207. if ($i && $e && !isset($skipped_tags[$i]->armor['MakeWellFormed_TagClosedError'])) {
  208. $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$i]);
  209. }
  210. $result[] = $new_token = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
  211. foreach ($this->injectors as $j => $x) { // $j, not $i!!!
  212. $this->injectors[$j]->notifyEnd($new_token);
  213. }
  214. }
  215. }
  216. $context->destroy('CurrentNesting');
  217. $context->destroy('InputTokens');
  218. $context->destroy('InputIndex');
  219. $context->destroy('CurrentToken');
  220. // we're at the end now, fix all still unclosed tags (this is
  221. // duplicated from the end of the loop with some slight modifications)
  222. // not using $skipped_tags since it would invariably be all of them
  223. if (!empty($this->currentNesting)) {
  224. for ($i = count($this->currentNesting) - 1; $i >= 0; $i--) {
  225. if ($e && !isset($this->currentNesting[$i]->armor['MakeWellFormed_TagClosedError'])) {
  226. $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $this->currentNesting[$i]);
  227. }
  228. $result[] = $new_token = new HTMLPurifier_Token_End($this->currentNesting[$i]->name);
  229. foreach ($this->injectors as $j => $x) { // $j, not $i!!!
  230. $this->injectors[$j]->notifyEnd($new_token);
  231. }
  232. }
  233. }
  234. unset($this->outputTokens, $this->injectors, $this->currentInjector,
  235. $this->currentNesting, $this->inputTokens, $this->inputIndex);
  236. return $result;
  237. }
  238. function processToken($token, $config, &$context) {
  239. if (is_array($token)) {
  240. // the original token was overloaded by an injector, time
  241. // to some fancy acrobatics
  242. // $this->inputIndex is decremented so that the entire set gets
  243. // re-processed
  244. array_splice($this->inputTokens, $this->inputIndex--, 1, $token);
  245. // adjust the injector skips based on the array substitution
  246. if ($this->injectors) {
  247. $offset = count($token);
  248. for ($i = 0; $i <= $this->currentInjector; $i++) {
  249. // because of the skip back, we need to add one more
  250. // for uninitialized injectors. I'm not exactly
  251. // sure why this is the case, but I think it has to
  252. // do with the fact that we're decrementing skips
  253. // before re-checking text
  254. if (!$this->injectors[$i]->skip) $this->injectors[$i]->skip++;
  255. $this->injectors[$i]->skip += $offset;
  256. }
  257. }
  258. } elseif ($token) {
  259. // regular case
  260. $this->outputTokens[] = $token;
  261. if ($token->type == 'start') {
  262. $this->currentNesting[] = $token;
  263. } elseif ($token->type == 'end') {
  264. array_pop($this->currentNesting); // not actually used
  265. }
  266. }
  267. }
  268. }