/moodle/lib/htmlpurifier/HTMLPurifier/Strategy/MakeWellFormed.php
https://bitbucket.org/geek745/moodle-db2 · PHP · 316 lines · 205 code · 48 blank · 63 comment · 52 complexity · 2e49daa80113f69e7d707726055f6e63 MD5 · raw file
- <?php
- require_once 'HTMLPurifier/Strategy.php';
- require_once 'HTMLPurifier/HTMLDefinition.php';
- require_once 'HTMLPurifier/Generator.php';
- require_once 'HTMLPurifier/Injector/AutoParagraph.php';
- require_once 'HTMLPurifier/Injector/Linkify.php';
- require_once 'HTMLPurifier/Injector/PurifierLinkify.php';
- HTMLPurifier_ConfigSchema::define(
- 'AutoFormat', 'Custom', array(), 'list', '
- <p>
- This directive can be used to add custom auto-format injectors.
- Specify an array of injector names (class name minus the prefix)
- or concrete implementations. Injector class must exist. This directive
- has been available since 2.0.1.
- </p>
- '
- );
- /**
- * Takes tokens makes them well-formed (balance end tags, etc.)
- */
- class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
- {
-
- /**
- * Locally shared variable references
- * @private
- */
- var $inputTokens, $inputIndex, $outputTokens, $currentNesting,
- $currentInjector, $injectors;
-
- function execute($tokens, $config, &$context) {
-
- $definition = $config->getHTMLDefinition();
-
- // local variables
- $result = array();
- $generator = new HTMLPurifier_Generator();
- $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
- $e =& $context->get('ErrorCollector', true);
-
- // member variables
- $this->currentNesting = array();
- $this->inputIndex = false;
- $this->inputTokens =& $tokens;
- $this->outputTokens =& $result;
-
- // context variables
- $context->register('CurrentNesting', $this->currentNesting);
- $context->register('InputIndex', $this->inputIndex);
- $context->register('InputTokens', $tokens);
-
- // -- begin INJECTOR --
-
- $this->injectors = array();
-
- $injectors = $config->getBatch('AutoFormat');
- $custom_injectors = $injectors['Custom'];
- unset($injectors['Custom']); // special case
- foreach ($injectors as $injector => $b) {
- $injector = "HTMLPurifier_Injector_$injector";
- if (!$b) continue;
- $this->injectors[] = new $injector;
- }
- foreach ($custom_injectors as $injector) {
- if (is_string($injector)) {
- $injector = "HTMLPurifier_Injector_$injector";
- $injector = new $injector;
- }
- $this->injectors[] = $injector;
- }
-
- // array index of the injector that resulted in an array
- // substitution. This enables processTokens() to know which
- // injectors are affected by the added tokens and which are
- // not (namely, the ones after the current injector are not
- // affected)
- $this->currentInjector = false;
-
- // give the injectors references to the definition and context
- // variables for performance reasons
- foreach ($this->injectors as $i => $x) {
- $error = $this->injectors[$i]->prepare($config, $context);
- if (!$error) continue;
- list($injector) = array_splice($this->injectors, $i, 1);
- $name = $injector->name;
- trigger_error("Cannot enable $name injector because $error is not allowed", E_USER_WARNING);
- }
-
- // warning: most foreach loops follow the convention $i => $x.
- // be sure, for PHP4 compatibility, to only perform write operations
- // directly referencing the object using $i: $x is only safe for reads
-
- // -- end INJECTOR --
-
- $token = false;
- $context->register('CurrentToken', $token);
-
- for ($this->inputIndex = 0; isset($tokens[$this->inputIndex]); $this->inputIndex++) {
-
- // if all goes well, this token will be passed through unharmed
- $token = $tokens[$this->inputIndex];
-
- //printTokens($tokens, $this->inputIndex);
-
- foreach ($this->injectors as $i => $x) {
- if ($x->skip > 0) $this->injectors[$i]->skip--;
- }
-
- // quick-check: if it's not a tag, no need to process
- if (empty( $token->is_tag )) {
- if ($token->type === 'text') {
- // injector handler code; duplicated for performance reasons
- foreach ($this->injectors as $i => $x) {
- if (!$x->skip) $this->injectors[$i]->handleText($token);
- if (is_array($token)) {
- $this->currentInjector = $i;
- break;
- }
- }
- }
- $this->processToken($token, $config, $context);
- continue;
- }
-
- $info = $definition->info[$token->name]->child;
-
- // quick tag checks: anything that's *not* an end tag
- $ok = false;
- if ($info->type == 'empty' && $token->type == 'start') {
- // test if it claims to be a start tag but is empty
- $token = new HTMLPurifier_Token_Empty($token->name, $token->attr);
- $ok = true;
- } elseif ($info->type != 'empty' && $token->type == 'empty' ) {
- // claims to be empty but really is a start tag
- $token = array(
- new HTMLPurifier_Token_Start($token->name, $token->attr),
- new HTMLPurifier_Token_End($token->name)
- );
- $ok = true;
- } elseif ($token->type == 'empty') {
- // real empty token
- $ok = true;
- } elseif ($token->type == 'start') {
- // start tag
-
- // ...unless they also have to close their parent
- if (!empty($this->currentNesting)) {
-
- $parent = array_pop($this->currentNesting);
- $parent_info = $definition->info[$parent->name];
-
- // this can be replaced with a more general algorithm:
- // if the token is not allowed by the parent, auto-close
- // the parent
- if (!isset($parent_info->child->elements[$token->name])) {
- if ($e) $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
- // close the parent, then re-loop to reprocess token
- $result[] = new HTMLPurifier_Token_End($parent->name);
- $this->inputIndex--;
- continue;
- }
-
- $this->currentNesting[] = $parent; // undo the pop
- }
- $ok = true;
- }
-
- // injector handler code; duplicated for performance reasons
- if ($ok) {
- foreach ($this->injectors as $i => $x) {
- if (!$x->skip) $this->injectors[$i]->handleElement($token);
- if (is_array($token)) {
- $this->currentInjector = $i;
- break;
- }
- }
- $this->processToken($token, $config, $context);
- continue;
- }
-
- // sanity check: we should be dealing with a closing tag
- if ($token->type != 'end') continue;
-
- // make sure that we have something open
- if (empty($this->currentNesting)) {
- if ($escape_invalid_tags) {
- if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text');
- $result[] = new HTMLPurifier_Token_Text(
- $generator->generateFromToken($token, $config, $context)
- );
- } elseif ($e) {
- $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed');
- }
- continue;
- }
-
- // first, check for the simplest case: everything closes neatly
- $current_parent = array_pop($this->currentNesting);
- if ($current_parent->name == $token->name) {
- $result[] = $token;
- foreach ($this->injectors as $i => $x) {
- $this->injectors[$i]->notifyEnd($token);
- }
- continue;
- }
-
- // okay, so we're trying to close the wrong tag
-
- // undo the pop previous pop
- $this->currentNesting[] = $current_parent;
-
- // scroll back the entire nest, trying to find our tag.
- // (feature could be to specify how far you'd like to go)
- $size = count($this->currentNesting);
- // -2 because -1 is the last element, but we already checked that
- $skipped_tags = false;
- for ($i = $size - 2; $i >= 0; $i--) {
- if ($this->currentNesting[$i]->name == $token->name) {
- // current nesting is modified
- $skipped_tags = array_splice($this->currentNesting, $i);
- break;
- }
- }
-
- // we still didn't find the tag, so remove
- if ($skipped_tags === false) {
- if ($escape_invalid_tags) {
- $result[] = new HTMLPurifier_Token_Text(
- $generator->generateFromToken($token, $config, $context)
- );
- if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text');
- } elseif ($e) {
- $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed');
- }
- continue;
- }
-
- // okay, we found it, close all the skipped tags
- // note that skipped tags contains the element we need closed
- for ($i = count($skipped_tags) - 1; $i >= 0; $i--) {
- if ($i && $e && !isset($skipped_tags[$i]->armor['MakeWellFormed_TagClosedError'])) {
- $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$i]);
- }
- $result[] = $new_token = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
- foreach ($this->injectors as $j => $x) { // $j, not $i!!!
- $this->injectors[$j]->notifyEnd($new_token);
- }
- }
-
- }
-
- $context->destroy('CurrentNesting');
- $context->destroy('InputTokens');
- $context->destroy('InputIndex');
- $context->destroy('CurrentToken');
-
- // we're at the end now, fix all still unclosed tags (this is
- // duplicated from the end of the loop with some slight modifications)
- // not using $skipped_tags since it would invariably be all of them
- if (!empty($this->currentNesting)) {
- for ($i = count($this->currentNesting) - 1; $i >= 0; $i--) {
- if ($e && !isset($this->currentNesting[$i]->armor['MakeWellFormed_TagClosedError'])) {
- $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $this->currentNesting[$i]);
- }
- $result[] = $new_token = new HTMLPurifier_Token_End($this->currentNesting[$i]->name);
- foreach ($this->injectors as $j => $x) { // $j, not $i!!!
- $this->injectors[$j]->notifyEnd($new_token);
- }
- }
- }
-
- unset($this->outputTokens, $this->injectors, $this->currentInjector,
- $this->currentNesting, $this->inputTokens, $this->inputIndex);
-
- return $result;
- }
-
- function processToken($token, $config, &$context) {
- if (is_array($token)) {
- // the original token was overloaded by an injector, time
- // to some fancy acrobatics
-
- // $this->inputIndex is decremented so that the entire set gets
- // re-processed
- array_splice($this->inputTokens, $this->inputIndex--, 1, $token);
-
- // adjust the injector skips based on the array substitution
- if ($this->injectors) {
- $offset = count($token);
- for ($i = 0; $i <= $this->currentInjector; $i++) {
- // because of the skip back, we need to add one more
- // for uninitialized injectors. I'm not exactly
- // sure why this is the case, but I think it has to
- // do with the fact that we're decrementing skips
- // before re-checking text
- if (!$this->injectors[$i]->skip) $this->injectors[$i]->skip++;
- $this->injectors[$i]->skip += $offset;
- }
- }
- } elseif ($token) {
- // regular case
- $this->outputTokens[] = $token;
- if ($token->type == 'start') {
- $this->currentNesting[] = $token;
- } elseif ($token->type == 'end') {
- array_pop($this->currentNesting); // not actually used
- }
- }
- }
-
- }