Parser.php | searchcode

/includes/parser/Parser.php

https://github.com/spenser-roark/OOUG-Wiki · PHP · 5722 lines · 3490 code · 498 blank · 1734 comment · 688 complexity · c111458fb54de04aa4e0fb3d712b02fb MD5 · raw file
Large files are truncated click here to view the full file

<?php
/**
 * @defgroup Parser Parser
 *
 * @file
 * @ingroup Parser
 * File for Parser and related classes
 */


/**
 * PHP Parser - Processes wiki markup (which uses a more user-friendly
 * syntax, such as "[[link]]" for making links), and provides a one-way
 * transformation of that wiki markup it into XHTML output / markup
 * (which in turn the browser understands, and can display).
 *
 * <pre>
 * There are five main entry points into the Parser class:
 * parse()
 *     produces HTML output
 * preSaveTransform().
 *     produces altered wiki markup.
 * preprocess()
 *     removes HTML comments and expands templates
 * cleanSig() / cleanSigInSig()
 *     Cleans a signature before saving it to preferences
 * getSection()
 *     Return the content of a section from an article for section editing
 * replaceSection()
 *     Replaces a section by number inside an article
 * getPreloadText()
 *     Removes <noinclude> sections, and <includeonly> tags.
 *
 * Globals used:
 *    object: $wgContLang
 *
 * NOT $wgUser or $wgTitle or $wgRequest or $wgLang. Keep them away!
 *
 * settings:
 *  $wgUseDynamicDates*, $wgInterwikiMagic*,
 *  $wgNamespacesWithSubpages, $wgAllowExternalImages*,
 *  $wgLocaltimezone, $wgAllowSpecialInclusion*,
 *  $wgMaxArticleSize*
 *
 *  * only within ParserOptions
 * </pre>
 *
 * @ingroup Parser
 */
class Parser {
	/**
	 * Update this version number when the ParserOutput format
	 * changes in an incompatible way, so the parser cache
	 * can automatically discard old data.
	 */
	const VERSION = '1.6.4';

	/**
	 * Update this version number when the output of serialiseHalfParsedText()
	 * changes in an incompatible way
	 */
	const HALF_PARSED_VERSION = 2;

	# Flags for Parser::setFunctionHook
	# Also available as global constants from Defines.php
	const SFH_NO_HASH = 1;
	const SFH_OBJECT_ARGS = 2;

	# Constants needed for external link processing
	# Everything except bracket, space, or control characters
	# \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
	# as well as U+3000 is IDEOGRAPHIC SPACE for bug 19052
	const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}]';
	const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F\p{Zs}]+)
		\\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';

	# State constants for the definition list colon extraction
	const COLON_STATE_TEXT = 0;
	const COLON_STATE_TAG = 1;
	const COLON_STATE_TAGSTART = 2;
	const COLON_STATE_CLOSETAG = 3;
	const COLON_STATE_TAGSLASH = 4;
	const COLON_STATE_COMMENT = 5;
	const COLON_STATE_COMMENTDASH = 6;
	const COLON_STATE_COMMENTDASHDASH = 7;

	# Flags for preprocessToDom
	const PTD_FOR_INCLUSION = 1;

	# Allowed values for $this->mOutputType
	# Parameter to startExternalParse().
	const OT_HTML = 1; # like parse()
	const OT_WIKI = 2; # like preSaveTransform()
	const OT_PREPROCESS = 3; # like preprocess()
	const OT_MSG = 3;
	const OT_PLAIN = 4; # like extractSections() - portions of the original are returned unchanged.

	# Marker Suffix needs to be accessible staticly.
	const MARKER_SUFFIX = "-QINU\x7f";

	# Persistent:
	var $mTagHooks = array();
	var $mTransparentTagHooks = array();
	var $mFunctionHooks = array();
	var $mFunctionSynonyms = array( 0 => array(), 1 => array() );
	var $mFunctionTagHooks = array();
	var $mStripList  = array();
	var $mDefaultStripList  = array();
	var $mVarCache = array();
	var $mImageParams = array();
	var $mImageParamsMagicArray = array();
	var $mMarkerIndex = 0;
	var $mFirstCall = true;

	# Initialised by initialiseVariables()

	/**
	 * @var MagicWordArray
	 */
	var $mVariables;

	/**
	 * @var MagicWordArray
	 */
	var $mSubstWords;
	var $mConf, $mPreprocessor, $mExtLinkBracketedRegex, $mUrlProtocols; # Initialised in constructor

	# Cleared with clearState():
	/**
	 * @var ParserOutput
	 */
	var $mOutput;
	var $mAutonumber, $mDTopen;

	/**
	 * @var StripState
	 */
	var $mStripState;

	var $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
	/**
	 * @var LinkHolderArray
	 */
	var $mLinkHolders;

	var $mLinkID;
	var $mIncludeSizes, $mPPNodeCount, $mDefaultSort;
	var $mTplExpandCache; # empty-frame expansion cache
	var $mTplRedirCache, $mTplDomCache, $mHeadings, $mDoubleUnderscores;
	var $mExpensiveFunctionCount; # number of expensive parser function calls
	var $mShowToc, $mForceTocPosition;

	/**
	 * @var User
	 */
	var $mUser; # User object; only used when doing pre-save transform

	# Temporary
	# These are variables reset at least once per parse regardless of $clearState

	/**
	 * @var ParserOptions
	 */
	var $mOptions;

	/**
	 * @var Title
	 */
	var $mTitle;        # Title context, used for self-link rendering and similar things
	var $mOutputType;   # Output type, one of the OT_xxx constants
	var $ot;            # Shortcut alias, see setOutputType()
	var $mRevisionObject; # The revision object of the specified revision ID
	var $mRevisionId;   # ID to display in {{REVISIONID}} tags
	var $mRevisionTimestamp; # The timestamp of the specified revision ID
	var $mRevisionUser; # User to display in {{REVISIONUSER}} tag
	var $mRevIdForTs;   # The revision ID which was used to fetch the timestamp

	/**
	 * @var string
	 */
	var $mUniqPrefix;

	/**
	 * Constructor
	 *
	 * @param $conf array
	 */
	public function __construct( $conf = array() ) {
		$this->mConf = $conf;
		$this->mUrlProtocols = wfUrlProtocols();
		$this->mExtLinkBracketedRegex = '/\[((' . wfUrlProtocols() . ')'.
			self::EXT_LINK_URL_CLASS.'+)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su';
		if ( isset( $conf['preprocessorClass'] ) ) {
			$this->mPreprocessorClass = $conf['preprocessorClass'];
		} elseif ( defined( 'MW_COMPILED' ) ) {
			# Preprocessor_Hash is much faster than Preprocessor_DOM in compiled mode
			$this->mPreprocessorClass = 'Preprocessor_Hash';
		} elseif ( extension_loaded( 'domxml' ) ) {
			# PECL extension that conflicts with the core DOM extension (bug 13770)
			wfDebug( "Warning: you have the obsolete domxml extension for PHP. Please remove it!\n" );
			$this->mPreprocessorClass = 'Preprocessor_Hash';
		} elseif ( extension_loaded( 'dom' ) ) {
			$this->mPreprocessorClass = 'Preprocessor_DOM';
		} else {
			$this->mPreprocessorClass = 'Preprocessor_Hash';
		}
		wfDebug( __CLASS__ . ": using preprocessor: {$this->mPreprocessorClass}\n" );
	}

	/**
	 * Reduce memory usage to reduce the impact of circular references
	 */
	function __destruct() {
		if ( isset( $this->mLinkHolders ) ) {
			unset( $this->mLinkHolders );
		}
		foreach ( $this as $name => $value ) {
			unset( $this->$name );
		}
	}

	/**
	 * Do various kinds of initialisation on the first call of the parser
	 */
	function firstCallInit() {
		if ( !$this->mFirstCall ) {
			return;
		}
		$this->mFirstCall = false;

		wfProfileIn( __METHOD__ );

		CoreParserFunctions::register( $this );
		CoreTagHooks::register( $this );
		$this->initialiseVariables();

		wfRunHooks( 'ParserFirstCallInit', array( &$this ) );
		wfProfileOut( __METHOD__ );
	}

	/**
	 * Clear Parser state
	 *
	 * @private
	 */
	function clearState() {
		wfProfileIn( __METHOD__ );
		if ( $this->mFirstCall ) {
			$this->firstCallInit();
		}
		$this->mOutput = new ParserOutput;
		$this->mOptions->registerWatcher( array( $this->mOutput, 'recordOption' ) );
		$this->mAutonumber = 0;
		$this->mLastSection = '';
		$this->mDTopen = false;
		$this->mIncludeCount = array();
		$this->mArgStack = false;
		$this->mInPre = false;
		$this->mLinkHolders = new LinkHolderArray( $this );
		$this->mLinkID = 0;
		$this->mRevisionObject = $this->mRevisionTimestamp =
			$this->mRevisionId = $this->mRevisionUser = null;
		$this->mVarCache = array();
		$this->mUser = null;

		/**
		 * Prefix for temporary replacement strings for the multipass parser.
		 * \x07 should never appear in input as it's disallowed in XML.
		 * Using it at the front also gives us a little extra robustness
		 * since it shouldn't match when butted up against identifier-like
		 * string constructs.
		 *
		 * Must not consist of all title characters, or else it will change
		 * the behaviour of <nowiki> in a link.
		 */
		# $this->mUniqPrefix = "\x07UNIQ" . Parser::getRandomString();
		# Changed to \x7f to allow XML double-parsing -- TS
		$this->mUniqPrefix = "\x7fUNIQ" . self::getRandomString();
		$this->mStripState = new StripState( $this->mUniqPrefix );


		# Clear these on every parse, bug 4549
		$this->mTplExpandCache = $this->mTplRedirCache = $this->mTplDomCache = array();

		$this->mShowToc = true;
		$this->mForceTocPosition = false;
		$this->mIncludeSizes = array(
			'post-expand' => 0,
			'arg' => 0,
		);
		$this->mPPNodeCount = 0;
		$this->mDefaultSort = false;
		$this->mHeadings = array();
		$this->mDoubleUnderscores = array();
		$this->mExpensiveFunctionCount = 0;

		# Fix cloning
		if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) {
			$this->mPreprocessor = null;
		}

		wfRunHooks( 'ParserClearState', array( &$this ) );
		wfProfileOut( __METHOD__ );
	}

	/**
	 * Convert wikitext to HTML
	 * Do not call this function recursively.
	 *
	 * @param $text String: text we want to parse
	 * @param $title Title object
	 * @param $options ParserOptions
	 * @param $linestart boolean
	 * @param $clearState boolean
	 * @param $revid Int: number to pass in {{REVISIONID}}
	 * @return ParserOutput a ParserOutput
	 */
	public function parse( $text, Title $title, ParserOptions $options, $linestart = true, $clearState = true, $revid = null ) {
		/**
		 * First pass--just handle <nowiki> sections, pass the rest off
		 * to internalParse() which does all the real work.
		 */

		global $wgUseTidy, $wgAlwaysUseTidy, $wgDisableLangConversion, $wgDisableTitleConversion;
		$fname = __METHOD__.'-' . wfGetCaller();
		wfProfileIn( __METHOD__ );
		wfProfileIn( $fname );

		$this->startParse( $title, $options, self::OT_HTML, $clearState );

		$oldRevisionId = $this->mRevisionId;
		$oldRevisionObject = $this->mRevisionObject;
		$oldRevisionTimestamp = $this->mRevisionTimestamp;
		$oldRevisionUser = $this->mRevisionUser;
		if ( $revid !== null ) {
			$this->mRevisionId = $revid;
			$this->mRevisionObject = null;
			$this->mRevisionTimestamp = null;
			$this->mRevisionUser = null;
		}

		wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
		# No more strip!
		wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
		$text = $this->internalParse( $text );

		$text = $this->mStripState->unstripGeneral( $text );

		# Clean up special characters, only run once, next-to-last before doBlockLevels
		$fixtags = array(
			# french spaces, last one Guillemet-left
			# only if there is something before the space
			'/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1&#160;',
			# french spaces, Guillemet-right
			'/(\\302\\253) /' => '\\1&#160;',
			'/&#160;(!\s*important)/' => ' \\1', # Beware of CSS magic word !important, bug #11874.
		);
		$text = preg_replace( array_keys( $fixtags ), array_values( $fixtags ), $text );

		$text = $this->doBlockLevels( $text, $linestart );

		$this->replaceLinkHolders( $text );

		/**
		 * The input doesn't get language converted if
		 * a) It's disabled
		 * b) Content isn't converted
		 * c) It's a conversion table
		 * d) it is an interface message (which is in the user language)
		 */
		if ( !( $wgDisableLangConversion
				|| isset( $this->mDoubleUnderscores['nocontentconvert'] )
				|| $this->mTitle->isConversionTable() ) )
		{
			# Run convert unconditionally in 1.18-compatible mode
			global $wgBug34832TransitionalRollback;
			if ( $wgBug34832TransitionalRollback || !$this->mOptions->getInterfaceMessage() ) {
				# The position of the convert() call should not be changed. it
				# assumes that the links are all replaced and the only thing left
				# is the <nowiki> mark.
				$text = $this->getConverterLanguage()->convert( $text );
			}
		}

		/**
		 * A converted title will be provided in the output object if title and
		 * content conversion are enabled, the article text does not contain
		 * a conversion-suppressing double-underscore tag, and no
		 * {{DISPLAYTITLE:...}} is present. DISPLAYTITLE takes precedence over
		 * automatic link conversion.
		 */
		if ( !( $wgDisableLangConversion
				|| $wgDisableTitleConversion
				|| isset( $this->mDoubleUnderscores['nocontentconvert'] )
				|| isset( $this->mDoubleUnderscores['notitleconvert'] )
				|| $this->mOutput->getDisplayTitle() !== false ) )
		{
			$convruletitle = $this->getConverterLanguage()->getConvRuleTitle();
			if ( $convruletitle ) {
				$this->mOutput->setTitleText( $convruletitle );
			} else {
				$titleText = $this->getConverterLanguage()->convertTitle( $title );
				$this->mOutput->setTitleText( $titleText );
			}
		}

		$text = $this->mStripState->unstripNoWiki( $text );

		wfRunHooks( 'ParserBeforeTidy', array( &$this, &$text ) );

		$text = $this->replaceTransparentTags( $text );
		$text = $this->mStripState->unstripGeneral( $text );

		$text = Sanitizer::normalizeCharReferences( $text );

		if ( ( $wgUseTidy && $this->mOptions->getTidy() ) || $wgAlwaysUseTidy ) {
			$text = MWTidy::tidy( $text );
		} else {
			# attempt to sanitize at least some nesting problems
			# (bug #2702 and quite a few others)
			$tidyregs = array(
				# ''Something [http://www.cool.com cool''] -->
				# <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
				'/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
				'\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
				# fix up an anchor inside another anchor, only
				# at least for a single single nested link (bug 3695)
				'/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
				'\\1\\2</a>\\3</a>\\1\\4</a>',
				# fix div inside inline elements- doBlockLevels won't wrap a line which
				# contains a div, so fix it up here; replace
				# div with escaped text
				'/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
				'\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9',
				# remove empty italic or bold tag pairs, some
				# introduced by rules above
				'/<([bi])><\/\\1>/' => '',
			);

			$text = preg_replace(
				array_keys( $tidyregs ),
				array_values( $tidyregs ),
				$text );
		}
		global $wgExpensiveParserFunctionLimit;
		if ( $this->mExpensiveFunctionCount > $wgExpensiveParserFunctionLimit ) {
			$this->limitationWarn( 'expensive-parserfunction', $this->mExpensiveFunctionCount, $wgExpensiveParserFunctionLimit );
		}

		wfRunHooks( 'ParserAfterTidy', array( &$this, &$text ) );

		# Information on include size limits, for the benefit of users who try to skirt them
		if ( $this->mOptions->getEnableLimitReport() ) {
			$max = $this->mOptions->getMaxIncludeSize();
			$PFreport = "Expensive parser function count: {$this->mExpensiveFunctionCount}/$wgExpensiveParserFunctionLimit\n";
			$limitReport =
				"NewPP limit report\n" .
				"Preprocessor node count: {$this->mPPNodeCount}/{$this->mOptions->getMaxPPNodeCount()}\n" .
				"Post-expand include size: {$this->mIncludeSizes['post-expand']}/$max bytes\n" .
				"Template argument size: {$this->mIncludeSizes['arg']}/$max bytes\n".
				$PFreport;
			wfRunHooks( 'ParserLimitReport', array( $this, &$limitReport ) );
			$text .= "\n<!-- \n$limitReport-->\n";
		}
		$this->mOutput->setText( $text );

		$this->mRevisionId = $oldRevisionId;
		$this->mRevisionObject = $oldRevisionObject;
		$this->mRevisionTimestamp = $oldRevisionTimestamp;
		$this->mRevisionUser = $oldRevisionUser;
		wfProfileOut( $fname );
		wfProfileOut( __METHOD__ );

		return $this->mOutput;
	}

	/**
	 * Recursive parser entry point that can be called from an extension tag
	 * hook.
	 *
	 * If $frame is not provided, then template variables (e.g., {{{1}}}) within $text are not expanded
	 *
	 * @param $text String: text extension wants to have parsed
	 * @param $frame PPFrame: The frame to use for expanding any template variables
	 *
	 * @return string
	 */
	function recursiveTagParse( $text, $frame=false ) {
		wfProfileIn( __METHOD__ );
		wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
		wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
		$text = $this->internalParse( $text, false, $frame );
		wfProfileOut( __METHOD__ );
		return $text;
	}

	/**
	 * Expand templates and variables in the text, producing valid, static wikitext.
	 * Also removes comments.
	 */
	function preprocess( $text, Title $title, ParserOptions $options, $revid = null ) {
		wfProfileIn( __METHOD__ );
		$this->startParse( $title, $options, self::OT_PREPROCESS, true );
		if ( $revid !== null ) {
			$this->mRevisionId = $revid;
		}
		wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
		wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
		$text = $this->replaceVariables( $text );
		$text = $this->mStripState->unstripBoth( $text );
		wfProfileOut( __METHOD__ );
		return $text;
	}

	/**
	 * Recursive parser entry point that can be called from an extension tag
	 * hook.
	 *
	 * @param $text String: text to be expanded
	 * @param $frame PPFrame: The frame to use for expanding any template variables
	 * @return String
	 * @since 1.19
	 */
	public function recursivePreprocess( $text, $frame = false ) {
		wfProfileIn( __METHOD__ );
		$text = $this->replaceVariables( $text, $frame );
		$text = $this->mStripState->unstripBoth( $text );
		wfProfileOut( __METHOD__ );
		return $text;
	}

	/**
	 * Process the wikitext for the ?preload= feature. (bug 5210)
	 *
	 * <noinclude>, <includeonly> etc. are parsed as for template transclusion,
	 * comments, templates, arguments, tags hooks and parser functions are untouched.
	 *
	 * @param $text String
	 * @param $title Title
	 * @param $options ParserOptions
	 * @return String
	 */
	public function getPreloadText( $text, Title $title, ParserOptions $options ) {
		# Parser (re)initialisation
		$this->startParse( $title, $options, self::OT_PLAIN, true );

		$flags = PPFrame::NO_ARGS | PPFrame::NO_TEMPLATES;
		$dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
		$text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
		$text = $this->mStripState->unstripBoth( $text );
		return $text;
	}

	/**
	 * Get a random string
	 *
	 * @return string
	 */
	static public function getRandomString() {
		return dechex( mt_rand( 0, 0x7fffffff ) ) . dechex( mt_rand( 0, 0x7fffffff ) );
	}

	/**
	 * Set the current user.
	 * Should only be used when doing pre-save transform.
	 *
	 * @param $user Mixed: User object or null (to reset)
	 */
	function setUser( $user ) {
		$this->mUser = $user;
	}

	/**
	 * Accessor for mUniqPrefix.
	 *
	 * @return String
	 */
	public function uniqPrefix() {
		if ( !isset( $this->mUniqPrefix ) ) {
			# @todo FIXME: This is probably *horribly wrong*
			# LanguageConverter seems to want $wgParser's uniqPrefix, however
			# if this is called for a parser cache hit, the parser may not
			# have ever been initialized in the first place.
			# Not really sure what the heck is supposed to be going on here.
			return '';
			# throw new MWException( "Accessing uninitialized mUniqPrefix" );
		}
		return $this->mUniqPrefix;
	}

	/**
	 * Set the context title
	 *
	 * @param $t Title
	 */
	function setTitle( $t ) {
		if ( !$t || $t instanceof FakeTitle ) {
			$t = Title::newFromText( 'NO TITLE' );
		}

		if ( strval( $t->getFragment() ) !== '' ) {
			# Strip the fragment to avoid various odd effects
			$this->mTitle = clone $t;
			$this->mTitle->setFragment( '' );
		} else {
			$this->mTitle = $t;
		}
	}

	/**
	 * Accessor for the Title object
	 *
	 * @return Title object
	 */
	function getTitle() {
		return $this->mTitle;
	}

	/**
	 * Accessor/mutator for the Title object
	 *
	 * @param $x New Title object or null to just get the current one
	 * @return Title object
	 */
	function Title( $x = null ) {
		return wfSetVar( $this->mTitle, $x );
	}

	/**
	 * Set the output type
	 *
	 * @param $ot Integer: new value
	 */
	function setOutputType( $ot ) {
		$this->mOutputType = $ot;
		# Shortcut alias
		$this->ot = array(
			'html' => $ot == self::OT_HTML,
			'wiki' => $ot == self::OT_WIKI,
			'pre' => $ot == self::OT_PREPROCESS,
			'plain' => $ot == self::OT_PLAIN,
		);
	}

	/**
	 * Accessor/mutator for the output type
	 *
	 * @param $x New value or null to just get the current one
	 * @return Integer
	 */
	function OutputType( $x = null ) {
		return wfSetVar( $this->mOutputType, $x );
	}

	/**
	 * Get the ParserOutput object
	 *
	 * @return ParserOutput object
	 */
	function getOutput() {
		return $this->mOutput;
	}

	/**
	 * Get the ParserOptions object
	 *
	 * @return ParserOptions object
	 */
	function getOptions() {
		return $this->mOptions;
	}

	/**
	 * Accessor/mutator for the ParserOptions object
	 *
	 * @param $x New value or null to just get the current one
	 * @return Current ParserOptions object
	 */
	function Options( $x = null ) {
		return wfSetVar( $this->mOptions, $x );
	}

	/**
	 * @return int
	 */
	function nextLinkID() {
		return $this->mLinkID++;
	}

	/**
	 * @param $id int
	 */
	function setLinkID( $id ) {
		$this->mLinkID = $id;
	}

	/**
	 * Get a language object for use in parser functions such as {{FORMATNUM:}}
	 * @return Language
	 */
	function getFunctionLang() {
		return $this->getTargetLanguage();
	}

	/**
	 * Get the target language for the content being parsed. This is usually the 
	 * language that the content is in. 
	 */
	function getTargetLanguage() {
		$target = $this->mOptions->getTargetLanguage();
		if ( $target !== null ) {
			return $target;
		} elseif( $this->mOptions->getInterfaceMessage() ) {
			return $this->mOptions->getUserLangObj();
		} elseif( is_null( $this->mTitle ) ) {
			throw new MWException( __METHOD__.': $this->mTitle is null' );
		}
		return $this->mTitle->getPageLanguage();
	}

	/**
	 * Get the language object for language conversion
	 */
	function getConverterLanguage() {
		global $wgBug34832TransitionalRollback, $wgContLang;
		if ( $wgBug34832TransitionalRollback ) {
			return $wgContLang;
		} else {
			return $this->getTargetLanguage();
		}
	}

	/**
	 * Get a User object either from $this->mUser, if set, or from the
	 * ParserOptions object otherwise
	 *
	 * @return User object
	 */
	function getUser() {
		if ( !is_null( $this->mUser ) ) {
			return $this->mUser;
		}
		return $this->mOptions->getUser();
	}

	/**
	 * Get a preprocessor object
	 *
	 * @return Preprocessor instance
	 */
	function getPreprocessor() {
		if ( !isset( $this->mPreprocessor ) ) {
			$class = $this->mPreprocessorClass;
			$this->mPreprocessor = new $class( $this );
		}
		return $this->mPreprocessor;
	}

	/**
	 * Replaces all occurrences of HTML-style comments and the given tags
	 * in the text with a random marker and returns the next text. The output
	 * parameter $matches will be an associative array filled with data in
	 * the form:
	 *   'UNIQ-xxxxx' => array(
	 *     'element',
	 *     'tag content',
	 *     array( 'param' => 'x' ),
	 *     '<element param="x">tag content</element>' ) )
	 *
	 * @param $elements array list of element names. Comments are always extracted.
	 * @param $text string Source text string.
	 * @param $matches array Out parameter, Array: extracted tags
	 * @param $uniq_prefix string
	 * @return String: stripped text
	 */
	public static function extractTagsAndParams( $elements, $text, &$matches, $uniq_prefix = '' ) {
		static $n = 1;
		$stripped = '';
		$matches = array();

		$taglist = implode( '|', $elements );
		$start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?" . ">)|<(!--)/i";

		while ( $text != '' ) {
			$p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
			$stripped .= $p[0];
			if ( count( $p ) < 5 ) {
				break;
			}
			if ( count( $p ) > 5 ) {
				# comment
				$element    = $p[4];
				$attributes = '';
				$close      = '';
				$inside     = $p[5];
			} else {
				# tag
				$element    = $p[1];
				$attributes = $p[2];
				$close      = $p[3];
				$inside     = $p[4];
			}

			$marker = "$uniq_prefix-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
			$stripped .= $marker;

			if ( $close === '/>' ) {
				# Empty element tag, <tag />
				$content = null;
				$text = $inside;
				$tail = null;
			} else {
				if ( $element === '!--' ) {
					$end = '/(-->)/';
				} else {
					$end = "/(<\\/$element\\s*>)/i";
				}
				$q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
				$content = $q[0];
				if ( count( $q ) < 3 ) {
					# No end tag -- let it run out to the end of the text.
					$tail = '';
					$text = '';
				} else {
					$tail = $q[1];
					$text = $q[2];
				}
			}

			$matches[$marker] = array( $element,
				$content,
				Sanitizer::decodeTagAttributes( $attributes ),
				"<$element$attributes$close$content$tail" );
		}
		return $stripped;
	}

	/**
	 * Get a list of strippable XML-like elements
	 *
	 * @return array
	 */
	function getStripList() {
		return $this->mStripList;
	}

	/**
	 * Add an item to the strip state
	 * Returns the unique tag which must be inserted into the stripped text
	 * The tag will be replaced with the original text in unstrip()
	 *
	 * @param $text string
	 *
	 * @return string
	 */
	function insertStripItem( $text ) {
		$rnd = "{$this->mUniqPrefix}-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
		$this->mMarkerIndex++;
		$this->mStripState->addGeneral( $rnd, $text );
		return $rnd;
	}

	/**
	 * parse the wiki syntax used to render tables
	 *
	 * @private
	 */
	function doTableStuff( $text ) {
		wfProfileIn( __METHOD__ );

		$lines = StringUtils::explode( "\n", $text );
		$out = '';
		$td_history = array(); # Is currently a td tag open?
		$last_tag_history = array(); # Save history of last lag activated (td, th or caption)
		$tr_history = array(); # Is currently a tr tag open?
		$tr_attributes = array(); # history of tr attributes
		$has_opened_tr = array(); # Did this table open a <tr> element?
		$indent_level = 0; # indent level of the table

		foreach ( $lines as $outLine ) {
			$line = trim( $outLine );

			if ( $line === '' ) { # empty line, go to next line
				$out .= $outLine."\n";
				continue;
			}

			$first_character = $line[0];
			$matches = array();

			if ( preg_match( '/^(:*)\{\|(.*)$/', $line , $matches ) ) {
				# First check if we are starting a new table
				$indent_level = strlen( $matches[1] );

				$attributes = $this->mStripState->unstripBoth( $matches[2] );
				$attributes = Sanitizer::fixTagAttributes( $attributes , 'table' );

				$outLine = str_repeat( '<dl><dd>' , $indent_level ) . "<table{$attributes}>";
				array_push( $td_history , false );
				array_push( $last_tag_history , '' );
				array_push( $tr_history , false );
				array_push( $tr_attributes , '' );
				array_push( $has_opened_tr , false );
			} elseif ( count( $td_history ) == 0 ) {
				# Don't do any of the following
				$out .= $outLine."\n";
				continue;
			} elseif ( substr( $line , 0 , 2 ) === '|}' ) {
				# We are ending a table
				$line = '</table>' . substr( $line , 2 );
				$last_tag = array_pop( $last_tag_history );

				if ( !array_pop( $has_opened_tr ) ) {
					$line = "<tr><td></td></tr>{$line}";
				}

				if ( array_pop( $tr_history ) ) {
					$line = "</tr>{$line}";
				}

				if ( array_pop( $td_history ) ) {
					$line = "</{$last_tag}>{$line}";
				}
				array_pop( $tr_attributes );
				$outLine = $line . str_repeat( '</dd></dl>' , $indent_level );
			} elseif ( substr( $line , 0 , 2 ) === '|-' ) {
				# Now we have a table row
				$line = preg_replace( '#^\|-+#', '', $line );

				# Whats after the tag is now only attributes
				$attributes = $this->mStripState->unstripBoth( $line );
				$attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
				array_pop( $tr_attributes );
				array_push( $tr_attributes, $attributes );

				$line = '';
				$last_tag = array_pop( $last_tag_history );
				array_pop( $has_opened_tr );
				array_push( $has_opened_tr , true );

				if ( array_pop( $tr_history ) ) {
					$line = '</tr>';
				}

				if ( array_pop( $td_history ) ) {
					$line = "</{$last_tag}>{$line}";
				}

				$outLine = $line;
				array_push( $tr_history , false );
				array_push( $td_history , false );
				array_push( $last_tag_history , '' );
			} elseif ( $first_character === '|' || $first_character === '!' || substr( $line , 0 , 2 )  === '|+' ) {
				# This might be cell elements, td, th or captions
				if ( substr( $line , 0 , 2 ) === '|+' ) {
					$first_character = '+';
					$line = substr( $line , 1 );
				}

				$line = substr( $line , 1 );

				if ( $first_character === '!' ) {
					$line = str_replace( '!!' , '||' , $line );
				}

				# Split up multiple cells on the same line.
				# FIXME : This can result in improper nesting of tags processed
				# by earlier parser steps, but should avoid splitting up eg
				# attribute values containing literal "||".
				$cells = StringUtils::explodeMarkup( '||' , $line );

				$outLine = '';

				# Loop through each table cell
				foreach ( $cells as $cell ) {
					$previous = '';
					if ( $first_character !== '+' ) {
						$tr_after = array_pop( $tr_attributes );
						if ( !array_pop( $tr_history ) ) {
							$previous = "<tr{$tr_after}>\n";
						}
						array_push( $tr_history , true );
						array_push( $tr_attributes , '' );
						array_pop( $has_opened_tr );
						array_push( $has_opened_tr , true );
					}

					$last_tag = array_pop( $last_tag_history );

					if ( array_pop( $td_history ) ) {
						$previous = "</{$last_tag}>\n{$previous}";
					}

					if ( $first_character === '|' ) {
						$last_tag = 'td';
					} elseif ( $first_character === '!' ) {
						$last_tag = 'th';
					} elseif ( $first_character === '+' ) {
						$last_tag = 'caption';
					} else {
						$last_tag = '';
					}

					array_push( $last_tag_history , $last_tag );

					# A cell could contain both parameters and data
					$cell_data = explode( '|' , $cell , 2 );

					# Bug 553: Note that a '|' inside an invalid link should not
					# be mistaken as delimiting cell parameters
					if ( strpos( $cell_data[0], '[[' ) !== false ) {
						$cell = "{$previous}<{$last_tag}>{$cell}";
					} elseif ( count( $cell_data ) == 1 ) {
						$cell = "{$previous}<{$last_tag}>{$cell_data[0]}";
					} else {
						$attributes = $this->mStripState->unstripBoth( $cell_data[0] );
						$attributes = Sanitizer::fixTagAttributes( $attributes , $last_tag );
						$cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}";
					}

					$outLine .= $cell;
					array_push( $td_history , true );
				}
			}
			$out .= $outLine . "\n";
		}

		# Closing open td, tr && table
		while ( count( $td_history ) > 0 ) {
			if ( array_pop( $td_history ) ) {
				$out .= "</td>\n";
			}
			if ( array_pop( $tr_history ) ) {
				$out .= "</tr>\n";
			}
			if ( !array_pop( $has_opened_tr ) ) {
				$out .= "<tr><td></td></tr>\n" ;
			}

			$out .= "</table>\n";
		}

		# Remove trailing line-ending (b/c)
		if ( substr( $out, -1 ) === "\n" ) {
			$out = substr( $out, 0, -1 );
		}

		# special case: don't return empty table
		if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
			$out = '';
		}

		wfProfileOut( __METHOD__ );

		return $out;
	}

	/**
	 * Helper function for parse() that transforms wiki markup into
	 * HTML. Only called for $mOutputType == self::OT_HTML.
	 *
	 * @private
	 *
	 * @param $text string
	 * @param $isMain bool
	 * @param $frame bool
	 *
	 * @return string
	 */
	function internalParse( $text, $isMain = true, $frame = false ) {
		wfProfileIn( __METHOD__ );

		$origText = $text;

		# Hook to suspend the parser in this state
		if ( !wfRunHooks( 'ParserBeforeInternalParse', array( &$this, &$text, &$this->mStripState ) ) ) {
			wfProfileOut( __METHOD__ );
			return $text ;
		}

		# if $frame is provided, then use $frame for replacing any variables
		if ( $frame ) {
			# use frame depth to infer how include/noinclude tags should be handled
			# depth=0 means this is the top-level document; otherwise it's an included document
			if ( !$frame->depth ) {
				$flag = 0;
			} else {
				$flag = Parser::PTD_FOR_INCLUSION;
			}
			$dom = $this->preprocessToDom( $text, $flag );
			$text = $frame->expand( $dom );
		} else {
			# if $frame is not provided, then use old-style replaceVariables
			$text = $this->replaceVariables( $text );
		}

		$text = Sanitizer::removeHTMLtags( $text, array( &$this, 'attributeStripCallback' ), false, array_keys( $this->mTransparentTagHooks ) );
		wfRunHooks( 'InternalParseBeforeLinks', array( &$this, &$text, &$this->mStripState ) );

		# Tables need to come after variable replacement for things to work
		# properly; putting them before other transformations should keep
		# exciting things like link expansions from showing up in surprising
		# places.
		$text = $this->doTableStuff( $text );

		$text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );

		$text = $this->doDoubleUnderscore( $text );

		$text = $this->doHeadings( $text );
		if ( $this->mOptions->getUseDynamicDates() ) {
			$df = DateFormatter::getInstance();
			$text = $df->reformat( $this->mOptions->getDateFormat(), $text );
		}
		$text = $this->replaceInternalLinks( $text );
		$text = $this->doAllQuotes( $text );
		$text = $this->replaceExternalLinks( $text );

		# replaceInternalLinks may sometimes leave behind
		# absolute URLs, which have to be masked to hide them from replaceExternalLinks
		$text = str_replace( $this->mUniqPrefix.'NOPARSE', '', $text );

		$text = $this->doMagicLinks( $text );
		$text = $this->formatHeadings( $text, $origText, $isMain );

		wfProfileOut( __METHOD__ );
		return $text;
	}

	/**
	 * Replace special strings like "ISBN xxx" and "RFC xxx" with
	 * magic external links.
	 *
	 * DML
	 * @private
	 *
	 * @param $text string
	 *
	 * @return string
	 */
	function doMagicLinks( $text ) {
		wfProfileIn( __METHOD__ );
		$prots = wfUrlProtocolsWithoutProtRel();
		$urlChar = self::EXT_LINK_URL_CLASS;
		$text = preg_replace_callback(
			'!(?:                           # Start cases
				(<a[ \t\r\n>].*?</a>) |     # m[1]: Skip link text
				(<.*?>) |                   # m[2]: Skip stuff inside HTML elements' . "
				(\\b(?:$prots)$urlChar+) |  # m[3]: Free external links" . '
				(?:RFC|PMID)\s+([0-9]+) |   # m[4]: RFC or PMID, capture number
				ISBN\s+(\b                  # m[5]: ISBN, capture number
					(?: 97[89] [\ \-]? )?   # optional 13-digit ISBN prefix
					(?: [0-9]  [\ \-]? ){9} # 9 digits with opt. delimiters
					[0-9Xx]                 # check digit
					\b)
			)!xu', array( &$this, 'magicLinkCallback' ), $text );
		wfProfileOut( __METHOD__ );
		return $text;
	}

	/**
	 * @throws MWException
	 * @param $m array
	 * @return HTML|string
	 */
	function magicLinkCallback( $m ) {
		if ( isset( $m[1] ) && $m[1] !== '' ) {
			# Skip anchor
			return $m[0];
		} elseif ( isset( $m[2] ) && $m[2] !== '' ) {
			# Skip HTML element
			return $m[0];
		} elseif ( isset( $m[3] ) && $m[3] !== '' ) {
			# Free external link
			return $this->makeFreeExternalLink( $m[0] );
		} elseif ( isset( $m[4] ) && $m[4] !== '' ) {
			# RFC or PMID
			if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
				$keyword = 'RFC';
				$urlmsg = 'rfcurl';
				$CssClass = 'mw-magiclink-rfc';
				$id = $m[4];
			} elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
				$keyword = 'PMID';
				$urlmsg = 'pubmedurl';
				$CssClass = 'mw-magiclink-pmid';
				$id = $m[4];
			} else {
				throw new MWException( __METHOD__.': unrecognised match type "' .
					substr( $m[0], 0, 20 ) . '"' );
			}
			$url = wfMsgForContent( $urlmsg, $id );
			return Linker::makeExternalLink( $url, "{$keyword} {$id}", true, $CssClass );
		} elseif ( isset( $m[5] ) && $m[5] !== '' ) {
			# ISBN
			$isbn = $m[5];
			$num = strtr( $isbn, array(
				'-' => '',
				' ' => '',
				'x' => 'X',
			));
			$titleObj = SpecialPage::getTitleFor( 'Booksources', $num );
			return'<a href="' .
				htmlspecialchars( $titleObj->getLocalUrl() ) .
				"\" class=\"internal mw-magiclink-isbn\">ISBN $isbn</a>";
		} else {
			return $m[0];
		}
	}

	/**
	 * Make a free external link, given a user-supplied URL
	 *
	 * @param $url string
	 *
	 * @return string HTML
	 * @private
	 */
	function makeFreeExternalLink( $url ) {
		wfProfileIn( __METHOD__ );

		$trail = '';

		# The characters '<' and '>' (which were escaped by
		# removeHTMLtags()) should not be included in
		# URLs, per RFC 2396.
		$m2 = array();
		if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
			$trail = substr( $url, $m2[0][1] ) . $trail;
			$url = substr( $url, 0, $m2[0][1] );
		}

		# Move trailing punctuation to $trail
		$sep = ',;\.:!?';
		# If there is no left bracket, then consider right brackets fair game too
		if ( strpos( $url, '(' ) === false ) {
			$sep .= ')';
		}

		$numSepChars = strspn( strrev( $url ), $sep );
		if ( $numSepChars ) {
			$trail = substr( $url, -$numSepChars ) . $trail;
			$url = substr( $url, 0, -$numSepChars );
		}

		$url = Sanitizer::cleanUrl( $url );

		# Is this an external image?
		$text = $this->maybeMakeExternalImage( $url );
		if ( $text === false ) {
			# Not an image, make a link
			$text = Linker::makeExternalLink( $url, 
				$this->getConverterLanguage()->markNoConversion($url), true, 'free',
				$this->getExternalLinkAttribs( $url ) );
			# Register it in the output object...
			# Replace unnecessary URL escape codes with their equivalent characters
			$pasteurized = self::replaceUnusualEscapes( $url );
			$this->mOutput->addExternalLink( $pasteurized );
		}
		wfProfileOut( __METHOD__ );
		return $text . $trail;
	}


	/**
	 * Parse headers and return html
	 *
	 * @private
	 *
	 * @param $text string
	 *
	 * @return string
	 */
	function doHeadings( $text ) {
		wfProfileIn( __METHOD__ );
		for ( $i = 6; $i >= 1; --$i ) {
			$h = str_repeat( '=', $i );
			$text = preg_replace( "/^$h(.+)$h\\s*$/m",
			  "<h$i>\\1</h$i>", $text );
		}
		wfProfileOut( __METHOD__ );
		return $text;
	}

	/**
	 * Replace single quotes with HTML markup
	 * @private
	 *
	 * @param $text string
	 *
	 * @return string the altered text
	 */
	function doAllQuotes( $text ) {
		wfProfileIn( __METHOD__ );
		$outtext = '';
		$lines = StringUtils::explode( "\n", $text );
		foreach ( $lines as $line ) {
			$outtext .= $this->doQuotes( $line ) . "\n";
		}
		$outtext = substr( $outtext, 0,-1 );
		wfProfileOut( __METHOD__ );
		return $outtext;
	}

	/**
	 * Helper function for doAllQuotes()
	 *
	 * @param $text string
	 *
	 * @return string
	 */
	public function doQuotes( $text ) {
		$arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
		if ( count( $arr ) == 1 ) {
			return $text;
		} else {
			# First, do some preliminary work. This may shift some apostrophes from
			# being mark-up to being text. It also counts the number of occurrences
			# of bold and italics mark-ups.
			$numbold = 0;
			$numitalics = 0;
			for ( $i = 0; $i < count( $arr ); $i++ ) {
				if ( ( $i % 2 ) == 1 ) {
					# If there are ever four apostrophes, assume the first is supposed to
					# be text, and the remaining three constitute mark-up for bold text.
					if ( strlen( $arr[$i] ) == 4 ) {
						$arr[$i-1] .= "'";
						$arr[$i] = "'''";
					} elseif ( strlen( $arr[$i] ) > 5 ) {
						# If there are more than 5 apostrophes in a row, assume they're all
						# text except for the last 5.
						$arr[$i-1] .= str_repeat( "'", strlen( $arr[$i] ) - 5 );
						$arr[$i] = "'''''";
					}
					# Count the number of occurrences of bold and italics mark-ups.
					# We are not counting sequences of five apostrophes.
					if ( strlen( $arr[$i] ) == 2 ) {
						$numitalics++;
					} elseif ( strlen( $arr[$i] ) == 3 ) {
						$numbold++;
					} elseif ( strlen( $arr[$i] ) == 5 ) {
						$numitalics++;
						$numbold++;
					}
				}
			}

			# If there is an odd number of both bold and italics, it is likely
			# that one of the bold ones was meant to be an apostrophe followed
			# by italics. Which one we cannot know for certain, but it is more
			# likely to be one that has a single-letter word before it.
			if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
				$i = 0;
				$firstsingleletterword = -1;
				$firstmultiletterword = -1;
				$firstspace = -1;
				foreach ( $arr as $r ) {
					if ( ( $i % 2 == 1 ) and ( strlen( $r ) == 3 ) ) {
						$x1 = substr( $arr[$i-1], -1 );
						$x2 = substr( $arr[$i-1], -2, 1 );
						if ( $x1 === ' ' ) {
							if ( $firstspace == -1 ) {
								$firstspace = $i;
							}
						} elseif ( $x2 === ' ') {
							if ( $firstsingleletterword == -1 ) {
								$firstsingleletterword = $i;
							}
						} else {
							if ( $firstmultiletterword == -1 ) {
								$firstmultiletterword = $i;
							}
						}
					}
					$i++;
				}

				# If there is a single-letter word, use it!
				if ( $firstsingleletterword > -1 ) {
					$arr[$firstsingleletterword] = "''";
					$arr[$firstsingleletterword-1] .= "'";
				} elseif ( $firstmultiletterword > -1 ) {
					# If not, but there's a multi-letter word, use that one.
					$arr[$firstmultiletterword] = "''";
					$arr[$firstmultiletterword-1] .= "'";
				} elseif ( $firstspace > -1 ) {
					# ... otherwise use the first one that has neither.
					# (notice that it is possible for all three to be -1 if, for example,
					# there is only one pentuple-apostrophe in the line)
					$arr[$firstspace] = "''";
					$arr[$firstspace-1] .= "'";
				}
			}

			# Now let's actually convert our apostrophic mush to HTML!
			$output = '';
			$buffer = '';
			$state = '';
			$i = 0;
			foreach ( $arr as $r ) {
				if ( ( $i % 2 ) == 0 ) {
					if ( $state === 'both' ) {
						$buffer .= $r;
					} else {
						$output .= $r;
					}
				} else {
					if ( strlen( $r ) == 2 ) {
						if ( $state === 'i' ) {
							$output .= '</i>'; $state = '';
						} elseif ( $state === 'bi' ) {
							$output .= '</i>'; $state = 'b';
						} elseif ( $state === 'ib' ) {
							$output .= '</b></i><b>'; $state = 'b';
						} elseif ( $state === 'both' ) {
							$output .= '<b><i>'.$buffer.'</i>'; $state = 'b';
						} else { # $state can be 'b' or ''
							$output .= '<i>'; $state .= 'i';
						}
					} elseif ( strlen( $r ) == 3 ) {
						if ( $state === 'b' ) {
							$output .= '</b>'; $state = '';
						} elseif ( $state === 'bi' ) {
							$output .= '</i></b><i>'; $state = 'i';
						} elseif ( $state === 'ib' ) {
							$output .= '</b>'; $state = 'i';
						} elseif ( $state === 'both' ) {
							$output .= '<i><b>'.$buffer.'</b>'; $state = 'i';
						} else { # $state can be 'i' or ''
							$output .= '<b>'; $state .= 'b';
						}
					} elseif ( strlen( $r ) == 5 ) {
						if ( $state === 'b' ) {
							$output .= '</b><i>'; $state = 'i';
						} elseif ( $state === 'i' ) {
							$output .= '</i><b>'; $state = 'b';
						} elseif ( $state === 'bi' ) {
							$output .= '</i></b>'; $state = '';
						} elseif ( $state === 'ib' ) {
							$output .= '</b></i>'; $state = '';
						} elseif ( $state === 'both' ) {
							$output .= '<i><b>'.$buffer.'</b></i>'; $state = '';
						} else { # ($state == '')
							$buffer = ''; $state = 'both';
						}
					}
				}
				$i++;
			}
			# Now close all remaining tags.  Notice that the order is important.
			if ( $state === 'b' || $state === 'ib' ) {
				$output .= '</b>';
			}
			if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
				$output .= '</i>';
			}
			if ( $state === 'bi' ) {
				$output .= '</b>';
			}
			# There might be lonely ''''', so make sure we have a buffer
			if ( $state === 'both' && $buffer ) {
				$output .= '<b><i>'.$buffer.'</i></b>';
			}
			return $output;
		}
	}

	/**
	 * Replace external links (REL)
	 *
	 * Note: this is all very hackish and the order of execution matters a lot.
	 * Make sure to run maintenance/parserTests.php if you change this code.
	 *
	 * @private
	 *
	 * @param $text string
	 *
	 * @return string
	 */
	function replaceExternalLinks( $text ) {
		wfProfileIn( __METHOD__ );

		$bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
		$s = array_shift( $bits );

		$i = 0;
		while ( $i<count( $bits ) ) {
			$url = $bits[$i++];
			$protocol = $bits[$i++];
			$text = $bits[$i++];
			$trail = $bits[$i++];

			# The characters '<' and '>' (which were escaped by
			# removeHTMLtags()) should not be included in
			# URLs, per RFC 2396.
			$m2 = array();
			if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
				$text = substr( $url, $m2[0][1] ) . ' ' . $text;
				$url = substr( $url, 0, $m2[0][1] );
			}

			# If the link text is an image URL, replace it with an <img> tag
			# This happened by accident in the original parser, but some people used it extensively
			$img = $this->maybeMakeExternalImage( $text );
			if ( $img !== false ) {
				$text = $img;
			}

			$dtrail = '';

			# Set linktype for CSS - if URL==text, link is essentially free
			$linktype = ( $text === $url ) ? 'free' : 'text';

			# No link text, e.g. [http://domain.tld/some.link]
			if ( $text == '' ) {
				# Autonumber
				$langObj = $this->getTargetLanguage();
				$text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
				$linktype = 'autonumber';
			} else {
				# Have link text, e.g. [http://domain.tld/some.link text]s
				# Check for trail
				list( $dtrail, $trail ) = Linker::splitTrail( $trail );
			}

			$text = $this->getConverterLanguage()->markNoConversion( $text );

			$url = Sanitizer::cleanUrl( $url );

			# Use the encoded URL
			# This means that users can paste URLs directly into the text
			# Funny characters like ö aren't valid in URLs anyway
			# This was changed in August 2004
			$s .= Linker::makeExternalLink( $url, $text, false, $linktype,
				$this->getExternalLinkAttribs( $url ) ) . $dtrail . $trail;

			# Register link in the output object.
			# Replace unnecessary URL escape codes with the referenced character
			# This prevents spammers from hiding links from the filters
			$pasteurized = self::replaceUnusualEscapes( $url );
			$this->mOutput->addExternalLink( $pasteurized );
		}

		wfProfileOut( __METHOD__ );
		return $s;
	}

	/**
	 * Get an associative array of additional HTML attributes appropriate for a
	 * particular external link.  This currently may include rel => nofollow
	 * (depending on configuration, namespace, and the URL's domain) and/or a
	 * target attribute (depending on configuration).
	 *
	 * @param $url String|bool optional URL, to extract the domain from for rel =>
	 *   nofollow if appropriate
	 * @return Array associative array of HTML attributes
	 */
	function getExternalLinkAttribs( $url = false ) {
		$attribs = array();
		global $wgNoFollowLinks, $wgNoFollowNsExceptions, $wgNoFollowDomainExceptions;
		$ns = $this->mTitle->getNamespace();
		if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions ) &&
				!wfMatchesDomainList( $url, $wgNoFollowDomainExceptions ) )
		{
			$attribs['rel'] = 'nofollow';
		}
		if ( $this->mOptions->getExternalLinkTarget() ) {
			$attribs['target'] = $this->mOptions->getExternalLinkTarget();
		}
		return $attribs;
	}

	/**
	 * Replace unusual URL escape codes with their equivalent characters
	 *
	 * @param $url String
	 * @return String
	 *
	 * @todo  This can merge genuinely required bits in the path or query string,
	 *        breaking legit URLs. A proper fix would treat the various parts of
	 *        the URL differently; as a workaround, just use the output for
	 *        statistical records, not for actual linking/output.
	 */
	static function replaceUnusualEscapes( $url ) {
		return preg_replace_callback( '/%[0-9A-Fa-f]{2}/',
			array( __CLASS__, 'replaceUnusualEscapesCallback' ), $url );
	}

	/**
	 * Callback function used in replaceUnusualEscapes().
	 * Replaces unusual URL escape codes with their equivalent character
	 *
	 * @param $matches array
	 *
	 * @return string
	 */
	private static function replaceUnusualEscapesCallback( $matches ) {
		$char = urldecode( $matches[0] );
		$ord = ord( $char );
		# Is it an unsafe or HTTP reserved character according to RFC 1738?
		if ( $ord > 32 && $ord < 127 && strpos( '<>"#{}|\^~[]`;/?', $char ) === false ) {
			# No, shouldn't be escaped
			return $char;
		} else {
			# Yes, leave it escaped
			return $matches[0];
		}
	}

	/**
	 * make an image if it's allowed, either through the global
	 * option, through the exception, or through the on-wiki whitelist
	 * @private
	 *
	 * $param $url string
	 *
	 * @return string
	 */
	function maybeMakeExternalImage( $url ) {
		$imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
		$imagesexception = !empty( $imagesfrom );
		$text = false;
		# $imagesfrom could be either a single string or an array of strings, parse out the latter
		if ( $imagesexception && is_array( $imagesfrom ) ) {
			$imagematch = false;
			foreach ( $imagesfrom as $match ) {
				if ( strpos( $url, $match ) === 0 ) {
					$imagematch = true;
					break;
				}
			}
		} elseif ( $imagesexception ) {
			$imagematch = ( strpos( $url, $imagesfrom ) === 0 );
		} else {
			$imagematch = false;
		}
		if ( $this->mOptions->getAllowExternalImages()
			 || ( $imagesexception && $imagematch ) ) {
			if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
				# Image found
				$text = Linker::makeExternalImage( $url );
			}
		}
		if ( !$text && $this->mOptions->getEnableImageWhitelist()
			 && preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
			$whitelist = explode( "\n", wfMsgForContent( 'external_image_whitelist' ) );
			foreach ( $whitelist as $entry ) {
				# Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
				if ( strpos( $entry, '#' ) === 0 || $entry === '' ) {
					continue;
				}
				if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
					# Image matches a whitelist entry
					$text = Linker::makeExternalImage( $url );
					break;
				}
			}
		}
		return $text;
	}

	/**
	 * Process [[ ]] wikilinks
	 *
	 * @param $s string
	 *
	 * @return String: processed text
	 *
	 * @private
	 */
	function re…
Alerts (44)

'var' Legacy var keyword detected; use public/private/protected for class properties
102 103 104 105 106 107 108 109 110 111 112 113 120 125 126 132 133 138 140 144 146 147 148 149 151 156 164 169 170 171 172 173 174 175 176 181
'global $' Use of global variables; prefer dependency injection or function parameters
324 376 1560
Complexity hotspot; lines 392 to 396 (total complexity: 5)
392 393 394 395 396