/includes/parser/Parser.php
PHP | 5824 lines | 3544 code | 502 blank | 1778 comment | 693 complexity | 9d59990fe5fcb7ecb49bf426312bf069 MD5 | raw file
Possible License(s): GPL-2.0, Apache-2.0, LGPL-3.0
Large files files are truncated, but you can click here to view the full file
- <?php
- /**
- * PHP parser that converts wiki markup to HTML.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- * http://www.gnu.org/copyleft/gpl.html
- *
- * @file
- * @ingroup Parser
- */
- /**
- * @defgroup Parser Parser
- */
- /**
- * PHP Parser - Processes wiki markup (which uses a more user-friendly
- * syntax, such as "[[link]]" for making links), and provides a one-way
- * transformation of that wiki markup it into XHTML output / markup
- * (which in turn the browser understands, and can display).
- *
- * There are seven main entry points into the Parser class:
- *
- * - Parser::parse()
- * produces HTML output
- * - Parser::preSaveTransform().
- * produces altered wiki markup.
- * - Parser::preprocess()
- * removes HTML comments and expands templates
- * - Parser::cleanSig() and Parser::cleanSigInSig()
- * Cleans a signature before saving it to preferences
- * - Parser::getSection()
- * Return the content of a section from an article for section editing
- * - Parser::replaceSection()
- * Replaces a section by number inside an article
- * - Parser::getPreloadText()
- * Removes <noinclude> sections, and <includeonly> tags.
- *
- * Globals used:
- * object: $wgContLang
- *
- * @warning $wgUser or $wgTitle or $wgRequest or $wgLang. Keep them away!
- *
- * @par Settings:
- * $wgLocaltimezone
- * $wgNamespacesWithSubpages
- *
- * @par Settings only within ParserOptions:
- * $wgAllowExternalImages
- * $wgAllowSpecialInclusion
- * $wgInterwikiMagic
- * $wgMaxArticleSize
- * $wgUseDynamicDates
- *
- * @ingroup Parser
- */
- class Parser {
- /**
- * Update this version number when the ParserOutput format
- * changes in an incompatible way, so the parser cache
- * can automatically discard old data.
- */
- const VERSION = '1.6.4';
- /**
- * Update this version number when the output of serialiseHalfParsedText()
- * changes in an incompatible way
- */
- const HALF_PARSED_VERSION = 2;
- # Flags for Parser::setFunctionHook
- # Also available as global constants from Defines.php
- const SFH_NO_HASH = 1;
- const SFH_OBJECT_ARGS = 2;
- # Constants needed for external link processing
- # Everything except bracket, space, or control characters
- # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
- # as well as U+3000 is IDEOGRAPHIC SPACE for bug 19052
- const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}]';
- const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F\p{Zs}]+)
- \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
- # State constants for the definition list colon extraction
- const COLON_STATE_TEXT = 0;
- const COLON_STATE_TAG = 1;
- const COLON_STATE_TAGSTART = 2;
- const COLON_STATE_CLOSETAG = 3;
- const COLON_STATE_TAGSLASH = 4;
- const COLON_STATE_COMMENT = 5;
- const COLON_STATE_COMMENTDASH = 6;
- const COLON_STATE_COMMENTDASHDASH = 7;
- # Flags for preprocessToDom
- const PTD_FOR_INCLUSION = 1;
- # Allowed values for $this->mOutputType
- # Parameter to startExternalParse().
- const OT_HTML = 1; # like parse()
- const OT_WIKI = 2; # like preSaveTransform()
- const OT_PREPROCESS = 3; # like preprocess()
- const OT_MSG = 3;
- const OT_PLAIN = 4; # like extractSections() - portions of the original are returned unchanged.
- # Marker Suffix needs to be accessible staticly.
- const MARKER_SUFFIX = "-QINU\x7f";
- # Persistent:
- var $mTagHooks = array();
- var $mTransparentTagHooks = array();
- var $mFunctionHooks = array();
- var $mFunctionSynonyms = array( 0 => array(), 1 => array() );
- var $mFunctionTagHooks = array();
- var $mStripList = array();
- var $mDefaultStripList = array();
- var $mVarCache = array();
- var $mImageParams = array();
- var $mImageParamsMagicArray = array();
- var $mMarkerIndex = 0;
- var $mFirstCall = true;
- # Initialised by initialiseVariables()
- /**
- * @var MagicWordArray
- */
- var $mVariables;
- /**
- * @var MagicWordArray
- */
- var $mSubstWords;
- var $mConf, $mPreprocessor, $mExtLinkBracketedRegex, $mUrlProtocols; # Initialised in constructor
- # Cleared with clearState():
- /**
- * @var ParserOutput
- */
- var $mOutput;
- var $mAutonumber, $mDTopen;
- /**
- * @var StripState
- */
- var $mStripState;
- var $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
- /**
- * @var LinkHolderArray
- */
- var $mLinkHolders;
- var $mLinkID;
- var $mIncludeSizes, $mPPNodeCount, $mGeneratedPPNodeCount, $mHighestExpansionDepth;
- var $mDefaultSort;
- var $mTplExpandCache; # empty-frame expansion cache
- var $mTplRedirCache, $mTplDomCache, $mHeadings, $mDoubleUnderscores;
- var $mExpensiveFunctionCount; # number of expensive parser function calls
- var $mShowToc, $mForceTocPosition;
- /**
- * @var User
- */
- var $mUser; # User object; only used when doing pre-save transform
- # Temporary
- # These are variables reset at least once per parse regardless of $clearState
- /**
- * @var ParserOptions
- */
- var $mOptions;
- /**
- * @var Title
- */
- var $mTitle; # Title context, used for self-link rendering and similar things
- var $mOutputType; # Output type, one of the OT_xxx constants
- var $ot; # Shortcut alias, see setOutputType()
- var $mRevisionObject; # The revision object of the specified revision ID
- var $mRevisionId; # ID to display in {{REVISIONID}} tags
- var $mRevisionTimestamp; # The timestamp of the specified revision ID
- var $mRevisionUser; # User to display in {{REVISIONUSER}} tag
- var $mRevIdForTs; # The revision ID which was used to fetch the timestamp
- /**
- * @var string
- */
- var $mUniqPrefix;
- /**
- * Constructor
- *
- * @param $conf array
- */
- public function __construct( $conf = array() ) {
- $this->mConf = $conf;
- $this->mUrlProtocols = wfUrlProtocols();
- $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')'.
- self::EXT_LINK_URL_CLASS.'+)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su';
- if ( isset( $conf['preprocessorClass'] ) ) {
- $this->mPreprocessorClass = $conf['preprocessorClass'];
- } elseif ( defined( 'MW_COMPILED' ) ) {
- # Preprocessor_Hash is much faster than Preprocessor_DOM in compiled mode
- $this->mPreprocessorClass = 'Preprocessor_Hash';
- } elseif ( extension_loaded( 'domxml' ) ) {
- # PECL extension that conflicts with the core DOM extension (bug 13770)
- wfDebug( "Warning: you have the obsolete domxml extension for PHP. Please remove it!\n" );
- $this->mPreprocessorClass = 'Preprocessor_Hash';
- } elseif ( extension_loaded( 'dom' ) ) {
- $this->mPreprocessorClass = 'Preprocessor_DOM';
- } else {
- $this->mPreprocessorClass = 'Preprocessor_Hash';
- }
- wfDebug( __CLASS__ . ": using preprocessor: {$this->mPreprocessorClass}\n" );
- }
- /**
- * Reduce memory usage to reduce the impact of circular references
- */
- function __destruct() {
- if ( isset( $this->mLinkHolders ) ) {
- unset( $this->mLinkHolders );
- }
- foreach ( $this as $name => $value ) {
- unset( $this->$name );
- }
- }
- /**
- * Do various kinds of initialisation on the first call of the parser
- */
- function firstCallInit() {
- if ( !$this->mFirstCall ) {
- return;
- }
- $this->mFirstCall = false;
- wfProfileIn( __METHOD__ );
- CoreParserFunctions::register( $this );
- CoreTagHooks::register( $this );
- $this->initialiseVariables();
- wfRunHooks( 'ParserFirstCallInit', array( &$this ) );
- wfProfileOut( __METHOD__ );
- }
- /**
- * Clear Parser state
- *
- * @private
- */
- function clearState() {
- wfProfileIn( __METHOD__ );
- if ( $this->mFirstCall ) {
- $this->firstCallInit();
- }
- $this->mOutput = new ParserOutput;
- $this->mOptions->registerWatcher( array( $this->mOutput, 'recordOption' ) );
- $this->mAutonumber = 0;
- $this->mLastSection = '';
- $this->mDTopen = false;
- $this->mIncludeCount = array();
- $this->mArgStack = false;
- $this->mInPre = false;
- $this->mLinkHolders = new LinkHolderArray( $this );
- $this->mLinkID = 0;
- $this->mRevisionObject = $this->mRevisionTimestamp =
- $this->mRevisionId = $this->mRevisionUser = null;
- $this->mVarCache = array();
- $this->mUser = null;
- /**
- * Prefix for temporary replacement strings for the multipass parser.
- * \x07 should never appear in input as it's disallowed in XML.
- * Using it at the front also gives us a little extra robustness
- * since it shouldn't match when butted up against identifier-like
- * string constructs.
- *
- * Must not consist of all title characters, or else it will change
- * the behaviour of <nowiki> in a link.
- */
- $this->mUniqPrefix = "\x7fUNIQ" . self::getRandomString();
- $this->mStripState = new StripState( $this->mUniqPrefix );
- # Clear these on every parse, bug 4549
- $this->mTplExpandCache = $this->mTplRedirCache = $this->mTplDomCache = array();
- $this->mShowToc = true;
- $this->mForceTocPosition = false;
- $this->mIncludeSizes = array(
- 'post-expand' => 0,
- 'arg' => 0,
- );
- $this->mPPNodeCount = 0;
- $this->mGeneratedPPNodeCount = 0;
- $this->mHighestExpansionDepth = 0;
- $this->mDefaultSort = false;
- $this->mHeadings = array();
- $this->mDoubleUnderscores = array();
- $this->mExpensiveFunctionCount = 0;
- # Fix cloning
- if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) {
- $this->mPreprocessor = null;
- }
- wfRunHooks( 'ParserClearState', array( &$this ) );
- wfProfileOut( __METHOD__ );
- }
- /**
- * Convert wikitext to HTML
- * Do not call this function recursively.
- *
- * @param $text String: text we want to parse
- * @param $title Title object
- * @param $options ParserOptions
- * @param $linestart boolean
- * @param $clearState boolean
- * @param $revid Int: number to pass in {{REVISIONID}}
- * @return ParserOutput a ParserOutput
- */
- public function parse( $text, Title $title, ParserOptions $options, $linestart = true, $clearState = true, $revid = null ) {
- /**
- * First pass--just handle <nowiki> sections, pass the rest off
- * to internalParse() which does all the real work.
- */
- global $wgUseTidy, $wgAlwaysUseTidy;
- $fname = __METHOD__.'-' . wfGetCaller();
- wfProfileIn( __METHOD__ );
- wfProfileIn( $fname );
- $this->startParse( $title, $options, self::OT_HTML, $clearState );
- # Remove the strip marker tag prefix from the input, if present.
- if ( $clearState ) {
- $text = str_replace( $this->mUniqPrefix, '', $text );
- }
- $oldRevisionId = $this->mRevisionId;
- $oldRevisionObject = $this->mRevisionObject;
- $oldRevisionTimestamp = $this->mRevisionTimestamp;
- $oldRevisionUser = $this->mRevisionUser;
- if ( $revid !== null ) {
- $this->mRevisionId = $revid;
- $this->mRevisionObject = null;
- $this->mRevisionTimestamp = null;
- $this->mRevisionUser = null;
- }
- wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
- # No more strip!
- wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
- $text = $this->internalParse( $text );
- wfRunHooks( 'ParserAfterParse', array( &$this, &$text, &$this->mStripState ) );
- $text = $this->mStripState->unstripGeneral( $text );
- # Clean up special characters, only run once, next-to-last before doBlockLevels
- $fixtags = array(
- # french spaces, last one Guillemet-left
- # only if there is something before the space
- '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1 ',
- # french spaces, Guillemet-right
- '/(\\302\\253) /' => '\\1 ',
- '/ (!\s*important)/' => ' \\1', # Beware of CSS magic word !important, bug #11874.
- );
- $text = preg_replace( array_keys( $fixtags ), array_values( $fixtags ), $text );
- $text = $this->doBlockLevels( $text, $linestart );
- $this->replaceLinkHolders( $text );
- /**
- * The input doesn't get language converted if
- * a) It's disabled
- * b) Content isn't converted
- * c) It's a conversion table
- * d) it is an interface message (which is in the user language)
- */
- if ( !( $options->getDisableContentConversion()
- || isset( $this->mDoubleUnderscores['nocontentconvert'] ) ) )
- {
- # Run convert unconditionally in 1.18-compatible mode
- global $wgBug34832TransitionalRollback;
- if ( $wgBug34832TransitionalRollback || !$this->mOptions->getInterfaceMessage() ) {
- # The position of the convert() call should not be changed. it
- # assumes that the links are all replaced and the only thing left
- # is the <nowiki> mark.
- $text = $this->getConverterLanguage()->convert( $text );
- }
- }
- /**
- * A converted title will be provided in the output object if title and
- * content conversion are enabled, the article text does not contain
- * a conversion-suppressing double-underscore tag, and no
- * {{DISPLAYTITLE:...}} is present. DISPLAYTITLE takes precedence over
- * automatic link conversion.
- */
- if ( !( $options->getDisableTitleConversion()
- || isset( $this->mDoubleUnderscores['nocontentconvert'] )
- || isset( $this->mDoubleUnderscores['notitleconvert'] )
- || $this->mOutput->getDisplayTitle() !== false ) )
- {
- $convruletitle = $this->getConverterLanguage()->getConvRuleTitle();
- if ( $convruletitle ) {
- $this->mOutput->setTitleText( $convruletitle );
- } else {
- $titleText = $this->getConverterLanguage()->convertTitle( $title );
- $this->mOutput->setTitleText( $titleText );
- }
- }
- $text = $this->mStripState->unstripNoWiki( $text );
- wfRunHooks( 'ParserBeforeTidy', array( &$this, &$text ) );
- $text = $this->replaceTransparentTags( $text );
- $text = $this->mStripState->unstripGeneral( $text );
- $text = Sanitizer::normalizeCharReferences( $text );
- if ( ( $wgUseTidy && $this->mOptions->getTidy() ) || $wgAlwaysUseTidy ) {
- $text = MWTidy::tidy( $text );
- } else {
- # attempt to sanitize at least some nesting problems
- # (bug #2702 and quite a few others)
- $tidyregs = array(
- # ''Something [http://www.cool.com cool''] -->
- # <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
- '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
- '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
- # fix up an anchor inside another anchor, only
- # at least for a single single nested link (bug 3695)
- '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
- '\\1\\2</a>\\3</a>\\1\\4</a>',
- # fix div inside inline elements- doBlockLevels won't wrap a line which
- # contains a div, so fix it up here; replace
- # div with escaped text
- '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
- '\\1\\3<div\\5>\\6</div>\\8\\9',
- # remove empty italic or bold tag pairs, some
- # introduced by rules above
- '/<([bi])><\/\\1>/' => '',
- );
- $text = preg_replace(
- array_keys( $tidyregs ),
- array_values( $tidyregs ),
- $text );
- }
- if ( $this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit() ) {
- $this->limitationWarn( 'expensive-parserfunction',
- $this->mExpensiveFunctionCount,
- $this->mOptions->getExpensiveParserFunctionLimit()
- );
- }
- wfRunHooks( 'ParserAfterTidy', array( &$this, &$text ) );
- # Information on include size limits, for the benefit of users who try to skirt them
- if ( $this->mOptions->getEnableLimitReport() ) {
- $max = $this->mOptions->getMaxIncludeSize();
- $PFreport = "Expensive parser function count: {$this->mExpensiveFunctionCount}/{$this->mOptions->getExpensiveParserFunctionLimit()}\n";
- $limitReport =
- "NewPP limit report\n" .
- "Preprocessor visited node count: {$this->mPPNodeCount}/{$this->mOptions->getMaxPPNodeCount()}\n" .
- "Preprocessor generated node count: " .
- "{$this->mGeneratedPPNodeCount}/{$this->mOptions->getMaxGeneratedPPNodeCount()}\n" .
- "Post-expand include size: {$this->mIncludeSizes['post-expand']}/$max bytes\n" .
- "Template argument size: {$this->mIncludeSizes['arg']}/$max bytes\n".
- "Highest expansion depth: {$this->mHighestExpansionDepth}/{$this->mOptions->getMaxPPExpandDepth()}\n".
- $PFreport;
- wfRunHooks( 'ParserLimitReport', array( $this, &$limitReport ) );
- $text .= "\n<!-- \n$limitReport-->\n";
- }
- $this->mOutput->setText( $text );
- $this->mRevisionId = $oldRevisionId;
- $this->mRevisionObject = $oldRevisionObject;
- $this->mRevisionTimestamp = $oldRevisionTimestamp;
- $this->mRevisionUser = $oldRevisionUser;
- wfProfileOut( $fname );
- wfProfileOut( __METHOD__ );
- return $this->mOutput;
- }
- /**
- * Recursive parser entry point that can be called from an extension tag
- * hook.
- *
- * If $frame is not provided, then template variables (e.g., {{{1}}}) within $text are not expanded
- *
- * @param $text String: text extension wants to have parsed
- * @param $frame PPFrame: The frame to use for expanding any template variables
- *
- * @return string
- */
- function recursiveTagParse( $text, $frame=false ) {
- wfProfileIn( __METHOD__ );
- wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
- wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
- $text = $this->internalParse( $text, false, $frame );
- wfProfileOut( __METHOD__ );
- return $text;
- }
- /**
- * Expand templates and variables in the text, producing valid, static wikitext.
- * Also removes comments.
- * @return mixed|string
- */
- function preprocess( $text, Title $title, ParserOptions $options, $revid = null ) {
- wfProfileIn( __METHOD__ );
- $this->startParse( $title, $options, self::OT_PREPROCESS, true );
- if ( $revid !== null ) {
- $this->mRevisionId = $revid;
- }
- wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
- wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
- $text = $this->replaceVariables( $text );
- $text = $this->mStripState->unstripBoth( $text );
- wfProfileOut( __METHOD__ );
- return $text;
- }
- /**
- * Recursive parser entry point that can be called from an extension tag
- * hook.
- *
- * @param $text String: text to be expanded
- * @param $frame PPFrame: The frame to use for expanding any template variables
- * @return String
- * @since 1.19
- */
- public function recursivePreprocess( $text, $frame = false ) {
- wfProfileIn( __METHOD__ );
- $text = $this->replaceVariables( $text, $frame );
- $text = $this->mStripState->unstripBoth( $text );
- wfProfileOut( __METHOD__ );
- return $text;
- }
- /**
- * Process the wikitext for the "?preload=" feature. (bug 5210)
- *
- * "<noinclude>", "<includeonly>" etc. are parsed as for template
- * transclusion, comments, templates, arguments, tags hooks and parser
- * functions are untouched.
- *
- * @param $text String
- * @param $title Title
- * @param $options ParserOptions
- * @return String
- */
- public function getPreloadText( $text, Title $title, ParserOptions $options ) {
- # Parser (re)initialisation
- $this->startParse( $title, $options, self::OT_PLAIN, true );
- $flags = PPFrame::NO_ARGS | PPFrame::NO_TEMPLATES;
- $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
- $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
- $text = $this->mStripState->unstripBoth( $text );
- return $text;
- }
- /**
- * Get a random string
- *
- * @return string
- */
- static public function getRandomString() {
- return wfRandomString( 16 );
- }
- /**
- * Set the current user.
- * Should only be used when doing pre-save transform.
- *
- * @param $user Mixed: User object or null (to reset)
- */
- function setUser( $user ) {
- $this->mUser = $user;
- }
- /**
- * Accessor for mUniqPrefix.
- *
- * @return String
- */
- public function uniqPrefix() {
- if ( !isset( $this->mUniqPrefix ) ) {
- # @todo FIXME: This is probably *horribly wrong*
- # LanguageConverter seems to want $wgParser's uniqPrefix, however
- # if this is called for a parser cache hit, the parser may not
- # have ever been initialized in the first place.
- # Not really sure what the heck is supposed to be going on here.
- return '';
- # throw new MWException( "Accessing uninitialized mUniqPrefix" );
- }
- return $this->mUniqPrefix;
- }
- /**
- * Set the context title
- *
- * @param $t Title
- */
- function setTitle( $t ) {
- if ( !$t || $t instanceof FakeTitle ) {
- $t = Title::newFromText( 'NO TITLE' );
- }
- if ( strval( $t->getFragment() ) !== '' ) {
- # Strip the fragment to avoid various odd effects
- $this->mTitle = clone $t;
- $this->mTitle->setFragment( '' );
- } else {
- $this->mTitle = $t;
- }
- }
- /**
- * Accessor for the Title object
- *
- * @return Title object
- */
- function getTitle() {
- return $this->mTitle;
- }
- /**
- * Accessor/mutator for the Title object
- *
- * @param $x Title object or null to just get the current one
- * @return Title object
- */
- function Title( $x = null ) {
- return wfSetVar( $this->mTitle, $x );
- }
- /**
- * Set the output type
- *
- * @param $ot Integer: new value
- */
- function setOutputType( $ot ) {
- $this->mOutputType = $ot;
- # Shortcut alias
- $this->ot = array(
- 'html' => $ot == self::OT_HTML,
- 'wiki' => $ot == self::OT_WIKI,
- 'pre' => $ot == self::OT_PREPROCESS,
- 'plain' => $ot == self::OT_PLAIN,
- );
- }
- /**
- * Accessor/mutator for the output type
- *
- * @param $x int|null New value or null to just get the current one
- * @return Integer
- */
- function OutputType( $x = null ) {
- return wfSetVar( $this->mOutputType, $x );
- }
- /**
- * Get the ParserOutput object
- *
- * @return ParserOutput object
- */
- function getOutput() {
- return $this->mOutput;
- }
- /**
- * Get the ParserOptions object
- *
- * @return ParserOptions object
- */
- function getOptions() {
- return $this->mOptions;
- }
- /**
- * Accessor/mutator for the ParserOptions object
- *
- * @param $x ParserOptions New value or null to just get the current one
- * @return ParserOptions Current ParserOptions object
- */
- function Options( $x = null ) {
- return wfSetVar( $this->mOptions, $x );
- }
- /**
- * @return int
- */
- function nextLinkID() {
- return $this->mLinkID++;
- }
- /**
- * @param $id int
- */
- function setLinkID( $id ) {
- $this->mLinkID = $id;
- }
- /**
- * Get a language object for use in parser functions such as {{FORMATNUM:}}
- * @return Language
- */
- function getFunctionLang() {
- return $this->getTargetLanguage();
- }
- /**
- * Get the target language for the content being parsed. This is usually the
- * language that the content is in.
- *
- * @since 1.19
- *
- * @return Language|null
- */
- public function getTargetLanguage() {
- $target = $this->mOptions->getTargetLanguage();
- if ( $target !== null ) {
- return $target;
- } elseif( $this->mOptions->getInterfaceMessage() ) {
- return $this->mOptions->getUserLangObj();
- } elseif( is_null( $this->mTitle ) ) {
- throw new MWException( __METHOD__ . ': $this->mTitle is null' );
- }
- return $this->mTitle->getPageLanguage();
- }
- /**
- * Get the language object for language conversion
- */
- function getConverterLanguage() {
- global $wgBug34832TransitionalRollback, $wgContLang;
- if ( $wgBug34832TransitionalRollback ) {
- return $wgContLang;
- } else {
- return $this->getTargetLanguage();
- }
- }
- /**
- * Get a User object either from $this->mUser, if set, or from the
- * ParserOptions object otherwise
- *
- * @return User object
- */
- function getUser() {
- if ( !is_null( $this->mUser ) ) {
- return $this->mUser;
- }
- return $this->mOptions->getUser();
- }
- /**
- * Get a preprocessor object
- *
- * @return Preprocessor instance
- */
- function getPreprocessor() {
- if ( !isset( $this->mPreprocessor ) ) {
- $class = $this->mPreprocessorClass;
- $this->mPreprocessor = new $class( $this );
- }
- return $this->mPreprocessor;
- }
- /**
- * Replaces all occurrences of HTML-style comments and the given tags
- * in the text with a random marker and returns the next text. The output
- * parameter $matches will be an associative array filled with data in
- * the form:
- *
- * @code
- * 'UNIQ-xxxxx' => array(
- * 'element',
- * 'tag content',
- * array( 'param' => 'x' ),
- * '<element param="x">tag content</element>' ) )
- * @endcode
- *
- * @param $elements array list of element names. Comments are always extracted.
- * @param $text string Source text string.
- * @param $matches array Out parameter, Array: extracted tags
- * @param $uniq_prefix string
- * @return String: stripped text
- */
- public static function extractTagsAndParams( $elements, $text, &$matches, $uniq_prefix = '' ) {
- static $n = 1;
- $stripped = '';
- $matches = array();
- $taglist = implode( '|', $elements );
- $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?" . ">)|<(!--)/i";
- while ( $text != '' ) {
- $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
- $stripped .= $p[0];
- if ( count( $p ) < 5 ) {
- break;
- }
- if ( count( $p ) > 5 ) {
- # comment
- $element = $p[4];
- $attributes = '';
- $close = '';
- $inside = $p[5];
- } else {
- # tag
- $element = $p[1];
- $attributes = $p[2];
- $close = $p[3];
- $inside = $p[4];
- }
- $marker = "$uniq_prefix-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
- $stripped .= $marker;
- if ( $close === '/>' ) {
- # Empty element tag, <tag />
- $content = null;
- $text = $inside;
- $tail = null;
- } else {
- if ( $element === '!--' ) {
- $end = '/(-->)/';
- } else {
- $end = "/(<\\/$element\\s*>)/i";
- }
- $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
- $content = $q[0];
- if ( count( $q ) < 3 ) {
- # No end tag -- let it run out to the end of the text.
- $tail = '';
- $text = '';
- } else {
- $tail = $q[1];
- $text = $q[2];
- }
- }
- $matches[$marker] = array( $element,
- $content,
- Sanitizer::decodeTagAttributes( $attributes ),
- "<$element$attributes$close$content$tail" );
- }
- return $stripped;
- }
- /**
- * Get a list of strippable XML-like elements
- *
- * @return array
- */
- function getStripList() {
- return $this->mStripList;
- }
- /**
- * Add an item to the strip state
- * Returns the unique tag which must be inserted into the stripped text
- * The tag will be replaced with the original text in unstrip()
- *
- * @param $text string
- *
- * @return string
- */
- function insertStripItem( $text ) {
- $rnd = "{$this->mUniqPrefix}-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
- $this->mMarkerIndex++;
- $this->mStripState->addGeneral( $rnd, $text );
- return $rnd;
- }
- /**
- * parse the wiki syntax used to render tables
- *
- * @private
- * @return string
- */
- function doTableStuff( $text ) {
- wfProfileIn( __METHOD__ );
- $lines = StringUtils::explode( "\n", $text );
- $out = '';
- $td_history = array(); # Is currently a td tag open?
- $last_tag_history = array(); # Save history of last lag activated (td, th or caption)
- $tr_history = array(); # Is currently a tr tag open?
- $tr_attributes = array(); # history of tr attributes
- $has_opened_tr = array(); # Did this table open a <tr> element?
- $indent_level = 0; # indent level of the table
- foreach ( $lines as $outLine ) {
- $line = trim( $outLine );
- if ( $line === '' ) { # empty line, go to next line
- $out .= $outLine."\n";
- continue;
- }
- $first_character = $line[0];
- $matches = array();
- if ( preg_match( '/^(:*)\{\|(.*)$/', $line , $matches ) ) {
- # First check if we are starting a new table
- $indent_level = strlen( $matches[1] );
- $attributes = $this->mStripState->unstripBoth( $matches[2] );
- $attributes = Sanitizer::fixTagAttributes( $attributes , 'table' );
- $outLine = str_repeat( '<dl><dd>' , $indent_level ) . "<table{$attributes}>";
- array_push( $td_history , false );
- array_push( $last_tag_history , '' );
- array_push( $tr_history , false );
- array_push( $tr_attributes , '' );
- array_push( $has_opened_tr , false );
- } elseif ( count( $td_history ) == 0 ) {
- # Don't do any of the following
- $out .= $outLine."\n";
- continue;
- } elseif ( substr( $line , 0 , 2 ) === '|}' ) {
- # We are ending a table
- $line = '</table>' . substr( $line , 2 );
- $last_tag = array_pop( $last_tag_history );
- if ( !array_pop( $has_opened_tr ) ) {
- $line = "<tr><td></td></tr>{$line}";
- }
- if ( array_pop( $tr_history ) ) {
- $line = "</tr>{$line}";
- }
- if ( array_pop( $td_history ) ) {
- $line = "</{$last_tag}>{$line}";
- }
- array_pop( $tr_attributes );
- $outLine = $line . str_repeat( '</dd></dl>' , $indent_level );
- } elseif ( substr( $line , 0 , 2 ) === '|-' ) {
- # Now we have a table row
- $line = preg_replace( '#^\|-+#', '', $line );
- # Whats after the tag is now only attributes
- $attributes = $this->mStripState->unstripBoth( $line );
- $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
- array_pop( $tr_attributes );
- array_push( $tr_attributes, $attributes );
- $line = '';
- $last_tag = array_pop( $last_tag_history );
- array_pop( $has_opened_tr );
- array_push( $has_opened_tr , true );
- if ( array_pop( $tr_history ) ) {
- $line = '</tr>';
- }
- if ( array_pop( $td_history ) ) {
- $line = "</{$last_tag}>{$line}";
- }
- $outLine = $line;
- array_push( $tr_history , false );
- array_push( $td_history , false );
- array_push( $last_tag_history , '' );
- } elseif ( $first_character === '|' || $first_character === '!' || substr( $line , 0 , 2 ) === '|+' ) {
- # This might be cell elements, td, th or captions
- if ( substr( $line , 0 , 2 ) === '|+' ) {
- $first_character = '+';
- $line = substr( $line , 1 );
- }
- $line = substr( $line , 1 );
- if ( $first_character === '!' ) {
- $line = str_replace( '!!' , '||' , $line );
- }
- # Split up multiple cells on the same line.
- # FIXME : This can result in improper nesting of tags processed
- # by earlier parser steps, but should avoid splitting up eg
- # attribute values containing literal "||".
- $cells = StringUtils::explodeMarkup( '||' , $line );
- $outLine = '';
- # Loop through each table cell
- foreach ( $cells as $cell ) {
- $previous = '';
- if ( $first_character !== '+' ) {
- $tr_after = array_pop( $tr_attributes );
- if ( !array_pop( $tr_history ) ) {
- $previous = "<tr{$tr_after}>\n";
- }
- array_push( $tr_history , true );
- array_push( $tr_attributes , '' );
- array_pop( $has_opened_tr );
- array_push( $has_opened_tr , true );
- }
- $last_tag = array_pop( $last_tag_history );
- if ( array_pop( $td_history ) ) {
- $previous = "</{$last_tag}>\n{$previous}";
- }
- if ( $first_character === '|' ) {
- $last_tag = 'td';
- } elseif ( $first_character === '!' ) {
- $last_tag = 'th';
- } elseif ( $first_character === '+' ) {
- $last_tag = 'caption';
- } else {
- $last_tag = '';
- }
- array_push( $last_tag_history , $last_tag );
- # A cell could contain both parameters and data
- $cell_data = explode( '|' , $cell , 2 );
- # Bug 553: Note that a '|' inside an invalid link should not
- # be mistaken as delimiting cell parameters
- if ( strpos( $cell_data[0], '[[' ) !== false ) {
- $cell = "{$previous}<{$last_tag}>{$cell}";
- } elseif ( count( $cell_data ) == 1 ) {
- $cell = "{$previous}<{$last_tag}>{$cell_data[0]}";
- } else {
- $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
- $attributes = Sanitizer::fixTagAttributes( $attributes , $last_tag );
- $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}";
- }
- $outLine .= $cell;
- array_push( $td_history , true );
- }
- }
- $out .= $outLine . "\n";
- }
- # Closing open td, tr && table
- while ( count( $td_history ) > 0 ) {
- if ( array_pop( $td_history ) ) {
- $out .= "</td>\n";
- }
- if ( array_pop( $tr_history ) ) {
- $out .= "</tr>\n";
- }
- if ( !array_pop( $has_opened_tr ) ) {
- $out .= "<tr><td></td></tr>\n" ;
- }
- $out .= "</table>\n";
- }
- # Remove trailing line-ending (b/c)
- if ( substr( $out, -1 ) === "\n" ) {
- $out = substr( $out, 0, -1 );
- }
- # special case: don't return empty table
- if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
- $out = '';
- }
- wfProfileOut( __METHOD__ );
- return $out;
- }
- /**
- * Helper function for parse() that transforms wiki markup into
- * HTML. Only called for $mOutputType == self::OT_HTML.
- *
- * @private
- *
- * @param $text string
- * @param $isMain bool
- * @param $frame bool
- *
- * @return string
- */
- function internalParse( $text, $isMain = true, $frame = false ) {
- wfProfileIn( __METHOD__ );
- $origText = $text;
- # Hook to suspend the parser in this state
- if ( !wfRunHooks( 'ParserBeforeInternalParse', array( &$this, &$text, &$this->mStripState ) ) ) {
- wfProfileOut( __METHOD__ );
- return $text ;
- }
- # if $frame is provided, then use $frame for replacing any variables
- if ( $frame ) {
- # use frame depth to infer how include/noinclude tags should be handled
- # depth=0 means this is the top-level document; otherwise it's an included document
- if ( !$frame->depth ) {
- $flag = 0;
- } else {
- $flag = Parser::PTD_FOR_INCLUSION;
- }
- $dom = $this->preprocessToDom( $text, $flag );
- $text = $frame->expand( $dom );
- } else {
- # if $frame is not provided, then use old-style replaceVariables
- $text = $this->replaceVariables( $text );
- }
- wfRunHooks( 'InternalParseBeforeSanitize', array( &$this, &$text, &$this->mStripState ) );
- $text = Sanitizer::removeHTMLtags( $text, array( &$this, 'attributeStripCallback' ), false, array_keys( $this->mTransparentTagHooks ) );
- wfRunHooks( 'InternalParseBeforeLinks', array( &$this, &$text, &$this->mStripState ) );
- # Tables need to come after variable replacement for things to work
- # properly; putting them before other transformations should keep
- # exciting things like link expansions from showing up in surprising
- # places.
- $text = $this->doTableStuff( $text );
- $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
- $text = $this->doDoubleUnderscore( $text );
- $text = $this->doHeadings( $text );
- if ( $this->mOptions->getUseDynamicDates() ) {
- $df = DateFormatter::getInstance();
- $text = $df->reformat( $this->mOptions->getDateFormat(), $text );
- }
- $text = $this->replaceInternalLinks( $text );
- $text = $this->doAllQuotes( $text );
- $text = $this->replaceExternalLinks( $text );
- # replaceInternalLinks may sometimes leave behind
- # absolute URLs, which have to be masked to hide them from replaceExternalLinks
- $text = str_replace( $this->mUniqPrefix.'NOPARSE', '', $text );
- $text = $this->doMagicLinks( $text );
- $text = $this->formatHeadings( $text, $origText, $isMain );
- wfProfileOut( __METHOD__ );
- return $text;
- }
- /**
- * Replace special strings like "ISBN xxx" and "RFC xxx" with
- * magic external links.
- *
- * DML
- * @private
- *
- * @param $text string
- *
- * @return string
- */
- function doMagicLinks( $text ) {
- wfProfileIn( __METHOD__ );
- $prots = wfUrlProtocolsWithoutProtRel();
- $urlChar = self::EXT_LINK_URL_CLASS;
- $text = preg_replace_callback(
- '!(?: # Start cases
- (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text
- (<.*?>) | # m[2]: Skip stuff inside HTML elements' . "
- (\\b(?i:$prots)$urlChar+) | # m[3]: Free external links" . '
- (?:RFC|PMID)\s+([0-9]+) | # m[4]: RFC or PMID, capture number
- ISBN\s+(\b # m[5]: ISBN, capture number
- (?: 97[89] [\ \-]? )? # optional 13-digit ISBN prefix
- (?: [0-9] [\ \-]? ){9} # 9 digits with opt. delimiters
- [0-9Xx] # check digit
- \b)
- )!xu', array( &$this, 'magicLinkCallback' ), $text );
- wfProfileOut( __METHOD__ );
- return $text;
- }
- /**
- * @throws MWException
- * @param $m array
- * @return HTML|string
- */
- function magicLinkCallback( $m ) {
- if ( isset( $m[1] ) && $m[1] !== '' ) {
- # Skip anchor
- return $m[0];
- } elseif ( isset( $m[2] ) && $m[2] !== '' ) {
- # Skip HTML element
- return $m[0];
- } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
- # Free external link
- return $this->makeFreeExternalLink( $m[0] );
- } elseif ( isset( $m[4] ) && $m[4] !== '' ) {
- # RFC or PMID
- if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
- $keyword = 'RFC';
- $urlmsg = 'rfcurl';
- $CssClass = 'mw-magiclink-rfc';
- $id = $m[4];
- } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
- $keyword = 'PMID';
- $urlmsg = 'pubmedurl';
- $CssClass = 'mw-magiclink-pmid';
- $id = $m[4];
- } else {
- throw new MWException( __METHOD__.': unrecognised match type "' .
- substr( $m[0], 0, 20 ) . '"' );
- }
- $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
- return Linker::makeExternalLink( $url, "{$keyword} {$id}", true, $CssClass );
- } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
- # ISBN
- $isbn = $m[5];
- $num = strtr( $isbn, array(
- '-' => '',
- ' ' => '',
- 'x' => 'X',
- ));
- $titleObj = SpecialPage::getTitleFor( 'Booksources', $num );
- return'<a href="' .
- htmlspecialchars( $titleObj->getLocalUrl() ) .
- "\" class=\"internal mw-magiclink-isbn\">ISBN $isbn</a>";
- } else {
- return $m[0];
- }
- }
- /**
- * Make a free external link, given a user-supplied URL
- *
- * @param $url string
- *
- * @return string HTML
- * @private
- */
- function makeFreeExternalLink( $url ) {
- wfProfileIn( __METHOD__ );
- $trail = '';
- # The characters '<' and '>' (which were escaped by
- # removeHTMLtags()) should not be included in
- # URLs, per RFC 2396.
- $m2 = array();
- if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
- $trail = substr( $url, $m2[0][1] ) . $trail;
- $url = substr( $url, 0, $m2[0][1] );
- }
- # Move trailing punctuation to $trail
- $sep = ',;\.:!?';
- # If there is no left bracket, then consider right brackets fair game too
- if ( strpos( $url, '(' ) === false ) {
- $sep .= ')';
- }
- $numSepChars = strspn( strrev( $url ), $sep );
- if ( $numSepChars ) {
- $trail = substr( $url, -$numSepChars ) . $trail;
- $url = substr( $url, 0, -$numSepChars );
- }
- $url = Sanitizer::cleanUrl( $url );
- # Is this an external image?
- $text = $this->maybeMakeExternalImage( $url );
- if ( $text === false ) {
- # Not an image, make a link
- $text = Linker::makeExternalLink( $url,
- $this->getConverterLanguage()->markNoConversion($url), true, 'free',
- $this->getExternalLinkAttribs( $url ) );
- # Register it in the output object...
- # Replace unnecessary URL escape codes with their equivalent characters
- $pasteurized = self::replaceUnusualEscapes( $url );
- $this->mOutput->addExternalLink( $pasteurized );
- }
- wfProfileOut( __METHOD__ );
- return $text . $trail;
- }
- /**
- * Parse headers and return html
- *
- * @private
- *
- * @param $text string
- *
- * @return string
- */
- function doHeadings( $text ) {
- wfProfileIn( __METHOD__ );
- for ( $i = 6; $i >= 1; --$i ) {
- $h = str_repeat( '=', $i );
- $text = preg_replace( "/^$h(.+)$h\\s*$/m",
- "<h$i>\\1</h$i>", $text );
- }
- wfProfileOut( __METHOD__ );
- return $text;
- }
- /**
- * Replace single quotes with HTML markup
- * @private
- *
- * @param $text string
- *
- * @return string the altered text
- */
- function doAllQuotes( $text ) {
- wfProfileIn( __METHOD__ );
- $outtext = '';
- $lines = StringUtils::explode( "\n", $text );
- foreach ( $lines as $line ) {
- $outtext .= $this->doQuotes( $line ) . "\n";
- }
- $outtext = substr( $outtext, 0,-1 );
- wfProfileOut( __METHOD__ );
- return $outtext;
- }
- /**
- * Helper function for doAllQuotes()
- *
- * @param $text string
- *
- * @return string
- */
- public function doQuotes( $text ) {
- $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
- if ( count( $arr ) == 1 ) {
- return $text;
- } else {
- # First, do some preliminary work. This may shift some apostrophes from
- # being mark-up to being text. It also counts the number of occurrences
- # of bold and italics mark-ups.
- $numbold = 0;
- $numitalics = 0;
- for ( $i = 0; $i < count( $arr ); $i++ ) {
- if ( ( $i % 2 ) == 1 ) {
- # If there are ever four apostrophes, assume the first is supposed to
- # be text, and the remaining three constitute mark-up for bold text.
- if ( strlen( $arr[$i] ) == 4 ) {
- $arr[$i-1] .= "'";
- $arr[$i] = "'''";
- } elseif ( strlen( $arr[$i] ) > 5 ) {
- # If there are more than 5 apostrophes in a row, assume they're all
- # text except for the last 5.
- $arr[$i-1] .= str_repeat( "'", strlen( $arr[$i] ) - 5 );
- $arr[$i] = "'''''";
- }
- # Count the number of occurrences of bold and italics mark-ups.
- # We are not counting sequences of five apostrophes.
- if ( strlen( $arr[$i] ) == 2 ) {
- $numitalics++;
- } elseif ( strlen( $arr[$i] ) == 3 ) {
- $numbold++;
- } elseif ( strlen( $arr[$i] ) == 5 ) {
- $numitalics++;
- $numbold++;
- }
- }
- }
- # If there is an odd number of both bold and italics, it is likely
- # that one of the bold ones was meant to be an apostrophe followed
- # by italics. Which one we cannot know for certain, but it is more
- # likely to be one that has a single-letter word before it.
- if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
- $i = 0;
- $firstsingleletterword = -1;
- $firstmultiletterword = -1;
- $firstspace = -1;
- foreach ( $arr as $r ) {
- if ( ( $i % 2 == 1 ) and ( strlen( $r ) == 3 ) ) {
- $x1 = substr( $arr[$i-1], -1 );
- $x2 = substr( $arr[$i-1], -2, 1 );
- if ( $x1 === ' ' ) {
- if ( $firstspace == -1 ) {
- $firstspace = $i;
- }
- } elseif ( $x2 === ' ') {
- if ( $firstsingleletterword == -1 ) {
- $firstsingleletterword = $i;
- }
- } else {
- if ( $firstmultiletterword == -1 ) {
- $firstmultiletterword = $i;
- }
- }
- }
- $i++;
- }
- # If there is a single-letter word, use it!
- if ( $firstsingleletterword > -1 ) {
- $arr[$firstsingleletterword] = "''";
- $arr[$firstsingleletterword-1] .= "'";
- } elseif ( $firstmultiletterword > -1 ) {
- # If not, but there's a multi-letter word, use that one.
- $arr[$firstmultiletterword] = "''";
- $arr[$firstmultiletterword-1] .= "'";
- } elseif ( $firstspace > -1 ) {
- # ... otherwise use the first one that has neither.
- # (notice that it is possible for all three to be -1 if, for example,
- # there is only one pentuple-apostrophe in the line)
- $arr[$firstspace] = "''";
- $arr[$firstspace-1] .= "'";
- }
- }
- # Now let's actually convert our apostrophic mush to HTML!
- $output = '';
- $buffer = '';
- $state = '';
- $i = 0;
- foreach ( $arr as $r ) {
- if ( ( $i % 2 ) == 0 ) {
- if ( $state === 'both' ) {
- $buffer .= $r;
- } else {
- $output .= $r;
- }
- } else {
- if ( strlen( $r ) == 2 ) {
- if ( $state === 'i' ) {
- $output .= '</i>'; $state = '';
- } elseif ( $state === 'bi' ) {
- $output .= '</i>'; $state = 'b';
- } elseif ( $state === 'ib' ) {
- $output .= '</b></i><b>'; $state = 'b';
- } elseif ( $state === 'both' ) {
- $output .= '<b><i>'.$buffer.'</i>'; $state = 'b';
- } else { # $state can be 'b' or ''
- $output .= '<i>'; $state .= 'i';
- }
- } elseif ( strlen( $r ) == 3 ) {
- if ( $state === 'b' ) {
- $output .= '</b>'; $state = '';
- } elseif ( $state === 'bi' ) {
- $output .= '</i></b><i>'; $state = 'i';
- } elseif ( $state === 'ib' ) {
- $output .= '</b>'; $state = 'i';
- } elseif ( $state === 'both' ) {
- $output .= '<i><b>'.$buffer.'</b>'; $state = 'i';
- } else { # $state can be 'i' or ''
- $output .= '<b>'; $state .= 'b';
- }
- } elseif ( strlen( $r ) == 5 ) {
- if ( $state === 'b' ) {
- $output .= '</b><i>'; $state = 'i';
- } elseif ( $state === 'i' ) {
- $output .= '</i><b>'; $state = 'b';
- } elseif ( $state === 'bi' ) {
- $output .= '</i></b>'; $state = '';
- } elseif ( $state === 'ib' ) {
- $output .= '</b></i>'; $state = '';
- } elseif ( $state === 'both' ) {
- $output .= '<i><b>'.$buffer.'</b></i>'; $state = '';
- } else { # ($state == '')
- $buffer = ''; $state = 'both';
- }
- }
- }
- $i++;
- }
- # Now close all remaining tags. Notice that the order is important.
- if ( $state === 'b' || $state === 'ib' ) {
- $output .= '</b>';
- }
- if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
- $output .= '</i>';
- }
- if ( $state === 'bi' ) {
- $output .= '</b>';
- }
- # There might be lonely ''''', so make sure we have a buffer
- if ( $state === 'both' && $buffer ) {
- $output .= '<b><i>'.$buffer.'</i></b>';
- }
- return $output;
- }
- }
- /**
- * Replace external links (REL)
- *
- * Note: this is all very hackish and the order of execution matters a lot.
- * Make sure to run maintenance/parserTests.php if you change this code.
- *
- * @private
- *
- * @param $text string
- *
- * @return string
- */
- function replaceExternalLinks( $text ) {
- wfProfileIn( __METHOD__ );
- $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
- if ( $bits === false ) {
- throw new MWException( "PCRE needs to be compiled with --enable-unicode-properties in order for MediaWiki to function" );
- }
- $s = array_shift( $bits );
- $i = 0;
- while ( $i<count( $bits ) ) {
- $url = $bits[$i++];
- $protocol = $bits[$i++];
- $text = $bits[$i++];
- $trail = $bits[$i++];
- # The characters '<' and '>' (which were escaped by
- # removeHTMLtags()) should not be included in
- # URLs, per RFC 2396.
- $m2 = array();
- if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
- $text = substr( $url, $m2[0][1] ) . ' ' . $text;
- $url = substr( $url, 0, $m2[0][1] );
- }
- # If the link text is an image URL, replace it with an <img> tag
- # This happened by accident in the original parser, but some people used it extensively
- $img = $this->maybeMakeExternalImage( $text );
- if ( $img !== false ) {
- $text = $img;
- }
- $dtrail = '';
- # Set linktype for CSS - if URL==text, link is essentially free
- $linktype = ( $text === $url ) ? 'free' : 'text';
- # No link text, e.g. [http://domain.tld/some.link]
- if ( $text == '' ) {
- # Autonumber
- $langObj = $this->getTargetLanguage();
- $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
- $linktype = 'autonumber';
- } else {
- # Have link text, e.g. [http://domain.tld/some.link text]s
- # Check for trail
- list( $dtrail, $trail ) = Linker::splitTrail( $trail );
- }
- $text = $this->getConverterLanguage()->markNoConversion( $text );
- $url = Sanitizer::cleanUrl( $url );
- # Use the encoded URL
- # This means that users can paste URLs directly into the text
- # Funny characters like ö aren't valid in URLs anyway
- # This was changed in August 2004
- $s .= Linker::makeExternalLink( $url, $text, false, $linktype,
- $this->getExternalLinkAttribs( $url ) ) . $dtrail . $trail;
- # Register link in the output object.
- # Replace unnecessary URL escape codes with the referenced character
- # This prevents spammers from hiding links from the filters
- $pasteurized = self::replaceUnusualEscapes( $url );
- $this->mOutput->addExternalLink( $pasteurized );
- }
- wfProfileOut( __METHOD__ );
- return $s;
- }
- /**
- * Get an associative array of additional HTML attributes appropriate for a
- * particular external link. This currently may include rel => nofollow
- * (depending on configuration, namespace, and the URL's domain) and/or a
- * target attribute (depending on configuration).
- *
- * @param $url String|bool optional URL, to extract the domain from for rel =>
- * nofollow if appropriate
- * @return Array associative array of HTML attributes
- */
- function getExternalLinkAttribs( $url = false ) {
- $attribs = array();
- global $wgNoFollowLinks, $wgNoFollowNsExceptions, $wgNoFollowDomainExceptions;
- $ns = $this->mTitle->getNamespace();
- if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions ) &&
- !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions ) )
- {
- $attribs['rel'] = 'nofollow';
- }
- if ( $this->mOptions->getExternalLinkTarget() ) {
- $attribs['target'] = $this->mOptions->getExternalLinkTarget();
- }
- return $attribs;
- }
- /**
- * Replace unusual URL escape codes with their equivalent characters
- *
- * @param $url String
- * @return String
- *
- * @todo This can merge genuinely required bits in the path or query string,
- * breaking legit URLs. A proper fix would treat the various parts of
- * the URL differently; as a workaround, just use the output for
- * statistical records, not for actual linking/output.
- */
- static function replaceUnusualEscapes( $url ) {
- return preg_replace_callback( '/%[0-9A-Fa-f]{2}/',
- array( __CLASS__, 'replaceUnusualEscapesCallback' ), $url );
- }
- /**
- * Callback function used in replaceUnusualEscapes().
- * Replaces unusual URL escape codes with their equivalent character
- *
- * @param $matches array
- *
- * @return string
- */
- private static function replaceUnusualEscapesCallback( $matches ) {
- $char = urldecode( $matches[0] );
- $ord = ord( $char );
- # Is it an unsafe or HTTP reserved character according to RFC 1738?
- if ( $ord > 32 && $ord < 127 && strpos( '<>"#{}|\^~[]`;/?', $char ) === false ) {
- # No, shouldn't be escaped
- return $char;
- } else {
- # Yes, leave it escaped
- return $matches[0];
- }
- }
- /**
- * make an image if it's allowed, either through the global
- * option, through th…
Large files files are truncated, but you can click here to view the full file