PageRenderTime 69ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 1ms

/includes/parser/Parser.php

https://github.com/spenser-roark/OOUG-Wiki
PHP | 5722 lines | 3490 code | 498 blank | 1734 comment | 688 complexity | c111458fb54de04aa4e0fb3d712b02fb MD5 | raw file
Possible License(s): GPL-2.0, Apache-2.0, LGPL-3.0

Large files files are truncated, but you can click here to view the full file

  1. <?php
  2. /**
  3. * @defgroup Parser Parser
  4. *
  5. * @file
  6. * @ingroup Parser
  7. * File for Parser and related classes
  8. */
  9. /**
  10. * PHP Parser - Processes wiki markup (which uses a more user-friendly
  11. * syntax, such as "[[link]]" for making links), and provides a one-way
  12. * transformation of that wiki markup it into XHTML output / markup
  13. * (which in turn the browser understands, and can display).
  14. *
  15. * <pre>
  16. * There are five main entry points into the Parser class:
  17. * parse()
  18. * produces HTML output
  19. * preSaveTransform().
  20. * produces altered wiki markup.
  21. * preprocess()
  22. * removes HTML comments and expands templates
  23. * cleanSig() / cleanSigInSig()
  24. * Cleans a signature before saving it to preferences
  25. * getSection()
  26. * Return the content of a section from an article for section editing
  27. * replaceSection()
  28. * Replaces a section by number inside an article
  29. * getPreloadText()
  30. * Removes <noinclude> sections, and <includeonly> tags.
  31. *
  32. * Globals used:
  33. * object: $wgContLang
  34. *
  35. * NOT $wgUser or $wgTitle or $wgRequest or $wgLang. Keep them away!
  36. *
  37. * settings:
  38. * $wgUseDynamicDates*, $wgInterwikiMagic*,
  39. * $wgNamespacesWithSubpages, $wgAllowExternalImages*,
  40. * $wgLocaltimezone, $wgAllowSpecialInclusion*,
  41. * $wgMaxArticleSize*
  42. *
  43. * * only within ParserOptions
  44. * </pre>
  45. *
  46. * @ingroup Parser
  47. */
  48. class Parser {
  49. /**
  50. * Update this version number when the ParserOutput format
  51. * changes in an incompatible way, so the parser cache
  52. * can automatically discard old data.
  53. */
  54. const VERSION = '1.6.4';
  55. /**
  56. * Update this version number when the output of serialiseHalfParsedText()
  57. * changes in an incompatible way
  58. */
  59. const HALF_PARSED_VERSION = 2;
  60. # Flags for Parser::setFunctionHook
  61. # Also available as global constants from Defines.php
  62. const SFH_NO_HASH = 1;
  63. const SFH_OBJECT_ARGS = 2;
  64. # Constants needed for external link processing
  65. # Everything except bracket, space, or control characters
  66. # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
  67. # as well as U+3000 is IDEOGRAPHIC SPACE for bug 19052
  68. const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}]';
  69. const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F\p{Zs}]+)
  70. \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
  71. # State constants for the definition list colon extraction
  72. const COLON_STATE_TEXT = 0;
  73. const COLON_STATE_TAG = 1;
  74. const COLON_STATE_TAGSTART = 2;
  75. const COLON_STATE_CLOSETAG = 3;
  76. const COLON_STATE_TAGSLASH = 4;
  77. const COLON_STATE_COMMENT = 5;
  78. const COLON_STATE_COMMENTDASH = 6;
  79. const COLON_STATE_COMMENTDASHDASH = 7;
  80. # Flags for preprocessToDom
  81. const PTD_FOR_INCLUSION = 1;
  82. # Allowed values for $this->mOutputType
  83. # Parameter to startExternalParse().
  84. const OT_HTML = 1; # like parse()
  85. const OT_WIKI = 2; # like preSaveTransform()
  86. const OT_PREPROCESS = 3; # like preprocess()
  87. const OT_MSG = 3;
  88. const OT_PLAIN = 4; # like extractSections() - portions of the original are returned unchanged.
  89. # Marker Suffix needs to be accessible staticly.
  90. const MARKER_SUFFIX = "-QINU\x7f";
  91. # Persistent:
  92. var $mTagHooks = array();
  93. var $mTransparentTagHooks = array();
  94. var $mFunctionHooks = array();
  95. var $mFunctionSynonyms = array( 0 => array(), 1 => array() );
  96. var $mFunctionTagHooks = array();
  97. var $mStripList = array();
  98. var $mDefaultStripList = array();
  99. var $mVarCache = array();
  100. var $mImageParams = array();
  101. var $mImageParamsMagicArray = array();
  102. var $mMarkerIndex = 0;
  103. var $mFirstCall = true;
  104. # Initialised by initialiseVariables()
  105. /**
  106. * @var MagicWordArray
  107. */
  108. var $mVariables;
  109. /**
  110. * @var MagicWordArray
  111. */
  112. var $mSubstWords;
  113. var $mConf, $mPreprocessor, $mExtLinkBracketedRegex, $mUrlProtocols; # Initialised in constructor
  114. # Cleared with clearState():
  115. /**
  116. * @var ParserOutput
  117. */
  118. var $mOutput;
  119. var $mAutonumber, $mDTopen;
  120. /**
  121. * @var StripState
  122. */
  123. var $mStripState;
  124. var $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
  125. /**
  126. * @var LinkHolderArray
  127. */
  128. var $mLinkHolders;
  129. var $mLinkID;
  130. var $mIncludeSizes, $mPPNodeCount, $mDefaultSort;
  131. var $mTplExpandCache; # empty-frame expansion cache
  132. var $mTplRedirCache, $mTplDomCache, $mHeadings, $mDoubleUnderscores;
  133. var $mExpensiveFunctionCount; # number of expensive parser function calls
  134. var $mShowToc, $mForceTocPosition;
  135. /**
  136. * @var User
  137. */
  138. var $mUser; # User object; only used when doing pre-save transform
  139. # Temporary
  140. # These are variables reset at least once per parse regardless of $clearState
  141. /**
  142. * @var ParserOptions
  143. */
  144. var $mOptions;
  145. /**
  146. * @var Title
  147. */
  148. var $mTitle; # Title context, used for self-link rendering and similar things
  149. var $mOutputType; # Output type, one of the OT_xxx constants
  150. var $ot; # Shortcut alias, see setOutputType()
  151. var $mRevisionObject; # The revision object of the specified revision ID
  152. var $mRevisionId; # ID to display in {{REVISIONID}} tags
  153. var $mRevisionTimestamp; # The timestamp of the specified revision ID
  154. var $mRevisionUser; # User to display in {{REVISIONUSER}} tag
  155. var $mRevIdForTs; # The revision ID which was used to fetch the timestamp
  156. /**
  157. * @var string
  158. */
  159. var $mUniqPrefix;
  160. /**
  161. * Constructor
  162. *
  163. * @param $conf array
  164. */
  165. public function __construct( $conf = array() ) {
  166. $this->mConf = $conf;
  167. $this->mUrlProtocols = wfUrlProtocols();
  168. $this->mExtLinkBracketedRegex = '/\[((' . wfUrlProtocols() . ')'.
  169. self::EXT_LINK_URL_CLASS.'+)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su';
  170. if ( isset( $conf['preprocessorClass'] ) ) {
  171. $this->mPreprocessorClass = $conf['preprocessorClass'];
  172. } elseif ( defined( 'MW_COMPILED' ) ) {
  173. # Preprocessor_Hash is much faster than Preprocessor_DOM in compiled mode
  174. $this->mPreprocessorClass = 'Preprocessor_Hash';
  175. } elseif ( extension_loaded( 'domxml' ) ) {
  176. # PECL extension that conflicts with the core DOM extension (bug 13770)
  177. wfDebug( "Warning: you have the obsolete domxml extension for PHP. Please remove it!\n" );
  178. $this->mPreprocessorClass = 'Preprocessor_Hash';
  179. } elseif ( extension_loaded( 'dom' ) ) {
  180. $this->mPreprocessorClass = 'Preprocessor_DOM';
  181. } else {
  182. $this->mPreprocessorClass = 'Preprocessor_Hash';
  183. }
  184. wfDebug( __CLASS__ . ": using preprocessor: {$this->mPreprocessorClass}\n" );
  185. }
  186. /**
  187. * Reduce memory usage to reduce the impact of circular references
  188. */
  189. function __destruct() {
  190. if ( isset( $this->mLinkHolders ) ) {
  191. unset( $this->mLinkHolders );
  192. }
  193. foreach ( $this as $name => $value ) {
  194. unset( $this->$name );
  195. }
  196. }
  197. /**
  198. * Do various kinds of initialisation on the first call of the parser
  199. */
  200. function firstCallInit() {
  201. if ( !$this->mFirstCall ) {
  202. return;
  203. }
  204. $this->mFirstCall = false;
  205. wfProfileIn( __METHOD__ );
  206. CoreParserFunctions::register( $this );
  207. CoreTagHooks::register( $this );
  208. $this->initialiseVariables();
  209. wfRunHooks( 'ParserFirstCallInit', array( &$this ) );
  210. wfProfileOut( __METHOD__ );
  211. }
  212. /**
  213. * Clear Parser state
  214. *
  215. * @private
  216. */
  217. function clearState() {
  218. wfProfileIn( __METHOD__ );
  219. if ( $this->mFirstCall ) {
  220. $this->firstCallInit();
  221. }
  222. $this->mOutput = new ParserOutput;
  223. $this->mOptions->registerWatcher( array( $this->mOutput, 'recordOption' ) );
  224. $this->mAutonumber = 0;
  225. $this->mLastSection = '';
  226. $this->mDTopen = false;
  227. $this->mIncludeCount = array();
  228. $this->mArgStack = false;
  229. $this->mInPre = false;
  230. $this->mLinkHolders = new LinkHolderArray( $this );
  231. $this->mLinkID = 0;
  232. $this->mRevisionObject = $this->mRevisionTimestamp =
  233. $this->mRevisionId = $this->mRevisionUser = null;
  234. $this->mVarCache = array();
  235. $this->mUser = null;
  236. /**
  237. * Prefix for temporary replacement strings for the multipass parser.
  238. * \x07 should never appear in input as it's disallowed in XML.
  239. * Using it at the front also gives us a little extra robustness
  240. * since it shouldn't match when butted up against identifier-like
  241. * string constructs.
  242. *
  243. * Must not consist of all title characters, or else it will change
  244. * the behaviour of <nowiki> in a link.
  245. */
  246. # $this->mUniqPrefix = "\x07UNIQ" . Parser::getRandomString();
  247. # Changed to \x7f to allow XML double-parsing -- TS
  248. $this->mUniqPrefix = "\x7fUNIQ" . self::getRandomString();
  249. $this->mStripState = new StripState( $this->mUniqPrefix );
  250. # Clear these on every parse, bug 4549
  251. $this->mTplExpandCache = $this->mTplRedirCache = $this->mTplDomCache = array();
  252. $this->mShowToc = true;
  253. $this->mForceTocPosition = false;
  254. $this->mIncludeSizes = array(
  255. 'post-expand' => 0,
  256. 'arg' => 0,
  257. );
  258. $this->mPPNodeCount = 0;
  259. $this->mDefaultSort = false;
  260. $this->mHeadings = array();
  261. $this->mDoubleUnderscores = array();
  262. $this->mExpensiveFunctionCount = 0;
  263. # Fix cloning
  264. if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) {
  265. $this->mPreprocessor = null;
  266. }
  267. wfRunHooks( 'ParserClearState', array( &$this ) );
  268. wfProfileOut( __METHOD__ );
  269. }
  270. /**
  271. * Convert wikitext to HTML
  272. * Do not call this function recursively.
  273. *
  274. * @param $text String: text we want to parse
  275. * @param $title Title object
  276. * @param $options ParserOptions
  277. * @param $linestart boolean
  278. * @param $clearState boolean
  279. * @param $revid Int: number to pass in {{REVISIONID}}
  280. * @return ParserOutput a ParserOutput
  281. */
  282. public function parse( $text, Title $title, ParserOptions $options, $linestart = true, $clearState = true, $revid = null ) {
  283. /**
  284. * First pass--just handle <nowiki> sections, pass the rest off
  285. * to internalParse() which does all the real work.
  286. */
  287. global $wgUseTidy, $wgAlwaysUseTidy, $wgDisableLangConversion, $wgDisableTitleConversion;
  288. $fname = __METHOD__.'-' . wfGetCaller();
  289. wfProfileIn( __METHOD__ );
  290. wfProfileIn( $fname );
  291. $this->startParse( $title, $options, self::OT_HTML, $clearState );
  292. $oldRevisionId = $this->mRevisionId;
  293. $oldRevisionObject = $this->mRevisionObject;
  294. $oldRevisionTimestamp = $this->mRevisionTimestamp;
  295. $oldRevisionUser = $this->mRevisionUser;
  296. if ( $revid !== null ) {
  297. $this->mRevisionId = $revid;
  298. $this->mRevisionObject = null;
  299. $this->mRevisionTimestamp = null;
  300. $this->mRevisionUser = null;
  301. }
  302. wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
  303. # No more strip!
  304. wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
  305. $text = $this->internalParse( $text );
  306. $text = $this->mStripState->unstripGeneral( $text );
  307. # Clean up special characters, only run once, next-to-last before doBlockLevels
  308. $fixtags = array(
  309. # french spaces, last one Guillemet-left
  310. # only if there is something before the space
  311. '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1&#160;',
  312. # french spaces, Guillemet-right
  313. '/(\\302\\253) /' => '\\1&#160;',
  314. '/&#160;(!\s*important)/' => ' \\1', # Beware of CSS magic word !important, bug #11874.
  315. );
  316. $text = preg_replace( array_keys( $fixtags ), array_values( $fixtags ), $text );
  317. $text = $this->doBlockLevels( $text, $linestart );
  318. $this->replaceLinkHolders( $text );
  319. /**
  320. * The input doesn't get language converted if
  321. * a) It's disabled
  322. * b) Content isn't converted
  323. * c) It's a conversion table
  324. * d) it is an interface message (which is in the user language)
  325. */
  326. if ( !( $wgDisableLangConversion
  327. || isset( $this->mDoubleUnderscores['nocontentconvert'] )
  328. || $this->mTitle->isConversionTable() ) )
  329. {
  330. # Run convert unconditionally in 1.18-compatible mode
  331. global $wgBug34832TransitionalRollback;
  332. if ( $wgBug34832TransitionalRollback || !$this->mOptions->getInterfaceMessage() ) {
  333. # The position of the convert() call should not be changed. it
  334. # assumes that the links are all replaced and the only thing left
  335. # is the <nowiki> mark.
  336. $text = $this->getConverterLanguage()->convert( $text );
  337. }
  338. }
  339. /**
  340. * A converted title will be provided in the output object if title and
  341. * content conversion are enabled, the article text does not contain
  342. * a conversion-suppressing double-underscore tag, and no
  343. * {{DISPLAYTITLE:...}} is present. DISPLAYTITLE takes precedence over
  344. * automatic link conversion.
  345. */
  346. if ( !( $wgDisableLangConversion
  347. || $wgDisableTitleConversion
  348. || isset( $this->mDoubleUnderscores['nocontentconvert'] )
  349. || isset( $this->mDoubleUnderscores['notitleconvert'] )
  350. || $this->mOutput->getDisplayTitle() !== false ) )
  351. {
  352. $convruletitle = $this->getConverterLanguage()->getConvRuleTitle();
  353. if ( $convruletitle ) {
  354. $this->mOutput->setTitleText( $convruletitle );
  355. } else {
  356. $titleText = $this->getConverterLanguage()->convertTitle( $title );
  357. $this->mOutput->setTitleText( $titleText );
  358. }
  359. }
  360. $text = $this->mStripState->unstripNoWiki( $text );
  361. wfRunHooks( 'ParserBeforeTidy', array( &$this, &$text ) );
  362. $text = $this->replaceTransparentTags( $text );
  363. $text = $this->mStripState->unstripGeneral( $text );
  364. $text = Sanitizer::normalizeCharReferences( $text );
  365. if ( ( $wgUseTidy && $this->mOptions->getTidy() ) || $wgAlwaysUseTidy ) {
  366. $text = MWTidy::tidy( $text );
  367. } else {
  368. # attempt to sanitize at least some nesting problems
  369. # (bug #2702 and quite a few others)
  370. $tidyregs = array(
  371. # ''Something [http://www.cool.com cool''] -->
  372. # <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
  373. '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
  374. '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
  375. # fix up an anchor inside another anchor, only
  376. # at least for a single single nested link (bug 3695)
  377. '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
  378. '\\1\\2</a>\\3</a>\\1\\4</a>',
  379. # fix div inside inline elements- doBlockLevels won't wrap a line which
  380. # contains a div, so fix it up here; replace
  381. # div with escaped text
  382. '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
  383. '\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9',
  384. # remove empty italic or bold tag pairs, some
  385. # introduced by rules above
  386. '/<([bi])><\/\\1>/' => '',
  387. );
  388. $text = preg_replace(
  389. array_keys( $tidyregs ),
  390. array_values( $tidyregs ),
  391. $text );
  392. }
  393. global $wgExpensiveParserFunctionLimit;
  394. if ( $this->mExpensiveFunctionCount > $wgExpensiveParserFunctionLimit ) {
  395. $this->limitationWarn( 'expensive-parserfunction', $this->mExpensiveFunctionCount, $wgExpensiveParserFunctionLimit );
  396. }
  397. wfRunHooks( 'ParserAfterTidy', array( &$this, &$text ) );
  398. # Information on include size limits, for the benefit of users who try to skirt them
  399. if ( $this->mOptions->getEnableLimitReport() ) {
  400. $max = $this->mOptions->getMaxIncludeSize();
  401. $PFreport = "Expensive parser function count: {$this->mExpensiveFunctionCount}/$wgExpensiveParserFunctionLimit\n";
  402. $limitReport =
  403. "NewPP limit report\n" .
  404. "Preprocessor node count: {$this->mPPNodeCount}/{$this->mOptions->getMaxPPNodeCount()}\n" .
  405. "Post-expand include size: {$this->mIncludeSizes['post-expand']}/$max bytes\n" .
  406. "Template argument size: {$this->mIncludeSizes['arg']}/$max bytes\n".
  407. $PFreport;
  408. wfRunHooks( 'ParserLimitReport', array( $this, &$limitReport ) );
  409. $text .= "\n<!-- \n$limitReport-->\n";
  410. }
  411. $this->mOutput->setText( $text );
  412. $this->mRevisionId = $oldRevisionId;
  413. $this->mRevisionObject = $oldRevisionObject;
  414. $this->mRevisionTimestamp = $oldRevisionTimestamp;
  415. $this->mRevisionUser = $oldRevisionUser;
  416. wfProfileOut( $fname );
  417. wfProfileOut( __METHOD__ );
  418. return $this->mOutput;
  419. }
  420. /**
  421. * Recursive parser entry point that can be called from an extension tag
  422. * hook.
  423. *
  424. * If $frame is not provided, then template variables (e.g., {{{1}}}) within $text are not expanded
  425. *
  426. * @param $text String: text extension wants to have parsed
  427. * @param $frame PPFrame: The frame to use for expanding any template variables
  428. *
  429. * @return string
  430. */
  431. function recursiveTagParse( $text, $frame=false ) {
  432. wfProfileIn( __METHOD__ );
  433. wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
  434. wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
  435. $text = $this->internalParse( $text, false, $frame );
  436. wfProfileOut( __METHOD__ );
  437. return $text;
  438. }
  439. /**
  440. * Expand templates and variables in the text, producing valid, static wikitext.
  441. * Also removes comments.
  442. */
  443. function preprocess( $text, Title $title, ParserOptions $options, $revid = null ) {
  444. wfProfileIn( __METHOD__ );
  445. $this->startParse( $title, $options, self::OT_PREPROCESS, true );
  446. if ( $revid !== null ) {
  447. $this->mRevisionId = $revid;
  448. }
  449. wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
  450. wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
  451. $text = $this->replaceVariables( $text );
  452. $text = $this->mStripState->unstripBoth( $text );
  453. wfProfileOut( __METHOD__ );
  454. return $text;
  455. }
  456. /**
  457. * Recursive parser entry point that can be called from an extension tag
  458. * hook.
  459. *
  460. * @param $text String: text to be expanded
  461. * @param $frame PPFrame: The frame to use for expanding any template variables
  462. * @return String
  463. * @since 1.19
  464. */
  465. public function recursivePreprocess( $text, $frame = false ) {
  466. wfProfileIn( __METHOD__ );
  467. $text = $this->replaceVariables( $text, $frame );
  468. $text = $this->mStripState->unstripBoth( $text );
  469. wfProfileOut( __METHOD__ );
  470. return $text;
  471. }
  472. /**
  473. * Process the wikitext for the ?preload= feature. (bug 5210)
  474. *
  475. * <noinclude>, <includeonly> etc. are parsed as for template transclusion,
  476. * comments, templates, arguments, tags hooks and parser functions are untouched.
  477. *
  478. * @param $text String
  479. * @param $title Title
  480. * @param $options ParserOptions
  481. * @return String
  482. */
  483. public function getPreloadText( $text, Title $title, ParserOptions $options ) {
  484. # Parser (re)initialisation
  485. $this->startParse( $title, $options, self::OT_PLAIN, true );
  486. $flags = PPFrame::NO_ARGS | PPFrame::NO_TEMPLATES;
  487. $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
  488. $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
  489. $text = $this->mStripState->unstripBoth( $text );
  490. return $text;
  491. }
  492. /**
  493. * Get a random string
  494. *
  495. * @return string
  496. */
  497. static public function getRandomString() {
  498. return dechex( mt_rand( 0, 0x7fffffff ) ) . dechex( mt_rand( 0, 0x7fffffff ) );
  499. }
  500. /**
  501. * Set the current user.
  502. * Should only be used when doing pre-save transform.
  503. *
  504. * @param $user Mixed: User object or null (to reset)
  505. */
  506. function setUser( $user ) {
  507. $this->mUser = $user;
  508. }
  509. /**
  510. * Accessor for mUniqPrefix.
  511. *
  512. * @return String
  513. */
  514. public function uniqPrefix() {
  515. if ( !isset( $this->mUniqPrefix ) ) {
  516. # @todo FIXME: This is probably *horribly wrong*
  517. # LanguageConverter seems to want $wgParser's uniqPrefix, however
  518. # if this is called for a parser cache hit, the parser may not
  519. # have ever been initialized in the first place.
  520. # Not really sure what the heck is supposed to be going on here.
  521. return '';
  522. # throw new MWException( "Accessing uninitialized mUniqPrefix" );
  523. }
  524. return $this->mUniqPrefix;
  525. }
  526. /**
  527. * Set the context title
  528. *
  529. * @param $t Title
  530. */
  531. function setTitle( $t ) {
  532. if ( !$t || $t instanceof FakeTitle ) {
  533. $t = Title::newFromText( 'NO TITLE' );
  534. }
  535. if ( strval( $t->getFragment() ) !== '' ) {
  536. # Strip the fragment to avoid various odd effects
  537. $this->mTitle = clone $t;
  538. $this->mTitle->setFragment( '' );
  539. } else {
  540. $this->mTitle = $t;
  541. }
  542. }
  543. /**
  544. * Accessor for the Title object
  545. *
  546. * @return Title object
  547. */
  548. function getTitle() {
  549. return $this->mTitle;
  550. }
  551. /**
  552. * Accessor/mutator for the Title object
  553. *
  554. * @param $x New Title object or null to just get the current one
  555. * @return Title object
  556. */
  557. function Title( $x = null ) {
  558. return wfSetVar( $this->mTitle, $x );
  559. }
  560. /**
  561. * Set the output type
  562. *
  563. * @param $ot Integer: new value
  564. */
  565. function setOutputType( $ot ) {
  566. $this->mOutputType = $ot;
  567. # Shortcut alias
  568. $this->ot = array(
  569. 'html' => $ot == self::OT_HTML,
  570. 'wiki' => $ot == self::OT_WIKI,
  571. 'pre' => $ot == self::OT_PREPROCESS,
  572. 'plain' => $ot == self::OT_PLAIN,
  573. );
  574. }
  575. /**
  576. * Accessor/mutator for the output type
  577. *
  578. * @param $x New value or null to just get the current one
  579. * @return Integer
  580. */
  581. function OutputType( $x = null ) {
  582. return wfSetVar( $this->mOutputType, $x );
  583. }
  584. /**
  585. * Get the ParserOutput object
  586. *
  587. * @return ParserOutput object
  588. */
  589. function getOutput() {
  590. return $this->mOutput;
  591. }
  592. /**
  593. * Get the ParserOptions object
  594. *
  595. * @return ParserOptions object
  596. */
  597. function getOptions() {
  598. return $this->mOptions;
  599. }
  600. /**
  601. * Accessor/mutator for the ParserOptions object
  602. *
  603. * @param $x New value or null to just get the current one
  604. * @return Current ParserOptions object
  605. */
  606. function Options( $x = null ) {
  607. return wfSetVar( $this->mOptions, $x );
  608. }
  609. /**
  610. * @return int
  611. */
  612. function nextLinkID() {
  613. return $this->mLinkID++;
  614. }
  615. /**
  616. * @param $id int
  617. */
  618. function setLinkID( $id ) {
  619. $this->mLinkID = $id;
  620. }
  621. /**
  622. * Get a language object for use in parser functions such as {{FORMATNUM:}}
  623. * @return Language
  624. */
  625. function getFunctionLang() {
  626. return $this->getTargetLanguage();
  627. }
  628. /**
  629. * Get the target language for the content being parsed. This is usually the
  630. * language that the content is in.
  631. */
  632. function getTargetLanguage() {
  633. $target = $this->mOptions->getTargetLanguage();
  634. if ( $target !== null ) {
  635. return $target;
  636. } elseif( $this->mOptions->getInterfaceMessage() ) {
  637. return $this->mOptions->getUserLangObj();
  638. } elseif( is_null( $this->mTitle ) ) {
  639. throw new MWException( __METHOD__.': $this->mTitle is null' );
  640. }
  641. return $this->mTitle->getPageLanguage();
  642. }
  643. /**
  644. * Get the language object for language conversion
  645. */
  646. function getConverterLanguage() {
  647. global $wgBug34832TransitionalRollback, $wgContLang;
  648. if ( $wgBug34832TransitionalRollback ) {
  649. return $wgContLang;
  650. } else {
  651. return $this->getTargetLanguage();
  652. }
  653. }
  654. /**
  655. * Get a User object either from $this->mUser, if set, or from the
  656. * ParserOptions object otherwise
  657. *
  658. * @return User object
  659. */
  660. function getUser() {
  661. if ( !is_null( $this->mUser ) ) {
  662. return $this->mUser;
  663. }
  664. return $this->mOptions->getUser();
  665. }
  666. /**
  667. * Get a preprocessor object
  668. *
  669. * @return Preprocessor instance
  670. */
  671. function getPreprocessor() {
  672. if ( !isset( $this->mPreprocessor ) ) {
  673. $class = $this->mPreprocessorClass;
  674. $this->mPreprocessor = new $class( $this );
  675. }
  676. return $this->mPreprocessor;
  677. }
  678. /**
  679. * Replaces all occurrences of HTML-style comments and the given tags
  680. * in the text with a random marker and returns the next text. The output
  681. * parameter $matches will be an associative array filled with data in
  682. * the form:
  683. * 'UNIQ-xxxxx' => array(
  684. * 'element',
  685. * 'tag content',
  686. * array( 'param' => 'x' ),
  687. * '<element param="x">tag content</element>' ) )
  688. *
  689. * @param $elements array list of element names. Comments are always extracted.
  690. * @param $text string Source text string.
  691. * @param $matches array Out parameter, Array: extracted tags
  692. * @param $uniq_prefix string
  693. * @return String: stripped text
  694. */
  695. public static function extractTagsAndParams( $elements, $text, &$matches, $uniq_prefix = '' ) {
  696. static $n = 1;
  697. $stripped = '';
  698. $matches = array();
  699. $taglist = implode( '|', $elements );
  700. $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?" . ">)|<(!--)/i";
  701. while ( $text != '' ) {
  702. $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
  703. $stripped .= $p[0];
  704. if ( count( $p ) < 5 ) {
  705. break;
  706. }
  707. if ( count( $p ) > 5 ) {
  708. # comment
  709. $element = $p[4];
  710. $attributes = '';
  711. $close = '';
  712. $inside = $p[5];
  713. } else {
  714. # tag
  715. $element = $p[1];
  716. $attributes = $p[2];
  717. $close = $p[3];
  718. $inside = $p[4];
  719. }
  720. $marker = "$uniq_prefix-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
  721. $stripped .= $marker;
  722. if ( $close === '/>' ) {
  723. # Empty element tag, <tag />
  724. $content = null;
  725. $text = $inside;
  726. $tail = null;
  727. } else {
  728. if ( $element === '!--' ) {
  729. $end = '/(-->)/';
  730. } else {
  731. $end = "/(<\\/$element\\s*>)/i";
  732. }
  733. $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
  734. $content = $q[0];
  735. if ( count( $q ) < 3 ) {
  736. # No end tag -- let it run out to the end of the text.
  737. $tail = '';
  738. $text = '';
  739. } else {
  740. $tail = $q[1];
  741. $text = $q[2];
  742. }
  743. }
  744. $matches[$marker] = array( $element,
  745. $content,
  746. Sanitizer::decodeTagAttributes( $attributes ),
  747. "<$element$attributes$close$content$tail" );
  748. }
  749. return $stripped;
  750. }
  751. /**
  752. * Get a list of strippable XML-like elements
  753. *
  754. * @return array
  755. */
  756. function getStripList() {
  757. return $this->mStripList;
  758. }
  759. /**
  760. * Add an item to the strip state
  761. * Returns the unique tag which must be inserted into the stripped text
  762. * The tag will be replaced with the original text in unstrip()
  763. *
  764. * @param $text string
  765. *
  766. * @return string
  767. */
  768. function insertStripItem( $text ) {
  769. $rnd = "{$this->mUniqPrefix}-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
  770. $this->mMarkerIndex++;
  771. $this->mStripState->addGeneral( $rnd, $text );
  772. return $rnd;
  773. }
  774. /**
  775. * parse the wiki syntax used to render tables
  776. *
  777. * @private
  778. */
  779. function doTableStuff( $text ) {
  780. wfProfileIn( __METHOD__ );
  781. $lines = StringUtils::explode( "\n", $text );
  782. $out = '';
  783. $td_history = array(); # Is currently a td tag open?
  784. $last_tag_history = array(); # Save history of last lag activated (td, th or caption)
  785. $tr_history = array(); # Is currently a tr tag open?
  786. $tr_attributes = array(); # history of tr attributes
  787. $has_opened_tr = array(); # Did this table open a <tr> element?
  788. $indent_level = 0; # indent level of the table
  789. foreach ( $lines as $outLine ) {
  790. $line = trim( $outLine );
  791. if ( $line === '' ) { # empty line, go to next line
  792. $out .= $outLine."\n";
  793. continue;
  794. }
  795. $first_character = $line[0];
  796. $matches = array();
  797. if ( preg_match( '/^(:*)\{\|(.*)$/', $line , $matches ) ) {
  798. # First check if we are starting a new table
  799. $indent_level = strlen( $matches[1] );
  800. $attributes = $this->mStripState->unstripBoth( $matches[2] );
  801. $attributes = Sanitizer::fixTagAttributes( $attributes , 'table' );
  802. $outLine = str_repeat( '<dl><dd>' , $indent_level ) . "<table{$attributes}>";
  803. array_push( $td_history , false );
  804. array_push( $last_tag_history , '' );
  805. array_push( $tr_history , false );
  806. array_push( $tr_attributes , '' );
  807. array_push( $has_opened_tr , false );
  808. } elseif ( count( $td_history ) == 0 ) {
  809. # Don't do any of the following
  810. $out .= $outLine."\n";
  811. continue;
  812. } elseif ( substr( $line , 0 , 2 ) === '|}' ) {
  813. # We are ending a table
  814. $line = '</table>' . substr( $line , 2 );
  815. $last_tag = array_pop( $last_tag_history );
  816. if ( !array_pop( $has_opened_tr ) ) {
  817. $line = "<tr><td></td></tr>{$line}";
  818. }
  819. if ( array_pop( $tr_history ) ) {
  820. $line = "</tr>{$line}";
  821. }
  822. if ( array_pop( $td_history ) ) {
  823. $line = "</{$last_tag}>{$line}";
  824. }
  825. array_pop( $tr_attributes );
  826. $outLine = $line . str_repeat( '</dd></dl>' , $indent_level );
  827. } elseif ( substr( $line , 0 , 2 ) === '|-' ) {
  828. # Now we have a table row
  829. $line = preg_replace( '#^\|-+#', '', $line );
  830. # Whats after the tag is now only attributes
  831. $attributes = $this->mStripState->unstripBoth( $line );
  832. $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
  833. array_pop( $tr_attributes );
  834. array_push( $tr_attributes, $attributes );
  835. $line = '';
  836. $last_tag = array_pop( $last_tag_history );
  837. array_pop( $has_opened_tr );
  838. array_push( $has_opened_tr , true );
  839. if ( array_pop( $tr_history ) ) {
  840. $line = '</tr>';
  841. }
  842. if ( array_pop( $td_history ) ) {
  843. $line = "</{$last_tag}>{$line}";
  844. }
  845. $outLine = $line;
  846. array_push( $tr_history , false );
  847. array_push( $td_history , false );
  848. array_push( $last_tag_history , '' );
  849. } elseif ( $first_character === '|' || $first_character === '!' || substr( $line , 0 , 2 ) === '|+' ) {
  850. # This might be cell elements, td, th or captions
  851. if ( substr( $line , 0 , 2 ) === '|+' ) {
  852. $first_character = '+';
  853. $line = substr( $line , 1 );
  854. }
  855. $line = substr( $line , 1 );
  856. if ( $first_character === '!' ) {
  857. $line = str_replace( '!!' , '||' , $line );
  858. }
  859. # Split up multiple cells on the same line.
  860. # FIXME : This can result in improper nesting of tags processed
  861. # by earlier parser steps, but should avoid splitting up eg
  862. # attribute values containing literal "||".
  863. $cells = StringUtils::explodeMarkup( '||' , $line );
  864. $outLine = '';
  865. # Loop through each table cell
  866. foreach ( $cells as $cell ) {
  867. $previous = '';
  868. if ( $first_character !== '+' ) {
  869. $tr_after = array_pop( $tr_attributes );
  870. if ( !array_pop( $tr_history ) ) {
  871. $previous = "<tr{$tr_after}>\n";
  872. }
  873. array_push( $tr_history , true );
  874. array_push( $tr_attributes , '' );
  875. array_pop( $has_opened_tr );
  876. array_push( $has_opened_tr , true );
  877. }
  878. $last_tag = array_pop( $last_tag_history );
  879. if ( array_pop( $td_history ) ) {
  880. $previous = "</{$last_tag}>\n{$previous}";
  881. }
  882. if ( $first_character === '|' ) {
  883. $last_tag = 'td';
  884. } elseif ( $first_character === '!' ) {
  885. $last_tag = 'th';
  886. } elseif ( $first_character === '+' ) {
  887. $last_tag = 'caption';
  888. } else {
  889. $last_tag = '';
  890. }
  891. array_push( $last_tag_history , $last_tag );
  892. # A cell could contain both parameters and data
  893. $cell_data = explode( '|' , $cell , 2 );
  894. # Bug 553: Note that a '|' inside an invalid link should not
  895. # be mistaken as delimiting cell parameters
  896. if ( strpos( $cell_data[0], '[[' ) !== false ) {
  897. $cell = "{$previous}<{$last_tag}>{$cell}";
  898. } elseif ( count( $cell_data ) == 1 ) {
  899. $cell = "{$previous}<{$last_tag}>{$cell_data[0]}";
  900. } else {
  901. $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
  902. $attributes = Sanitizer::fixTagAttributes( $attributes , $last_tag );
  903. $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}";
  904. }
  905. $outLine .= $cell;
  906. array_push( $td_history , true );
  907. }
  908. }
  909. $out .= $outLine . "\n";
  910. }
  911. # Closing open td, tr && table
  912. while ( count( $td_history ) > 0 ) {
  913. if ( array_pop( $td_history ) ) {
  914. $out .= "</td>\n";
  915. }
  916. if ( array_pop( $tr_history ) ) {
  917. $out .= "</tr>\n";
  918. }
  919. if ( !array_pop( $has_opened_tr ) ) {
  920. $out .= "<tr><td></td></tr>\n" ;
  921. }
  922. $out .= "</table>\n";
  923. }
  924. # Remove trailing line-ending (b/c)
  925. if ( substr( $out, -1 ) === "\n" ) {
  926. $out = substr( $out, 0, -1 );
  927. }
  928. # special case: don't return empty table
  929. if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
  930. $out = '';
  931. }
  932. wfProfileOut( __METHOD__ );
  933. return $out;
  934. }
  935. /**
  936. * Helper function for parse() that transforms wiki markup into
  937. * HTML. Only called for $mOutputType == self::OT_HTML.
  938. *
  939. * @private
  940. *
  941. * @param $text string
  942. * @param $isMain bool
  943. * @param $frame bool
  944. *
  945. * @return string
  946. */
  947. function internalParse( $text, $isMain = true, $frame = false ) {
  948. wfProfileIn( __METHOD__ );
  949. $origText = $text;
  950. # Hook to suspend the parser in this state
  951. if ( !wfRunHooks( 'ParserBeforeInternalParse', array( &$this, &$text, &$this->mStripState ) ) ) {
  952. wfProfileOut( __METHOD__ );
  953. return $text ;
  954. }
  955. # if $frame is provided, then use $frame for replacing any variables
  956. if ( $frame ) {
  957. # use frame depth to infer how include/noinclude tags should be handled
  958. # depth=0 means this is the top-level document; otherwise it's an included document
  959. if ( !$frame->depth ) {
  960. $flag = 0;
  961. } else {
  962. $flag = Parser::PTD_FOR_INCLUSION;
  963. }
  964. $dom = $this->preprocessToDom( $text, $flag );
  965. $text = $frame->expand( $dom );
  966. } else {
  967. # if $frame is not provided, then use old-style replaceVariables
  968. $text = $this->replaceVariables( $text );
  969. }
  970. $text = Sanitizer::removeHTMLtags( $text, array( &$this, 'attributeStripCallback' ), false, array_keys( $this->mTransparentTagHooks ) );
  971. wfRunHooks( 'InternalParseBeforeLinks', array( &$this, &$text, &$this->mStripState ) );
  972. # Tables need to come after variable replacement for things to work
  973. # properly; putting them before other transformations should keep
  974. # exciting things like link expansions from showing up in surprising
  975. # places.
  976. $text = $this->doTableStuff( $text );
  977. $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
  978. $text = $this->doDoubleUnderscore( $text );
  979. $text = $this->doHeadings( $text );
  980. if ( $this->mOptions->getUseDynamicDates() ) {
  981. $df = DateFormatter::getInstance();
  982. $text = $df->reformat( $this->mOptions->getDateFormat(), $text );
  983. }
  984. $text = $this->replaceInternalLinks( $text );
  985. $text = $this->doAllQuotes( $text );
  986. $text = $this->replaceExternalLinks( $text );
  987. # replaceInternalLinks may sometimes leave behind
  988. # absolute URLs, which have to be masked to hide them from replaceExternalLinks
  989. $text = str_replace( $this->mUniqPrefix.'NOPARSE', '', $text );
  990. $text = $this->doMagicLinks( $text );
  991. $text = $this->formatHeadings( $text, $origText, $isMain );
  992. wfProfileOut( __METHOD__ );
  993. return $text;
  994. }
  995. /**
  996. * Replace special strings like "ISBN xxx" and "RFC xxx" with
  997. * magic external links.
  998. *
  999. * DML
  1000. * @private
  1001. *
  1002. * @param $text string
  1003. *
  1004. * @return string
  1005. */
  1006. function doMagicLinks( $text ) {
  1007. wfProfileIn( __METHOD__ );
  1008. $prots = wfUrlProtocolsWithoutProtRel();
  1009. $urlChar = self::EXT_LINK_URL_CLASS;
  1010. $text = preg_replace_callback(
  1011. '!(?: # Start cases
  1012. (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text
  1013. (<.*?>) | # m[2]: Skip stuff inside HTML elements' . "
  1014. (\\b(?:$prots)$urlChar+) | # m[3]: Free external links" . '
  1015. (?:RFC|PMID)\s+([0-9]+) | # m[4]: RFC or PMID, capture number
  1016. ISBN\s+(\b # m[5]: ISBN, capture number
  1017. (?: 97[89] [\ \-]? )? # optional 13-digit ISBN prefix
  1018. (?: [0-9] [\ \-]? ){9} # 9 digits with opt. delimiters
  1019. [0-9Xx] # check digit
  1020. \b)
  1021. )!xu', array( &$this, 'magicLinkCallback' ), $text );
  1022. wfProfileOut( __METHOD__ );
  1023. return $text;
  1024. }
  1025. /**
  1026. * @throws MWException
  1027. * @param $m array
  1028. * @return HTML|string
  1029. */
  1030. function magicLinkCallback( $m ) {
  1031. if ( isset( $m[1] ) && $m[1] !== '' ) {
  1032. # Skip anchor
  1033. return $m[0];
  1034. } elseif ( isset( $m[2] ) && $m[2] !== '' ) {
  1035. # Skip HTML element
  1036. return $m[0];
  1037. } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
  1038. # Free external link
  1039. return $this->makeFreeExternalLink( $m[0] );
  1040. } elseif ( isset( $m[4] ) && $m[4] !== '' ) {
  1041. # RFC or PMID
  1042. if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
  1043. $keyword = 'RFC';
  1044. $urlmsg = 'rfcurl';
  1045. $CssClass = 'mw-magiclink-rfc';
  1046. $id = $m[4];
  1047. } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
  1048. $keyword = 'PMID';
  1049. $urlmsg = 'pubmedurl';
  1050. $CssClass = 'mw-magiclink-pmid';
  1051. $id = $m[4];
  1052. } else {
  1053. throw new MWException( __METHOD__.': unrecognised match type "' .
  1054. substr( $m[0], 0, 20 ) . '"' );
  1055. }
  1056. $url = wfMsgForContent( $urlmsg, $id );
  1057. return Linker::makeExternalLink( $url, "{$keyword} {$id}", true, $CssClass );
  1058. } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
  1059. # ISBN
  1060. $isbn = $m[5];
  1061. $num = strtr( $isbn, array(
  1062. '-' => '',
  1063. ' ' => '',
  1064. 'x' => 'X',
  1065. ));
  1066. $titleObj = SpecialPage::getTitleFor( 'Booksources', $num );
  1067. return'<a href="' .
  1068. htmlspecialchars( $titleObj->getLocalUrl() ) .
  1069. "\" class=\"internal mw-magiclink-isbn\">ISBN $isbn</a>";
  1070. } else {
  1071. return $m[0];
  1072. }
  1073. }
  1074. /**
  1075. * Make a free external link, given a user-supplied URL
  1076. *
  1077. * @param $url string
  1078. *
  1079. * @return string HTML
  1080. * @private
  1081. */
  1082. function makeFreeExternalLink( $url ) {
  1083. wfProfileIn( __METHOD__ );
  1084. $trail = '';
  1085. # The characters '<' and '>' (which were escaped by
  1086. # removeHTMLtags()) should not be included in
  1087. # URLs, per RFC 2396.
  1088. $m2 = array();
  1089. if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
  1090. $trail = substr( $url, $m2[0][1] ) . $trail;
  1091. $url = substr( $url, 0, $m2[0][1] );
  1092. }
  1093. # Move trailing punctuation to $trail
  1094. $sep = ',;\.:!?';
  1095. # If there is no left bracket, then consider right brackets fair game too
  1096. if ( strpos( $url, '(' ) === false ) {
  1097. $sep .= ')';
  1098. }
  1099. $numSepChars = strspn( strrev( $url ), $sep );
  1100. if ( $numSepChars ) {
  1101. $trail = substr( $url, -$numSepChars ) . $trail;
  1102. $url = substr( $url, 0, -$numSepChars );
  1103. }
  1104. $url = Sanitizer::cleanUrl( $url );
  1105. # Is this an external image?
  1106. $text = $this->maybeMakeExternalImage( $url );
  1107. if ( $text === false ) {
  1108. # Not an image, make a link
  1109. $text = Linker::makeExternalLink( $url,
  1110. $this->getConverterLanguage()->markNoConversion($url), true, 'free',
  1111. $this->getExternalLinkAttribs( $url ) );
  1112. # Register it in the output object...
  1113. # Replace unnecessary URL escape codes with their equivalent characters
  1114. $pasteurized = self::replaceUnusualEscapes( $url );
  1115. $this->mOutput->addExternalLink( $pasteurized );
  1116. }
  1117. wfProfileOut( __METHOD__ );
  1118. return $text . $trail;
  1119. }
  1120. /**
  1121. * Parse headers and return html
  1122. *
  1123. * @private
  1124. *
  1125. * @param $text string
  1126. *
  1127. * @return string
  1128. */
  1129. function doHeadings( $text ) {
  1130. wfProfileIn( __METHOD__ );
  1131. for ( $i = 6; $i >= 1; --$i ) {
  1132. $h = str_repeat( '=', $i );
  1133. $text = preg_replace( "/^$h(.+)$h\\s*$/m",
  1134. "<h$i>\\1</h$i>", $text );
  1135. }
  1136. wfProfileOut( __METHOD__ );
  1137. return $text;
  1138. }
  1139. /**
  1140. * Replace single quotes with HTML markup
  1141. * @private
  1142. *
  1143. * @param $text string
  1144. *
  1145. * @return string the altered text
  1146. */
  1147. function doAllQuotes( $text ) {
  1148. wfProfileIn( __METHOD__ );
  1149. $outtext = '';
  1150. $lines = StringUtils::explode( "\n", $text );
  1151. foreach ( $lines as $line ) {
  1152. $outtext .= $this->doQuotes( $line ) . "\n";
  1153. }
  1154. $outtext = substr( $outtext, 0,-1 );
  1155. wfProfileOut( __METHOD__ );
  1156. return $outtext;
  1157. }
  1158. /**
  1159. * Helper function for doAllQuotes()
  1160. *
  1161. * @param $text string
  1162. *
  1163. * @return string
  1164. */
  1165. public function doQuotes( $text ) {
  1166. $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
  1167. if ( count( $arr ) == 1 ) {
  1168. return $text;
  1169. } else {
  1170. # First, do some preliminary work. This may shift some apostrophes from
  1171. # being mark-up to being text. It also counts the number of occurrences
  1172. # of bold and italics mark-ups.
  1173. $numbold = 0;
  1174. $numitalics = 0;
  1175. for ( $i = 0; $i < count( $arr ); $i++ ) {
  1176. if ( ( $i % 2 ) == 1 ) {
  1177. # If there are ever four apostrophes, assume the first is supposed to
  1178. # be text, and the remaining three constitute mark-up for bold text.
  1179. if ( strlen( $arr[$i] ) == 4 ) {
  1180. $arr[$i-1] .= "'";
  1181. $arr[$i] = "'''";
  1182. } elseif ( strlen( $arr[$i] ) > 5 ) {
  1183. # If there are more than 5 apostrophes in a row, assume they're all
  1184. # text except for the last 5.
  1185. $arr[$i-1] .= str_repeat( "'", strlen( $arr[$i] ) - 5 );
  1186. $arr[$i] = "'''''";
  1187. }
  1188. # Count the number of occurrences of bold and italics mark-ups.
  1189. # We are not counting sequences of five apostrophes.
  1190. if ( strlen( $arr[$i] ) == 2 ) {
  1191. $numitalics++;
  1192. } elseif ( strlen( $arr[$i] ) == 3 ) {
  1193. $numbold++;
  1194. } elseif ( strlen( $arr[$i] ) == 5 ) {
  1195. $numitalics++;
  1196. $numbold++;
  1197. }
  1198. }
  1199. }
  1200. # If there is an odd number of both bold and italics, it is likely
  1201. # that one of the bold ones was meant to be an apostrophe followed
  1202. # by italics. Which one we cannot know for certain, but it is more
  1203. # likely to be one that has a single-letter word before it.
  1204. if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
  1205. $i = 0;
  1206. $firstsingleletterword = -1;
  1207. $firstmultiletterword = -1;
  1208. $firstspace = -1;
  1209. foreach ( $arr as $r ) {
  1210. if ( ( $i % 2 == 1 ) and ( strlen( $r ) == 3 ) ) {
  1211. $x1 = substr( $arr[$i-1], -1 );
  1212. $x2 = substr( $arr[$i-1], -2, 1 );
  1213. if ( $x1 === ' ' ) {
  1214. if ( $firstspace == -1 ) {
  1215. $firstspace = $i;
  1216. }
  1217. } elseif ( $x2 === ' ') {
  1218. if ( $firstsingleletterword == -1 ) {
  1219. $firstsingleletterword = $i;
  1220. }
  1221. } else {
  1222. if ( $firstmultiletterword == -1 ) {
  1223. $firstmultiletterword = $i;
  1224. }
  1225. }
  1226. }
  1227. $i++;
  1228. }
  1229. # If there is a single-letter word, use it!
  1230. if ( $firstsingleletterword > -1 ) {
  1231. $arr[$firstsingleletterword] = "''";
  1232. $arr[$firstsingleletterword-1] .= "'";
  1233. } elseif ( $firstmultiletterword > -1 ) {
  1234. # If not, but there's a multi-letter word, use that one.
  1235. $arr[$firstmultiletterword] = "''";
  1236. $arr[$firstmultiletterword-1] .= "'";
  1237. } elseif ( $firstspace > -1 ) {
  1238. # ... otherwise use the first one that has neither.
  1239. # (notice that it is possible for all three to be -1 if, for example,
  1240. # there is only one pentuple-apostrophe in the line)
  1241. $arr[$firstspace] = "''";
  1242. $arr[$firstspace-1] .= "'";
  1243. }
  1244. }
  1245. # Now let's actually convert our apostrophic mush to HTML!
  1246. $output = '';
  1247. $buffer = '';
  1248. $state = '';
  1249. $i = 0;
  1250. foreach ( $arr as $r ) {
  1251. if ( ( $i % 2 ) == 0 ) {
  1252. if ( $state === 'both' ) {
  1253. $buffer .= $r;
  1254. } else {
  1255. $output .= $r;
  1256. }
  1257. } else {
  1258. if ( strlen( $r ) == 2 ) {
  1259. if ( $state === 'i' ) {
  1260. $output .= '</i>'; $state = '';
  1261. } elseif ( $state === 'bi' ) {
  1262. $output .= '</i>'; $state = 'b';
  1263. } elseif ( $state === 'ib' ) {
  1264. $output .= '</b></i><b>'; $state = 'b';
  1265. } elseif ( $state === 'both' ) {
  1266. $output .= '<b><i>'.$buffer.'</i>'; $state = 'b';
  1267. } else { # $state can be 'b' or ''
  1268. $output .= '<i>'; $state .= 'i';
  1269. }
  1270. } elseif ( strlen( $r ) == 3 ) {
  1271. if ( $state === 'b' ) {
  1272. $output .= '</b>'; $state = '';
  1273. } elseif ( $state === 'bi' ) {
  1274. $output .= '</i></b><i>'; $state = 'i';
  1275. } elseif ( $state === 'ib' ) {
  1276. $output .= '</b>'; $state = 'i';
  1277. } elseif ( $state === 'both' ) {
  1278. $output .= '<i><b>'.$buffer.'</b>'; $state = 'i';
  1279. } else { # $state can be 'i' or ''
  1280. $output .= '<b>'; $state .= 'b';
  1281. }
  1282. } elseif ( strlen( $r ) == 5 ) {
  1283. if ( $state === 'b' ) {
  1284. $output .= '</b><i>'; $state = 'i';
  1285. } elseif ( $state === 'i' ) {
  1286. $output .= '</i><b>'; $state = 'b';
  1287. } elseif ( $state === 'bi' ) {
  1288. $output .= '</i></b>'; $state = '';
  1289. } elseif ( $state === 'ib' ) {
  1290. $output .= '</b></i>'; $state = '';
  1291. } elseif ( $state === 'both' ) {
  1292. $output .= '<i><b>'.$buffer.'</b></i>'; $state = '';
  1293. } else { # ($state == '')
  1294. $buffer = ''; $state = 'both';
  1295. }
  1296. }
  1297. }
  1298. $i++;
  1299. }
  1300. # Now close all remaining tags. Notice that the order is important.
  1301. if ( $state === 'b' || $state === 'ib' ) {
  1302. $output .= '</b>';
  1303. }
  1304. if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
  1305. $output .= '</i>';
  1306. }
  1307. if ( $state === 'bi' ) {
  1308. $output .= '</b>';
  1309. }
  1310. # There might be lonely ''''', so make sure we have a buffer
  1311. if ( $state === 'both' && $buffer ) {
  1312. $output .= '<b><i>'.$buffer.'</i></b>';
  1313. }
  1314. return $output;
  1315. }
  1316. }
  1317. /**
  1318. * Replace external links (REL)
  1319. *
  1320. * Note: this is all very hackish and the order of execution matters a lot.
  1321. * Make sure to run maintenance/parserTests.php if you change this code.
  1322. *
  1323. * @private
  1324. *
  1325. * @param $text string
  1326. *
  1327. * @return string
  1328. */
  1329. function replaceExternalLinks( $text ) {
  1330. wfProfileIn( __METHOD__ );
  1331. $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
  1332. $s = array_shift( $bits );
  1333. $i = 0;
  1334. while ( $i<count( $bits ) ) {
  1335. $url = $bits[$i++];
  1336. $protocol = $bits[$i++];
  1337. $text = $bits[$i++];
  1338. $trail = $bits[$i++];
  1339. # The characters '<' and '>' (which were escaped by
  1340. # removeHTMLtags()) should not be included in
  1341. # URLs, per RFC 2396.
  1342. $m2 = array();
  1343. if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
  1344. $text = substr( $url, $m2[0][1] ) . ' ' . $text;
  1345. $url = substr( $url, 0, $m2[0][1] );
  1346. }
  1347. # If the link text is an image URL, replace it with an <img> tag
  1348. # This happened by accident in the original parser, but some people used it extensively
  1349. $img = $this->maybeMakeExternalImage( $text );
  1350. if ( $img !== false ) {
  1351. $text = $img;
  1352. }
  1353. $dtrail = '';
  1354. # Set linktype for CSS - if URL==text, link is essentially free
  1355. $linktype = ( $text === $url ) ? 'free' : 'text';
  1356. # No link text, e.g. [http://domain.tld/some.link]
  1357. if ( $text == '' ) {
  1358. # Autonumber
  1359. $langObj = $this->getTargetLanguage();
  1360. $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
  1361. $linktype = 'autonumber';
  1362. } else {
  1363. # Have link text, e.g. [http://domain.tld/some.link text]s
  1364. # Check for trail
  1365. list( $dtrail, $trail ) = Linker::splitTrail( $trail );
  1366. }
  1367. $text = $this->getConverterLanguage()->markNoConversion( $text );
  1368. $url = Sanitizer::cleanUrl( $url );
  1369. # Use the encoded URL
  1370. # This means that users can paste URLs directly into the text
  1371. # Funny characters like ö aren't valid in URLs anyway
  1372. # This was changed in August 2004
  1373. $s .= Linker::makeExternalLink( $url, $text, false, $linktype,
  1374. $this->getExternalLinkAttribs( $url ) ) . $dtrail . $trail;
  1375. # Register link in the output object.
  1376. # Replace unnecessary URL escape codes with the referenced character
  1377. # This prevents spammers from hiding links from the filters
  1378. $pasteurized = self::replaceUnusualEscapes( $url );
  1379. $this->mOutput->addExternalLink( $pasteurized );
  1380. }
  1381. wfProfileOut( __METHOD__ );
  1382. return $s;
  1383. }
  1384. /**
  1385. * Get an associative array of additional HTML attributes appropriate for a
  1386. * particular external link. This currently may include rel => nofollow
  1387. * (depending on configuration, namespace, and the URL's domain) and/or a
  1388. * target attribute (depending on configuration).
  1389. *
  1390. * @param $url String|bool optional URL, to extract the domain from for rel =>
  1391. * nofollow if appropriate
  1392. * @return Array associative array of HTML attributes
  1393. */
  1394. function getExternalLinkAttribs( $url = false ) {
  1395. $attribs = array();
  1396. global $wgNoFollowLinks, $wgNoFollowNsExceptions, $wgNoFollowDomainExceptions;
  1397. $ns = $this->mTitle->getNamespace();
  1398. if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions ) &&
  1399. !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions ) )
  1400. {
  1401. $attribs['rel'] = 'nofollow';
  1402. }
  1403. if ( $this->mOptions->getExternalLinkTarget() ) {
  1404. $attribs['target'] = $this->mOptions->getExternalLinkTarget();
  1405. }
  1406. return $attribs;
  1407. }
  1408. /**
  1409. * Replace unusual URL escape codes with their equivalent characters
  1410. *
  1411. * @param $url String
  1412. * @return String
  1413. *
  1414. * @todo This can merge genuinely required bits in the path or query string,
  1415. * breaking legit URLs. A proper fix would treat the various parts of
  1416. * the URL differently; as a workaround, just use the output for
  1417. * statistical records, not for actual linking/output.
  1418. */
  1419. static function replaceUnusualEscapes( $url ) {
  1420. return preg_replace_callback( '/%[0-9A-Fa-f]{2}/',
  1421. array( __CLASS__, 'replaceUnusualEscapesCallback' ), $url );
  1422. }
  1423. /**
  1424. * Callback function used in replaceUnusualEscapes().
  1425. * Replaces unusual URL escape codes with their equivalent character
  1426. *
  1427. * @param $matches array
  1428. *
  1429. * @return string
  1430. */
  1431. private static function replaceUnusualEscapesCallback( $matches ) {
  1432. $char = urldecode( $matches[0] );
  1433. $ord = ord( $char );
  1434. # Is it an unsafe or HTTP reserved character according to RFC 1738?
  1435. if ( $ord > 32 && $ord < 127 && strpos( '<>"#{}|\^~[]`;/?', $char ) === false ) {
  1436. # No, shouldn't be escaped
  1437. return $char;
  1438. } else {
  1439. # Yes, leave it escaped
  1440. return $matches[0];
  1441. }
  1442. }
  1443. /**
  1444. * make an image if it's allowed, either through the global
  1445. * option, through the exception, or through the on-wiki whitelist
  1446. * @private
  1447. *
  1448. * $param $url string
  1449. *
  1450. * @return string
  1451. */
  1452. function maybeMakeExternalImage( $url ) {
  1453. $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
  1454. $imagesexception = !empty( $imagesfrom );
  1455. $text = false;
  1456. # $imagesfrom could be either a single string or an array of strings, parse out the latter
  1457. if ( $imagesexception && is_array( $imagesfrom ) ) {
  1458. $imagematch = false;
  1459. foreach ( $imagesfrom as $match ) {
  1460. if ( strpos( $url, $match ) === 0 ) {
  1461. $imagematch = true;
  1462. break;
  1463. }
  1464. }
  1465. } elseif ( $imagesexception ) {
  1466. $imagematch = ( strpos( $url, $imagesfrom ) === 0 );
  1467. } else {
  1468. $imagematch = false;
  1469. }
  1470. if ( $this->mOptions->getAllowExternalImages()
  1471. || ( $imagesexception && $imagematch ) ) {
  1472. if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
  1473. # Image found
  1474. $text = Linker::makeExternalImage( $url );
  1475. }
  1476. }
  1477. if ( !$text && $this->mOptions->getEnableImageWhitelist()
  1478. && preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
  1479. $whitelist = explode( "\n", wfMsgForContent( 'external_image_whitelist' ) );
  1480. foreach ( $whitelist as $entry ) {
  1481. # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
  1482. if ( strpos( $entry, '#' ) === 0 || $entry === '' ) {
  1483. continue;
  1484. }
  1485. if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
  1486. # Image matches a whitelist entry
  1487. $text = Linker::makeExternalImage( $url );
  1488. break;
  1489. }
  1490. }
  1491. }
  1492. return $text;
  1493. }
  1494. /**
  1495. * Process [[ ]] wikilinks
  1496. *
  1497. * @param $s string
  1498. *
  1499. * @return String: processed text
  1500. *
  1501. * @private
  1502. */
  1503. function re

Large files files are truncated, but you can click here to view the full file