PageRenderTime 66ms CodeModel.GetById 22ms RepoModel.GetById 1ms app.codeStats 0ms

/common/libraries/plugin/wiki/mediawiki_parser.class.php

https://bitbucket.org/renaatdemuynck/chamilo
PHP | 1941 lines | 1435 code | 166 blank | 340 comment | 302 complexity | 7a46461fcfa4424780e86bc4a28689f5 MD5 | raw file
Possible License(s): BSD-3-Clause, LGPL-2.1, LGPL-3.0, GPL-3.0, MIT, GPL-2.0
  1. <?php
  2. require_once dirname(__FILE__) . '/mediawiki/Utilities.php';
  3. require_once dirname(__FILE__) . '/mediawiki/Sanitizer.php';
  4. require_once dirname(__FILE__) . '/mediawiki/StringUtils.php';
  5. require_once dirname(__FILE__) . '/mediawiki/Xml.php';
  6. require_once dirname(__FILE__) . '/mediawiki/StripState.php';
  7. require_once dirname(__FILE__) . '/mediawiki/Title.php';
  8. require_once dirname(__FILE__) . '/mediawiki/LinkHolderArray.php';
  9. require_once dirname(__FILE__) . '/mediawiki/Linker.php';
  10. require_once dirname(__FILE__) . '/mediawiki/LinkCache.php';
  11. require_once dirname(__FILE__) . '/mediawiki/Defines.php';
  12. require_once dirname(__FILE__) . '/mediawiki/ParserOutput.php';
  13. require_once dirname(__FILE__) . '/mediawiki/Namespace.php';
  14. require_once dirname(__FILE__) . '/mediawiki_parser_context.class.php';
  15. function wfUrlProtocols()
  16. {
  17. /**
  18. * The external URL protocols
  19. */
  20. $wgUrlProtocols = array('http://', 'https://', 'ftp://', 'irc://', 'gopher://', 'telnet://', // Well if we're going to support the above.. -ĂŚvar
  21. 'nntp://', // @bug 3808 RFC 1738
  22. 'worldwind://', 'mailto:', 'news:', 'svn://');
  23. // Support old-style $wgUrlProtocols strings, for backwards compatibility
  24. // with LocalSettings files from 1.5
  25. if (is_array($wgUrlProtocols))
  26. {
  27. $protocols = array();
  28. foreach ($wgUrlProtocols as $protocol)
  29. $protocols[] = preg_quote($protocol, '/');
  30. return implode('|', $protocols);
  31. }
  32. else
  33. {
  34. return $wgUrlProtocols;
  35. }
  36. }
  37. function wfUrlencode($s)
  38. {
  39. $s = urlencode($s);
  40. $s = str_ireplace(array('%3B', '%3A', '%40', '%24', '%21', '%2A', '%28', '%29', '%2C', '%2F'), array(';', ':', '@',
  41. '$', '!', '*', '(', ')', ',', '/'), $s);
  42. return $s;
  43. }
  44. /**
  45. * This is the logical opposite of wfArrayToCGI(): it accepts a query string as
  46. * its argument and returns the same string in array form. This allows compa-
  47. * tibility with legacy functions that accept raw query strings instead of nice
  48. * arrays. Of course, keys and values are urldecode()d. Don't try passing in-
  49. * valid query strings, or it will explode.
  50. *
  51. * @param $query string Query string
  52. * @return array Array version of input
  53. */
  54. function wfCgiToArray($query)
  55. {
  56. if (isset($query[0]) and $query[0] == '?')
  57. {
  58. $query = substr($query, 1);
  59. }
  60. $bits = explode('&', $query);
  61. $ret = array();
  62. foreach ($bits as $bit)
  63. {
  64. if ($bit === '')
  65. {
  66. continue;
  67. }
  68. list($key, $value) = explode('=', $bit);
  69. $key = urldecode($key);
  70. $value = urldecode($value);
  71. $ret[$key] = $value;
  72. }
  73. return $ret;
  74. }
  75. /**
  76. * This function takes two arrays as input, and returns a CGI-style string, e.g.
  77. * "days=7&limit=100". Options in the first array override options in the second.
  78. * Options set to "" will not be output.
  79. */
  80. function wfArrayToCGI($array1, $array2 = NULL)
  81. {
  82. if (! is_null($array2))
  83. {
  84. $array1 = $array1 + $array2;
  85. }
  86. $cgi = '';
  87. foreach ($array1 as $key => $value)
  88. {
  89. if ('' !== $value)
  90. {
  91. if ('' != $cgi)
  92. {
  93. $cgi .= '&';
  94. }
  95. if (is_array($value))
  96. {
  97. $firstTime = true;
  98. foreach ($value as $v)
  99. {
  100. $cgi .= ($firstTime ? '' : '&') . urlencode($key . '[]') . '=' . urlencode($v);
  101. $firstTime = false;
  102. }
  103. }
  104. else
  105. $cgi .= urlencode($key) . '=' . urlencode($value);
  106. }
  107. }
  108. return $cgi;
  109. }
  110. /**
  111. * Append a query string to an existing URL, which may or may not already
  112. * have query string parameters already. If so, they will be combined.
  113. *
  114. * @param string $url
  115. * @param string $query
  116. * @return string
  117. */
  118. function wfAppendQuery($url, $query)
  119. {
  120. if ($query != '')
  121. {
  122. if (false === strpos($url, '?'))
  123. {
  124. $url .= '?';
  125. }
  126. else
  127. {
  128. $url .= '&';
  129. }
  130. $url .= $query;
  131. }
  132. return $url;
  133. }
  134. /**
  135. * A Mediawiki wikitext parser using the same functions
  136. * as used by Mediawiki's parsing engine
  137. *
  138. * @author Hans De Bisschop
  139. * @see Parser
  140. *
  141. */
  142. class MediawikiParser
  143. {
  144. // State constants for the definition list colon extraction
  145. const COLON_STATE_TEXT = 0;
  146. const COLON_STATE_TAG = 1;
  147. const COLON_STATE_TAGSTART = 2;
  148. const COLON_STATE_CLOSETAG = 3;
  149. const COLON_STATE_TAGSLASH = 4;
  150. const COLON_STATE_COMMENT = 5;
  151. const COLON_STATE_COMMENTDASH = 6;
  152. const COLON_STATE_COMMENTDASHDASH = 7;
  153. const MARKER_SUFFIX = "-QINU\x7f";
  154. const VERSION = '1.6.4';
  155. // Flags for preprocessToDom
  156. const PTD_FOR_INCLUSION = 1;
  157. private $mUniqPrefix;
  158. /**
  159. * The context of the MediawikiParser
  160. *
  161. * @var MediawikiParserContext
  162. */
  163. private $mediawiki_parser_context;
  164. function __construct(MediaWikiParserContext $mediawiki_parser_context)
  165. {
  166. $this->mediawiki_parser_context = $mediawiki_parser_context;
  167. $this->mUniqPrefix = "\x7fUNIQ" . self :: getRandomString();
  168. $this->mLinkID = 0;
  169. $this->mOutput = new MediawikiParserOutput();
  170. $this->mStripState = new MediawikiStripState();
  171. $this->mLinkHolders = new MediawikiLinkHolderArray($this);
  172. }
  173. function get_mediawiki_parser_context()
  174. {
  175. return $this->mediawiki_parser_context;
  176. }
  177. /**
  178. * Get a random string
  179. *
  180. * @private
  181. * @static
  182. */
  183. function getRandomString()
  184. {
  185. return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
  186. }
  187. function parse()
  188. {
  189. $text = $this->mediawiki_parser_context->get_body();
  190. $text = $this->internalParse($text);
  191. # Clean up special characters, only run once, next-to-last before doBlockLevels
  192. $fixtags = array(# french spaces, last one Guillemet-left
  193. # only if there is something before the space
  194. '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1&nbsp;\\2', # french spaces, Guillemet-right
  195. '/(\\302\\253) /' => '\\1&nbsp;', '/&nbsp;(!\s*important)/' => ' \\1'); #Beware of CSS magic word !important, bug #11874.
  196. $text = preg_replace(array_keys($fixtags), array_values($fixtags), $text);
  197. $text = $this->doBlockLevels($text, $linestart);
  198. $this->replaceLinkHolders($text);
  199. return $text;
  200. }
  201. /**
  202. * Replace <!--LINK--> link placeholders with actual links, in the buffer
  203. * Placeholders created in Skin::makeLinkObj()
  204. * Returns an array of link CSS classes, indexed by PDBK.
  205. */
  206. function replaceLinkHolders(&$text, $options = 0)
  207. {
  208. return $this->mLinkHolders->replace($text);
  209. }
  210. function internalParse($text)
  211. {
  212. $isMain = true;
  213. //$text = Sanitizer :: removeHTMLtags($text, array(&$this, 'attributeStripCallback'), false, array_keys($this->mTransparentTagHooks));
  214. // Tables need to come after variable replacement for things to work
  215. // properly; putting them before other transformations should keep
  216. // exciting things like link expansions from showing up in surprising
  217. // places.
  218. $text = $this->doTableStuff($text);
  219. $text = preg_replace('/(^|\n)-----*/', '\\1<hr />', $text);
  220. //
  221. // $text = $this->doDoubleUnderscore($text);
  222. $text = $this->doHeadings($text);
  223. // //if ($this->mOptions->getUseDynamicDates())
  224. // //{
  225. // // $df = DateFormatter :: getInstance();
  226. // // $text = $df->reformat($this->mOptions->getDateFormat(), $text);
  227. // //}
  228. $text = $this->doAllQuotes($text);
  229. $text = $this->replaceInternalLinks($text);
  230. // $text = $this->replaceExternalLinks($text);
  231. //
  232. // # replaceInternalLinks may sometimes leave behind
  233. // # absolute URLs, which have to be masked to hide them from replaceExternalLinks
  234. // $text = str_replace($this->mUniqPrefix . 'NOPARSE', '', $text);
  235. //
  236. // $text = $this->doMagicLinks($text);
  237. $text = $this->formatHeadings($text, $isMain);
  238. return $text;
  239. }
  240. /**
  241. * parse the wiki syntax used to render tables
  242. *
  243. * @private
  244. */
  245. function doTableStuff($text)
  246. {
  247. $lines = MediawikiStringUtils :: explode("\n", $text);
  248. $out = '';
  249. $td_history = array(); // Is currently a td tag open?
  250. $last_tag_history = array(); // Save history of last lag activated (td, th or caption)
  251. $tr_history = array(); // Is currently a tr tag open?
  252. $tr_attributes = array(); // history of tr attributes
  253. $has_opened_tr = array(); // Did this table open a <tr> element?
  254. $indent_level = 0; // indent level of the table
  255. foreach ($lines as $outLine)
  256. {
  257. $line = trim($outLine);
  258. if ($line == '')
  259. { // empty line, go to next line
  260. $out .= $outLine . "\n";
  261. continue;
  262. }
  263. $first_character = $line[0];
  264. $matches = array();
  265. if (preg_match('/^(:*)\{\|(.*)$/', $line, $matches))
  266. {
  267. // First check if we are starting a new table
  268. $indent_level = strlen($matches[1]);
  269. $attributes = $this->mStripState->unstripBoth($matches[2]);
  270. $attributes = MediawikiSanitizer :: fixTagAttributes($attributes, 'table');
  271. $outLine = str_repeat('<dl><dd>', $indent_level) . "<table{$attributes}>";
  272. array_push($td_history, false);
  273. array_push($last_tag_history, '');
  274. array_push($tr_history, false);
  275. array_push($tr_attributes, '');
  276. array_push($has_opened_tr, false);
  277. }
  278. else
  279. if (count($td_history) == 0)
  280. {
  281. // Don't do any of the following
  282. $out .= $outLine . "\n";
  283. continue;
  284. }
  285. else
  286. if (substr($line, 0, 2) === '|}')
  287. {
  288. // We are ending a table
  289. $line = '</table>' . substr($line, 2);
  290. $last_tag = array_pop($last_tag_history);
  291. if (! array_pop($has_opened_tr))
  292. {
  293. $line = "<tr><td></td></tr>{$line}";
  294. }
  295. if (array_pop($tr_history))
  296. {
  297. $line = "</tr>{$line}";
  298. }
  299. if (array_pop($td_history))
  300. {
  301. $line = "</{$last_tag}>{$line}";
  302. }
  303. array_pop($tr_attributes);
  304. $outLine = $line . str_repeat('</dd></dl>', $indent_level);
  305. }
  306. else
  307. if (substr($line, 0, 2) === '|-')
  308. {
  309. // Now we have a table row
  310. $line = preg_replace('#^\|-+#', '', $line);
  311. // Whats after the tag is now only attributes
  312. $attributes = $this->mStripState->unstripBoth($line);
  313. $attributes = MediawikiSanitizer :: fixTagAttributes($attributes, 'tr');
  314. array_pop($tr_attributes);
  315. array_push($tr_attributes, $attributes);
  316. $line = '';
  317. $last_tag = array_pop($last_tag_history);
  318. array_pop($has_opened_tr);
  319. array_push($has_opened_tr, true);
  320. if (array_pop($tr_history))
  321. {
  322. $line = '</tr>';
  323. }
  324. if (array_pop($td_history))
  325. {
  326. $line = "</{$last_tag}>{$line}";
  327. }
  328. $outLine = $line;
  329. array_push($tr_history, false);
  330. array_push($td_history, false);
  331. array_push($last_tag_history, '');
  332. }
  333. else
  334. if ($first_character === '|' || $first_character === '!' || substr($line, 0, 2) === '|+')
  335. {
  336. // This might be cell elements, td, th or captions
  337. if (substr($line, 0, 2) === '|+')
  338. {
  339. $first_character = '+';
  340. $line = substr($line, 1);
  341. }
  342. $line = substr($line, 1);
  343. if ($first_character === '!')
  344. {
  345. $line = str_replace('!!', '||', $line);
  346. }
  347. // Split up multiple cells on the same line.
  348. // FIXME : This can result in improper nesting of tags processed
  349. // by earlier parser steps, but should avoid splitting up eg
  350. // attribute values containing literal "||".
  351. $cells = MediawikiStringUtils :: explodeMarkup('||', $line);
  352. $outLine = '';
  353. // Loop through each table cell
  354. foreach ($cells as $cell)
  355. {
  356. $previous = '';
  357. if ($first_character !== '+')
  358. {
  359. $tr_after = array_pop($tr_attributes);
  360. if (! array_pop($tr_history))
  361. {
  362. $previous = "<tr{$tr_after}>\n";
  363. }
  364. array_push($tr_history, true);
  365. array_push($tr_attributes, '');
  366. array_pop($has_opened_tr);
  367. array_push($has_opened_tr, true);
  368. }
  369. $last_tag = array_pop($last_tag_history);
  370. if (array_pop($td_history))
  371. {
  372. $previous = "</{$last_tag}>{$previous}";
  373. }
  374. if ($first_character === '|')
  375. {
  376. $last_tag = 'td';
  377. }
  378. else
  379. if ($first_character === '!')
  380. {
  381. $last_tag = 'th';
  382. }
  383. else
  384. if ($first_character === '+')
  385. {
  386. $last_tag = 'caption';
  387. }
  388. else
  389. {
  390. $last_tag = '';
  391. }
  392. array_push($last_tag_history, $last_tag);
  393. // A cell could contain both parameters and data
  394. $cell_data = explode('|', $cell, 2);
  395. // Bug 553: Note that a '|' inside an invalid link should not
  396. // be mistaken as delimiting cell parameters
  397. if (strpos($cell_data[0], '[[') !== false)
  398. {
  399. $cell = "{$previous}<{$last_tag}>{$cell}";
  400. }
  401. else
  402. if (count($cell_data) == 1)
  403. $cell = "{$previous}<{$last_tag}>{$cell_data[0]}";
  404. else
  405. {
  406. $attributes = $this->mStripState->unstripBoth($cell_data[0]);
  407. $attributes = MediawikiSanitizer :: fixTagAttributes($attributes, $last_tag);
  408. $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}";
  409. }
  410. $outLine .= $cell;
  411. array_push($td_history, true);
  412. }
  413. }
  414. $out .= $outLine . "\n";
  415. }
  416. // Closing open td, tr && table
  417. while (count($td_history) > 0)
  418. {
  419. if (array_pop($td_history))
  420. {
  421. $out .= "</td>\n";
  422. }
  423. if (array_pop($tr_history))
  424. {
  425. $out .= "</tr>\n";
  426. }
  427. if (! array_pop($has_opened_tr))
  428. {
  429. $out .= "<tr><td></td></tr>\n";
  430. }
  431. $out .= "</table>\n";
  432. }
  433. // Remove trailing line-ending (b/c)
  434. if (substr($out, - 1) === "\n")
  435. {
  436. $out = substr($out, 0, - 1);
  437. }
  438. // special case: don't return empty table
  439. if ($out === "<table>\n<tr><td></td></tr>\n</table>")
  440. {
  441. $out = '';
  442. }
  443. return $out;
  444. }
  445. /**
  446. * Parse headers and return html
  447. *
  448. * @private
  449. */
  450. function doHeadings($text)
  451. {
  452. for($i = 6; $i >= 1; -- $i)
  453. {
  454. $h = str_repeat('=', $i);
  455. $text = preg_replace("/^$h(.+)$h\\s*$/m", "<h$i>\\1</h$i>", $text);
  456. }
  457. return $text;
  458. }
  459. /**
  460. * Replace single quotes with HTML markup
  461. * @private
  462. * @return string the altered text
  463. */
  464. function doAllQuotes($text)
  465. {
  466. $outtext = '';
  467. $lines = MediawikiStringUtils :: explode("\n", $text);
  468. foreach ($lines as $line)
  469. {
  470. $outtext .= $this->doQuotes($line) . "\n";
  471. }
  472. $outtext = substr($outtext, 0, - 1);
  473. return $outtext;
  474. }
  475. /**
  476. * Helper function for doAllQuotes()
  477. */
  478. public function doQuotes($text)
  479. {
  480. $arr = preg_split("/(''+)/", $text, - 1, PREG_SPLIT_DELIM_CAPTURE);
  481. if (count($arr) == 1)
  482. return $text;
  483. else
  484. {
  485. # First, do some preliminary work. This may shift some apostrophes from
  486. # being mark-up to being text. It also counts the number of occurrences
  487. # of bold and italics mark-ups.
  488. $i = 0;
  489. $numbold = 0;
  490. $numitalics = 0;
  491. foreach ($arr as $r)
  492. {
  493. if (($i % 2) == 1)
  494. {
  495. # If there are ever four apostrophes, assume the first is supposed to
  496. # be text, and the remaining three constitute mark-up for bold text.
  497. if (strlen($arr[$i]) == 4)
  498. {
  499. $arr[$i - 1] .= "'";
  500. $arr[$i] = "'''";
  501. }
  502. # If there are more than 5 apostrophes in a row, assume they're all
  503. # text except for the last 5.
  504. else
  505. if (strlen($arr[$i]) > 5)
  506. {
  507. $arr[$i - 1] .= str_repeat("'", strlen($arr[$i]) - 5);
  508. $arr[$i] = "'''''";
  509. }
  510. # Count the number of occurrences of bold and italics mark-ups.
  511. # We are not counting sequences of five apostrophes.
  512. if (strlen($arr[$i]) == 2)
  513. {
  514. $numitalics ++;
  515. }
  516. else
  517. if (strlen($arr[$i]) == 3)
  518. {
  519. $numbold ++;
  520. }
  521. else
  522. if (strlen($arr[$i]) == 5)
  523. {
  524. $numitalics ++;
  525. $numbold ++;
  526. }
  527. }
  528. $i ++;
  529. }
  530. # If there is an odd number of both bold and italics, it is likely
  531. # that one of the bold ones was meant to be an apostrophe followed
  532. # by italics. Which one we cannot know for certain, but it is more
  533. # likely to be one that has a single-letter word before it.
  534. if (($numbold % 2 == 1) && ($numitalics % 2 == 1))
  535. {
  536. $i = 0;
  537. $firstsingleletterword = - 1;
  538. $firstmultiletterword = - 1;
  539. $firstspace = - 1;
  540. foreach ($arr as $r)
  541. {
  542. if (($i % 2 == 1) and (strlen($r) == 3))
  543. {
  544. $x1 = substr($arr[$i - 1], - 1);
  545. $x2 = substr($arr[$i - 1], - 2, 1);
  546. if ($x1 === ' ')
  547. {
  548. if ($firstspace == - 1)
  549. $firstspace = $i;
  550. }
  551. else
  552. if ($x2 === ' ')
  553. {
  554. if ($firstsingleletterword == - 1)
  555. $firstsingleletterword = $i;
  556. }
  557. else
  558. {
  559. if ($firstmultiletterword == - 1)
  560. $firstmultiletterword = $i;
  561. }
  562. }
  563. $i ++;
  564. }
  565. # If there is a single-letter word, use it!
  566. if ($firstsingleletterword > - 1)
  567. {
  568. $arr[$firstsingleletterword] = "''";
  569. $arr[$firstsingleletterword - 1] .= "'";
  570. }
  571. # If not, but there's a multi-letter word, use that one.
  572. else
  573. if ($firstmultiletterword > - 1)
  574. {
  575. $arr[$firstmultiletterword] = "''";
  576. $arr[$firstmultiletterword - 1] .= "'";
  577. }
  578. # ... otherwise use the first one that has neither.
  579. # (notice that it is possible for all three to be -1 if, for example,
  580. # there is only one pentuple-apostrophe in the line)
  581. else
  582. if ($firstspace > - 1)
  583. {
  584. $arr[$firstspace] = "''";
  585. $arr[$firstspace - 1] .= "'";
  586. }
  587. }
  588. # Now let's actually convert our apostrophic mush to HTML!
  589. $output = '';
  590. $buffer = '';
  591. $state = '';
  592. $i = 0;
  593. foreach ($arr as $r)
  594. {
  595. if (($i % 2) == 0)
  596. {
  597. if ($state === 'both')
  598. $buffer .= $r;
  599. else
  600. $output .= $r;
  601. }
  602. else
  603. {
  604. if (strlen($r) == 2)
  605. {
  606. if ($state === 'i')
  607. {
  608. $output .= '</i>';
  609. $state = '';
  610. }
  611. else
  612. if ($state === 'bi')
  613. {
  614. $output .= '</i>';
  615. $state = 'b';
  616. }
  617. else
  618. if ($state === 'ib')
  619. {
  620. $output .= '</b></i><b>';
  621. $state = 'b';
  622. }
  623. else
  624. if ($state === 'both')
  625. {
  626. $output .= '<b><i>' . $buffer . '</i>';
  627. $state = 'b';
  628. }
  629. else # $state can be 'b' or ''
  630. {
  631. $output .= '<i>';
  632. $state .= 'i';
  633. }
  634. }
  635. else
  636. if (strlen($r) == 3)
  637. {
  638. if ($state === 'b')
  639. {
  640. $output .= '</b>';
  641. $state = '';
  642. }
  643. else
  644. if ($state === 'bi')
  645. {
  646. $output .= '</i></b><i>';
  647. $state = 'i';
  648. }
  649. else
  650. if ($state === 'ib')
  651. {
  652. $output .= '</b>';
  653. $state = 'i';
  654. }
  655. else
  656. if ($state === 'both')
  657. {
  658. $output .= '<i><b>' . $buffer . '</b>';
  659. $state = 'i';
  660. }
  661. else # $state can be 'i' or ''
  662. {
  663. $output .= '<b>';
  664. $state .= 'b';
  665. }
  666. }
  667. else
  668. if (strlen($r) == 5)
  669. {
  670. if ($state === 'b')
  671. {
  672. $output .= '</b><i>';
  673. $state = 'i';
  674. }
  675. else
  676. if ($state === 'i')
  677. {
  678. $output .= '</i><b>';
  679. $state = 'b';
  680. }
  681. else
  682. if ($state === 'bi')
  683. {
  684. $output .= '</i></b>';
  685. $state = '';
  686. }
  687. else
  688. if ($state === 'ib')
  689. {
  690. $output .= '</b></i>';
  691. $state = '';
  692. }
  693. else
  694. if ($state === 'both')
  695. {
  696. $output .= '<i><b>' . $buffer . '</b></i>';
  697. $state = '';
  698. }
  699. else # ($state == '')
  700. {
  701. $buffer = '';
  702. $state = 'both';
  703. }
  704. }
  705. }
  706. $i ++;
  707. }
  708. # Now close all remaining tags. Notice that the order is important.
  709. if ($state === 'b' || $state === 'ib')
  710. $output .= '</b>';
  711. if ($state === 'i' || $state === 'bi' || $state === 'ib')
  712. $output .= '</i>';
  713. if ($state === 'bi')
  714. $output .= '</b>';
  715. # There might be lonely ''''', so make sure we have a buffer
  716. if ($state === 'both' && $buffer)
  717. $output .= '<b><i>' . $buffer . '</i></b>';
  718. return $output;
  719. }
  720. }
  721. /**
  722. * Make lists from lines starting with ':', '*', '#', etc. (DBL)
  723. *
  724. * @private
  725. * @return string the lists rendered as HTML
  726. */
  727. function doBlockLevels($text, $linestart)
  728. {
  729. # Parsing through the text line by line. The main thing
  730. # happening here is handling of block-level elements p, pre,
  731. # and making lists from lines starting with * # : etc.
  732. #
  733. $textLines = MediawikiStringUtils :: explode("\n", $text);
  734. $lastPrefix = $output = '';
  735. $this->mDTopen = $inBlockElem = false;
  736. $prefixLength = 0;
  737. $paragraphStack = false;
  738. foreach ($textLines as $oLine)
  739. {
  740. # Fix up $linestart
  741. if (! $linestart)
  742. {
  743. $output .= $oLine;
  744. $linestart = true;
  745. continue;
  746. }
  747. $lastPrefixLength = strlen($lastPrefix);
  748. $preCloseMatch = preg_match('/<\\/pre/i', $oLine);
  749. $preOpenMatch = preg_match('/<pre/i', $oLine);
  750. if (! $this->mInPre)
  751. {
  752. # Multiple prefixes may abut each other for nested lists.
  753. $prefixLength = strspn($oLine, '*#:;');
  754. $prefix = substr($oLine, 0, $prefixLength);
  755. # eh?
  756. $prefix2 = str_replace(';', ':', $prefix);
  757. $t = substr($oLine, $prefixLength);
  758. $this->mInPre = (bool) $preOpenMatch;
  759. }
  760. else
  761. {
  762. # Don't interpret any other prefixes in preformatted text
  763. $prefixLength = 0;
  764. $prefix = $prefix2 = '';
  765. $t = $oLine;
  766. }
  767. # List generation
  768. if ($prefixLength && $lastPrefix === $prefix2)
  769. {
  770. # Same as the last item, so no need to deal with nesting or opening stuff
  771. $output .= $this->nextItem(substr($prefix, - 1));
  772. $paragraphStack = false;
  773. if (substr($prefix, - 1) === ';')
  774. {
  775. # The one nasty exception: definition lists work like this:
  776. # ; title : definition text
  777. # So we check for : in the remainder text to split up the
  778. # title and definition, without b0rking links.
  779. $term = $t2 = '';
  780. if ($this->findColonNoLinks($t, $term, $t2) !== false)
  781. {
  782. $t = $t2;
  783. $output .= $term . $this->nextItem(':');
  784. }
  785. }
  786. }
  787. elseif ($prefixLength || $lastPrefixLength)
  788. {
  789. # Either open or close a level...
  790. $commonPrefixLength = $this->getCommon($prefix, $lastPrefix);
  791. $paragraphStack = false;
  792. while ($commonPrefixLength < $lastPrefixLength)
  793. {
  794. $output .= $this->closeList($lastPrefix[$lastPrefixLength - 1]);
  795. -- $lastPrefixLength;
  796. }
  797. if ($prefixLength <= $commonPrefixLength && $commonPrefixLength > 0)
  798. {
  799. $output .= $this->nextItem($prefix[$commonPrefixLength - 1]);
  800. }
  801. while ($prefixLength > $commonPrefixLength)
  802. {
  803. $char = substr($prefix, $commonPrefixLength, 1);
  804. $output .= $this->openList($char);
  805. if (';' === $char)
  806. {
  807. # FIXME: This is dupe of code above
  808. if ($this->findColonNoLinks($t, $term, $t2) !== false)
  809. {
  810. $t = $t2;
  811. $output .= $term . $this->nextItem(':');
  812. }
  813. }
  814. ++ $commonPrefixLength;
  815. }
  816. $lastPrefix = $prefix2;
  817. }
  818. if (0 == $prefixLength)
  819. {
  820. # No prefix (not in list)--go to paragraph mode
  821. // XXX: use a stack for nestable elements like span, table and div
  822. $openmatch = preg_match('/(?:<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<ol|<li|<\\/tr|<\\/td|<\\/th)/iS', $t);
  823. $closematch = preg_match('/(?:<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|' . '<td|<th|<\\/?div|<hr|<\\/pre|<\\/p|' . $this->mUniqPrefix . '-pre|<\\/li|<\\/ul|<\\/ol|<\\/?center)/iS', $t);
  824. if ($openmatch or $closematch)
  825. {
  826. $paragraphStack = false;
  827. # TODO bug 5718: paragraph closed
  828. $output .= $this->closeParagraph();
  829. if ($preOpenMatch and ! $preCloseMatch)
  830. {
  831. $this->mInPre = true;
  832. }
  833. if ($closematch)
  834. {
  835. $inBlockElem = false;
  836. }
  837. else
  838. {
  839. $inBlockElem = true;
  840. }
  841. }
  842. else
  843. if (! $inBlockElem && ! $this->mInPre)
  844. {
  845. if (' ' == substr($t, 0, 1) and ($this->mLastSection === 'pre' or trim($t) != ''))
  846. {
  847. // pre
  848. if ($this->mLastSection !== 'pre')
  849. {
  850. $paragraphStack = false;
  851. $output .= $this->closeParagraph() . '<pre>';
  852. $this->mLastSection = 'pre';
  853. }
  854. $t = substr($t, 1);
  855. }
  856. else
  857. {
  858. // paragraph
  859. if ('' == trim($t))
  860. {
  861. if ($paragraphStack)
  862. {
  863. $output .= $paragraphStack . '<br />';
  864. $paragraphStack = false;
  865. $this->mLastSection = 'p';
  866. }
  867. else
  868. {
  869. if ($this->mLastSection !== 'p')
  870. {
  871. $output .= $this->closeParagraph();
  872. $this->mLastSection = '';
  873. $paragraphStack = '<p>';
  874. }
  875. else
  876. {
  877. $paragraphStack = '</p><p>';
  878. }
  879. }
  880. }
  881. else
  882. {
  883. if ($paragraphStack)
  884. {
  885. $output .= $paragraphStack;
  886. $paragraphStack = false;
  887. $this->mLastSection = 'p';
  888. }
  889. else
  890. if ($this->mLastSection !== 'p')
  891. {
  892. $output .= $this->closeParagraph() . '<p>';
  893. $this->mLastSection = 'p';
  894. }
  895. }
  896. }
  897. }
  898. }
  899. // somewhere above we forget to get out of pre block (bug 785)
  900. if ($preCloseMatch && $this->mInPre)
  901. {
  902. $this->mInPre = false;
  903. }
  904. if ($paragraphStack === false)
  905. {
  906. $output .= $t . "\n";
  907. }
  908. }
  909. while ($prefixLength)
  910. {
  911. $output .= $this->closeList($prefix2[$prefixLength - 1]);
  912. -- $prefixLength;
  913. }
  914. if ('' != $this->mLastSection)
  915. {
  916. $output .= '</' . $this->mLastSection . '>';
  917. $this->mLastSection = '';
  918. }
  919. return $output;
  920. }
  921. /* private */ function nextItem($char)
  922. {
  923. if ('*' === $char || '#' === $char)
  924. {
  925. return '</li><li>';
  926. }
  927. else
  928. if (':' === $char || ';' === $char)
  929. {
  930. $close = '</dd>';
  931. if ($this->mDTopen)
  932. {
  933. $close = '</dt>';
  934. }
  935. if (';' === $char)
  936. {
  937. $this->mDTopen = true;
  938. return $close . '<dt>';
  939. }
  940. else
  941. {
  942. $this->mDTopen = false;
  943. return $close . '<dd>';
  944. }
  945. }
  946. return '<!-- ERR 2 -->';
  947. }
  948. /**
  949. * Split up a string on ':', ignoring any occurences inside tags
  950. * to prevent illegal overlapping.
  951. * @param string $str the string to split
  952. * @param string &$before set to everything before the ':'
  953. * @param string &$after set to everything after the ':'
  954. * return string the position of the ':', or false if none found
  955. */
  956. function findColonNoLinks($str, &$before, &$after)
  957. {
  958. $pos = strpos($str, ':');
  959. if ($pos === false)
  960. {
  961. // Nothing to find!
  962. return false;
  963. }
  964. $lt = strpos($str, '<');
  965. if ($lt === false || $lt > $pos)
  966. {
  967. // Easy; no tag nesting to worry about
  968. $before = substr($str, 0, $pos);
  969. $after = substr($str, $pos + 1);
  970. return $pos;
  971. }
  972. // Ugly state machine to walk through avoiding tags.
  973. $state = self :: COLON_STATE_TEXT;
  974. $stack = 0;
  975. $len = strlen($str);
  976. for($i = 0; $i < $len; $i ++)
  977. {
  978. $c = $str{$i};
  979. switch ($state)
  980. {
  981. // (Using the number is a performance hack for common cases)
  982. case 0 : // self::COLON_STATE_TEXT:
  983. switch ($c)
  984. {
  985. case "<" :
  986. // Could be either a <start> tag or an </end> tag
  987. $state = self :: COLON_STATE_TAGSTART;
  988. break;
  989. case ":" :
  990. if ($stack == 0)
  991. {
  992. // We found it!
  993. $before = substr($str, 0, $i);
  994. $after = substr($str, $i + 1);
  995. return $i;
  996. }
  997. // Embedded in a tag; don't break it.
  998. break;
  999. default :
  1000. // Skip ahead looking for something interesting
  1001. $colon = strpos($str, ':', $i);
  1002. if ($colon === false)
  1003. {
  1004. // Nothing else interesting
  1005. return false;
  1006. }
  1007. $lt = strpos($str, '<', $i);
  1008. if ($stack === 0)
  1009. {
  1010. if ($lt === false || $colon < $lt)
  1011. {
  1012. // We found it!
  1013. $before = substr($str, 0, $colon);
  1014. $after = substr($str, $colon + 1);
  1015. return $i;
  1016. }
  1017. }
  1018. if ($lt === false)
  1019. {
  1020. // Nothing else interesting to find; abort!
  1021. // We're nested, but there's no close tags left. Abort!
  1022. break 2;
  1023. }
  1024. // Skip ahead to next tag start
  1025. $i = $lt;
  1026. $state = self :: COLON_STATE_TAGSTART;
  1027. }
  1028. break;
  1029. case 1 : // self::COLON_STATE_TAG:
  1030. // In a <tag>
  1031. switch ($c)
  1032. {
  1033. case ">" :
  1034. $stack ++;
  1035. $state = self :: COLON_STATE_TEXT;
  1036. break;
  1037. case "/" :
  1038. // Slash may be followed by >?
  1039. $state = self :: COLON_STATE_TAGSLASH;
  1040. break;
  1041. default :
  1042. // ignore
  1043. }
  1044. break;
  1045. case 2 : // self::COLON_STATE_TAGSTART:
  1046. switch ($c)
  1047. {
  1048. case "/" :
  1049. $state = self :: COLON_STATE_CLOSETAG;
  1050. break;
  1051. case "!" :
  1052. $state = self :: COLON_STATE_COMMENT;
  1053. break;
  1054. case ">" :
  1055. // Illegal early close? This shouldn't happen D:
  1056. $state = self :: COLON_STATE_TEXT;
  1057. break;
  1058. default :
  1059. $state = self :: COLON_STATE_TAG;
  1060. }
  1061. break;
  1062. case 3 : // self::COLON_STATE_CLOSETAG:
  1063. // In a </tag>
  1064. if ($c === ">")
  1065. {
  1066. $stack --;
  1067. if ($stack < 0)
  1068. {
  1069. return false;
  1070. }
  1071. $state = self :: COLON_STATE_TEXT;
  1072. }
  1073. break;
  1074. case self :: COLON_STATE_TAGSLASH :
  1075. if ($c === ">")
  1076. {
  1077. // Yes, a self-closed tag <blah/>
  1078. $state = self :: COLON_STATE_TEXT;
  1079. }
  1080. else
  1081. {
  1082. // Probably we're jumping the gun, and this is an attribute
  1083. $state = self :: COLON_STATE_TAG;
  1084. }
  1085. break;
  1086. case 5 : // self::COLON_STATE_COMMENT:
  1087. if ($c === "-")
  1088. {
  1089. $state = self :: COLON_STATE_COMMENTDASH;
  1090. }
  1091. break;
  1092. case self :: COLON_STATE_COMMENTDASH :
  1093. if ($c === "-")
  1094. {
  1095. $state = self :: COLON_STATE_COMMENTDASHDASH;
  1096. }
  1097. else
  1098. {
  1099. $state = self :: COLON_STATE_COMMENT;
  1100. }
  1101. break;
  1102. case self :: COLON_STATE_COMMENTDASHDASH :
  1103. if ($c === ">")
  1104. {
  1105. $state = self :: COLON_STATE_TEXT;
  1106. }
  1107. else
  1108. {
  1109. $state = self :: COLON_STATE_COMMENT;
  1110. }
  1111. break;
  1112. default :
  1113. throw new MWException("State machine error in " . __METHOD__);
  1114. }
  1115. }
  1116. if ($stack > 0)
  1117. {
  1118. return false;
  1119. }
  1120. return false;
  1121. }
  1122. # getCommon() returns the length of the longest common substring
  1123. # of both arguments, starting at the beginning of both.
  1124. #
  1125. function getCommon($st1, $st2)
  1126. {
  1127. $fl = strlen($st1);
  1128. $shorter = strlen($st2);
  1129. if ($fl < $shorter)
  1130. {
  1131. $shorter = $fl;
  1132. }
  1133. for($i = 0; $i < $shorter; ++ $i)
  1134. {
  1135. if ($st1{$i} != $st2{$i})
  1136. {
  1137. break;
  1138. }
  1139. }
  1140. return $i;
  1141. }
  1142. function closeList($char)
  1143. {
  1144. if ('*' === $char)
  1145. {
  1146. $text = '</li></ul>';
  1147. }
  1148. else
  1149. if ('#' === $char)
  1150. {
  1151. $text = '</li></ol>';
  1152. }
  1153. else
  1154. if (':' === $char)
  1155. {
  1156. if ($this->mDTopen)
  1157. {
  1158. $this->mDTopen = false;
  1159. $text = '</dt></dl>';
  1160. }
  1161. else
  1162. {
  1163. $text = '</dd></dl>';
  1164. }
  1165. }
  1166. else
  1167. {
  1168. return '<!-- ERR 3 -->';
  1169. }
  1170. return $text . "\n";
  1171. }
  1172. # These next three functions open, continue, and close the list
  1173. # element appropriate to the prefix character passed into them.
  1174. #
  1175. function openList($char)
  1176. {
  1177. $result = $this->closeParagraph();
  1178. if ('*' === $char)
  1179. {
  1180. $result .= '<ul><li>';
  1181. }
  1182. else
  1183. if ('#' === $char)
  1184. {
  1185. $result .= '<ol><li>';
  1186. }
  1187. else
  1188. if (':' === $char)
  1189. {
  1190. $result .= '<dl><dd>';
  1191. }
  1192. else
  1193. if (';' === $char)
  1194. {
  1195. $result .= '<dl><dt>';
  1196. $this->mDTopen = true;
  1197. }
  1198. else
  1199. {
  1200. $result = '<!-- ERR 1 -->';
  1201. }
  1202. return $result;
  1203. }
  1204. /**#@+
  1205. * Used by doBlockLevels()
  1206. * @private
  1207. */
  1208. function closeParagraph()
  1209. {
  1210. $result = '';
  1211. if ('' != $this->mLastSection)
  1212. {
  1213. $result = '</' . $this->mLastSection . ">\n";
  1214. }
  1215. $this->mInPre = false;
  1216. $this->mLastSection = '';
  1217. return $result;
  1218. }
  1219. /**
  1220. * This function accomplishes several tasks:
  1221. * 1) Auto-number headings if that option is enabled
  1222. * 2) Add an [edit] link to sections for users who have enabled the option and can edit the page
  1223. * 3) Add a Table of contents on the top for users who have enabled the option
  1224. * 4) Auto-anchor headings
  1225. *
  1226. * It loops through all headlines, collects the necessary data, then splits up the
  1227. * string and re-inserts the newly formatted headlines.
  1228. *
  1229. * @param string $text
  1230. * @param boolean $isMain
  1231. * @private
  1232. */
  1233. function formatHeadings($text)
  1234. {
  1235. $wgMaxTocLevel = 3;
  1236. $doNumberHeadings = false;
  1237. # Get all headlines for numbering them and adding funky stuff like [edit]
  1238. # links - this is for later, but we need the number of headlines right now
  1239. $matches = array();
  1240. $numMatches = preg_match_all('/<H(?P<level>[1-6])(?P<attrib>.*?' . '>)(?P<header>.*?)<\/H[1-6] *>/i', $text, $matches);
  1241. # if there are fewer than 4 headlines in the article, do not show TOC
  1242. $enoughToc = ($numMatches >= 4);
  1243. # headline counter
  1244. $headlineCount = 0;
  1245. $numVisible = 0;
  1246. # Ugh .. the TOC should have neat indentation levels which can be
  1247. # passed to the skin functions. These are determined here
  1248. $toc = '';
  1249. $full = '';
  1250. $head = array();
  1251. $sublevelCount = array();
  1252. $levelCount = array();
  1253. $toclevel = 0;
  1254. $level = 0;
  1255. $prevlevel = 0;
  1256. $toclevel = 0;
  1257. $prevtoclevel = 0;
  1258. $markerRegex = "{$this->mUniqPrefix}-h-(\d+)-" . self :: MARKER_SUFFIX;
  1259. // $baseTitleText = $this->mTitle->getPrefixedDBkey();
  1260. $tocraw = array();
  1261. foreach ($matches[3] as $headline)
  1262. {
  1263. $isTemplate = false;
  1264. $titleText = false;
  1265. $sectionIndex = false;
  1266. $numbering = '';
  1267. $markerMatches = array();
  1268. // if (preg_match("/^$markerRegex/", $headline, $markerMatches))
  1269. // {
  1270. // $serial = $markerMatches[1];
  1271. // list($titleText, $sectionIndex) = $this->mHeadings[$serial];
  1272. // $isTemplate = ($titleText != $baseTitleText);
  1273. // $headline = preg_replace("/^$markerRegex/", "", $headline);
  1274. // }
  1275. if ($toclevel)
  1276. {
  1277. $prevlevel = $level;
  1278. $prevtoclevel = $toclevel;
  1279. }
  1280. $level = $matches[1][$headlineCount];
  1281. if ($doNumberHeadings || $enoughToc)
  1282. {
  1283. if ($level > $prevlevel)
  1284. {
  1285. # Increase TOC level
  1286. $toclevel ++;
  1287. $sublevelCount[$toclevel] = 0;
  1288. if ($toclevel < $wgMaxTocLevel)
  1289. {
  1290. $prevtoclevel = $toclevel;
  1291. $toc .= MediawikiLinker :: tocIndent();
  1292. $numVisible ++;
  1293. }
  1294. }
  1295. elseif ($level < $prevlevel && $toclevel > 1)
  1296. {
  1297. # Decrease TOC level, find level to jump to
  1298. if ($toclevel == 2 && $level <= $levelCount[1])
  1299. {
  1300. # Can only go down to level 1
  1301. $toclevel = 1;
  1302. }
  1303. else
  1304. {
  1305. for($i = $toclevel; $i > 0; $i --)
  1306. {
  1307. if ($levelCount[$i] == $level)
  1308. {
  1309. # Found last matching level
  1310. $toclevel = $i;
  1311. break;
  1312. }
  1313. elseif ($levelCount[$i] < $level)
  1314. {
  1315. # Found first matching level below current level
  1316. $toclevel = $i + 1;
  1317. break;
  1318. }
  1319. }
  1320. }
  1321. if ($toclevel < $wgMaxTocLevel)
  1322. {
  1323. if ($prevtoclevel < $wgMaxTocLevel)
  1324. {
  1325. # Unindent only if the previous toc level was shown :p
  1326. $toc .= MediawikiLinker :: tocUnindent($prevtoclevel - $toclevel);
  1327. $prevtoclevel = $toclevel;
  1328. }
  1329. else
  1330. {
  1331. $toc .= MediawikiLinker :: tocLineEnd();
  1332. }
  1333. }
  1334. }
  1335. else
  1336. {
  1337. # No change in level, end TOC line
  1338. if ($toclevel < $wgMaxTocLevel)
  1339. {
  1340. $toc .= MediawikiLinker :: tocLineEnd();
  1341. }
  1342. }
  1343. $levelCount[$toclevel] = $level;
  1344. # count number of headlines for each level
  1345. @$sublevelCount[$toclevel] ++;
  1346. $dot = 0;
  1347. for($i = 1; $i <= $toclevel; $i ++)
  1348. {
  1349. if (! empty($sublevelCount[$i]))
  1350. {
  1351. if ($dot)
  1352. {
  1353. $numbering .= '.';
  1354. }
  1355. //$numbering .= $wgContLang->formatNum($sublevelCount[$i]);
  1356. $numbering .= $sublevelCount[$i];
  1357. $dot = 1;
  1358. }
  1359. }
  1360. }
  1361. # The safe header is a version of the header text safe to use for links
  1362. # Avoid insertion of weird stuff like <math> by expanding the relevant sections
  1363. $safeHeadline = $this->mStripState->unstripBoth($headline);
  1364. # Remove link placeholders by the link text.
  1365. # <!--LINK number-->
  1366. # turns into
  1367. # link text with suffix
  1368. //$safeHeadline = $this->replaceLinkHoldersText($safeHeadline);
  1369. # Strip out HTML (other than plain <sup> and <sub>: bug 8393)
  1370. $tocline = preg_replace(array('#<(?!/?(sup|sub)).*?' . '>#', '#<(/?(sup|sub)).*?' . '>#'), array(
  1371. '', '<$1>'), $safeHeadline);
  1372. $tocline = trim($tocline);
  1373. # For the anchor, strip out HTML-y stuff period
  1374. $safeHeadline = preg_replace('/<.*?' . '>/', '', $safeHeadline);
  1375. $safeHeadline = trim($safeHeadline);
  1376. # Save headline for section edit hint before it's escaped
  1377. $headlineHint = $safeHeadline;
  1378. $legacyHeadline = false;
  1379. $safeHeadline = MediawikiSanitizer :: escapeId($safeHeadline, 'noninitial');
  1380. # HTML names must be case-insensitively unique (bug 10721). FIXME:
  1381. # Does this apply to Unicode characters? Because we aren't
  1382. # handling those here.
  1383. $arrayKey = strtolower($safeHeadline);
  1384. if ($legacyHeadline === false)
  1385. {
  1386. $legacyArrayKey = false;
  1387. }
  1388. else
  1389. {
  1390. $legacyArrayKey = strtolower($legacyHeadline);
  1391. }
  1392. # count how many in assoc. array so we can track dupes in anchors
  1393. if (isset($refers[$arrayKey]))
  1394. {
  1395. $refers[$arrayKey] ++;
  1396. }
  1397. else
  1398. {
  1399. $refers[$arrayKey] = 1;
  1400. }
  1401. if (isset($refers[$legacyArrayKey]))
  1402. {
  1403. $refers[$legacyArrayKey] ++;
  1404. }
  1405. else
  1406. {
  1407. $refers[$legacyArrayKey] = 1;
  1408. }
  1409. # Don't number the heading if it is the only one (looks silly)
  1410. if ($doNumberHeadings && count($matches[3]) > 1)
  1411. {
  1412. # the two are different if the line contains a link
  1413. $headline = $numbering . ' ' . $headline;
  1414. }
  1415. # Create the anchor for linking from the TOC to the section
  1416. $anchor = $safeHeadline;
  1417. $legacyAnchor = $legacyHeadline;
  1418. if ($refers[$arrayKey] > 1)
  1419. {
  1420. $anchor .= '_' . $refers[$arrayKey];
  1421. }
  1422. if ($legacyHeadline !== false && $refers[$legacyArrayKey] > 1)
  1423. {
  1424. $legacyAnchor .= '_' . $refers[$legacyArrayKey];
  1425. }
  1426. if ($enoughToc && (! isset($wgMaxTocLevel) || $toclevel < $wgMaxTocLevel))
  1427. {
  1428. $toc .= MediawikiLinker :: tocLine($anchor, $tocline, $numbering, $toclevel);
  1429. $tocraw[] = array('toclevel' => $toclevel, 'level' => $level, 'line' => $tocline,
  1430. 'number' => $numbering);
  1431. }
  1432. # give headline the correct <h#> tag
  1433. $head[$headlineCount] = MediawikiLinker :: makeHeadline($level, $matches['attrib'][$headlineCount], $anchor, $headline, $editlink, $legacyAnchor);
  1434. $headlineCount ++;
  1435. }
  1436. // $this->mOutput->setSections($tocraw);
  1437. # Never ever show TOC if no headers
  1438. if ($numVisible < 1)
  1439. {
  1440. $enoughToc = false;
  1441. }
  1442. if ($enoughToc)
  1443. {
  1444. if ($prevtoclevel > 0 && $prevtoclevel < $wgMaxTocLevel)
  1445. {
  1446. $toc .= MediawikiLinker :: tocUnindent($prevtoclevel - 1);
  1447. }
  1448. $toc = MediawikiLinker :: tocList($toc);
  1449. }
  1450. # split up and insert constructed headlines
  1451. $blocks = preg_split('/<H[1-6].*?' . '>.*?<\/H[1-6]>/i', $text);
  1452. $i = 0;
  1453. foreach ($blocks as $block)
  1454. {
  1455. $full .= $block;
  1456. if ($enoughToc && ! $i)
  1457. {
  1458. # Top anchor now in skin
  1459. $full = $full . $toc;
  1460. }
  1461. if (! empty($head[$i]))
  1462. {
  1463. $full .= $head[$i];
  1464. }
  1465. $i ++;
  1466. }
  1467. return $full;
  1468. }
  1469. /**
  1470. * Process [[ ]] wikilinks
  1471. * @return processed text
  1472. *
  1473. * @private
  1474. */
  1475. function replaceInternalLinks($s)
  1476. {
  1477. $this->mLinkHolders->merge($this->replaceInternalLinks2($s));
  1478. return $s;
  1479. }
  1480. /**
  1481. * Process [[ ]] wikilinks (RIL)
  1482. * @return LinkHolderArray
  1483. *
  1484. * @private
  1485. */
  1486. function replaceInternalLinks2(&$s)
  1487. {
  1488. static $tc = FALSE, $e1, $e1_img;
  1489. # the % is needed to support urlencoded titles as well
  1490. if (! $tc)
  1491. {
  1492. $tc = MediawikiTitle :: legalChars() . '#%';
  1493. # Match a link having the form [[namespace:link|alternate]]trail
  1494. $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
  1495. # Match cases where there is no "]]", which might still be images
  1496. $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
  1497. }
  1498. $holders = new MediawikiLinkHolderArray($this);
  1499. #split the entire text string on occurences of [[
  1500. $a = MediawikiStringUtils :: explode('[[', ' ' . $s);
  1501. #get the first element (all text up to first [[), and remove the space we added
  1502. $s = $a->current();
  1503. $a->next();
  1504. $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
  1505. $s = substr($s, 1);
  1506. $e2 = null;
  1507. $prefix = '';
  1508. $selflink = array($this->mediawiki_parser_context->get_title());
  1509. # Loop for each link
  1510. for(; $line !== false && $line !== null; $a->next(), $line = $a->current())
  1511. {
  1512. # Check for excessive memory usage
  1513. if ($holders->isBig())
  1514. {
  1515. # Too big
  1516. # Do the existence check, replace the link holders and clear the array
  1517. $holders->replace($s);
  1518. $holders->clear();
  1519. }
  1520. $might_be_img = false;
  1521. if (preg_match($e1, $line, $m))
  1522. { # page with normal text or alt
  1523. $text = $m[2];
  1524. # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
  1525. # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
  1526. # the real problem is with the $e1 regex
  1527. # See bug 1300.
  1528. #
  1529. # Still some problems for cases where the ] is meant to be outside punctuation,
  1530. # and no image is in sight. See bug 2095.
  1531. #
  1532. if ($text !== '' && substr($m[3], 0, 1) === ']' && strpos($text, '[') !== false)
  1533. {
  1534. $text .= ']'; # so that replaceExternalLinks($text) works later
  1535. $m[3] = substr($m[3], 1);
  1536. }
  1537. # fix up urlencoded title texts
  1538. if (strpos($m[1], '%') !== false)
  1539. {
  1540. # Should anchors '#' also be rejected?
  1541. $m[1] = str_replace(array('<', '>'), array('&lt;', '&gt;'), urldecode($m[1]));
  1542. }
  1543. $trail = $m[3];
  1544. }
  1545. elseif (preg_match($e1_img, $line, $m))
  1546. { # Invalid, but might be an image with a link in its caption
  1547. $might_be_img = true;
  1548. $text = $m[2];
  1549. if (strpos($m[1], '%') !== false)
  1550. {
  1551. $m[1] = urldecode($m[1]);
  1552. }
  1553. $trail = "";
  1554. }
  1555. else
  1556. { # Invalid form; output directly
  1557. $s .= $prefix . '[[' . $line;
  1558. continue;
  1559. }
  1560. # Don't allow internal links to pages containing
  1561. # PROTO: where PROTO is a valid URL protocol; these
  1562. # should be external links.
  1563. if (preg_match('/^\b(?:' . wfUrlProtocols() . ')/', $m[1]))
  1564. {
  1565. $s .= $prefix . '[[' . $line;
  1566. continue;
  1567. }
  1568. $link = $m[1];
  1569. $noforce = (substr($m[1], 0, 1) !== ':');
  1570. if (! $noforce)
  1571. {
  1572. # Strip off leading ':'
  1573. $link = substr($link, 1);
  1574. }
  1575. $nt = MediawikiTitle :: newFromText($this->mStripState->unstripNoWiki($link));
  1576. if ($nt === NULL)
  1577. {
  1578. $s .= $prefix . '[[' . $line;
  1579. continue;
  1580. }
  1581. $ns = $nt->getNamespace();
  1582. $iw = $nt->getInterWiki();
  1583. if ($might_be_img)
  1584. { # if this is actually an invalid link
  1585. if ($ns == NS_FILE && $noforce)
  1586. { #but might be an image
  1587. $found = false;
  1588. while (true)
  1589. {
  1590. #look at the next 'line' to see if we can close it there
  1591. $a->next();
  1592. $next_line = $a->current();
  1593. if ($next_line === false || $next_line === null)
  1594. {
  1595. break;
  1596. }
  1597. $m = explode(']]', $next_line, 3);
  1598. if (count($m) == 3)
  1599. {
  1600. # the first ]] closes the inner link, the second the image
  1601. $found = true;
  1602. $text .= "[[{$m[0]}]]{$m[1]}";
  1603. $trail = $m[2];
  1604. break;
  1605. }
  1606. elseif (count($m) == 2)
  1607. {
  1608. #if there's exactly one ]] that's fine, we'll keep looking
  1609. $text .= "[[{$m[0]}]]{$m[1]}";
  1610. }
  1611. else
  1612. {
  1613. #if $next_line is invalid too, we need look no further
  1614. $text .= '[[' . $next_line;
  1615. break;
  1616. }
  1617. }
  1618. if (! $found)
  1619. {
  1620. # we couldn't find the end of this imageLink, so output it raw
  1621. #but don't ignore what might be perfectly normal links in the text we've examined
  1622. $holders->merge($this->replaceInternalLinks2($text));
  1623. $s .= "{$prefix}[[$link|$text";
  1624. # note: no $trail, because without an end, there *is* no trail
  1625. continue;
  1626. }
  1627. }
  1628. else
  1629. { #it's not an image, so output it raw
  1630. $s .= "{$prefix}[[$link|$text";
  1631. # note: no $trail, because without an end, there *is* no trail
  1632. continue;
  1633. }
  1634. }
  1635. $wasblank = ('' == $text);
  1636. if ($wasblank)
  1637. $text = $link;
  1638. # Link not escaped by : , create the various objects
  1639. if ($noforce)
  1640. {
  1641. # Interwikis
  1642. if ($iw && $this->mOptions->getInterwikiMagic() && $wgContLang->getLanguageName($iw))
  1643. {
  1644. $this->mOutput->addLanguageLink($nt->getFullText());
  1645. $s = rtrim($s . $prefix);
  1646. $s .= trim($trail, "\n") == '' ? '' : $prefix . $trail;
  1647. continue;
  1648. }
  1649. if ($ns == NS_FILE)
  1650. {
  1651. if (! wfIsBadImage($nt->getDBkey(), $this->mTitle))
  1652. {
  1653. # recursively parse links inside the image caption
  1654. # actually, this will parse them in any other parameters, too,
  1655. # but it might be hard to fix that, and it doesn't matter ATM
  1656. $text = $this->replaceExternalLinks($text);
  1657. $holders->merge($this->replaceInternalLinks2($text));
  1658. # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
  1659. $s .= $prefix . $this->armorLinks($this->makeImage($nt, $text, $holders)) . $trail;
  1660. }
  1661. $this->mOutput->addImage($nt->getDBkey());
  1662. continue;
  1663. }
  1664. if ($ns == NS_CATEGORY)
  1665. {
  1666. $s = rtrim($s . "\n"); # bug 87
  1667. if ($wasblank)
  1668. {
  1669. $sortkey = $this->getDefaultSort();
  1670. }
  1671. else
  1672. {
  1673. $sortkey = $text;
  1674. }
  1675. $sortkey = Sanitizer :: decodeCharReferences($sortkey);
  1676. $sortkey = str_replace("\n", '', $sortkey);
  1677. $sortkey = $wgContLang->convertCategoryKey($sortkey);
  1678. $this->mOutput->addCategory($nt->getDBkey(), $sortkey);
  1679. /**
  1680. * Strip the whitespace Category links produce, see bug 87
  1681. * @todo We might want to use trim($tmp, "\n") here.
  1682. */
  1683. $s .= trim($prefix . $trail, "\n") == '' ? '' : $prefix . $trail;
  1684. continue;
  1685. }
  1686. }
  1687. # Self-link checking
  1688. if ($nt->getFragment() === '' && $ns != NS_SPECIAL)
  1689. {
  1690. if (in_array($nt->getPrefixedText(), $selflink, true))
  1691. {
  1692. $s .= $prefix . $sk->makeSelfLinkObj($nt, $text, '', $trail);
  1693. continue;
  1694. }
  1695. }
  1696. # NS_MEDIA is a pseudo-namespace for linking directly to a file
  1697. # FIXME: Should do batch file existence checks, see comment below
  1698. if ($ns == NS_MEDIA)
  1699. {
  1700. # Give extensions a chance to select the file revision for us
  1701. $skip = $time = false;
  1702. if ($skip)
  1703. {
  1704. $link = $sk->link($nt);
  1705. }
  1706. else
  1707. {
  1708. $link = $sk->makeMediaLinkObj($nt, $text, $time);
  1709. }
  1710. # Cloak with NOPARSE to avoid replacement in replaceExternalLinks
  1711. $s .= $prefix . $this->armorLinks($link) . $trail;
  1712. $this->mOutput->addImage($nt->getDBkey());
  1713. continue;
  1714. }
  1715. # Some titles, such as valid special pages or files in foreign repos, should
  1716. # be shown as bluelinks even though they're not included in the page table
  1717. #
  1718. # FIXME: isAlwaysKnown() can be expensive for file links; we should really do
  1719. # batch file existence checks for NS_FILE and NS_MEDIA
  1720. if ($iw == '' && $nt->isAlwaysKnown())
  1721. {
  1722. $s .= $this->makeKnownLinkHolder($nt, $text, array(), $trail, $prefix);
  1723. }
  1724. else
  1725. {
  1726. # Links will be added to the output link list after checking
  1727. $s .= $holders->makeHolder($nt, $text, '', $trail, $prefix);
  1728. }
  1729. }
  1730. return $holders;
  1731. }
  1732. function nextLinkID()
  1733. {
  1734. return $this->mLinkID ++;
  1735. }
  1736. /**
  1737. * Render a forced-blue link inline; protect against double expansion of
  1738. * URLs if we're in a mode that prepends full URL prefixes to internal links.
  1739. * Since this little disaster has to split off the trail text to avoid
  1740. * breaking URLs in the following text without breaking trails on the
  1741. * wiki links, it's been made into a horrible function.
  1742. *
  1743. * @param Title $nt
  1744. * @param string $text
  1745. * @param string $query
  1746. * @param string $trail
  1747. * @param string $prefix
  1748. * @return string HTML-wikitext mix oh yuck
  1749. */
  1750. function makeKnownLinkHolder($nt, $text = '', $query = array(), $trail = '', $prefix = '')
  1751. {
  1752. list($inside, $trail) = MediawikiLinker :: splitTrail($trail);
  1753. $link = MediawikiLinker :: makeKnownLinkObj($nt, $text, $query, $inside, $prefix);
  1754. return $this->armorLinks($link) . $trail;
  1755. }
  1756. /**
  1757. * Insert a NOPARSE hacky thing into any inline links in a chunk that's
  1758. * going to go through further parsing steps before inline URL expansion.
  1759. *
  1760. * Not needed quite as much as it used to be since free links are a bit
  1761. * more sensible these days. But bracketed links are still an issue.
  1762. *
  1763. * @param string more-or-less HTML
  1764. * @return string less-or-more HTML with NOPARSE bits
  1765. */
  1766. function armorLinks($text)
  1767. {
  1768. return preg_replace('/\b(' . wfUrlProtocols() . ')/', "{$this->mUniqPrefix}NOPARSE$1", $text);
  1769. }
  1770. function getOutput()
  1771. {
  1772. return $this->mOutput;
  1773. }
  1774. }
  1775. ?>