PageRenderTime 60ms CodeModel.GetById 17ms RepoModel.GetById 1ms app.codeStats 0ms

/common/libraries/plugin/wiki/mediawiki_parser.class.php

https://bitbucket.org/renaatdemuynck/chamilo
PHP | 1941 lines | 1435 code | 166 blank | 340 comment | 302 complexity | 7a46461fcfa4424780e86bc4a28689f5 MD5 | raw file
Possible License(s): BSD-3-Clause, LGPL-2.1, LGPL-3.0, GPL-3.0, MIT, GPL-2.0

Large files files are truncated, but you can click here to view the full file

  1. <?php
  2. require_once dirname(__FILE__) . '/mediawiki/Utilities.php';
  3. require_once dirname(__FILE__) . '/mediawiki/Sanitizer.php';
  4. require_once dirname(__FILE__) . '/mediawiki/StringUtils.php';
  5. require_once dirname(__FILE__) . '/mediawiki/Xml.php';
  6. require_once dirname(__FILE__) . '/mediawiki/StripState.php';
  7. require_once dirname(__FILE__) . '/mediawiki/Title.php';
  8. require_once dirname(__FILE__) . '/mediawiki/LinkHolderArray.php';
  9. require_once dirname(__FILE__) . '/mediawiki/Linker.php';
  10. require_once dirname(__FILE__) . '/mediawiki/LinkCache.php';
  11. require_once dirname(__FILE__) . '/mediawiki/Defines.php';
  12. require_once dirname(__FILE__) . '/mediawiki/ParserOutput.php';
  13. require_once dirname(__FILE__) . '/mediawiki/Namespace.php';
  14. require_once dirname(__FILE__) . '/mediawiki_parser_context.class.php';
  15. function wfUrlProtocols()
  16. {
  17. /**
  18. * The external URL protocols
  19. */
  20. $wgUrlProtocols = array('http://', 'https://', 'ftp://', 'irc://', 'gopher://', 'telnet://', // Well if we're going to support the above.. -ĂŚvar
  21. 'nntp://', // @bug 3808 RFC 1738
  22. 'worldwind://', 'mailto:', 'news:', 'svn://');
  23. // Support old-style $wgUrlProtocols strings, for backwards compatibility
  24. // with LocalSettings files from 1.5
  25. if (is_array($wgUrlProtocols))
  26. {
  27. $protocols = array();
  28. foreach ($wgUrlProtocols as $protocol)
  29. $protocols[] = preg_quote($protocol, '/');
  30. return implode('|', $protocols);
  31. }
  32. else
  33. {
  34. return $wgUrlProtocols;
  35. }
  36. }
  37. function wfUrlencode($s)
  38. {
  39. $s = urlencode($s);
  40. $s = str_ireplace(array('%3B', '%3A', '%40', '%24', '%21', '%2A', '%28', '%29', '%2C', '%2F'), array(';', ':', '@',
  41. '$', '!', '*', '(', ')', ',', '/'), $s);
  42. return $s;
  43. }
  44. /**
  45. * This is the logical opposite of wfArrayToCGI(): it accepts a query string as
  46. * its argument and returns the same string in array form. This allows compa-
  47. * tibility with legacy functions that accept raw query strings instead of nice
  48. * arrays. Of course, keys and values are urldecode()d. Don't try passing in-
  49. * valid query strings, or it will explode.
  50. *
  51. * @param $query string Query string
  52. * @return array Array version of input
  53. */
  54. function wfCgiToArray($query)
  55. {
  56. if (isset($query[0]) and $query[0] == '?')
  57. {
  58. $query = substr($query, 1);
  59. }
  60. $bits = explode('&', $query);
  61. $ret = array();
  62. foreach ($bits as $bit)
  63. {
  64. if ($bit === '')
  65. {
  66. continue;
  67. }
  68. list($key, $value) = explode('=', $bit);
  69. $key = urldecode($key);
  70. $value = urldecode($value);
  71. $ret[$key] = $value;
  72. }
  73. return $ret;
  74. }
  75. /**
  76. * This function takes two arrays as input, and returns a CGI-style string, e.g.
  77. * "days=7&limit=100". Options in the first array override options in the second.
  78. * Options set to "" will not be output.
  79. */
  80. function wfArrayToCGI($array1, $array2 = NULL)
  81. {
  82. if (! is_null($array2))
  83. {
  84. $array1 = $array1 + $array2;
  85. }
  86. $cgi = '';
  87. foreach ($array1 as $key => $value)
  88. {
  89. if ('' !== $value)
  90. {
  91. if ('' != $cgi)
  92. {
  93. $cgi .= '&';
  94. }
  95. if (is_array($value))
  96. {
  97. $firstTime = true;
  98. foreach ($value as $v)
  99. {
  100. $cgi .= ($firstTime ? '' : '&') . urlencode($key . '[]') . '=' . urlencode($v);
  101. $firstTime = false;
  102. }
  103. }
  104. else
  105. $cgi .= urlencode($key) . '=' . urlencode($value);
  106. }
  107. }
  108. return $cgi;
  109. }
  110. /**
  111. * Append a query string to an existing URL, which may or may not already
  112. * have query string parameters already. If so, they will be combined.
  113. *
  114. * @param string $url
  115. * @param string $query
  116. * @return string
  117. */
  118. function wfAppendQuery($url, $query)
  119. {
  120. if ($query != '')
  121. {
  122. if (false === strpos($url, '?'))
  123. {
  124. $url .= '?';
  125. }
  126. else
  127. {
  128. $url .= '&';
  129. }
  130. $url .= $query;
  131. }
  132. return $url;
  133. }
  134. /**
  135. * A Mediawiki wikitext parser using the same functions
  136. * as used by Mediawiki's parsing engine
  137. *
  138. * @author Hans De Bisschop
  139. * @see Parser
  140. *
  141. */
  142. class MediawikiParser
  143. {
  144. // State constants for the definition list colon extraction
  145. const COLON_STATE_TEXT = 0;
  146. const COLON_STATE_TAG = 1;
  147. const COLON_STATE_TAGSTART = 2;
  148. const COLON_STATE_CLOSETAG = 3;
  149. const COLON_STATE_TAGSLASH = 4;
  150. const COLON_STATE_COMMENT = 5;
  151. const COLON_STATE_COMMENTDASH = 6;
  152. const COLON_STATE_COMMENTDASHDASH = 7;
  153. const MARKER_SUFFIX = "-QINU\x7f";
  154. const VERSION = '1.6.4';
  155. // Flags for preprocessToDom
  156. const PTD_FOR_INCLUSION = 1;
  157. private $mUniqPrefix;
  158. /**
  159. * The context of the MediawikiParser
  160. *
  161. * @var MediawikiParserContext
  162. */
  163. private $mediawiki_parser_context;
  164. function __construct(MediaWikiParserContext $mediawiki_parser_context)
  165. {
  166. $this->mediawiki_parser_context = $mediawiki_parser_context;
  167. $this->mUniqPrefix = "\x7fUNIQ" . self :: getRandomString();
  168. $this->mLinkID = 0;
  169. $this->mOutput = new MediawikiParserOutput();
  170. $this->mStripState = new MediawikiStripState();
  171. $this->mLinkHolders = new MediawikiLinkHolderArray($this);
  172. }
  173. function get_mediawiki_parser_context()
  174. {
  175. return $this->mediawiki_parser_context;
  176. }
  177. /**
  178. * Get a random string
  179. *
  180. * @private
  181. * @static
  182. */
  183. function getRandomString()
  184. {
  185. return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
  186. }
  187. function parse()
  188. {
  189. $text = $this->mediawiki_parser_context->get_body();
  190. $text = $this->internalParse($text);
  191. # Clean up special characters, only run once, next-to-last before doBlockLevels
  192. $fixtags = array(# french spaces, last one Guillemet-left
  193. # only if there is something before the space
  194. '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1&nbsp;\\2', # french spaces, Guillemet-right
  195. '/(\\302\\253) /' => '\\1&nbsp;', '/&nbsp;(!\s*important)/' => ' \\1'); #Beware of CSS magic word !important, bug #11874.
  196. $text = preg_replace(array_keys($fixtags), array_values($fixtags), $text);
  197. $text = $this->doBlockLevels($text, $linestart);
  198. $this->replaceLinkHolders($text);
  199. return $text;
  200. }
  201. /**
  202. * Replace <!--LINK--> link placeholders with actual links, in the buffer
  203. * Placeholders created in Skin::makeLinkObj()
  204. * Returns an array of link CSS classes, indexed by PDBK.
  205. */
  206. function replaceLinkHolders(&$text, $options = 0)
  207. {
  208. return $this->mLinkHolders->replace($text);
  209. }
  210. function internalParse($text)
  211. {
  212. $isMain = true;
  213. //$text = Sanitizer :: removeHTMLtags($text, array(&$this, 'attributeStripCallback'), false, array_keys($this->mTransparentTagHooks));
  214. // Tables need to come after variable replacement for things to work
  215. // properly; putting them before other transformations should keep
  216. // exciting things like link expansions from showing up in surprising
  217. // places.
  218. $text = $this->doTableStuff($text);
  219. $text = preg_replace('/(^|\n)-----*/', '\\1<hr />', $text);
  220. //
  221. // $text = $this->doDoubleUnderscore($text);
  222. $text = $this->doHeadings($text);
  223. // //if ($this->mOptions->getUseDynamicDates())
  224. // //{
  225. // // $df = DateFormatter :: getInstance();
  226. // // $text = $df->reformat($this->mOptions->getDateFormat(), $text);
  227. // //}
  228. $text = $this->doAllQuotes($text);
  229. $text = $this->replaceInternalLinks($text);
  230. // $text = $this->replaceExternalLinks($text);
  231. //
  232. // # replaceInternalLinks may sometimes leave behind
  233. // # absolute URLs, which have to be masked to hide them from replaceExternalLinks
  234. // $text = str_replace($this->mUniqPrefix . 'NOPARSE', '', $text);
  235. //
  236. // $text = $this->doMagicLinks($text);
  237. $text = $this->formatHeadings($text, $isMain);
  238. return $text;
  239. }
  240. /**
  241. * parse the wiki syntax used to render tables
  242. *
  243. * @private
  244. */
  245. function doTableStuff($text)
  246. {
  247. $lines = MediawikiStringUtils :: explode("\n", $text);
  248. $out = '';
  249. $td_history = array(); // Is currently a td tag open?
  250. $last_tag_history = array(); // Save history of last lag activated (td, th or caption)
  251. $tr_history = array(); // Is currently a tr tag open?
  252. $tr_attributes = array(); // history of tr attributes
  253. $has_opened_tr = array(); // Did this table open a <tr> element?
  254. $indent_level = 0; // indent level of the table
  255. foreach ($lines as $outLine)
  256. {
  257. $line = trim($outLine);
  258. if ($line == '')
  259. { // empty line, go to next line
  260. $out .= $outLine . "\n";
  261. continue;
  262. }
  263. $first_character = $line[0];
  264. $matches = array();
  265. if (preg_match('/^(:*)\{\|(.*)$/', $line, $matches))
  266. {
  267. // First check if we are starting a new table
  268. $indent_level = strlen($matches[1]);
  269. $attributes = $this->mStripState->unstripBoth($matches[2]);
  270. $attributes = MediawikiSanitizer :: fixTagAttributes($attributes, 'table');
  271. $outLine = str_repeat('<dl><dd>', $indent_level) . "<table{$attributes}>";
  272. array_push($td_history, false);
  273. array_push($last_tag_history, '');
  274. array_push($tr_history, false);
  275. array_push($tr_attributes, '');
  276. array_push($has_opened_tr, false);
  277. }
  278. else
  279. if (count($td_history) == 0)
  280. {
  281. // Don't do any of the following
  282. $out .= $outLine . "\n";
  283. continue;
  284. }
  285. else
  286. if (substr($line, 0, 2) === '|}')
  287. {
  288. // We are ending a table
  289. $line = '</table>' . substr($line, 2);
  290. $last_tag = array_pop($last_tag_history);
  291. if (! array_pop($has_opened_tr))
  292. {
  293. $line = "<tr><td></td></tr>{$line}";
  294. }
  295. if (array_pop($tr_history))
  296. {
  297. $line = "</tr>{$line}";
  298. }
  299. if (array_pop($td_history))
  300. {
  301. $line = "</{$last_tag}>{$line}";
  302. }
  303. array_pop($tr_attributes);
  304. $outLine = $line . str_repeat('</dd></dl>', $indent_level);
  305. }
  306. else
  307. if (substr($line, 0, 2) === '|-')
  308. {
  309. // Now we have a table row
  310. $line = preg_replace('#^\|-+#', '', $line);
  311. // Whats after the tag is now only attributes
  312. $attributes = $this->mStripState->unstripBoth($line);
  313. $attributes = MediawikiSanitizer :: fixTagAttributes($attributes, 'tr');
  314. array_pop($tr_attributes);
  315. array_push($tr_attributes, $attributes);
  316. $line = '';
  317. $last_tag = array_pop($last_tag_history);
  318. array_pop($has_opened_tr);
  319. array_push($has_opened_tr, true);
  320. if (array_pop($tr_history))
  321. {
  322. $line = '</tr>';
  323. }
  324. if (array_pop($td_history))
  325. {
  326. $line = "</{$last_tag}>{$line}";
  327. }
  328. $outLine = $line;
  329. array_push($tr_history, false);
  330. array_push($td_history, false);
  331. array_push($last_tag_history, '');
  332. }
  333. else
  334. if ($first_character === '|' || $first_character === '!' || substr($line, 0, 2) === '|+')
  335. {
  336. // This might be cell elements, td, th or captions
  337. if (substr($line, 0, 2) === '|+')
  338. {
  339. $first_character = '+';
  340. $line = substr($line, 1);
  341. }
  342. $line = substr($line, 1);
  343. if ($first_character === '!')
  344. {
  345. $line = str_replace('!!', '||', $line);
  346. }
  347. // Split up multiple cells on the same line.
  348. // FIXME : This can result in improper nesting of tags processed
  349. // by earlier parser steps, but should avoid splitting up eg
  350. // attribute values containing literal "||".
  351. $cells = MediawikiStringUtils :: explodeMarkup('||', $line);
  352. $outLine = '';
  353. // Loop through each table cell
  354. foreach ($cells as $cell)
  355. {
  356. $previous = '';
  357. if ($first_character !== '+')
  358. {
  359. $tr_after = array_pop($tr_attributes);
  360. if (! array_pop($tr_history))
  361. {
  362. $previous = "<tr{$tr_after}>\n";
  363. }
  364. array_push($tr_history, true);
  365. array_push($tr_attributes, '');
  366. array_pop($has_opened_tr);
  367. array_push($has_opened_tr, true);
  368. }
  369. $last_tag = array_pop($last_tag_history);
  370. if (array_pop($td_history))
  371. {
  372. $previous = "</{$last_tag}>{$previous}";
  373. }
  374. if ($first_character === '|')
  375. {
  376. $last_tag = 'td';
  377. }
  378. else
  379. if ($first_character === '!')
  380. {
  381. $last_tag = 'th';
  382. }
  383. else
  384. if ($first_character === '+')
  385. {
  386. $last_tag = 'caption';
  387. }
  388. else
  389. {
  390. $last_tag = '';
  391. }
  392. array_push($last_tag_history, $last_tag);
  393. // A cell could contain both parameters and data
  394. $cell_data = explode('|', $cell, 2);
  395. // Bug 553: Note that a '|' inside an invalid link should not
  396. // be mistaken as delimiting cell parameters
  397. if (strpos($cell_data[0], '[[') !== false)
  398. {
  399. $cell = "{$previous}<{$last_tag}>{$cell}";
  400. }
  401. else
  402. if (count($cell_data) == 1)
  403. $cell = "{$previous}<{$last_tag}>{$cell_data[0]}";
  404. else
  405. {
  406. $attributes = $this->mStripState->unstripBoth($cell_data[0]);
  407. $attributes = MediawikiSanitizer :: fixTagAttributes($attributes, $last_tag);
  408. $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}";
  409. }
  410. $outLine .= $cell;
  411. array_push($td_history, true);
  412. }
  413. }
  414. $out .= $outLine . "\n";
  415. }
  416. // Closing open td, tr && table
  417. while (count($td_history) > 0)
  418. {
  419. if (array_pop($td_history))
  420. {
  421. $out .= "</td>\n";
  422. }
  423. if (array_pop($tr_history))
  424. {
  425. $out .= "</tr>\n";
  426. }
  427. if (! array_pop($has_opened_tr))
  428. {
  429. $out .= "<tr><td></td></tr>\n";
  430. }
  431. $out .= "</table>\n";
  432. }
  433. // Remove trailing line-ending (b/c)
  434. if (substr($out, - 1) === "\n")
  435. {
  436. $out = substr($out, 0, - 1);
  437. }
  438. // special case: don't return empty table
  439. if ($out === "<table>\n<tr><td></td></tr>\n</table>")
  440. {
  441. $out = '';
  442. }
  443. return $out;
  444. }
  445. /**
  446. * Parse headers and return html
  447. *
  448. * @private
  449. */
  450. function doHeadings($text)
  451. {
  452. for($i = 6; $i >= 1; -- $i)
  453. {
  454. $h = str_repeat('=', $i);
  455. $text = preg_replace("/^$h(.+)$h\\s*$/m", "<h$i>\\1</h$i>", $text);
  456. }
  457. return $text;
  458. }
  459. /**
  460. * Replace single quotes with HTML markup
  461. * @private
  462. * @return string the altered text
  463. */
  464. function doAllQuotes($text)
  465. {
  466. $outtext = '';
  467. $lines = MediawikiStringUtils :: explode("\n", $text);
  468. foreach ($lines as $line)
  469. {
  470. $outtext .= $this->doQuotes($line) . "\n";
  471. }
  472. $outtext = substr($outtext, 0, - 1);
  473. return $outtext;
  474. }
  475. /**
  476. * Helper function for doAllQuotes()
  477. */
  478. public function doQuotes($text)
  479. {
  480. $arr = preg_split("/(''+)/", $text, - 1, PREG_SPLIT_DELIM_CAPTURE);
  481. if (count($arr) == 1)
  482. return $text;
  483. else
  484. {
  485. # First, do some preliminary work. This may shift some apostrophes from
  486. # being mark-up to being text. It also counts the number of occurrences
  487. # of bold and italics mark-ups.
  488. $i = 0;
  489. $numbold = 0;
  490. $numitalics = 0;
  491. foreach ($arr as $r)
  492. {
  493. if (($i % 2) == 1)
  494. {
  495. # If there are ever four apostrophes, assume the first is supposed to
  496. # be text, and the remaining three constitute mark-up for bold text.
  497. if (strlen($arr[$i]) == 4)
  498. {
  499. $arr[$i - 1] .= "'";
  500. $arr[$i] = "'''";
  501. }
  502. # If there are more than 5 apostrophes in a row, assume they're all
  503. # text except for the last 5.
  504. else
  505. if (strlen($arr[$i]) > 5)
  506. {
  507. $arr[$i - 1] .= str_repeat("'", strlen($arr[$i]) - 5);
  508. $arr[$i] = "'''''";
  509. }
  510. # Count the number of occurrences of bold and italics mark-ups.
  511. # We are not counting sequences of five apostrophes.
  512. if (strlen($arr[$i]) == 2)
  513. {
  514. $numitalics ++;
  515. }
  516. else
  517. if (strlen($arr[$i]) == 3)
  518. {
  519. $numbold ++;
  520. }
  521. else
  522. if (strlen($arr[$i]) == 5)
  523. {
  524. $numitalics ++;
  525. $numbold ++;
  526. }
  527. }
  528. $i ++;
  529. }
  530. # If there is an odd number of both bold and italics, it is likely
  531. # that one of the bold ones was meant to be an apostrophe followed
  532. # by italics. Which one we cannot know for certain, but it is more
  533. # likely to be one that has a single-letter word before it.
  534. if (($numbold % 2 == 1) && ($numitalics % 2 == 1))
  535. {
  536. $i = 0;
  537. $firstsingleletterword = - 1;
  538. $firstmultiletterword = - 1;
  539. $firstspace = - 1;
  540. foreach ($arr as $r)
  541. {
  542. if (($i % 2 == 1) and (strlen($r) == 3))
  543. {
  544. $x1 = substr($arr[$i - 1], - 1);
  545. $x2 = substr($arr[$i - 1], - 2, 1);
  546. if ($x1 === ' ')
  547. {
  548. if ($firstspace == - 1)
  549. $firstspace = $i;
  550. }
  551. else
  552. if ($x2 === ' ')
  553. {
  554. if ($firstsingleletterword == - 1)
  555. $firstsingleletterword = $i;
  556. }
  557. else
  558. {
  559. if ($firstmultiletterword == - 1)
  560. $firstmultiletterword = $i;
  561. }
  562. }
  563. $i ++;
  564. }
  565. # If there is a single-letter word, use it!
  566. if ($firstsingleletterword > - 1)
  567. {
  568. $arr[$firstsingleletterword] = "''";
  569. $arr[$firstsingleletterword - 1] .= "'";
  570. }
  571. # If not, but there's a multi-letter word, use that one.
  572. else
  573. if ($firstmultiletterword > - 1)
  574. {
  575. $arr[$firstmultiletterword] = "''";
  576. $arr[$firstmultiletterword - 1] .= "'";
  577. }
  578. # ... otherwise use the first one that has neither.
  579. # (notice that it is possible for all three to be -1 if, for example,
  580. # there is only one pentuple-apostrophe in the line)
  581. else
  582. if ($firstspace > - 1)
  583. {
  584. $arr[$firstspace] = "''";
  585. $arr[$firstspace - 1] .= "'";
  586. }
  587. }
  588. # Now let's actually convert our apostrophic mush to HTML!
  589. $output = '';
  590. $buffer = '';
  591. $state = '';
  592. $i = 0;
  593. foreach ($arr as $r)
  594. {
  595. if (($i % 2) == 0)
  596. {
  597. if ($state === 'both')
  598. $buffer .= $r;
  599. else
  600. $output .= $r;
  601. }
  602. else
  603. {
  604. if (strlen($r) == 2)
  605. {
  606. if ($state === 'i')
  607. {
  608. $output .= '</i>';
  609. $state = '';
  610. }
  611. else
  612. if ($state === 'bi')
  613. {
  614. $output .= '</i>';
  615. $state = 'b';
  616. }
  617. else
  618. if ($state === 'ib')
  619. {
  620. $output .= '</b></i><b>';
  621. $state = 'b';
  622. }
  623. else
  624. if ($state === 'both')
  625. {
  626. $output .= '<b><i>' . $buffer . '</i>';
  627. $state = 'b';
  628. }
  629. else # $state can be 'b' or ''
  630. {
  631. $output .= '<i>';
  632. $state .= 'i';
  633. }
  634. }
  635. else
  636. if (strlen($r) == 3)
  637. {
  638. if ($state === 'b')
  639. {
  640. $output .= '</b>';
  641. $state = '';
  642. }
  643. else
  644. if ($state === 'bi')
  645. {
  646. $output .= '</i></b><i>';
  647. $state = 'i';
  648. }
  649. else
  650. if ($state === 'ib')
  651. {
  652. $output .= '</b>';
  653. $state = 'i';
  654. }
  655. else
  656. if ($state === 'both')
  657. {
  658. $output .= '<i><b>' . $buffer . '</b>';
  659. $state = 'i';
  660. }
  661. else # $state can be 'i' or ''
  662. {
  663. $output .= '<b>';
  664. $state .= 'b';
  665. }
  666. }
  667. else
  668. if (strlen($r) == 5)
  669. {
  670. if ($state === 'b')
  671. {
  672. $output .= '</b><i>';
  673. $state = 'i';
  674. }
  675. else
  676. if ($state === 'i')
  677. {
  678. $output .= '</i><b>';
  679. $state = 'b';
  680. }
  681. else
  682. if ($state === 'bi')
  683. {
  684. $output .= '</i></b>';
  685. $state = '';
  686. }
  687. else
  688. if ($state === 'ib')
  689. {
  690. $output .= '</b></i>';
  691. $state = '';
  692. }
  693. else
  694. if ($state === 'both')
  695. {
  696. $output .= '<i><b>' . $buffer . '</b></i>';
  697. $state = '';
  698. }
  699. else # ($state == '')
  700. {
  701. $buffer = '';
  702. $state = 'both';
  703. }
  704. }
  705. }
  706. $i ++;
  707. }
  708. # Now close all remaining tags. Notice that the order is important.
  709. if ($state === 'b' || $state === 'ib')
  710. $output .= '</b>';
  711. if ($state === 'i' || $state === 'bi' || $state === 'ib')
  712. $output .= '</i>';
  713. if ($state === 'bi')
  714. $output .= '</b>';
  715. # There might be lonely ''''', so make sure we have a buffer
  716. if ($state === 'both' && $buffer)
  717. $output .= '<b><i>' . $buffer . '</i></b>';
  718. return $output;
  719. }
  720. }
  721. /**
  722. * Make lists from lines starting with ':', '*', '#', etc. (DBL)
  723. *
  724. * @private
  725. * @return string the lists rendered as HTML
  726. */
  727. function doBlockLevels($text, $linestart)
  728. {
  729. # Parsing through the text line by line. The main thing
  730. # happening here is handling of block-level elements p, pre,
  731. # and making lists from lines starting with * # : etc.
  732. #
  733. $textLines = MediawikiStringUtils :: explode("\n", $text);
  734. $lastPrefix = $output = '';
  735. $this->mDTopen = $inBlockElem = false;
  736. $prefixLength = 0;
  737. $paragraphStack = false;
  738. foreach ($textLines as $oLine)
  739. {
  740. # Fix up $linestart
  741. if (! $linestart)
  742. {
  743. $output .= $oLine;
  744. $linestart = true;
  745. continue;
  746. }
  747. $lastPrefixLength = strlen($lastPrefix);
  748. $preCloseMatch = preg_match('/<\\/pre/i', $oLine);
  749. $preOpenMatch = preg_match('/<pre/i', $oLine);
  750. if (! $this->mInPre)
  751. {
  752. # Multiple prefixes may abut each other for nested lists.
  753. $prefixLength = strspn($oLine, '*#:;');
  754. $prefix = substr($oLine, 0, $prefixLength);
  755. # eh?
  756. $prefix2 = str_replace(';', ':', $prefix);
  757. $t = substr($oLine, $prefixLength);
  758. $this->mInPre = (bool) $preOpenMatch;
  759. }
  760. else
  761. {
  762. # Don't interpret any other prefixes in preformatted text
  763. $prefixLength = 0;
  764. $prefix = $prefix2 = '';
  765. $t = $oLine;
  766. }
  767. # List generation
  768. if ($prefixLength && $lastPrefix === $prefix2)
  769. {
  770. # Same as the last item, so no need to deal with nesting or opening stuff
  771. $output .= $this->nextItem(substr($prefix, - 1));
  772. $paragraphStack = false;
  773. if (substr($prefix, - 1) === ';')
  774. {
  775. # The one nasty exception: definition lists work like this:
  776. # ; title : definition text
  777. # So we check for : in the remainder text to split up the
  778. # title and definition, without b0rking links.
  779. $term = $t2 = '';
  780. if ($this->findColonNoLinks($t, $term, $t2) !== false)
  781. {
  782. $t = $t2;
  783. $output .= $term . $this->nextItem(':');
  784. }
  785. }
  786. }
  787. elseif ($prefixLength || $lastPrefixLength)
  788. {
  789. # Either open or close a level...
  790. $commonPrefixLength = $this->getCommon($prefix, $lastPrefix);
  791. $paragraphStack = false;
  792. while ($commonPrefixLength < $lastPrefixLength)
  793. {
  794. $output .= $this->closeList($lastPrefix[$lastPrefixLength - 1]);
  795. -- $lastPrefixLength;
  796. }
  797. if ($prefixLength <= $commonPrefixLength && $commonPrefixLength > 0)
  798. {
  799. $output .= $this->nextItem($prefix[$commonPrefixLength - 1]);
  800. }
  801. while ($prefixLength > $commonPrefixLength)
  802. {
  803. $char = substr($prefix, $commonPrefixLength, 1);
  804. $output .= $this->openList($char);
  805. if (';' === $char)
  806. {
  807. # FIXME: This is dupe of code above
  808. if ($this->findColonNoLinks($t, $term, $t2) !== false)
  809. {
  810. $t = $t2;
  811. $output .= $term . $this->nextItem(':');
  812. }
  813. }
  814. ++ $commonPrefixLength;
  815. }
  816. $lastPrefix = $prefix2;
  817. }
  818. if (0 == $prefixLength)
  819. {
  820. # No prefix (not in list)--go to paragraph mode
  821. // XXX: use a stack for nestable elements like span, table and div
  822. $openmatch = preg_match('/(?:<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<ol|<li|<\\/tr|<\\/td|<\\/th)/iS', $t);
  823. $closematch = preg_match('/(?:<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|' . '<td|<th|<\\/?div|<hr|<\\/pre|<\\/p|' . $this->mUniqPrefix . '-pre|<\\/li|<\\/ul|<\\/ol|<\\/?center)/iS', $t);
  824. if ($openmatch or $closematch)
  825. {
  826. $paragraphStack = false;
  827. # TODO bug 5718: paragraph closed
  828. $output .= $this->closeParagraph();
  829. if ($preOpenMatch and ! $preCloseMatch)
  830. {
  831. $this->mInPre = true;
  832. }
  833. if ($closematch)
  834. {
  835. $inBlockElem = false;
  836. }
  837. else
  838. {
  839. $inBlockElem = true;
  840. }
  841. }
  842. else
  843. if (! $inBlockElem && ! $this->mInPre)
  844. {
  845. if (' ' == substr($t, 0, 1) and ($this->mLastSection === 'pre' or trim($t) != ''))
  846. {
  847. // pre
  848. if ($this->mLastSection !== 'pre')
  849. {
  850. $paragraphStack = false;
  851. $output .= $this->closeParagraph() . '<pre>';
  852. $this->mLastSection = 'pre';
  853. }
  854. $t = substr($t, 1);
  855. }
  856. else
  857. {
  858. // paragraph
  859. if ('' == trim($t))
  860. {
  861. if ($paragraphStack)
  862. {
  863. $output .= $paragraphStack . '<br />';
  864. $paragraphStack = false;
  865. $this->mLastSection = 'p';
  866. }
  867. else
  868. {
  869. if ($this->mLastSection !== 'p')
  870. {
  871. $output .= $this->closeParagraph();
  872. $this->mLastSection = '';
  873. $paragraphStack = '<p>';
  874. }
  875. else
  876. {
  877. $paragraphStack = '</p><p>';
  878. }
  879. }
  880. }
  881. else
  882. {
  883. if ($paragraphStack)
  884. {
  885. $output .= $paragraphStack;
  886. $paragraphStack = false;
  887. $this->mLastSection = 'p';
  888. }
  889. else
  890. if ($this->mLastSection !== 'p')
  891. {
  892. $output .= $this->closeParagraph() . '<p>';
  893. $this->mLastSection = 'p';
  894. }
  895. }
  896. }
  897. }
  898. }
  899. // somewhere above we forget to get out of pre block (bug 785)
  900. if ($preCloseMatch && $this->mInPre)
  901. {
  902. $this->mInPre = false;
  903. }
  904. if ($paragraphStack === false)
  905. {
  906. $output .= $t . "\n";
  907. }
  908. }
  909. while ($prefixLength)
  910. {
  911. $output .= $this->closeList($prefix2[$prefixLength - 1]);
  912. -- $prefixLength;
  913. }
  914. if ('' != $this->mLastSection)
  915. {
  916. $output .= '</' . $this->mLastSection . '>';
  917. $this->mLastSection = '';
  918. }
  919. return $output;
  920. }
  921. /* private */ function nextItem($char)
  922. {
  923. if ('*' === $char || '#' === $char)
  924. {
  925. return '</li><li>';
  926. }
  927. else
  928. if (':' === $char || ';' === $char)
  929. {
  930. $close = '</dd>';
  931. if ($this->mDTopen)
  932. {
  933. $close = '</dt>';
  934. }
  935. if (';' === $char)
  936. {
  937. $this->mDTopen = true;
  938. return $close . '<dt>';
  939. }
  940. else
  941. {
  942. $this->mDTopen = false;
  943. return $close . '<dd>';
  944. }
  945. }
  946. return '<!-- ERR 2 -->';
  947. }
  948. /**
  949. * Split up a string on ':', ignoring any occurences inside tags
  950. * to prevent illegal overlapping.
  951. * @param string $str the string to split
  952. * @param string &$before set to everything before the ':'
  953. * @param string &$after set to everything after the ':'
  954. * return string the position of the ':', or false if none found
  955. */
  956. function findColonNoLinks($str, &$before, &$after)
  957. {
  958. $pos = strpos($str, ':');
  959. if ($pos === false)
  960. {
  961. // Nothing to find!
  962. return false;
  963. }
  964. $lt = strpos($str, '<');
  965. if ($lt === false || $lt > $pos)
  966. {
  967. // Easy; no tag nesting to worry about
  968. $before = substr($str, 0, $pos);
  969. $after = substr($str, $pos + 1);
  970. return $pos;
  971. }
  972. // Ugly state machine to walk through avoiding tags.
  973. $state = self :: COLON_STATE_TEXT;
  974. $stack = 0;
  975. $len = strlen($str);
  976. for($i = 0; $i < $len; $i ++)
  977. {
  978. $c = $str{$i};
  979. switch ($state)
  980. {
  981. // (Using the number is a performance hack for common cases)
  982. case 0 : // self::COLON_STATE_TEXT:
  983. switch ($c)
  984. {
  985. case "<" :
  986. // Could be either a <start> tag or an </end> tag
  987. $state = self :: COLON_STATE_TAGSTART;
  988. break;
  989. case ":" :
  990. if ($stack == 0)
  991. {
  992. // We found it!
  993. $before = substr($str, 0, $i);
  994. $after = substr($str, $i + 1);
  995. return $i;
  996. }
  997. // Embedded in a tag; don't break it.
  998. break;
  999. default :
  1000. // Skip ahead looking for something interesting
  1001. $colon = strpos($str, ':', $i);
  1002. if ($colon === false)
  1003. {
  1004. // Nothing else interesting
  1005. return false;
  1006. }
  1007. $lt = strpos($str, '<', $i);
  1008. if ($stack === 0)
  1009. {
  1010. if ($lt === false || $colon < $lt)
  1011. {
  1012. // We found it!
  1013. $before = substr($str, 0, $colon);
  1014. $after = substr($str, $colon + 1);
  1015. return $i;
  1016. }
  1017. }
  1018. if ($lt === false)
  1019. {
  1020. // Nothing else interesting to find; abort!
  1021. // We're nested, but there's no close tags left. Abort!
  1022. break 2;
  1023. }
  1024. // Skip ahead to next tag start
  1025. $i = $lt;
  1026. $state = self :: COLON_STATE_TAGSTART;
  1027. }
  1028. break;
  1029. case 1 : // self::COLON_STATE_TAG:
  1030. // In a <tag>
  1031. switch ($c)
  1032. {
  1033. case ">" :
  1034. $stack ++;
  1035. $state = self :: COLON_STATE_TEXT;
  1036. break;
  1037. case "/" :
  1038. // Slash may be followed by >?
  1039. $state = self :: COLON_STATE_TAGSLASH;
  1040. break;
  1041. default :
  1042. // ignore
  1043. }
  1044. break;
  1045. case 2 : // self::COLON_STATE_TAGSTART:
  1046. switch ($c)
  1047. {
  1048. case "/" :
  1049. $state = self :: COLON_STATE_CLOSETAG;
  1050. break;
  1051. case "!" :
  1052. $state = self :: COLON_STATE_COMMENT;
  1053. break;
  1054. case ">" :
  1055. // Illegal early close? This shouldn't happen D:
  1056. $state = self :: COLON_STATE_TEXT;
  1057. break;
  1058. default :
  1059. $state = self :: COLON_STATE_TAG;
  1060. }
  1061. break;
  1062. case 3 : // self::COLON_STATE_CLOSETAG:
  1063. // In a </tag>
  1064. if ($c === ">")
  1065. {
  1066. $stack --;
  1067. if ($stack < 0)
  1068. {
  1069. return false;
  1070. }
  1071. $state = self :: COLON_STATE_TEXT;
  1072. }
  1073. break;
  1074. case self :: COLON_STATE_TAGSLASH :
  1075. if ($c === ">")
  1076. {
  1077. // Yes, a self-closed tag <blah/>
  1078. $state = self :: COLON_STATE_TEXT;
  1079. }
  1080. else
  1081. {
  1082. // Probably we're jumping the gun, and this is an attribute
  1083. $state = self :: COLON_STATE_TAG;
  1084. }
  1085. break;
  1086. case 5 : // self::COLON_STATE_COMMENT:
  1087. if ($c === "-")
  1088. {
  1089. $state = self :: COLON_STATE_COMMENTDASH;
  1090. }
  1091. break;
  1092. case self :: COLON_STATE_COMMENTDASH :
  1093. if ($c === "-")
  1094. {
  1095. $state = self :: COLON_STATE_COMMENTDASHDASH;
  1096. }
  1097. else
  1098. {
  1099. $state = self :: COLON_STATE_COMMENT;
  1100. }
  1101. break;
  1102. case self :: COLON_STATE_COMMENTDASHDASH :
  1103. if ($c === ">")
  1104. {
  1105. $state = self :: COLON_STATE_TEXT;
  1106. }
  1107. else
  1108. {
  1109. $state = self :: COLON_STATE_COMMENT;
  1110. }
  1111. break;
  1112. default :
  1113. throw new MWException("State machine error in " . __METHOD__);
  1114. }
  1115. }
  1116. if ($stack > 0)
  1117. {
  1118. return false;
  1119. }
  1120. return false;
  1121. }
  1122. # getCommon() returns the length of the longest common substring
  1123. # of both arguments, starting at the beginning of both.
  1124. #
  1125. function getCommon($st1, $st2)
  1126. {
  1127. $fl = strlen($st1);
  1128. $shorter = strlen($st2);
  1129. if ($fl < $shorter)
  1130. {
  1131. $shorter = $fl;
  1132. }
  1133. for($i = 0; $i < $shorter; ++ $i)
  1134. {
  1135. if ($st1{$i} != $st2{$i})
  1136. {
  1137. break;
  1138. }
  1139. }
  1140. return $i;
  1141. }
  1142. function closeList($char)
  1143. {
  1144. if ('*' === $char)
  1145. {
  1146. $text = '</li></ul>';
  1147. }
  1148. else
  1149. if ('#' === $char)
  1150. {
  1151. $text = '</li></ol>';
  1152. }
  1153. else
  1154. if (':' === $char)
  1155. {
  1156. if ($this->mDTopen)
  1157. {
  1158. $this->mDTopen = false;
  1159. $text = '</dt></dl>';
  1160. }
  1161. else
  1162. {
  1163. $text = '</dd></dl>';
  1164. }
  1165. }
  1166. else
  1167. {
  1168. return '<!-- ERR 3 -->';
  1169. }
  1170. return $text . "\n";
  1171. }
  1172. # These next three functions open, continue, and close the list
  1173. # element appropriate to the prefix character passed into them.
  1174. #
  1175. function openList($char)
  1176. {
  1177. $result = $this->closeParagraph();
  1178. if ('*' === $char)
  1179. {
  1180. $result .= '<ul><li>';
  1181. }
  1182. else
  1183. if ('#' === $char)
  1184. {
  1185. $result .= '<ol><li>';
  1186. }
  1187. else
  1188. if (':' === $char)
  1189. {
  1190. $result .= '<dl><dd>';
  1191. }
  1192. else
  1193. i

Large files files are truncated, but you can click here to view the full file