PageRenderTime 70ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/library/vendors/markdown/Michelf/Markdown.php

http://github.com/vanillaforums/Garden
PHP | 3168 lines | 2073 code | 368 blank | 727 comment | 210 complexity | 9546837dcbaac2c808bfb7d1278983b2 MD5 | raw file
Possible License(s): LGPL-2.1, GPL-2.0, AGPL-1.0, BSD-3-Clause, MIT
  1. <?php
  2. #
  3. #
  4. # DO NOT UPDATE THIS FILE
  5. # DO NOT BRING IN A NEW VERSION OF THIS LIBRARY
  6. # VANILLA CHANGES WILL BE LOST
  7. #
  8. # Please see /library/core/class.markdownvanilla.php
  9. #
  10. #
  11. #
  12. # Markdown - A text-to-HTML conversion tool for web writers
  13. #
  14. # PHP Markdown
  15. # Copyright (c) 2004-2014 Michel Fortin
  16. # <http://michelf.com/projects/php-markdown/>
  17. #
  18. # Original Markdown
  19. # Copyright (c) 2004-2006 John Gruber
  20. # <http://daringfireball.net/projects/markdown/>
  21. #
  22. namespace Michelf;
  23. #
  24. # Markdown Parser Class
  25. #
  26. class Markdown implements MarkdownInterface {
  27. ### Version ###
  28. const MARKDOWNLIB_VERSION = "1.4.1";
  29. ### Simple Function Interface ###
  30. public static function defaultTransform($text) {
  31. #
  32. # Initialize the parser and return the result of its transform method.
  33. # This will work fine for derived classes too.
  34. #
  35. # Take parser class on which this function was called.
  36. $parser_class = \get_called_class();
  37. # try to take parser from the static parser list
  38. static $parser_list;
  39. $parser =& $parser_list[$parser_class];
  40. # create the parser it not already set
  41. if (!$parser)
  42. $parser = new $parser_class;
  43. # Transform text using parser.
  44. return $parser->transform($text);
  45. }
  46. ### Configuration Variables ###
  47. # Change to ">" for HTML output.
  48. public $empty_element_suffix = " />";
  49. public $tab_width = 4;
  50. # Change to `true` to disallow markup or entities.
  51. public $no_markup = false;
  52. public $no_entities = false;
  53. # Predefined urls and titles for reference links and images.
  54. public $predef_urls = array();
  55. public $predef_titles = array();
  56. # Optional filter function for URLs
  57. public $url_filter_func = null;
  58. ### Parser Implementation ###
  59. # Regex to match balanced [brackets].
  60. # Needed to insert a maximum bracked depth while converting to PHP.
  61. protected $nested_brackets_depth = 6;
  62. protected $nested_brackets_re;
  63. protected $nested_url_parenthesis_depth = 4;
  64. protected $nested_url_parenthesis_re;
  65. # Table of hash values for escaped characters:
  66. protected $escape_chars = '\`*_{}[]()>#+-.!';
  67. protected $escape_chars_re;
  68. public function __construct() {
  69. #
  70. # Constructor function. Initialize appropriate member variables.
  71. #
  72. $this->_initDetab();
  73. $this->prepareItalicsAndBold();
  74. $this->nested_brackets_re =
  75. str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
  76. str_repeat('\])*', $this->nested_brackets_depth);
  77. $this->nested_url_parenthesis_re =
  78. str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
  79. str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
  80. $this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
  81. # Sort document, block, and span gamut in ascendent priority order.
  82. asort($this->document_gamut);
  83. asort($this->block_gamut);
  84. asort($this->span_gamut);
  85. }
  86. # Internal hashes used during transformation.
  87. protected $urls = array();
  88. protected $titles = array();
  89. protected $html_hashes = array();
  90. # Status flag to avoid invalid nesting.
  91. protected $in_anchor = false;
  92. protected function setup() {
  93. #
  94. # Called before the transformation process starts to setup parser
  95. # states.
  96. #
  97. # Clear global hashes.
  98. $this->urls = $this->predef_urls;
  99. $this->titles = $this->predef_titles;
  100. $this->html_hashes = array();
  101. $this->in_anchor = false;
  102. }
  103. protected function teardown() {
  104. #
  105. # Called after the transformation process to clear any variable
  106. # which may be taking up memory unnecessarly.
  107. #
  108. $this->urls = array();
  109. $this->titles = array();
  110. $this->html_hashes = array();
  111. }
  112. public function transform($text) {
  113. #
  114. # Main function. Performs some preprocessing on the input text
  115. # and pass it through the document gamut.
  116. #
  117. $this->setup();
  118. # Remove UTF-8 BOM and marker character in input, if present.
  119. $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
  120. # Standardize line endings:
  121. # DOS to Unix and Mac to Unix
  122. $text = preg_replace('{\r\n?}', "\n", $text);
  123. # Make sure $text ends with a couple of newlines:
  124. $text .= "\n\n";
  125. # Convert all tabs to spaces.
  126. $text = $this->detab($text);
  127. # Turn block-level HTML blocks into hash entries
  128. $text = $this->hashHTMLBlocks($text);
  129. # Strip any lines consisting only of spaces and tabs.
  130. # This makes subsequent regexen easier to write, because we can
  131. # match consecutive blank lines with /\n+/ instead of something
  132. # contorted like /[ ]*\n+/ .
  133. $text = preg_replace('/^[ ]+$/m', '', $text);
  134. # Run document gamut methods.
  135. foreach ($this->document_gamut as $method => $priority) {
  136. $text = $this->$method($text);
  137. }
  138. $this->teardown();
  139. return $text . "\n";
  140. }
  141. protected $document_gamut = array(
  142. # Strip link definitions, store in hashes.
  143. "stripLinkDefinitions" => 20,
  144. "runBasicBlockGamut" => 30,
  145. );
  146. protected function stripLinkDefinitions($text) {
  147. #
  148. # Strips link definitions from text, stores the URLs and titles in
  149. # hash references.
  150. #
  151. $less_than_tab = $this->tab_width - 1;
  152. # Link defs are in the form: ^[id]: url "optional title"
  153. $text = preg_replace_callback('{
  154. ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
  155. [ ]*
  156. \n? # maybe *one* newline
  157. [ ]*
  158. (?:
  159. <(.+?)> # url = $2
  160. |
  161. (\S+?) # url = $3
  162. )
  163. [ ]*
  164. \n? # maybe one newline
  165. [ ]*
  166. (?:
  167. (?<=\s) # lookbehind for whitespace
  168. ["(]
  169. (.*?) # title = $4
  170. [")]
  171. [ ]*
  172. )? # title is optional
  173. (?:\n+|\Z)
  174. }xm',
  175. array($this, '_stripLinkDefinitions_callback'),
  176. $text);
  177. return $text;
  178. }
  179. protected function _stripLinkDefinitions_callback($matches) {
  180. $link_id = strtolower($matches[1]);
  181. $url = $matches[2] == '' ? $matches[3] : $matches[2];
  182. $this->urls[$link_id] = $url;
  183. $this->titles[$link_id] =& $matches[4];
  184. return ''; # String that will replace the block
  185. }
  186. protected function hashHTMLBlocks($text) {
  187. if ($this->no_markup) return $text;
  188. $less_than_tab = $this->tab_width - 1;
  189. # Hashify HTML blocks:
  190. # We only want to do this for block-level HTML tags, such as headers,
  191. # lists, and tables. That's because we still want to wrap <p>s around
  192. # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
  193. # phrase emphasis, and spans. The list of tags we're looking for is
  194. # hard-coded:
  195. #
  196. # * List "a" is made of tags which can be both inline or block-level.
  197. # These will be treated block-level when the start tag is alone on
  198. # its line, otherwise they're not matched here and will be taken as
  199. # inline later.
  200. # * List "b" is made of tags which are always block-level;
  201. #
  202. $block_tags_a_re = 'ins|del';
  203. $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
  204. 'script|noscript|style|form|fieldset|iframe|math|svg|'.
  205. 'article|section|nav|aside|hgroup|header|footer|'.
  206. 'figure';
  207. # Regular expression for the content of a block tag.
  208. $nested_tags_level = 4;
  209. $attr = '
  210. (?> # optional tag attributes
  211. \s # starts with whitespace
  212. (?>
  213. [^>"/]+ # text outside quotes
  214. |
  215. /+(?!>) # slash not followed by ">"
  216. |
  217. "[^"]*" # text inside double quotes (tolerate ">")
  218. |
  219. \'[^\']*\' # text inside single quotes (tolerate ">")
  220. )*
  221. )?
  222. ';
  223. $content =
  224. str_repeat('
  225. (?>
  226. [^<]+ # content without tag
  227. |
  228. <\2 # nested opening tag
  229. '.$attr.' # attributes
  230. (?>
  231. />
  232. |
  233. >', $nested_tags_level). # end of opening tag
  234. '.*?'. # last level nested tag content
  235. str_repeat('
  236. </\2\s*> # closing nested tag
  237. )
  238. |
  239. <(?!/\2\s*> # other tags with a different name
  240. )
  241. )*',
  242. $nested_tags_level);
  243. $content2 = str_replace('\2', '\3', $content);
  244. # First, look for nested blocks, e.g.:
  245. # <div>
  246. # <div>
  247. # tags for inner block must be indented.
  248. # </div>
  249. # </div>
  250. #
  251. # The outermost tags must start at the left margin for this to match, and
  252. # the inner nested divs must be indented.
  253. # We need to do this before the next, more liberal match, because the next
  254. # match will start at the first `<div>` and stop at the first `</div>`.
  255. $text = preg_replace_callback('{(?>
  256. (?>
  257. (?<=\n) # Starting on its own line
  258. | # or
  259. \A\n? # the at beginning of the doc
  260. )
  261. ( # save in $1
  262. # Match from `\n<tag>` to `</tag>\n`, handling nested tags
  263. # in between.
  264. [ ]{0,'.$less_than_tab.'}
  265. <('.$block_tags_b_re.')# start tag = $2
  266. '.$attr.'> # attributes followed by > and \n
  267. '.$content.' # content, support nesting
  268. </\2> # the matching end tag
  269. [ ]* # trailing spaces/tabs
  270. (?=\n+|\Z) # followed by a newline or end of document
  271. | # Special version for tags of group a.
  272. [ ]{0,'.$less_than_tab.'}
  273. <('.$block_tags_a_re.')# start tag = $3
  274. '.$attr.'>[ ]*\n # attributes followed by >
  275. '.$content2.' # content, support nesting
  276. </\3> # the matching end tag
  277. [ ]* # trailing spaces/tabs
  278. (?=\n+|\Z) # followed by a newline or end of document
  279. | # Special case just for <hr />. It was easier to make a special
  280. # case than to make the other regex more complicated.
  281. [ ]{0,'.$less_than_tab.'}
  282. <(hr) # start tag = $2
  283. '.$attr.' # attributes
  284. /?> # the matching end tag
  285. [ ]*
  286. (?=\n{2,}|\Z) # followed by a blank line or end of document
  287. | # Special case for standalone HTML comments:
  288. [ ]{0,'.$less_than_tab.'}
  289. (?s:
  290. <!-- .*? -->
  291. )
  292. [ ]*
  293. (?=\n{2,}|\Z) # followed by a blank line or end of document
  294. | # PHP and ASP-style processor instructions (<? and <%)
  295. [ ]{0,'.$less_than_tab.'}
  296. (?s:
  297. <([?%]) # $2
  298. .*?
  299. \2>
  300. )
  301. [ ]*
  302. (?=\n{2,}|\Z) # followed by a blank line or end of document
  303. )
  304. )}Sxmi',
  305. array($this, '_hashHTMLBlocks_callback'),
  306. $text);
  307. return $text;
  308. }
  309. protected function _hashHTMLBlocks_callback($matches) {
  310. $text = $matches[1];
  311. $key = $this->hashBlock($text);
  312. return "\n\n$key\n\n";
  313. }
  314. protected function hashPart($text, $boundary = 'X') {
  315. #
  316. # Called whenever a tag must be hashed when a function insert an atomic
  317. # element in the text stream. Passing $text to through this function gives
  318. # a unique text-token which will be reverted back when calling unhash.
  319. #
  320. # The $boundary argument specify what character should be used to surround
  321. # the token. By convension, "B" is used for block elements that needs not
  322. # to be wrapped into paragraph tags at the end, ":" is used for elements
  323. # that are word separators and "X" is used in the general case.
  324. #
  325. # Swap back any tag hash found in $text so we do not have to `unhash`
  326. # multiple times at the end.
  327. $text = $this->unhash($text);
  328. # Then hash the block.
  329. static $i = 0;
  330. $key = "$boundary\x1A" . ++$i . $boundary;
  331. $this->html_hashes[$key] = $text;
  332. return $key; # String that will replace the tag.
  333. }
  334. protected function hashBlock($text) {
  335. #
  336. # Shortcut function for hashPart with block-level boundaries.
  337. #
  338. return $this->hashPart($text, 'B');
  339. }
  340. protected $block_gamut = array(
  341. #
  342. # These are all the transformations that form block-level
  343. # tags like paragraphs, headers, and list items.
  344. #
  345. "doHeaders" => 10,
  346. "doHorizontalRules" => 20,
  347. "doLists" => 40,
  348. "doCodeBlocks" => 50,
  349. "doBlockQuotes" => 60,
  350. );
  351. protected function runBlockGamut($text) {
  352. #
  353. # Run block gamut tranformations.
  354. #
  355. # We need to escape raw HTML in Markdown source before doing anything
  356. # else. This need to be done for each block, and not only at the
  357. # begining in the Markdown function since hashed blocks can be part of
  358. # list items and could have been indented. Indented blocks would have
  359. # been seen as a code block in a previous pass of hashHTMLBlocks.
  360. $text = $this->hashHTMLBlocks($text);
  361. return $this->runBasicBlockGamut($text);
  362. }
  363. protected function runBasicBlockGamut($text) {
  364. #
  365. # Run block gamut tranformations, without hashing HTML blocks. This is
  366. # useful when HTML blocks are known to be already hashed, like in the first
  367. # whole-document pass.
  368. #
  369. foreach ($this->block_gamut as $method => $priority) {
  370. $text = $this->$method($text);
  371. }
  372. # Finally form paragraph and restore hashed blocks.
  373. $text = $this->formParagraphs($text);
  374. return $text;
  375. }
  376. protected function doHorizontalRules($text) {
  377. # Do Horizontal Rules:
  378. return preg_replace(
  379. '{
  380. ^[ ]{0,3} # Leading space
  381. ([-*_]) # $1: First marker
  382. (?> # Repeated marker group
  383. [ ]{0,2} # Zero, one, or two spaces.
  384. \1 # Marker character
  385. ){2,} # Group repeated at least twice
  386. [ ]* # Tailing spaces
  387. $ # End of line.
  388. }mx',
  389. "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
  390. $text);
  391. }
  392. protected $span_gamut = array(
  393. #
  394. # These are all the transformations that occur *within* block-level
  395. # tags like paragraphs, headers, and list items.
  396. #
  397. # Process character escapes, code spans, and inline HTML
  398. # in one shot.
  399. "parseSpan" => -30,
  400. # Process anchor and image tags. Images must come first,
  401. # because ![foo][f] looks like an anchor.
  402. "doImages" => 10,
  403. "doAnchors" => 20,
  404. # Make links out of things like `<http://example.com/>`
  405. # Must come after doAnchors, because you can use < and >
  406. # delimiters in inline links like [this](<url>).
  407. "doAutoLinks" => 30,
  408. "encodeAmpsAndAngles" => 40,
  409. "doItalicsAndBold" => 50,
  410. "doHardBreaks" => 60
  411. );
  412. protected function runSpanGamut($text) {
  413. #
  414. # Run span gamut tranformations.
  415. #
  416. foreach ($this->span_gamut as $method => $priority) {
  417. $text = $this->$method($text);
  418. }
  419. return $text;
  420. }
  421. protected function doHardBreaks($text) {
  422. # Do hard breaks:
  423. return preg_replace_callback('/ {2,}\n/',
  424. array($this, '_doHardBreaks_callback'), $text);
  425. }
  426. protected function _doHardBreaks_callback($matches) {
  427. return $this->hashPart("<br$this->empty_element_suffix\n");
  428. }
  429. protected function doAnchors($text) {
  430. #
  431. # Turn Markdown link shortcuts into XHTML <a> tags.
  432. #
  433. if ($this->in_anchor) return $text;
  434. $this->in_anchor = true;
  435. #
  436. # First, handle reference-style links: [link text] [id]
  437. #
  438. $text = preg_replace_callback('{
  439. ( # wrap whole match in $1
  440. \[
  441. ('.$this->nested_brackets_re.') # link text = $2
  442. \]
  443. [ ]? # one optional space
  444. (?:\n[ ]*)? # one optional newline followed by spaces
  445. \[
  446. (.*?) # id = $3
  447. \]
  448. )
  449. }xs',
  450. array($this, '_doAnchors_reference_callback'), $text);
  451. #
  452. # Next, inline-style links: [link text](url "optional title")
  453. #
  454. $text = preg_replace_callback('{
  455. ( # wrap whole match in $1
  456. \[
  457. ('.$this->nested_brackets_re.') # link text = $2
  458. \]
  459. \( # literal paren
  460. [ \n]*
  461. (?:
  462. <(.+?)> # href = $3
  463. |
  464. ('.$this->nested_url_parenthesis_re.') # href = $4
  465. )
  466. [ \n]*
  467. ( # $5
  468. ([\'"]) # quote char = $6
  469. (.*?) # Title = $7
  470. \6 # matching quote
  471. [ \n]* # ignore any spaces/tabs between closing quote and )
  472. )? # title is optional
  473. \)
  474. )
  475. }xs',
  476. array($this, '_doAnchors_inline_callback'), $text);
  477. #
  478. # Last, handle reference-style shortcuts: [link text]
  479. # These must come last in case you've also got [link text][1]
  480. # or [link text](/foo)
  481. #
  482. $text = preg_replace_callback('{
  483. ( # wrap whole match in $1
  484. \[
  485. ([^\[\]]+) # link text = $2; can\'t contain [ or ]
  486. \]
  487. )
  488. }xs',
  489. array($this, '_doAnchors_reference_callback'), $text);
  490. $this->in_anchor = false;
  491. return $text;
  492. }
  493. protected function _doAnchors_reference_callback($matches) {
  494. $whole_match = $matches[1];
  495. $link_text = $matches[2];
  496. $link_id =& $matches[3];
  497. if ($link_id == "") {
  498. # for shortcut links like [this][] or [this].
  499. $link_id = $link_text;
  500. }
  501. # lower-case and turn embedded newlines into spaces
  502. $link_id = strtolower($link_id);
  503. $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
  504. if (isset($this->urls[$link_id])) {
  505. $url = $this->urls[$link_id];
  506. $url = $this->encodeURLAttribute($url);
  507. $result = "<a href=\"$url\"";
  508. if ( isset( $this->titles[$link_id] ) ) {
  509. $title = $this->titles[$link_id];
  510. $title = $this->encodeAttribute($title);
  511. $result .= " title=\"$title\"";
  512. }
  513. $link_text = $this->runSpanGamut($link_text);
  514. $result .= ">$link_text</a>";
  515. $result = $this->hashPart($result);
  516. }
  517. else {
  518. $result = $whole_match;
  519. }
  520. return $result;
  521. }
  522. protected function _doAnchors_inline_callback($matches) {
  523. $whole_match = $matches[1];
  524. $link_text = $this->runSpanGamut($matches[2]);
  525. $url = $matches[3] == '' ? $matches[4] : $matches[3];
  526. $title =& $matches[7];
  527. // if the URL was of the form <s p a c e s> it got caught by the HTML
  528. // tag parser and hashed. Need to reverse the process before using the URL.
  529. $unhashed = $this->unhash($url);
  530. if ($unhashed != $url)
  531. $url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
  532. $url = $this->encodeURLAttribute($url);
  533. $result = "<a href=\"$url\"";
  534. if (isset($title)) {
  535. $title = $this->encodeAttribute($title);
  536. $result .= " title=\"$title\"";
  537. }
  538. $link_text = $this->runSpanGamut($link_text);
  539. $result .= ">$link_text</a>";
  540. return $this->hashPart($result);
  541. }
  542. protected function doImages($text) {
  543. #
  544. # Turn Markdown image shortcuts into <img> tags.
  545. #
  546. #
  547. # First, handle reference-style labeled images: ![alt text][id]
  548. #
  549. $text = preg_replace_callback('{
  550. ( # wrap whole match in $1
  551. !\[
  552. ('.$this->nested_brackets_re.') # alt text = $2
  553. \]
  554. [ ]? # one optional space
  555. (?:\n[ ]*)? # one optional newline followed by spaces
  556. \[
  557. (.*?) # id = $3
  558. \]
  559. )
  560. }xs',
  561. array($this, '_doImages_reference_callback'), $text);
  562. #
  563. # Next, handle inline images: ![alt text](url "optional title")
  564. # Don't forget: encode * and _
  565. #
  566. $text = preg_replace_callback('{
  567. ( # wrap whole match in $1
  568. !\[
  569. ('.$this->nested_brackets_re.') # alt text = $2
  570. \]
  571. \s? # One optional whitespace character
  572. \( # literal paren
  573. [ \n]*
  574. (?:
  575. <(\S*)> # src url = $3
  576. |
  577. ('.$this->nested_url_parenthesis_re.') # src url = $4
  578. )
  579. [ \n]*
  580. ( # $5
  581. ([\'"]) # quote char = $6
  582. (.*?) # title = $7
  583. \6 # matching quote
  584. [ \n]*
  585. )? # title is optional
  586. \)
  587. )
  588. }xs',
  589. array($this, '_doImages_inline_callback'), $text);
  590. return $text;
  591. }
  592. protected function _doImages_reference_callback($matches) {
  593. $whole_match = $matches[1];
  594. $alt_text = $matches[2];
  595. $link_id = strtolower($matches[3]);
  596. if ($link_id == "") {
  597. $link_id = strtolower($alt_text); # for shortcut links like ![this][].
  598. }
  599. $alt_text = $this->encodeAttribute($alt_text);
  600. if (isset($this->urls[$link_id])) {
  601. $url = $this->encodeURLAttribute($this->urls[$link_id]);
  602. $result = "<img src=\"$url\" alt=\"$alt_text\"";
  603. if (isset($this->titles[$link_id])) {
  604. $title = $this->titles[$link_id];
  605. $title = $this->encodeAttribute($title);
  606. $result .= " title=\"$title\"";
  607. }
  608. $result .= $this->empty_element_suffix;
  609. $result = $this->hashPart($result);
  610. }
  611. else {
  612. # If there's no such link ID, leave intact:
  613. $result = $whole_match;
  614. }
  615. return $result;
  616. }
  617. protected function _doImages_inline_callback($matches) {
  618. $whole_match = $matches[1];
  619. $alt_text = $matches[2];
  620. $url = $matches[3] == '' ? $matches[4] : $matches[3];
  621. $title =& $matches[7];
  622. $alt_text = $this->encodeAttribute($alt_text);
  623. $url = $this->encodeURLAttribute($url);
  624. $result = "<img src=\"$url\" alt=\"$alt_text\"";
  625. if (isset($title)) {
  626. $title = $this->encodeAttribute($title);
  627. $result .= " title=\"$title\""; # $title already quoted
  628. }
  629. $result .= $this->empty_element_suffix;
  630. return $this->hashPart($result);
  631. }
  632. protected function doHeaders($text) {
  633. # Setext-style headers:
  634. # Header 1
  635. # ========
  636. #
  637. # Header 2
  638. # --------
  639. #
  640. $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
  641. array($this, '_doHeaders_callback_setext'), $text);
  642. # atx-style headers:
  643. # # Header 1
  644. # ## Header 2
  645. # ## Header 2 with closing hashes ##
  646. # ...
  647. # ###### Header 6
  648. #
  649. $text = preg_replace_callback('{
  650. ^(\#{1,6}) # $1 = string of #\'s
  651. [ ]*
  652. (.+?) # $2 = Header text
  653. [ ]*
  654. \#* # optional closing #\'s (not counted)
  655. \n+
  656. }xm',
  657. array($this, '_doHeaders_callback_atx'), $text);
  658. return $text;
  659. }
  660. protected function _doHeaders_callback_setext($matches) {
  661. # Terrible hack to check we haven't found an empty list item.
  662. if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1]))
  663. return $matches[0];
  664. $level = $matches[2]{0} == '=' ? 1 : 2;
  665. $block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>";
  666. return "\n" . $this->hashBlock($block) . "\n\n";
  667. }
  668. protected function _doHeaders_callback_atx($matches) {
  669. $level = strlen($matches[1]);
  670. $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>";
  671. return "\n" . $this->hashBlock($block) . "\n\n";
  672. }
  673. protected function doLists($text) {
  674. #
  675. # Form HTML ordered (numbered) and unordered (bulleted) lists.
  676. #
  677. $less_than_tab = $this->tab_width - 1;
  678. # Re-usable patterns to match list item bullets and number markers:
  679. $marker_ul_re = '[*+-]';
  680. $marker_ol_re = '\d+[\.]';
  681. $markers_relist = array(
  682. $marker_ul_re => $marker_ol_re,
  683. $marker_ol_re => $marker_ul_re,
  684. );
  685. foreach ($markers_relist as $marker_re => $other_marker_re) {
  686. # Re-usable pattern to match any entirel ul or ol list:
  687. $whole_list_re = '
  688. ( # $1 = whole list
  689. ( # $2
  690. ([ ]{0,'.$less_than_tab.'}) # $3 = number of spaces
  691. ('.$marker_re.') # $4 = first list item marker
  692. [ ]+
  693. )
  694. (?s:.+?)
  695. ( # $5
  696. \z
  697. |
  698. \n{2,}
  699. (?=\S)
  700. (?! # Negative lookahead for another list item marker
  701. [ ]*
  702. '.$marker_re.'[ ]+
  703. )
  704. |
  705. (?= # Lookahead for another kind of list
  706. \n
  707. \3 # Must have the same indentation
  708. '.$other_marker_re.'[ ]+
  709. )
  710. )
  711. )
  712. '; // mx
  713. # We use a different prefix before nested lists than top-level lists.
  714. # See extended comment in _ProcessListItems().
  715. if ($this->list_level) {
  716. $text = preg_replace_callback('{
  717. ^
  718. '.$whole_list_re.'
  719. }mx',
  720. array($this, '_doLists_callback'), $text);
  721. }
  722. else {
  723. $text = preg_replace_callback('{
  724. (?:(?<=\n)\n|\A\n?) # Must eat the newline
  725. '.$whole_list_re.'
  726. }mx',
  727. array($this, '_doLists_callback'), $text);
  728. }
  729. }
  730. return $text;
  731. }
  732. protected function _doLists_callback($matches) {
  733. # Re-usable patterns to match list item bullets and number markers:
  734. $marker_ul_re = '[*+-]';
  735. $marker_ol_re = '\d+[\.]';
  736. $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
  737. $list = $matches[1];
  738. $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol";
  739. $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
  740. $list .= "\n";
  741. $result = $this->processListItems($list, $marker_any_re);
  742. $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
  743. return "\n". $result ."\n\n";
  744. }
  745. protected $list_level = 0;
  746. protected function processListItems($list_str, $marker_any_re) {
  747. #
  748. # Process the contents of a single ordered or unordered list, splitting it
  749. # into individual list items.
  750. #
  751. # The $this->list_level global keeps track of when we're inside a list.
  752. # Each time we enter a list, we increment it; when we leave a list,
  753. # we decrement. If it's zero, we're not in a list anymore.
  754. #
  755. # We do this because when we're not inside a list, we want to treat
  756. # something like this:
  757. #
  758. # I recommend upgrading to version
  759. # 8. Oops, now this line is treated
  760. # as a sub-list.
  761. #
  762. # As a single paragraph, despite the fact that the second line starts
  763. # with a digit-period-space sequence.
  764. #
  765. # Whereas when we're inside a list (or sub-list), that line will be
  766. # treated as the start of a sub-list. What a kludge, huh? This is
  767. # an aspect of Markdown's syntax that's hard to parse perfectly
  768. # without resorting to mind-reading. Perhaps the solution is to
  769. # change the syntax rules such that sub-lists must start with a
  770. # starting cardinal number; e.g. "1." or "a.".
  771. $this->list_level++;
  772. # trim trailing blank lines:
  773. $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
  774. $list_str = preg_replace_callback('{
  775. (\n)? # leading line = $1
  776. (^[ ]*) # leading whitespace = $2
  777. ('.$marker_any_re.' # list marker and space = $3
  778. (?:[ ]+|(?=\n)) # space only required if item is not empty
  779. )
  780. ((?s:.*?)) # list item text = $4
  781. (?:(\n+(?=\n))|\n) # tailing blank line = $5
  782. (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
  783. }xm',
  784. array($this, '_processListItems_callback'), $list_str);
  785. $this->list_level--;
  786. return $list_str;
  787. }
  788. protected function _processListItems_callback($matches) {
  789. $item = $matches[4];
  790. $leading_line =& $matches[1];
  791. $leading_space =& $matches[2];
  792. $marker_space = $matches[3];
  793. $tailing_blank_line =& $matches[5];
  794. if ($leading_line || $tailing_blank_line ||
  795. preg_match('/\n{2,}/', $item))
  796. {
  797. # Replace marker with the appropriate whitespace indentation
  798. $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
  799. $item = $this->runBlockGamut($this->outdent($item)."\n");
  800. }
  801. else {
  802. # Recursion for sub-lists:
  803. $item = $this->doLists($this->outdent($item));
  804. $item = preg_replace('/\n+$/', '', $item);
  805. $item = $this->runSpanGamut($item);
  806. }
  807. return "<li>" . $item . "</li>\n";
  808. }
  809. protected function doCodeBlocks($text) {
  810. #
  811. # Process Markdown `<pre><code>` blocks.
  812. #
  813. $text = preg_replace_callback('{
  814. (?:\n\n|\A\n?)
  815. ( # $1 = the code block -- one or more lines, starting with a space/tab
  816. (?>
  817. [ ]{'.$this->tab_width.'} # Lines must start with a tab or a tab-width of spaces
  818. .*\n+
  819. )+
  820. )
  821. ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
  822. }xm',
  823. array($this, '_doCodeBlocks_callback'), $text);
  824. return $text;
  825. }
  826. protected function _doCodeBlocks_callback($matches) {
  827. $codeblock = $matches[1];
  828. $codeblock = $this->outdent($codeblock);
  829. $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
  830. # trim leading newlines and trailing newlines
  831. $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
  832. $codeblock = "<pre><code>$codeblock\n</code></pre>";
  833. return "\n\n".$this->hashBlock($codeblock)."\n\n";
  834. }
  835. protected function makeCodeSpan($code) {
  836. #
  837. # Create a code span markup for $code. Called from handleSpanToken.
  838. #
  839. $code = htmlspecialchars(trim($code), ENT_NOQUOTES);
  840. # Vanilla: add 2 lines below to do <pre><code> if there are newlines in the code.
  841. if (strpos($code, "\n"))
  842. return $this->hashPart("<pre><code>$code</code></pre>");
  843. return $this->hashPart("<code>$code</code>");
  844. }
  845. protected $em_relist = array(
  846. '' => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?![\.,:;]?\s)',
  847. '*' => '(?<![\s*])\*(?!\*)',
  848. '_' => '(?<![\s_])_(?!_)',
  849. );
  850. protected $strong_relist = array(
  851. '' => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?![\.,:;]?\s)',
  852. '**' => '(?<![\s*])\*\*(?!\*)',
  853. '__' => '(?<![\s_])__(?!_)',
  854. );
  855. protected $em_strong_relist = array(
  856. '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?![\.,:;]?\s)',
  857. '***' => '(?<![\s*])\*\*\*(?!\*)',
  858. '___' => '(?<![\s_])___(?!_)',
  859. );
  860. protected $em_strong_prepared_relist;
  861. protected function prepareItalicsAndBold() {
  862. #
  863. # Prepare regular expressions for searching emphasis tokens in any
  864. # context.
  865. #
  866. foreach ($this->em_relist as $em => $em_re) {
  867. foreach ($this->strong_relist as $strong => $strong_re) {
  868. # Construct list of allowed token expressions.
  869. $token_relist = array();
  870. if (isset($this->em_strong_relist["$em$strong"])) {
  871. $token_relist[] = $this->em_strong_relist["$em$strong"];
  872. }
  873. $token_relist[] = $em_re;
  874. $token_relist[] = $strong_re;
  875. # Construct master expression from list.
  876. $token_re = '{('. implode('|', $token_relist) .')}';
  877. $this->em_strong_prepared_relist["$em$strong"] = $token_re;
  878. }
  879. }
  880. }
  881. protected function doItalicsAndBold($text) {
  882. $token_stack = array('');
  883. $text_stack = array('');
  884. $em = '';
  885. $strong = '';
  886. $tree_char_em = false;
  887. while (1) {
  888. #
  889. # Get prepared regular expression for seraching emphasis tokens
  890. # in current context.
  891. #
  892. $token_re = $this->em_strong_prepared_relist["$em$strong"];
  893. #
  894. # Each loop iteration search for the next emphasis token.
  895. # Each token is then passed to handleSpanToken.
  896. #
  897. $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
  898. $text_stack[0] .= $parts[0];
  899. $token =& $parts[1];
  900. $text =& $parts[2];
  901. if (empty($token)) {
  902. # Reached end of text span: empty stack without emitting.
  903. # any more emphasis.
  904. while ($token_stack[0]) {
  905. $text_stack[1] .= array_shift($token_stack);
  906. $text_stack[0] .= array_shift($text_stack);
  907. }
  908. break;
  909. }
  910. $token_len = strlen($token);
  911. if ($tree_char_em) {
  912. # Reached closing marker while inside a three-char emphasis.
  913. if ($token_len == 3) {
  914. # Three-char closing marker, close em and strong.
  915. array_shift($token_stack);
  916. $span = array_shift($text_stack);
  917. $span = $this->runSpanGamut($span);
  918. $span = "<strong><em>$span</em></strong>";
  919. $text_stack[0] .= $this->hashPart($span);
  920. $em = '';
  921. $strong = '';
  922. } else {
  923. # Other closing marker: close one em or strong and
  924. # change current token state to match the other
  925. $token_stack[0] = str_repeat($token{0}, 3-$token_len);
  926. $tag = $token_len == 2 ? "strong" : "em";
  927. $span = $text_stack[0];
  928. $span = $this->runSpanGamut($span);
  929. $span = "<$tag>$span</$tag>";
  930. $text_stack[0] = $this->hashPart($span);
  931. $$tag = ''; # $$tag stands for $em or $strong
  932. }
  933. $tree_char_em = false;
  934. } else if ($token_len == 3) {
  935. if ($em) {
  936. # Reached closing marker for both em and strong.
  937. # Closing strong marker:
  938. for ($i = 0; $i < 2; ++$i) {
  939. $shifted_token = array_shift($token_stack);
  940. $tag = strlen($shifted_token) == 2 ? "strong" : "em";
  941. $span = array_shift($text_stack);
  942. $span = $this->runSpanGamut($span);
  943. $span = "<$tag>$span</$tag>";
  944. $text_stack[0] .= $this->hashPart($span);
  945. $$tag = ''; # $$tag stands for $em or $strong
  946. }
  947. } else {
  948. # Reached opening three-char emphasis marker. Push on token
  949. # stack; will be handled by the special condition above.
  950. $em = $token{0};
  951. $strong = "$em$em";
  952. array_unshift($token_stack, $token);
  953. array_unshift($text_stack, '');
  954. $tree_char_em = true;
  955. }
  956. } else if ($token_len == 2) {
  957. if ($strong) {
  958. # Unwind any dangling emphasis marker:
  959. if (strlen($token_stack[0]) == 1) {
  960. $text_stack[1] .= array_shift($token_stack);
  961. $text_stack[0] .= array_shift($text_stack);
  962. }
  963. # Closing strong marker:
  964. array_shift($token_stack);
  965. $span = array_shift($text_stack);
  966. $span = $this->runSpanGamut($span);
  967. $span = "<strong>$span</strong>";
  968. $text_stack[0] .= $this->hashPart($span);
  969. $strong = '';
  970. } else {
  971. array_unshift($token_stack, $token);
  972. array_unshift($text_stack, '');
  973. $strong = $token;
  974. }
  975. } else {
  976. # Here $token_len == 1
  977. if ($em) {
  978. if (strlen($token_stack[0]) == 1) {
  979. # Closing emphasis marker:
  980. array_shift($token_stack);
  981. $span = array_shift($text_stack);
  982. $span = $this->runSpanGamut($span);
  983. $span = "<em>$span</em>";
  984. $text_stack[0] .= $this->hashPart($span);
  985. $em = '';
  986. } else {
  987. $text_stack[0] .= $token;
  988. }
  989. } else {
  990. array_unshift($token_stack, $token);
  991. array_unshift($text_stack, '');
  992. $em = $token;
  993. }
  994. }
  995. }
  996. return $text_stack[0];
  997. }
  998. protected function doBlockQuotes($text) {
  999. # Vanilla: delete `(.+\n)* # subsequent consecutive lines` from pattern.
  1000. $text = preg_replace_callback('/
  1001. ( # Wrap whole match in $1
  1002. (?>
  1003. ^[ ]*>[ ]? # ">" at the start of a line
  1004. .+\n # rest of the first line
  1005. \n* # blanks
  1006. )+
  1007. )
  1008. /xm',
  1009. array($this, '_doBlockQuotes_callback'), $text);
  1010. return $text;
  1011. }
  1012. protected function _doBlockQuotes_callback($matches) {
  1013. $bq = $matches[1];
  1014. # trim one level of quoting - trim whitespace-only lines
  1015. $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
  1016. $bq = $this->runBlockGamut($bq); # recurse
  1017. $bq = preg_replace('/^/m', " ", $bq);
  1018. # These leading spaces cause problem with <pre> content,
  1019. # so we need to fix that:
  1020. $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
  1021. array($this, '_doBlockQuotes_callback2'), $bq);
  1022. # Vanilla: add ` class=\"Quote\"`
  1023. return "\n". $this->hashBlock("<blockquote class=\"UserQuote\"><div class=\"QuoteText\">\n$bq\n</div></blockquote>")."\n\n";
  1024. }
  1025. protected function _doBlockQuotes_callback2($matches) {
  1026. $pre = $matches[1];
  1027. $pre = preg_replace('/^ /m', '', $pre);
  1028. return $pre;
  1029. }
  1030. protected function formParagraphs($text) {
  1031. #
  1032. # Params:
  1033. # $text - string to process with html <p> tags
  1034. #
  1035. # Strip leading and trailing lines:
  1036. $text = preg_replace('/\A\n+|\n+\z/', '', $text);
  1037. $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
  1038. #
  1039. # Wrap <p> tags and unhashify HTML blocks
  1040. #
  1041. foreach ($grafs as $key => $value) {
  1042. if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
  1043. # Is a paragraph.
  1044. $value = $this->runSpanGamut($value);
  1045. $value = preg_replace('/^([ ]*)/', "<p>", $value);
  1046. $value .= "</p>";
  1047. $grafs[$key] = $this->unhash($value);
  1048. }
  1049. else {
  1050. # Is a block.
  1051. # Modify elements of @grafs in-place...
  1052. $graf = $value;
  1053. $block = $this->html_hashes[$graf];
  1054. $graf = $block;
  1055. // if (preg_match('{
  1056. // \A
  1057. // ( # $1 = <div> tag
  1058. // <div \s+
  1059. // [^>]*
  1060. // \b
  1061. // markdown\s*=\s* ([\'"]) # $2 = attr quote char
  1062. // 1
  1063. // \2
  1064. // [^>]*
  1065. // >
  1066. // )
  1067. // ( # $3 = contents
  1068. // .*
  1069. // )
  1070. // (</div>) # $4 = closing tag
  1071. // \z
  1072. // }xs', $block, $matches))
  1073. // {
  1074. // list(, $div_open, , $div_content, $div_close) = $matches;
  1075. //
  1076. // # We can't call Markdown(), because that resets the hash;
  1077. // # that initialization code should be pulled into its own sub, though.
  1078. // $div_content = $this->hashHTMLBlocks($div_content);
  1079. //
  1080. // # Run document gamut methods on the content.
  1081. // foreach ($this->document_gamut as $method => $priority) {
  1082. // $div_content = $this->$method($div_content);
  1083. // }
  1084. //
  1085. // $div_open = preg_replace(
  1086. // '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
  1087. //
  1088. // $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
  1089. // }
  1090. $grafs[$key] = $graf;
  1091. }
  1092. }
  1093. return implode("\n\n", $grafs);
  1094. }
  1095. protected function encodeAttribute($text) {
  1096. #
  1097. # Encode text for a double-quoted HTML attribute. This function
  1098. # is *not* suitable for attributes enclosed in single quotes.
  1099. #
  1100. $text = $this->encodeAmpsAndAngles($text);
  1101. $text = str_replace('"', '&quot;', $text);
  1102. return $text;
  1103. }
  1104. protected function encodeURLAttribute($url, &$text = null) {
  1105. #
  1106. # Encode text for a double-quoted HTML attribute containing a URL,
  1107. # applying the URL filter if set. Also generates the textual
  1108. # representation for the URL (removing mailto: or tel:) storing it in $text.
  1109. # This function is *not* suitable for attributes enclosed in single quotes.
  1110. #
  1111. if ($this->url_filter_func)
  1112. $url = call_user_func($this->url_filter_func, $url);
  1113. if (preg_match('{^mailto:}i', $url))
  1114. $url = $this->encodeEntityObfuscatedAttribute($url, $text, 7);
  1115. else if (preg_match('{^tel:}i', $url))
  1116. {
  1117. $url = $this->encodeAttribute($url);
  1118. $text = substr($url, 4);
  1119. }
  1120. else
  1121. {
  1122. $url = $this->encodeAttribute($url);
  1123. $text = $url;
  1124. }
  1125. return $url;
  1126. }
  1127. protected function encodeAmpsAndAngles($text) {
  1128. #
  1129. # Smart processing for ampersands and angle brackets that need to
  1130. # be encoded. Valid character entities are left alone unless the
  1131. # no-entities mode is set.
  1132. #
  1133. if ($this->no_entities) {
  1134. $text = str_replace('&', '&amp;', $text);
  1135. } else {
  1136. # Ampersand-encoding based entirely on Nat Irons's Amputator
  1137. # MT plugin: <http://bumppo.net/projects/amputator/>
  1138. $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
  1139. '&amp;', $text);
  1140. }
  1141. # Encode remaining <'s
  1142. $text = str_replace('<', '&lt;', $text);
  1143. return $text;
  1144. }
  1145. protected function doAutoLinks($text) {
  1146. $text = preg_replace_callback('{<((https?|ftp|dict|tel):[^\'">\s]+)>}i',
  1147. array($this, '_doAutoLinks_url_callback'), $text);
  1148. # Email addresses: <address@domain.foo>
  1149. $text = preg_replace_callback('{
  1150. <
  1151. (?:mailto:)?
  1152. (
  1153. (?:
  1154. [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+
  1155. |
  1156. ".*?"
  1157. )
  1158. \@
  1159. (?:
  1160. [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
  1161. |
  1162. \[[\d.a-fA-F:]+\] # IPv4 & IPv6
  1163. )
  1164. )
  1165. >
  1166. }xi',
  1167. array($this, '_doAutoLinks_email_callback'), $text);
  1168. return $text;
  1169. }
  1170. protected function _doAutoLinks_url_callback($matches) {
  1171. $url = $this->encodeURLAttribute($matches[1], $text);
  1172. $link = "<a href=\"$url\">$text</a>";
  1173. return $this->hashPart($link);
  1174. }
  1175. protected function _doAutoLinks_email_callback($matches) {
  1176. $addr = $matches[1];
  1177. $url = $this->encodeURLAttribute("mailto:$addr", $text);
  1178. $link = "<a href=\"$url\">$text</a>";
  1179. return $this->hashPart($link);
  1180. }
  1181. protected function encodeEntityObfuscatedAttribute($text, &$tail = null, $head_length = 0) {
  1182. #
  1183. # Input: some text to obfuscate, e.g. "mailto:foo@example.com"
  1184. #
  1185. # Output: the same text but with most characters encoded as either a
  1186. # decimal or hex entity, in the hopes of foiling most address
  1187. # harvesting spam bots. E.g.:
  1188. #
  1189. # &#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
  1190. # &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
  1191. # &#x6d;
  1192. #
  1193. # Note: the additional output $tail is assigned the same value as the
  1194. # ouput, minus the number of characters specified by $head_length.
  1195. #
  1196. # Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
  1197. # With some optimizations by Milian Wolff. Forced encoding of HTML
  1198. # attribute special characters by Allan Odgaard.
  1199. #
  1200. if ($text == "") return $tail = "";
  1201. $chars = preg_split('/(?<!^)(?!$)/', $text);
  1202. $seed = (int)abs(crc32($text) / strlen($text)); # Deterministic seed.
  1203. foreach ($chars as $key => $char) {
  1204. $ord = ord($char);
  1205. # Ignore non-ascii chars.
  1206. if ($ord < 128) {
  1207. $r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
  1208. # roughly 10% raw, 45% hex, 45% dec
  1209. # '@' *must* be encoded. I insist.
  1210. # '"' and '>' have to be encoded inside the attribute
  1211. if ($r > 90 && strpos('@"&>', $char) === false) /* do nothing */;
  1212. else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
  1213. else $chars[$key] = '&#'.$ord.';';
  1214. }
  1215. }
  1216. $text = implode('', $chars);
  1217. $tail = $head_length ? implode('', array_slice($chars, $head_length)) : $text;
  1218. return $text;
  1219. }
  1220. protected function parseSpan($str) {
  1221. #
  1222. # Take the string $str and parse it into tokens, hashing embeded HTML,
  1223. # escaped characters and handling code spans.
  1224. #
  1225. $output = '';
  1226. $span_re = '{
  1227. (
  1228. \\\\'.$this->escape_chars_re.'
  1229. |
  1230. (?<![`\\\\])
  1231. `+ # code span marker
  1232. '.( $this->no_markup ? '' : '
  1233. |
  1234. <!-- .*? --> # comment
  1235. |
  1236. <\?.*?\?> | <%.*?%> # processing instruction
  1237. |
  1238. <[!$]?[-a-zA-Z0-9:_]+ # regular tags
  1239. (?>
  1240. \s
  1241. (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
  1242. )?
  1243. >
  1244. |
  1245. <[-a-zA-Z0-9:_]+\s*/> # xml-style empty tag
  1246. |
  1247. </[-a-zA-Z0-9:_]+\s*> # closing tag
  1248. ').'
  1249. )
  1250. }xs';
  1251. while (1) {
  1252. #
  1253. # Each loop iteration seach for either the next tag, the next
  1254. # openning code span marker, or the next escaped character.
  1255. # Each token is then passed to handleSpanToken.
  1256. #
  1257. $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
  1258. # Create token from text preceding tag.
  1259. if ($parts[0] != "") {
  1260. $output .= $parts[0];
  1261. }
  1262. # Check if we reach the end.
  1263. if (isset($parts[1])) {
  1264. $output .= $this->handleSpanToken($parts[1], $parts[2]);
  1265. $str = $parts[2];
  1266. }
  1267. else {
  1268. break;
  1269. }
  1270. }
  1271. return $output;
  1272. }
  1273. protected function handleSpanToken($token, &$str) {
  1274. #
  1275. # Handle $token provided by parseSpan by determining its nature and
  1276. # returning the corresponding value that should replace it.
  1277. #
  1278. switch ($token{0}) {
  1279. case "\\":
  1280. return $this->hashPart("&#". ord($token{1}). ";");
  1281. case "`":
  1282. # Search for end marker in remaining text.
  1283. if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm',
  1284. $str, $matches))
  1285. {
  1286. $str = $matches[2];
  1287. $codespan = $this->makeCodeSpan($matches[1]);
  1288. return $this->hashPart($codespan);
  1289. }
  1290. return $token; // return as text since no ending marker found.
  1291. default:
  1292. return $this->hashPart($token);
  1293. }
  1294. }
  1295. protected function outdent($text) {
  1296. #
  1297. # Remove one level of line-leading tabs or spaces
  1298. #
  1299. return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text);
  1300. }
  1301. # String length function for detab. `_initDetab` will create a function to
  1302. # hanlde UTF-8 if the default function does not exist.
  1303. protected $utf8_strlen = 'mb_strlen';
  1304. protected function detab($text) {
  1305. #
  1306. # Replace tabs with the appropriate amount of space.
  1307. #
  1308. # For each line we separate the line in blocks delemited by
  1309. # tab characters. Then we reconstruct every line by adding the
  1310. # appropriate number of space between each blocks.
  1311. $text = preg_replace_callback('/^.*\t.*$/m',
  1312. array($this, '_detab_callback'), $text);
  1313. return $text;
  1314. }
  1315. protected function _detab_callback($matches) {
  1316. $line = $matches[0];
  1317. $strlen = $this->utf8_strlen; # strlen function for UTF-8.
  1318. # Split in blocks.
  1319. $blocks = explode("\t", $line);
  1320. # Add each blocks to the line.
  1321. $line = $blocks[0];
  1322. unset($blocks[0]); # Do not add first block twice.
  1323. foreach ($blocks as $block) {
  1324. # Calculate amount of space, insert spaces, insert block.
  1325. $amount = $this->tab_width -
  1326. $strlen($line, 'UTF-8') % $this->tab_width;
  1327. $line .= str_repeat(" ", $amount) . $block;
  1328. }
  1329. return $line;
  1330. }
  1331. protected function _initDetab() {
  1332. #
  1333. # Check for the availability of the function in the `utf8_strlen` property
  1334. # (initially `mb_strlen`). If the function is not available, create a
  1335. # function that will loosely count the number of UTF-8 characters with a
  1336. # regular expression.
  1337. #
  1338. if (function_exists($this->utf8_strlen)) return;
  1339. $this->utf8_strlen = create_function('$text', 'return preg_match_all(
  1340. "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/",
  1341. $text, $m);');
  1342. }
  1343. protected function unhash($text) {
  1344. #
  1345. # Swap back in all the tags hashed by _HashHTMLBlocks.
  1346. #
  1347. return preg_replace_callback('/(.)\x1A[0-9]+\1/',
  1348. array($this, '_unhash_callback'), $text);
  1349. }
  1350. protected function _unhash_callback($matches) {
  1351. return $this->html_hashes[$matches[0]];
  1352. }
  1353. }
  1354. #
  1355. # Temporary Markdown Extra Parser Implementation Class
  1356. #
  1357. # NOTE: DON'T USE THIS CLASS
  1358. # Currently the implementation of of Extra resides here in this temporary class.
  1359. # This makes it easier to propagate the changes between the three different
  1360. # packaging styles of PHP Markdown. When this issue is resolved, this
  1361. # MarkdownExtra_TmpImpl class here will disappear and \Michelf\MarkdownExtra
  1362. # will contain the code. So please use \Michelf\MarkdownExtra and ignore this
  1363. # one.
  1364. #
  1365. abstract class _MarkdownExtra_TmpImpl extends \Michelf\Markdown {
  1366. ### Configuration Variables ###
  1367. # Prefix for footnote ids.
  1368. public $fn_id_prefix = "";
  1369. # Optional title attribute for footnote links and backlinks.
  1370. public $fn_link_title = "";
  1371. public $fn_backlink_title = "";
  1372. # Optional class attribute for footnote links and backlinks.
  1373. public $fn_link_class = "footnote-ref";
  1374. public $fn_backlink_class = "footnote-backref";
  1375. # Class name for table cell alignment (%% replaced left/center/right)
  1376. # For instance: 'go-%%' becomes 'go-left' or 'go-right' or 'go-center'
  1377. # If empty, the align attribute is used instead of a class name.
  1378. public $table_align_class_tmpl = '';
  1379. # Optional class prefix for fenced code block.
  1380. public $code_class_prefix = "";
  1381. # Class attribute for code blocks goes on the `code` tag;
  1382. # setting this to true will put attributes on the `pre` tag instead.
  1383. public $code_attr_on_pre = false;
  1384. # Predefined abbreviations.
  1385. public $predef_abbr = array();
  1386. ### Parser Implementation ###
  1387. public function __construct() {
  1388. #
  1389. # Constructor function. Initialize the parser object.
  1390. #
  1391. # Add extra escapable characters before parent constructor
  1392. # initialize the table.
  1393. $this->escape_chars .= ':|';
  1394. # Insert extra document, block, and span transformations.
  1395. # Parent constructor will do the sorting.
  1396. $this->document_gamut += array(
  1397. "doFencedCodeBlocks" => 5,
  1398. "stripFootnotes" => 15,
  1399. "stripAbbreviations" => 25,
  1400. "appendFootnotes" => 50,
  1401. );
  1402. $this->block_gamut += array(
  1403. "doFencedCodeBlocks" => 5,
  1404. "doTables" => 15,
  1405. "doDefLists" => 45,
  1406. );
  1407. $this->span_gamut += array(
  1408. "doFootnotes" => 5,
  1409. "doAbbreviations" => 70,
  1410. );
  1411. parent::__construct();
  1412. }
  1413. # Extra variables used during extra transformations.
  1414. protected $footnotes = array();
  1415. protected $footnotes_ordered = array();
  1416. protected $footnotes_ref_count = array();
  1417. protected $footnotes_numbers = array();
  1418. protected $abbr_desciptions = array();
  1419. protected $abbr_word_re = '';
  1420. # Give the current footnote number.
  1421. protected $footnote_counter = 1;
  1422. protected function setup() {
  1423. #
  1424. # Setting up Extra-specific variables.
  1425. #
  1426. parent::setup();
  1427. $this->footnotes = array();
  1428. $this->footnotes_ordered = array();
  1429. $this->footnotes_ref_count = array();
  1430. $this->footnotes_numbers = array();
  1431. $this->abbr_desciptions = array();
  1432. $this->abbr_word_re = '';
  1433. $this->footnote_counter = 1;
  1434. foreach ($this->predef_abbr as $abbr_word => $abbr_desc) {
  1435. if ($this->abbr_word_re)
  1436. $this->abbr_word_re .= '|';
  1437. $this->abbr_word_re .= preg_quote($abbr_word);
  1438. $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
  1439. }
  1440. }
  1441. protected function teardown() {
  1442. #
  1443. # Clearing Extra-specific variables.
  1444. #
  1445. $this->footnotes = array();
  1446. $this->footnotes_ordered = array();
  1447. $this->footnotes_ref_count = array();
  1448. $this->footnotes_numbers = array();
  1449. $this->abbr_desciptions = array();
  1450. $this->abbr_word_re = '';
  1451. parent::teardown();
  1452. }
  1453. ### Extra Attribute Parser ###
  1454. # Expression to use to catch attributes (includes the braces)
  1455. protected $id_class_attr_catch_re = '\{((?:[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,})[ ]*\}';
  1456. # Expression to use when parsing in a context when no capture is desired
  1457. protected $id_class_attr_nocatch_re = '\{(?:[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,}[ ]*\}';
  1458. protected function doExtraAttributes($tag_name, $attr) {
  1459. #
  1460. # Parse attributes caught by the $this->id_class_attr_catch_re expression
  1461. # and return the HTML-formatted list of attributes.
  1462. #
  1463. # Currently supported attributes are .class and #id.
  1464. #
  1465. if (empty($attr)) return "";
  1466. # Split on components
  1467. preg_match_all('/[#.a-z][-_:a-zA-Z0-9=]+/', $attr, $matches);
  1468. $elements = $matches[0];
  1469. # handle classes and ids (only first id taken into account)
  1470. $classes = array();
  1471. $attributes = array();
  1472. $id = false;
  1473. foreach ($elements as $element) {
  1474. if ($element{0} == '.') {
  1475. $classes[] = substr($element, 1);
  1476. } else if ($element{0} == '#') {
  1477. if ($id === false) $id = substr($element, 1);
  1478. } else if (strpos($element, '=') > 0) {
  1479. $parts = explode('=', $element, 2);
  1480. $attributes[] = $parts[0] . '="' . $parts[1] . '"';
  1481. }
  1482. }
  1483. # compose attributes as string
  1484. $attr_str = "";
  1485. if (!empty($id)) {
  1486. $attr_str .= ' id="'.$id.'"';
  1487. }
  1488. if (!empty($classes)) {
  1489. $attr_str .= ' class="'.implode(" ", $classes).'"';
  1490. }
  1491. if (!$this->no_markup && !empty($attributes)) {
  1492. $attr_str .= ' '.implode(" ", $attributes);
  1493. }
  1494. return $attr_str;
  1495. }
  1496. protected function stripLinkDefinitions($text) {
  1497. #
  1498. # Strips link definitions from text, stores the URLs and titles in
  1499. # hash references.
  1500. #
  1501. $less_than_tab = $this->tab_width - 1;
  1502. # Link defs are in the form: ^[id]: url "optional title"
  1503. $text = preg_replace_callback('{
  1504. ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
  1505. [ ]*
  1506. \n? # maybe *one* newline
  1507. [ ]*
  1508. (?:
  1509. <(.+?)> # url = $2
  1510. |
  1511. (\S+?) # url = $3
  1512. )
  1513. [ ]*
  1514. \n? # maybe one newline
  1515. [ ]*
  1516. (?:
  1517. (?<=\s) # lookbehind for whitespace
  1518. ["(]
  1519. (.*?) # title = $4
  1520. [")]
  1521. [ ]*
  1522. )? # title is optional
  1523. (?:[ ]* '.$this->id_class_attr_catch_re.' )? # $5 = extra id & class attr
  1524. (?:\n+|\Z)
  1525. }xm',
  1526. array($this, '_stripLinkDefinitions_callback'),
  1527. $text);
  1528. return $text;
  1529. }
  1530. protected function _stripLinkDefinitions_callback($matches) {
  1531. $link_id = strtolower($matches[1]);
  1532. $url = $matches[2] == '' ? $matches[3] : $matches[2];
  1533. $this->urls[$link_id] = $url;
  1534. $this->titles[$link_id] =& $matches[4];
  1535. $this->ref_attr[$link_id] = $this->doExtraAttributes("", $dummy =& $matches[5]);
  1536. return ''; # String that will replace the block
  1537. }
  1538. ### HTML Block Parser ###
  1539. # Tags that are always treated as block tags:
  1540. protected $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend|article|section|nav|aside|hgroup|header|footer|figcaption|figure';
  1541. # Tags treated as block tags only if the opening tag is alone on its line:
  1542. protected $context_block_tags_re = 'script|noscript|style|ins|del|iframe|object|source|track|param|math|svg|canvas|audio|video';
  1543. # Tags where markdown="1" default to span mode:
  1544. protected $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
  1545. # Tags which must not have their contents modified, no matter where
  1546. # they appear:
  1547. protected $clean_tags_re = 'script|style|math|svg';
  1548. # Tags that do not need to be closed.
  1549. protected $auto_close_tags_re = 'hr|img|param|source|track';
  1550. protected function hashHTMLBlocks($text) {
  1551. #
  1552. # Hashify HTML Blocks and "clean tags".
  1553. #
  1554. # We only want to do this for block-level HTML tags, such as headers,
  1555. # lists, and tables. That's because we still want to wrap <p>s around
  1556. # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
  1557. # phrase emphasis, and spans. The list of tags we're looking for is
  1558. # hard-coded.
  1559. #
  1560. # This works by calling _HashHTMLBlocks_InMarkdown, which then calls
  1561. # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
  1562. # attribute is found within a tag, _HashHTMLBlocks_InHTML calls back
  1563. # _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
  1564. # These two functions are calling each other. It's recursive!
  1565. #
  1566. if ($this->no_markup) return $text;
  1567. #
  1568. # Call the HTML-in-Markdown hasher.
  1569. #
  1570. list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
  1571. return $text;
  1572. }
  1573. protected function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
  1574. $enclosing_tag_re = '', $span = false)
  1575. {
  1576. #
  1577. # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
  1578. #
  1579. # * $indent is the number of space to be ignored when checking for code
  1580. # blocks. This is important because if we don't take the indent into
  1581. # account, something like this (which looks right) won't work as expected:
  1582. #
  1583. # <div>
  1584. # <div markdown="1">
  1585. # Hello World. <-- Is this a Markdown code block or text?
  1586. # </div> <-- Is this a Markdown code block or a real tag?
  1587. # <div>
  1588. #
  1589. # If you don't like this, just don't indent the tag on which
  1590. # you apply the markdown="1" attribute.
  1591. #
  1592. # * If $enclosing_tag_re is not empty, stops at the first unmatched closing
  1593. # tag with that name. Nested tags supported.
  1594. #
  1595. # * If $span is true, text inside must treated as span. So any double
  1596. # newline will be replaced by a single newline so that it does not create
  1597. # paragraphs.
  1598. #
  1599. # Returns an array of that form: ( processed text , remaining text )
  1600. #
  1601. if ($text === '') return array('', '');
  1602. # Regex to check for the presense of newlines around a block tag.
  1603. $newline_before_re = '/(?:^\n?|\n\n)*$/';
  1604. $newline_after_re =
  1605. '{
  1606. ^ # Start of text following the tag.
  1607. (?>[ ]*<!--.*?-->)? # Optional comment.
  1608. [ ]*\n # Must be followed by newline.
  1609. }xs';
  1610. # Regex to match any tag.
  1611. $block_tag_re =
  1612. '{
  1613. ( # $2: Capture whole tag.
  1614. </? # Any opening or closing tag.
  1615. (?> # Tag name.
  1616. '.$this->block_tags_re.' |
  1617. '.$this->context_block_tags_re.' |
  1618. '.$this->clean_tags_re.' |
  1619. (?!\s)'.$enclosing_tag_re.'
  1620. )
  1621. (?:
  1622. (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name.
  1623. (?>
  1624. ".*?" | # Double quotes (can contain `>`)
  1625. \'.*?\' | # Single quotes (can contain `>`)
  1626. .+? # Anything but quotes and `>`.
  1627. )*?
  1628. )?
  1629. > # End of tag.
  1630. |
  1631. <!-- .*? --> # HTML Comment
  1632. |
  1633. <\?.*?\?> | <%.*?%> # Processing instruction
  1634. |
  1635. <!\[CDATA\[.*?\]\]> # CData Block
  1636. '. ( !$span ? ' # If not in span.
  1637. |
  1638. # Indented code block
  1639. (?: ^[ ]*\n | ^ | \n[ ]*\n )
  1640. [ ]{'.($indent+4).'}[^\n]* \n
  1641. (?>
  1642. (?: [ ]{'.($indent+4).'}[^\n]* | [ ]* ) \n
  1643. )*
  1644. |
  1645. # Fenced code block marker
  1646. (?<= ^ | \n )
  1647. [ ]{0,'.($indent+3).'}(?:~{3,}|`{3,})
  1648. [ ]*
  1649. (?:
  1650. \.?[-_:a-zA-Z0-9]+ # standalone class name
  1651. |
  1652. '.$this->id_class_attr_nocatch_re.' # extra attributes
  1653. )?
  1654. [ ]*
  1655. (?= \n )
  1656. ' : '' ). ' # End (if not is span).
  1657. |
  1658. # Code span marker
  1659. # Note, this regex needs to go after backtick fenced
  1660. # code blocks but it should also be kept outside of the
  1661. # "if not in span" condition adding backticks to the parser
  1662. `+
  1663. )
  1664. }xs';
  1665. $depth = 0; # Current depth inside the tag tree.
  1666. $parsed = ""; # Parsed text that will be returned.
  1667. #
  1668. # Loop through every tag until we find the closing tag of the parent
  1669. # or loop until reaching the end of text if no parent tag specified.
  1670. #
  1671. do {
  1672. #
  1673. # Split the text using the first $tag_match pattern found.
  1674. # Text before pattern will be first in the array, text after
  1675. # pattern will be at the end, and between will be any catches made
  1676. # by the pattern.
  1677. #
  1678. $parts = preg_split($block_tag_re, $text, 2,
  1679. PREG_SPLIT_DELIM_CAPTURE);
  1680. # If in Markdown span mode, add a empty-string span-level hash
  1681. # after each newline to prevent triggering any block element.
  1682. if ($span) {
  1683. $void = $this->hashPart("", ':');
  1684. $newline = "$void\n";
  1685. $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
  1686. }
  1687. $parsed .= $parts[0]; # Text before current tag.
  1688. # If end of $text has been reached. Stop loop.
  1689. if (count($parts) < 3) {
  1690. $text = "";
  1691. break;
  1692. }
  1693. $tag = $parts[1]; # Tag to handle.
  1694. $text = $parts[2]; # Remaining text after current tag.
  1695. $tag_re = preg_quote($tag); # For use in a regular expression.
  1696. #
  1697. # Check for: Fenced code block marker.
  1698. # Note: need to recheck the whole tag to disambiguate backtick
  1699. # fences from code spans
  1700. #
  1701. if (preg_match('{^\n?([ ]{0,'.($indent+3).'})(~{3,}|`{3,})[ ]*(?:\.?[-_:a-zA-Z0-9]+|'.$this->id_class_attr_nocatch_re.')?[ ]*\n?$}', $tag, $capture)) {
  1702. # Fenced code block marker: find matching end marker.
  1703. $fence_indent = strlen($capture[1]); # use captured indent in re
  1704. $fence_re = $capture[2]; # use captured fence in re
  1705. if (preg_match('{^(?>.*\n)*?[ ]{'.($fence_indent).'}'.$fence_re.'[ ]*(?:\n|$)}', $text,
  1706. $matches))
  1707. {
  1708. # End marker found: pass text unchanged until marker.
  1709. $parsed .= $tag . $matches[0];
  1710. $text = substr($text, strlen($matches[0]));
  1711. }
  1712. else {
  1713. # No end marker: just skip it.
  1714. $parsed .= $tag;
  1715. }
  1716. }
  1717. #
  1718. # Check for: Indented code block.
  1719. #
  1720. else if ($tag{0} == "\n" || $tag{0} == " ") {
  1721. # Indented code block: pass it unchanged, will be handled
  1722. # later.
  1723. $parsed .= $tag;
  1724. }
  1725. #
  1726. # Check for: Code span marker
  1727. # Note: need to check this after backtick fenced code blocks
  1728. #
  1729. else if ($tag{0} == "`") {
  1730. # Find corresponding end marker.
  1731. $tag_re = preg_quote($tag);
  1732. if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)'.$tag_re.'(?!`)}',
  1733. $text, $matches))
  1734. {
  1735. # End marker found: pass text unchanged until marker.
  1736. $parsed .= $tag . $matches[0];
  1737. $text = substr($text, strlen($matches[0]));
  1738. }
  1739. else {
  1740. # Unmatched marker: just skip it.
  1741. $parsed .= $tag;
  1742. }
  1743. }
  1744. #
  1745. # Check for: Opening Block level tag or
  1746. # Opening Context Block tag (like ins and del)
  1747. # used as a block tag (tag is alone on it's line).
  1748. #
  1749. else if (preg_match('{^<(?:'.$this->block_tags_re.')\b}', $tag) ||
  1750. ( preg_match('{^<(?:'.$this->context_block_tags_re.')\b}', $tag) &&
  1751. preg_match($newline_before_re, $parsed) &&
  1752. preg_match($newline_after_re, $text) )
  1753. )
  1754. {
  1755. # Need to parse tag and following text using the HTML parser.
  1756. list($block_text, $text) =
  1757. $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
  1758. # Make sure it stays outside of any paragraph by adding newlines.
  1759. $parsed .= "\n\n$block_text\n\n";
  1760. }
  1761. #
  1762. # Check for: Clean tag (like script, math)
  1763. # HTML Comments, processing instructions.
  1764. #
  1765. else if (preg_match('{^<(?:'.$this->clean_tags_re.')\b}', $tag) ||
  1766. $tag{1} == '!' || $tag{1} == '?')
  1767. {
  1768. # Need to parse tag and following text using the HTML parser.
  1769. # (don't check for markdown attribute)
  1770. list($block_text, $text) =
  1771. $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
  1772. $parsed .= $block_text;
  1773. }
  1774. #
  1775. # Check for: Tag with same name as enclosing tag.
  1776. #
  1777. else if ($enclosing_tag_re !== '' &&
  1778. # Same name as enclosing tag.
  1779. preg_match('{^</?(?:'.$enclosing_tag_re.')\b}', $tag))
  1780. {
  1781. #
  1782. # Increase/decrease nested tag count.
  1783. #
  1784. if ($tag{1} == '/') $depth--;
  1785. else if ($tag{strlen($tag)-2} != '/') $depth++;
  1786. if ($depth < 0) {
  1787. #
  1788. # Going out of parent element. Clean up and break so we
  1789. # return to the calling function.
  1790. #
  1791. $text = $tag . $text;
  1792. break;
  1793. }
  1794. $parsed .= $tag;
  1795. }
  1796. else {
  1797. $parsed .= $tag;
  1798. }
  1799. } while ($depth >= 0);
  1800. return array($parsed, $text);
  1801. }
  1802. protected function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
  1803. #
  1804. # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
  1805. #
  1806. # * Calls $hash_method to convert any blocks.
  1807. # * Stops when the first opening tag closes.
  1808. # * $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
  1809. # (it is not inside clean tags)
  1810. #
  1811. # Returns an array of that form: ( processed text , remaining text )
  1812. #
  1813. if ($text === '') return array('', '');
  1814. # Regex to match `markdown` attribute inside of a tag.
  1815. $markdown_attr_re = '
  1816. {
  1817. \s* # Eat whitespace before the `markdown` attribute
  1818. markdown
  1819. \s*=\s*
  1820. (?>
  1821. (["\']) # $1: quote delimiter
  1822. (.*?) # $2: attribute value
  1823. \1 # matching delimiter
  1824. |
  1825. ([^\s>]*) # $3: unquoted attribute value
  1826. )
  1827. () # $4: make $3 always defined (avoid warnings)
  1828. }xs';
  1829. # Regex to match any tag.
  1830. $tag_re = '{
  1831. ( # $2: Capture whole tag.
  1832. </? # Any opening or closing tag.
  1833. [\w:$]+ # Tag name.
  1834. (?:
  1835. (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name.
  1836. (?>
  1837. ".*?" | # Double quotes (can contain `>`)
  1838. \'.*?\' | # Single quotes (can contain `>`)
  1839. .+? # Anything but quotes and `>`.
  1840. )*?
  1841. )?
  1842. > # End of tag.
  1843. |
  1844. <!-- .*? --> # HTML Comment
  1845. |
  1846. <\?.*?\?> | <%.*?%> # Processing instruction
  1847. |
  1848. <!\[CDATA\[.*?\]\]> # CData Block
  1849. )
  1850. }xs';
  1851. $original_text = $text; # Save original text in case of faliure.
  1852. $depth = 0; # Current depth inside the tag tree.
  1853. $block_text = ""; # Temporary text holder for current text.
  1854. $parsed = ""; # Parsed text that will be returned.
  1855. #
  1856. # Get the name of the starting tag.
  1857. # (This pattern makes $base_tag_name_re safe without quoting.)
  1858. #
  1859. if (preg_match('/^<([\w:$]*)\b/', $text, $matches))
  1860. $base_tag_name_re = $matches[1];
  1861. #
  1862. # Loop through every tag until we find the corresponding closing tag.
  1863. #
  1864. do {
  1865. #
  1866. # Split the text using the first $tag_match pattern found.
  1867. # Text before pattern will be first in the array, text after
  1868. # pattern will be at the end, and between will be any catches made
  1869. # by the pattern.
  1870. #
  1871. $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
  1872. if (count($parts) < 3) {
  1873. #
  1874. # End of $text reached with unbalenced tag(s).
  1875. # In that case, we return original text unchanged and pass the
  1876. # first character as filtered to prevent an infinite loop in the
  1877. # parent function.
  1878. #
  1879. return array($original_text{0}, substr($original_text, 1));
  1880. }
  1881. $block_text .= $parts[0]; # Text before current tag.
  1882. $tag = $parts[1]; # Tag to handle.
  1883. $text = $parts[2]; # Remaining text after current tag.
  1884. #
  1885. # Check for: Auto-close tag (like <hr/>)
  1886. # Comments and Processing Instructions.
  1887. #
  1888. if (preg_match('{^</?(?:'.$this->auto_close_tags_re.')\b}', $tag) ||
  1889. $tag{1} == '!' || $tag{1} == '?')
  1890. {
  1891. # Just add the tag to the block as if it was text.
  1892. $block_text .= $tag;
  1893. }
  1894. else {
  1895. #
  1896. # Increase/decrease nested tag count. Only do so if
  1897. # the tag's name match base tag's.
  1898. #
  1899. if (preg_match('{^</?'.$base_tag_name_re.'\b}', $tag)) {
  1900. if ($tag{1} == '/') $depth--;
  1901. else if ($tag{strlen($tag)-2} != '/') $depth++;
  1902. }
  1903. #
  1904. # Check for `markdown="1"` attribute and handle it.
  1905. #
  1906. if ($md_attr &&
  1907. preg_match($markdown_attr_re, $tag, $attr_m) &&
  1908. preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
  1909. {
  1910. # Remove `markdown` attribute from opening tag.
  1911. $tag = preg_replace($markdown_attr_re, '', $tag);
  1912. # Check if text inside this tag must be parsed in span mode.
  1913. $this->mode = $attr_m[2] . $attr_m[3];
  1914. $span_mode = $this->mode == 'span' || $this->mode != 'block' &&
  1915. preg_match('{^<(?:'.$this->contain_span_tags_re.')\b}', $tag);
  1916. # Calculate indent before tag.
  1917. if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) {
  1918. $strlen = $this->utf8_strlen;
  1919. $indent = $strlen($matches[1], 'UTF-8');
  1920. } else {
  1921. $indent = 0;
  1922. }
  1923. # End preceding block with this tag.
  1924. $block_text .= $tag;
  1925. $parsed .= $this->$hash_method($block_text);
  1926. # Get enclosing tag name for the ParseMarkdown function.
  1927. # (This pattern makes $tag_name_re safe without quoting.)
  1928. preg_match('/^<([\w:$]*)\b/', $tag, $matches);
  1929. $tag_name_re = $matches[1];
  1930. # Parse the content using the HTML-in-Markdown parser.
  1931. list ($block_text, $text)
  1932. = $this->_hashHTMLBlocks_inMarkdown($text, $indent,
  1933. $tag_name_re, $span_mode);
  1934. # Outdent markdown text.
  1935. if ($indent > 0) {
  1936. $block_text = preg_replace("/^[ ]{1,$indent}/m", "",
  1937. $block_text);
  1938. }
  1939. # Append tag content to parsed text.
  1940. if (!$span_mode) $parsed .= "\n\n$block_text\n\n";
  1941. else $parsed .= "$block_text";
  1942. # Start over with a new block.
  1943. $block_text = "";
  1944. }
  1945. else $block_text .= $tag;
  1946. }
  1947. } while ($depth > 0);
  1948. #
  1949. # Hash last block text that wasn't processed inside the loop.
  1950. #
  1951. $parsed .= $this->$hash_method($block_text);
  1952. return array($parsed, $text);
  1953. }
  1954. protected function hashClean($text) {
  1955. #
  1956. # Called whenever a tag must be hashed when a function inserts a "clean" tag
  1957. # in $text, it passes through this function and is automaticaly escaped,
  1958. # blocking invalid nested overlap.
  1959. #
  1960. return $this->hashPart($text, 'C');
  1961. }
  1962. protected function doAnchors($text) {
  1963. #
  1964. # Turn Markdown link shortcuts into XHTML <a> tags.
  1965. #
  1966. if ($this->in_anchor) return $text;
  1967. $this->in_anchor = true;
  1968. #
  1969. # First, handle reference-style links: [link text] [id]
  1970. #
  1971. $text = preg_replace_callback('{
  1972. ( # wrap whole match in $1
  1973. \[
  1974. ('.$this->nested_brackets_re.') # link text = $2
  1975. \]
  1976. [ ]? # one optional space
  1977. (?:\n[ ]*)? # one optional newline followed by spaces
  1978. \[
  1979. (.*?) # id = $3
  1980. \]
  1981. )
  1982. }xs',
  1983. array($this, '_doAnchors_reference_callback'), $text);
  1984. #
  1985. # Next, inline-style links: [link text](url "optional title")
  1986. #
  1987. $text = preg_replace_callback('{
  1988. ( # wrap whole match in $1
  1989. \[
  1990. ('.$this->nested_brackets_re.') # link text = $2
  1991. \]
  1992. \( # literal paren
  1993. [ \n]*
  1994. (?:
  1995. <(.+?)> # href = $3
  1996. |
  1997. ('.$this->nested_url_parenthesis_re.') # href = $4
  1998. )
  1999. [ \n]*
  2000. ( # $5
  2001. ([\'"]) # quote char = $6
  2002. (.*?) # Title = $7
  2003. \6 # matching quote
  2004. [ \n]* # ignore any spaces/tabs between closing quote and )
  2005. )? # title is optional
  2006. \)
  2007. (?:[ ]? '.$this->id_class_attr_catch_re.' )? # $8 = id/class attributes
  2008. )
  2009. }xs',
  2010. array($this, '_doAnchors_inline_callback'), $text);
  2011. #
  2012. # Last, handle reference-style shortcuts: [link text]
  2013. # These must come last in case you've also got [link text][1]
  2014. # or [link text](/foo)
  2015. #
  2016. $text = preg_replace_callback('{
  2017. ( # wrap whole match in $1
  2018. \[
  2019. ([^\[\]]+) # link text = $2; can\'t contain [ or ]
  2020. \]
  2021. )
  2022. }xs',
  2023. array($this, '_doAnchors_reference_callback'), $text);
  2024. $this->in_anchor = false;
  2025. return $text;
  2026. }
  2027. protected function _doAnchors_reference_callback($matches) {
  2028. $whole_match = $matches[1];
  2029. $link_text = $matches[2];
  2030. $link_id =& $matches[3];
  2031. if ($link_id == "") {
  2032. # for shortcut links like [this][] or [this].
  2033. $link_id = $link_text;
  2034. }
  2035. # lower-case and turn embedded newlines into spaces
  2036. $link_id = strtolower($link_id);
  2037. $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
  2038. if (isset($this->urls[$link_id])) {
  2039. $url = $this->urls[$link_id];
  2040. $url = $this->encodeURLAttribute($url);
  2041. $result = "<a href=\"$url\"";
  2042. if ( isset( $this->titles[$link_id] ) ) {
  2043. $title = $this->titles[$link_id];
  2044. $title = $this->encodeAttribute($title);
  2045. $result .= " title=\"$title\"";
  2046. }
  2047. if (isset($this->ref_attr[$link_id]))
  2048. $result .= $this->ref_attr[$link_id];
  2049. $link_text = $this->runSpanGamut($link_text);
  2050. $result .= ">$link_text</a>";
  2051. $result = $this->hashPart($result);
  2052. }
  2053. else {
  2054. $result = $whole_match;
  2055. }
  2056. return $result;
  2057. }
  2058. protected function _doAnchors_inline_callback($matches) {
  2059. $whole_match = $matches[1];
  2060. $link_text = $this->runSpanGamut($matches[2]);
  2061. $url = $matches[3] == '' ? $matches[4] : $matches[3];
  2062. $title =& $matches[7];
  2063. $attr = $this->doExtraAttributes("a", $dummy =& $matches[8]);
  2064. // if the URL was of the form <s p a c e s> it got caught by the HTML
  2065. // tag parser and hashed. Need to reverse the process before using the URL.
  2066. $unhashed = $this->unhash($url);
  2067. if ($unhashed != $url)
  2068. $url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
  2069. $url = $this->encodeURLAttribute($url);
  2070. $result = "<a href=\"$url\"";
  2071. if (isset($title)) {
  2072. $title = $this->encodeAttribute($title);
  2073. $result .= " title=\"$title\"";
  2074. }
  2075. $result .= $attr;
  2076. $link_text = $this->runSpanGamut($link_text);
  2077. $result .= ">$link_text</a>";
  2078. return $this->hashPart($result);
  2079. }
  2080. protected function doImages($text) {
  2081. #
  2082. # Turn Markdown image shortcuts into <img> tags.
  2083. #
  2084. #
  2085. # First, handle reference-style labeled images: ![alt text][id]
  2086. #
  2087. $text = preg_replace_callback('{
  2088. ( # wrap whole match in $1
  2089. !\[
  2090. ('.$this->nested_brackets_re.') # alt text = $2
  2091. \]
  2092. [ ]? # one optional space
  2093. (?:\n[ ]*)? # one optional newline followed by spaces
  2094. \[
  2095. (.*?) # id = $3
  2096. \]
  2097. )
  2098. }xs',
  2099. array($this, '_doImages_reference_callback'), $text);
  2100. #
  2101. # Next, handle inline images: ![alt text](url "optional title")
  2102. # Don't forget: encode * and _
  2103. #
  2104. $text = preg_replace_callback('{
  2105. ( # wrap whole match in $1
  2106. !\[
  2107. ('.$this->nested_brackets_re.') # alt text = $2
  2108. \]
  2109. \s? # One optional whitespace character
  2110. \( # literal paren
  2111. [ \n]*
  2112. (?:
  2113. <(\S*)> # src url = $3
  2114. |
  2115. ('.$this->nested_url_parenthesis_re.') # src url = $4
  2116. )
  2117. [ \n]*
  2118. ( # $5
  2119. ([\'"]) # quote char = $6
  2120. (.*?) # title = $7
  2121. \6 # matching quote
  2122. [ \n]*
  2123. )? # title is optional
  2124. \)
  2125. (?:[ ]? '.$this->id_class_attr_catch_re.' )? # $8 = id/class attributes
  2126. )
  2127. }xs',
  2128. array($this, '_doImages_inline_callback'), $text);
  2129. return $text;
  2130. }
  2131. protected function _doImages_reference_callback($matches) {
  2132. $whole_match = $matches[1];
  2133. $alt_text = $matches[2];
  2134. $link_id = strtolower($matches[3]);
  2135. if ($link_id == "") {
  2136. $link_id = strtolower($alt_text); # for shortcut links like ![this][].
  2137. }
  2138. $alt_text = $this->encodeAttribute($alt_text);
  2139. if (isset($this->urls[$link_id])) {
  2140. $url = $this->encodeURLAttribute($this->urls[$link_id]);
  2141. $result = "<img src=\"$url\" alt=\"$alt_text\"";
  2142. if (isset($this->titles[$link_id])) {
  2143. $title = $this->titles[$link_id];
  2144. $title = $this->encodeAttribute($title);
  2145. $result .= " title=\"$title\"";
  2146. }
  2147. if (isset($this->ref_attr[$link_id]))
  2148. $result .= $this->ref_attr[$link_id];
  2149. $result .= $this->empty_element_suffix;
  2150. $result = $this->hashPart($result);
  2151. }
  2152. else {
  2153. # If there's no such link ID, leave intact:
  2154. $result = $whole_match;
  2155. }
  2156. return $result;
  2157. }
  2158. protected function _doImages_inline_callback($matches) {
  2159. $whole_match = $matches[1];
  2160. $alt_text = $matches[2];
  2161. $url = $matches[3] == '' ? $matches[4] : $matches[3];
  2162. $title =& $matches[7];
  2163. $attr = $this->doExtraAttributes("img", $dummy =& $matches[8]);
  2164. $alt_text = $this->encodeAttribute($alt_text);
  2165. $url = $this->encodeURLAttribute($url);
  2166. $result = "<img src=\"$url\" alt=\"$alt_text\"";
  2167. if (isset($title)) {
  2168. $title = $this->encodeAttribute($title);
  2169. $result .= " title=\"$title\""; # $title already quoted
  2170. }
  2171. $result .= $attr;
  2172. $result .= $this->empty_element_suffix;
  2173. return $this->hashPart($result);
  2174. }
  2175. protected function doHeaders($text) {
  2176. #
  2177. # Redefined to add id and class attribute support.
  2178. #
  2179. # Setext-style headers:
  2180. # Header 1 {#header1}
  2181. # ========
  2182. #
  2183. # Header 2 {#header2 .class1 .class2}
  2184. # --------
  2185. #
  2186. $text = preg_replace_callback(
  2187. '{
  2188. (^.+?) # $1: Header text
  2189. (?:[ ]+ '.$this->id_class_attr_catch_re.' )? # $3 = id/class attributes
  2190. [ ]*\n(=+|-+)[ ]*\n+ # $3: Header footer
  2191. }mx',
  2192. array($this, '_doHeaders_callback_setext'), $text);
  2193. # atx-style headers:
  2194. # # Header 1 {#header1}
  2195. # ## Header 2 {#header2}
  2196. # ## Header 2 with closing hashes ## {#header3.class1.class2}
  2197. # ...
  2198. # ###### Header 6 {.class2}
  2199. #
  2200. $text = preg_replace_callback('{
  2201. ^(\#{1,6}) # $1 = string of #\'s
  2202. [ ]*
  2203. (.+?) # $2 = Header text
  2204. [ ]*
  2205. \#* # optional closing #\'s (not counted)
  2206. (?:[ ]+ '.$this->id_class_attr_catch_re.' )? # $3 = id/class attributes
  2207. [ ]*
  2208. \n+
  2209. }xm',
  2210. array($this, '_doHeaders_callback_atx'), $text);
  2211. return $text;
  2212. }
  2213. protected function _doHeaders_callback_setext($matches) {
  2214. if ($matches[3] == '-' && preg_match('{^- }', $matches[1]))
  2215. return $matches[0];
  2216. $level = $matches[3]{0} == '=' ? 1 : 2;
  2217. $attr = $this->doExtraAttributes("h$level", $dummy =& $matches[2]);
  2218. $block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>";
  2219. return "\n" . $this->hashBlock($block) . "\n\n";
  2220. }
  2221. protected function _doHeaders_callback_atx($matches) {
  2222. $level = strlen($matches[1]);
  2223. $attr = $this->doExtraAttributes("h$level", $dummy =& $matches[3]);
  2224. $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>";
  2225. return "\n" . $this->hashBlock($block) . "\n\n";
  2226. }
  2227. protected function doTables($text) {
  2228. #
  2229. # Form HTML tables.
  2230. #
  2231. $less_than_tab = $this->tab_width - 1;
  2232. #
  2233. # Find tables with leading pipe.
  2234. #
  2235. # | Header 1 | Header 2
  2236. # | -------- | --------
  2237. # | Cell 1 | Cell 2
  2238. # | Cell 3 | Cell 4
  2239. #
  2240. $text = preg_replace_callback('
  2241. {
  2242. ^ # Start of a line
  2243. [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
  2244. [|] # Optional leading pipe (present)
  2245. (.+) \n # $1: Header row (at least one pipe)
  2246. [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
  2247. [|] ([ ]*[-:]+[-| :]*) \n # $2: Header underline
  2248. ( # $3: Cells
  2249. (?>
  2250. [ ]* # Allowed whitespace.
  2251. [|] .* \n # Row content.
  2252. )*
  2253. )
  2254. (?=\n|\Z) # Stop at final double newline.
  2255. }xm',
  2256. array($this, '_doTable_leadingPipe_callback'), $text);
  2257. #
  2258. # Find tables without leading pipe.
  2259. #
  2260. # Header 1 | Header 2
  2261. # -------- | --------
  2262. # Cell 1 | Cell 2
  2263. # Cell 3 | Cell 4
  2264. #
  2265. $text = preg_replace_callback('
  2266. {
  2267. ^ # Start of a line
  2268. [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
  2269. (\S.*[|].*) \n # $1: Header row (at least one pipe)
  2270. [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
  2271. ([-:]+[ ]*[|][-| :]*) \n # $2: Header underline
  2272. ( # $3: Cells
  2273. (?>
  2274. .* [|] .* \n # Row content
  2275. )*
  2276. )
  2277. (?=\n|\Z) # Stop at final double newline.
  2278. }xm',
  2279. array($this, '_DoTable_callback'), $text);
  2280. return $text;
  2281. }
  2282. protected function _doTable_leadingPipe_callback($matches) {
  2283. $head = $matches[1];
  2284. $underline = $matches[2];
  2285. $content = $matches[3];
  2286. # Remove leading pipe for each row.
  2287. $content = preg_replace('/^ *[|]/m', '', $content);
  2288. return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
  2289. }
  2290. protected function _doTable_makeAlignAttr($alignname)
  2291. {
  2292. if (empty($this->table_align_class_tmpl))
  2293. return " align=\"$alignname\"";
  2294. $classname = str_replace('%%', $alignname, $this->table_align_class_tmpl);
  2295. return " class=\"$classname\"";
  2296. }
  2297. protected function _doTable_callback($matches) {
  2298. $head = $matches[1];
  2299. $underline = $matches[2];
  2300. $content = $matches[3];
  2301. # Remove any tailing pipes for each line.
  2302. $head = preg_replace('/[|] *$/m', '', $head);
  2303. $underline = preg_replace('/[|] *$/m', '', $underline);
  2304. $content = preg_replace('/[|] *$/m', '', $content);
  2305. # Reading alignement from header underline.
  2306. $separators = preg_split('/ *[|] */', $underline);
  2307. foreach ($separators as $n => $s) {
  2308. if (preg_match('/^ *-+: *$/', $s))
  2309. $attr[$n] = $this->_doTable_makeAlignAttr('right');
  2310. else if (preg_match('/^ *:-+: *$/', $s))
  2311. $attr[$n] = $this->_doTable_makeAlignAttr('center');
  2312. else if (preg_match('/^ *:-+ *$/', $s))
  2313. $attr[$n] = $this->_doTable_makeAlignAttr('left');
  2314. else
  2315. $attr[$n] = '';
  2316. }
  2317. # Parsing span elements, including code spans, character escapes,
  2318. # and inline HTML tags, so that pipes inside those gets ignored.
  2319. $head = $this->parseSpan($head);
  2320. $headers = preg_split('/ *[|] */', $head);
  2321. $col_count = count($headers);
  2322. $attr = array_pad($attr, $col_count, '');
  2323. # Write column headers.
  2324. $text = "<table>\n";
  2325. $text .= "<thead>\n";
  2326. $text .= "<tr>\n";
  2327. foreach ($headers as $n => $header)
  2328. $text .= " <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n";
  2329. $text .= "</tr>\n";
  2330. $text .= "</thead>\n";
  2331. # Split content by row.
  2332. $rows = explode("\n", trim($content, "\n"));
  2333. $text .= "<tbody>\n";
  2334. foreach ($rows as $row) {
  2335. # Parsing span elements, including code spans, character escapes,
  2336. # and inline HTML tags, so that pipes inside those gets ignored.
  2337. $row = $this->parseSpan($row);
  2338. # Split row by cell.
  2339. $row_cells = preg_split('/ *[|] */', $row, $col_count);
  2340. $row_cells = array_pad($row_cells, $col_count, '');
  2341. $text .= "<tr>\n";
  2342. foreach ($row_cells as $n => $cell)
  2343. $text .= " <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n";
  2344. $text .= "</tr>\n";
  2345. }
  2346. $text .= "</tbody>\n";
  2347. $text .= "</table>";
  2348. return $this->hashBlock($text) . "\n";
  2349. }
  2350. protected function doDefLists($text) {
  2351. #
  2352. # Form HTML definition lists.
  2353. #
  2354. $less_than_tab = $this->tab_width - 1;
  2355. # Re-usable pattern to match any entire dl list:
  2356. $whole_list_re = '(?>
  2357. ( # $1 = whole list
  2358. ( # $2
  2359. [ ]{0,'.$less_than_tab.'}
  2360. ((?>.*\S.*\n)+) # $3 = defined term
  2361. \n?
  2362. [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
  2363. )
  2364. (?s:.+?)
  2365. ( # $4
  2366. \z
  2367. |
  2368. \n{2,}
  2369. (?=\S)
  2370. (?! # Negative lookahead for another term
  2371. [ ]{0,'.$less_than_tab.'}
  2372. (?: \S.*\n )+? # defined term
  2373. \n?
  2374. [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
  2375. )
  2376. (?! # Negative lookahead for another definition
  2377. [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
  2378. )
  2379. )
  2380. )
  2381. )'; // mx
  2382. $text = preg_replace_callback('{
  2383. (?>\A\n?|(?<=\n\n))
  2384. '.$whole_list_re.'
  2385. }mx',
  2386. array($this, '_doDefLists_callback'), $text);
  2387. return $text;
  2388. }
  2389. protected function _doDefLists_callback($matches) {
  2390. # Re-usable patterns to match list item bullets and number markers:
  2391. $list = $matches[1];
  2392. # Turn double returns into triple returns, so that we can make a
  2393. # paragraph for the last item in a list, if necessary:
  2394. $result = trim($this->processDefListItems($list));
  2395. $result = "<dl>\n" . $result . "\n</dl>";
  2396. return $this->hashBlock($result) . "\n\n";
  2397. }
  2398. protected function processDefListItems($list_str) {
  2399. #
  2400. # Process the contents of a single definition list, splitting it
  2401. # into individual term and definition list items.
  2402. #
  2403. $less_than_tab = $this->tab_width - 1;
  2404. # trim trailing blank lines:
  2405. $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
  2406. # Process definition terms.
  2407. $list_str = preg_replace_callback('{
  2408. (?>\A\n?|\n\n+) # leading line
  2409. ( # definition terms = $1
  2410. [ ]{0,'.$less_than_tab.'} # leading whitespace
  2411. (?!\:[ ]|[ ]) # negative lookahead for a definition
  2412. # mark (colon) or more whitespace.
  2413. (?> \S.* \n)+? # actual term (not whitespace).
  2414. )
  2415. (?=\n?[ ]{0,3}:[ ]) # lookahead for following line feed
  2416. # with a definition mark.
  2417. }xm',
  2418. array($this, '_processDefListItems_callback_dt'), $list_str);
  2419. # Process actual definitions.
  2420. $list_str = preg_replace_callback('{
  2421. \n(\n+)? # leading line = $1
  2422. ( # marker space = $2
  2423. [ ]{0,'.$less_than_tab.'} # whitespace before colon
  2424. \:[ ]+ # definition mark (colon)
  2425. )
  2426. ((?s:.+?)) # definition text = $3
  2427. (?= \n+ # stop at next definition mark,
  2428. (?: # next term or end of text
  2429. [ ]{0,'.$less_than_tab.'} \:[ ] |
  2430. <dt> | \z
  2431. )
  2432. )
  2433. }xm',
  2434. array($this, '_processDefListItems_callback_dd'), $list_str);
  2435. return $list_str;
  2436. }
  2437. protected function _processDefListItems_callback_dt($matches) {
  2438. $terms = explode("\n", trim($matches[1]));
  2439. $text = '';
  2440. foreach ($terms as $term) {
  2441. $term = $this->runSpanGamut(trim($term));
  2442. $text .= "\n<dt>" . $term . "</dt>";
  2443. }
  2444. return $text . "\n";
  2445. }
  2446. protected function _processDefListItems_callback_dd($matches) {
  2447. $leading_line = $matches[1];
  2448. $marker_space = $matches[2];
  2449. $def = $matches[3];
  2450. if ($leading_line || preg_match('/\n{2,}/', $def)) {
  2451. # Replace marker with the appropriate whitespace indentation
  2452. $def = str_repeat(' ', strlen($marker_space)) . $def;
  2453. $def = $this->runBlockGamut($this->outdent($def . "\n\n"));
  2454. $def = "\n". $def ."\n";
  2455. }
  2456. else {
  2457. $def = rtrim($def);
  2458. $def = $this->runSpanGamut($this->outdent($def));
  2459. }
  2460. return "\n<dd>" . $def . "</dd>\n";
  2461. }
  2462. protected function doFencedCodeBlocks($text) {
  2463. #
  2464. # Adding the fenced code block syntax to regular Markdown:
  2465. #
  2466. # ~~~
  2467. # Code block
  2468. # ~~~
  2469. #
  2470. $less_than_tab = $this->tab_width;
  2471. $text = preg_replace_callback('{
  2472. (?:\n|\A)
  2473. # 1: Opening marker
  2474. (
  2475. (?:~{3,}|`{3,}) # 3 or more tildes/backticks.
  2476. )
  2477. [ ]*
  2478. (?:
  2479. \.?([-_:a-zA-Z0-9]+) # 2: standalone class name
  2480. |
  2481. '.$this->id_class_attr_catch_re.' # 3: Extra attributes
  2482. )?
  2483. [ ]* \n # Whitespace and newline following marker.
  2484. # 4: Content
  2485. (
  2486. (?>
  2487. (?!\1 [ ]* \n) # Not a closing marker.
  2488. .*\n+
  2489. )+
  2490. )
  2491. # Closing marker.
  2492. \1 [ ]* (?= \n )
  2493. }xm',
  2494. array($this, '_doFencedCodeBlocks_callback'), $text);
  2495. return $text;
  2496. }
  2497. protected function _doFencedCodeBlocks_callback($matches) {
  2498. $classname =& $matches[2];
  2499. $attrs =& $matches[3];
  2500. $codeblock = $matches[4];
  2501. $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
  2502. $codeblock = preg_replace_callback('/^\n+/',
  2503. array($this, '_doFencedCodeBlocks_newlines'), $codeblock);
  2504. if ($classname != "") {
  2505. if ($classname{0} == '.')
  2506. $classname = substr($classname, 1);
  2507. $attr_str = ' class="'.$this->code_class_prefix.$classname.'"';
  2508. } else {
  2509. $attr_str = $this->doExtraAttributes($this->code_attr_on_pre ? "pre" : "code", $attrs);
  2510. }
  2511. $pre_attr_str = $this->code_attr_on_pre ? $attr_str : '';
  2512. $code_attr_str = $this->code_attr_on_pre ? '' : $attr_str;
  2513. $codeblock = "<pre$pre_attr_str><code$code_attr_str>$codeblock</code></pre>";
  2514. return "\n\n".$this->hashBlock($codeblock)."\n\n";
  2515. }
  2516. protected function _doFencedCodeBlocks_newlines($matches) {
  2517. return str_repeat("<br$this->empty_element_suffix",
  2518. strlen($matches[0]));
  2519. }
  2520. #
  2521. # Redefining emphasis markers so that emphasis by underscore does not
  2522. # work in the middle of a word.
  2523. #
  2524. protected $em_relist = array(
  2525. '' => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?![\.,:;]?\s)',
  2526. '*' => '(?<![\s*])\*(?!\*)',
  2527. '_' => '(?<![\s_])_(?![a-zA-Z0-9_])',
  2528. );
  2529. protected $strong_relist = array(
  2530. '' => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?![\.,:;]?\s)',
  2531. '**' => '(?<![\s*])\*\*(?!\*)',
  2532. '__' => '(?<![\s_])__(?![a-zA-Z0-9_])',
  2533. );
  2534. protected $em_strong_relist = array(
  2535. '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?![\.,:;]?\s)',
  2536. '***' => '(?<![\s*])\*\*\*(?!\*)',
  2537. '___' => '(?<![\s_])___(?![a-zA-Z0-9_])',
  2538. );
  2539. protected function formParagraphs($text) {
  2540. #
  2541. # Params:
  2542. # $text - string to process with html <p> tags
  2543. #
  2544. # Strip leading and trailing lines:
  2545. $text = preg_replace('/\A\n+|\n+\z/', '', $text);
  2546. $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
  2547. #
  2548. # Wrap <p> tags and unhashify HTML blocks
  2549. #
  2550. foreach ($grafs as $key => $value) {
  2551. $value = trim($this->runSpanGamut($value));
  2552. # Check if this should be enclosed in a paragraph.
  2553. # Clean tag hashes & block tag hashes are left alone.
  2554. $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value);
  2555. if ($is_p) {
  2556. $value = "<p>$value</p>";
  2557. }
  2558. $grafs[$key] = $value;
  2559. }
  2560. # Join grafs in one text, then unhash HTML tags.
  2561. $text = implode("\n\n", $grafs);
  2562. # Finish by removing any tag hashes still present in $text.
  2563. $text = $this->unhash($text);
  2564. return $text;
  2565. }
  2566. ### Footnotes
  2567. protected function stripFootnotes($text) {
  2568. #
  2569. # Strips link definitions from text, stores the URLs and titles in
  2570. # hash references.
  2571. #
  2572. $less_than_tab = $this->tab_width - 1;
  2573. # Link defs are in the form: [^id]: url "optional title"
  2574. $text = preg_replace_callback('{
  2575. ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?: # note_id = $1
  2576. [ ]*
  2577. \n? # maybe *one* newline
  2578. ( # text = $2 (no blank lines allowed)
  2579. (?:
  2580. .+ # actual text
  2581. |
  2582. \n # newlines but
  2583. (?!\[.+?\][ ]?:\s)# negative lookahead for footnote or link definition marker.
  2584. (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed
  2585. # by non-indented content
  2586. )*
  2587. )
  2588. }xm',
  2589. array($this, '_stripFootnotes_callback'),
  2590. $text);
  2591. return $text;
  2592. }
  2593. protected function _stripFootnotes_callback($matches) {
  2594. $note_id = $this->fn_id_prefix . $matches[1];
  2595. $this->footnotes[$note_id] = $this->outdent($matches[2]);
  2596. return ''; # String that will replace the block
  2597. }
  2598. protected function doFootnotes($text) {
  2599. #
  2600. # Replace footnote references in $text [^id] with a special text-token
  2601. # which will be replaced by the actual footnote marker in appendFootnotes.
  2602. #
  2603. if (!$this->in_anchor) {
  2604. $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text);
  2605. }
  2606. return $text;
  2607. }
  2608. protected function appendFootnotes($text) {
  2609. #
  2610. # Append footnote list to text.
  2611. #
  2612. $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
  2613. array($this, '_appendFootnotes_callback'), $text);
  2614. if (!empty($this->footnotes_ordered)) {
  2615. $text .= "\n\n";
  2616. $text .= "<div class=\"footnotes\">\n";
  2617. $text .= "<hr". $this->empty_element_suffix ."\n";
  2618. $text .= "<ol>\n\n";
  2619. $attr = "";
  2620. if ($this->fn_backlink_class != "") {
  2621. $class = $this->fn_backlink_class;
  2622. $class = $this->encodeAttribute($class);
  2623. $attr .= " class=\"$class\"";
  2624. }
  2625. if ($this->fn_backlink_title != "") {
  2626. $title = $this->fn_backlink_title;
  2627. $title = $this->encodeAttribute($title);
  2628. $attr .= " title=\"$title\"";
  2629. }
  2630. $num = 0;
  2631. while (!empty($this->footnotes_ordered)) {
  2632. $footnote = reset($this->footnotes_ordered);
  2633. $note_id = key($this->footnotes_ordered);
  2634. unset($this->footnotes_ordered[$note_id]);
  2635. $ref_count = $this->footnotes_ref_count[$note_id];
  2636. unset($this->footnotes_ref_count[$note_id]);
  2637. unset($this->footnotes[$note_id]);
  2638. $footnote .= "\n"; # Need to append newline before parsing.
  2639. $footnote = $this->runBlockGamut("$footnote\n");
  2640. $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
  2641. array($this, '_appendFootnotes_callback'), $footnote);
  2642. $attr = str_replace("%%", ++$num, $attr);
  2643. $note_id = $this->encodeAttribute($note_id);
  2644. # Prepare backlink, multiple backlinks if multiple references
  2645. $backlink = "<a href=\"#fnref:$note_id\"$attr>&#8617;</a>";
  2646. for ($ref_num = 2; $ref_num <= $ref_count; ++$ref_num) {
  2647. $backlink .= " <a href=\"#fnref$ref_num:$note_id\"$attr>&#8617;</a>";
  2648. }
  2649. # Add backlink to last paragraph; create new paragraph if needed.
  2650. if (preg_match('{</p>$}', $footnote)) {
  2651. $footnote = substr($footnote, 0, -4) . "&#160;$backlink</p>";
  2652. } else {
  2653. $footnote .= "\n\n<p>$backlink</p>";
  2654. }
  2655. $text .= "<li id=\"fn:$note_id\">\n";
  2656. $text .= $footnote . "\n";
  2657. $text .= "</li>\n\n";
  2658. }
  2659. $text .= "</ol>\n";
  2660. $text .= "</div>";
  2661. }
  2662. return $text;
  2663. }
  2664. protected function _appendFootnotes_callback($matches) {
  2665. $node_id = $this->fn_id_prefix . $matches[1];
  2666. # Create footnote marker only if it has a corresponding footnote *and*
  2667. # the footnote hasn't been used by another marker.
  2668. if (isset($this->footnotes[$node_id])) {
  2669. $num =& $this->footnotes_numbers[$node_id];
  2670. if (!isset($num)) {
  2671. # Transfer footnote content to the ordered list and give it its
  2672. # number
  2673. $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id];
  2674. $this->footnotes_ref_count[$node_id] = 1;
  2675. $num = $this->footnote_counter++;
  2676. $ref_count_mark = '';
  2677. } else {
  2678. $ref_count_mark = $this->footnotes_ref_count[$node_id] += 1;
  2679. }
  2680. $attr = "";
  2681. if ($this->fn_link_class != "") {
  2682. $class = $this->fn_link_class;
  2683. $class = $this->encodeAttribute($class);
  2684. $attr .= " class=\"$class\"";
  2685. }
  2686. if ($this->fn_link_title != "") {
  2687. $title = $this->fn_link_title;
  2688. $title = $this->encodeAttribute($title);
  2689. $attr .= " title=\"$title\"";
  2690. }
  2691. $attr = str_replace("%%", $num, $attr);
  2692. $node_id = $this->encodeAttribute($node_id);
  2693. return
  2694. "<sup id=\"fnref$ref_count_mark:$node_id\">".
  2695. "<a href=\"#fn:$node_id\"$attr>$num</a>".
  2696. "</sup>";
  2697. }
  2698. return "[^".$matches[1]."]";
  2699. }
  2700. ### Abbreviations ###
  2701. protected function stripAbbreviations($text) {
  2702. #
  2703. # Strips abbreviations from text, stores titles in hash references.
  2704. #
  2705. $less_than_tab = $this->tab_width - 1;
  2706. # Link defs are in the form: [id]*: url "optional title"
  2707. $text = preg_replace_callback('{
  2708. ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?: # abbr_id = $1
  2709. (.*) # text = $2 (no blank lines allowed)
  2710. }xm',
  2711. array($this, '_stripAbbreviations_callback'),
  2712. $text);
  2713. return $text;
  2714. }
  2715. protected function _stripAbbreviations_callback($matches) {
  2716. $abbr_word = $matches[1];
  2717. $abbr_desc = $matches[2];
  2718. if ($this->abbr_word_re)
  2719. $this->abbr_word_re .= '|';
  2720. $this->abbr_word_re .= preg_quote($abbr_word);
  2721. $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
  2722. return ''; # String that will replace the block
  2723. }
  2724. protected function doAbbreviations($text) {
  2725. #
  2726. # Find defined abbreviations in text and wrap them in <abbr> elements.
  2727. #
  2728. if ($this->abbr_word_re) {
  2729. // cannot use the /x modifier because abbr_word_re may
  2730. // contain significant spaces:
  2731. $text = preg_replace_callback('{'.
  2732. '(?<![\w\x1A])'.
  2733. '(?:'.$this->abbr_word_re.')'.
  2734. '(?![\w\x1A])'.
  2735. '}',
  2736. array($this, '_doAbbreviations_callback'), $text);
  2737. }
  2738. return $text;
  2739. }
  2740. protected function _doAbbreviations_callback($matches) {
  2741. $abbr = $matches[0];
  2742. if (isset($this->abbr_desciptions[$abbr])) {
  2743. $desc = $this->abbr_desciptions[$abbr];
  2744. if (empty($desc)) {
  2745. return $this->hashPart("<abbr>$abbr</abbr>");
  2746. } else {
  2747. $desc = $this->encodeAttribute($desc);
  2748. return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>");
  2749. }
  2750. } else {
  2751. return $matches[0];
  2752. }
  2753. }
  2754. }