PageRenderTime 58ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 1ms

/library/vendors/markdown/Michelf/Markdown.php

http://github.com/vanillaforums/Garden
PHP | 3168 lines | 2073 code | 368 blank | 727 comment | 210 complexity | 9546837dcbaac2c808bfb7d1278983b2 MD5 | raw file
Possible License(s): LGPL-2.1, GPL-2.0, AGPL-1.0, BSD-3-Clause, MIT

Large files files are truncated, but you can click here to view the full file

  1. <?php
  2. #
  3. #
  4. # DO NOT UPDATE THIS FILE
  5. # DO NOT BRING IN A NEW VERSION OF THIS LIBRARY
  6. # VANILLA CHANGES WILL BE LOST
  7. #
  8. # Please see /library/core/class.markdownvanilla.php
  9. #
  10. #
  11. #
  12. # Markdown - A text-to-HTML conversion tool for web writers
  13. #
  14. # PHP Markdown
  15. # Copyright (c) 2004-2014 Michel Fortin
  16. # <http://michelf.com/projects/php-markdown/>
  17. #
  18. # Original Markdown
  19. # Copyright (c) 2004-2006 John Gruber
  20. # <http://daringfireball.net/projects/markdown/>
  21. #
  22. namespace Michelf;
  23. #
  24. # Markdown Parser Class
  25. #
  26. class Markdown implements MarkdownInterface {
  27. ### Version ###
  28. const MARKDOWNLIB_VERSION = "1.4.1";
  29. ### Simple Function Interface ###
  30. public static function defaultTransform($text) {
  31. #
  32. # Initialize the parser and return the result of its transform method.
  33. # This will work fine for derived classes too.
  34. #
  35. # Take parser class on which this function was called.
  36. $parser_class = \get_called_class();
  37. # try to take parser from the static parser list
  38. static $parser_list;
  39. $parser =& $parser_list[$parser_class];
  40. # create the parser it not already set
  41. if (!$parser)
  42. $parser = new $parser_class;
  43. # Transform text using parser.
  44. return $parser->transform($text);
  45. }
  46. ### Configuration Variables ###
  47. # Change to ">" for HTML output.
  48. public $empty_element_suffix = " />";
  49. public $tab_width = 4;
  50. # Change to `true` to disallow markup or entities.
  51. public $no_markup = false;
  52. public $no_entities = false;
  53. # Predefined urls and titles for reference links and images.
  54. public $predef_urls = array();
  55. public $predef_titles = array();
  56. # Optional filter function for URLs
  57. public $url_filter_func = null;
  58. ### Parser Implementation ###
  59. # Regex to match balanced [brackets].
  60. # Needed to insert a maximum bracked depth while converting to PHP.
  61. protected $nested_brackets_depth = 6;
  62. protected $nested_brackets_re;
  63. protected $nested_url_parenthesis_depth = 4;
  64. protected $nested_url_parenthesis_re;
  65. # Table of hash values for escaped characters:
  66. protected $escape_chars = '\`*_{}[]()>#+-.!';
  67. protected $escape_chars_re;
  68. public function __construct() {
  69. #
  70. # Constructor function. Initialize appropriate member variables.
  71. #
  72. $this->_initDetab();
  73. $this->prepareItalicsAndBold();
  74. $this->nested_brackets_re =
  75. str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
  76. str_repeat('\])*', $this->nested_brackets_depth);
  77. $this->nested_url_parenthesis_re =
  78. str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
  79. str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
  80. $this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
  81. # Sort document, block, and span gamut in ascendent priority order.
  82. asort($this->document_gamut);
  83. asort($this->block_gamut);
  84. asort($this->span_gamut);
  85. }
  86. # Internal hashes used during transformation.
  87. protected $urls = array();
  88. protected $titles = array();
  89. protected $html_hashes = array();
  90. # Status flag to avoid invalid nesting.
  91. protected $in_anchor = false;
  92. protected function setup() {
  93. #
  94. # Called before the transformation process starts to setup parser
  95. # states.
  96. #
  97. # Clear global hashes.
  98. $this->urls = $this->predef_urls;
  99. $this->titles = $this->predef_titles;
  100. $this->html_hashes = array();
  101. $this->in_anchor = false;
  102. }
  103. protected function teardown() {
  104. #
  105. # Called after the transformation process to clear any variable
  106. # which may be taking up memory unnecessarly.
  107. #
  108. $this->urls = array();
  109. $this->titles = array();
  110. $this->html_hashes = array();
  111. }
  112. public function transform($text) {
  113. #
  114. # Main function. Performs some preprocessing on the input text
  115. # and pass it through the document gamut.
  116. #
  117. $this->setup();
  118. # Remove UTF-8 BOM and marker character in input, if present.
  119. $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
  120. # Standardize line endings:
  121. # DOS to Unix and Mac to Unix
  122. $text = preg_replace('{\r\n?}', "\n", $text);
  123. # Make sure $text ends with a couple of newlines:
  124. $text .= "\n\n";
  125. # Convert all tabs to spaces.
  126. $text = $this->detab($text);
  127. # Turn block-level HTML blocks into hash entries
  128. $text = $this->hashHTMLBlocks($text);
  129. # Strip any lines consisting only of spaces and tabs.
  130. # This makes subsequent regexen easier to write, because we can
  131. # match consecutive blank lines with /\n+/ instead of something
  132. # contorted like /[ ]*\n+/ .
  133. $text = preg_replace('/^[ ]+$/m', '', $text);
  134. # Run document gamut methods.
  135. foreach ($this->document_gamut as $method => $priority) {
  136. $text = $this->$method($text);
  137. }
  138. $this->teardown();
  139. return $text . "\n";
  140. }
  141. protected $document_gamut = array(
  142. # Strip link definitions, store in hashes.
  143. "stripLinkDefinitions" => 20,
  144. "runBasicBlockGamut" => 30,
  145. );
  146. protected function stripLinkDefinitions($text) {
  147. #
  148. # Strips link definitions from text, stores the URLs and titles in
  149. # hash references.
  150. #
  151. $less_than_tab = $this->tab_width - 1;
  152. # Link defs are in the form: ^[id]: url "optional title"
  153. $text = preg_replace_callback('{
  154. ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
  155. [ ]*
  156. \n? # maybe *one* newline
  157. [ ]*
  158. (?:
  159. <(.+?)> # url = $2
  160. |
  161. (\S+?) # url = $3
  162. )
  163. [ ]*
  164. \n? # maybe one newline
  165. [ ]*
  166. (?:
  167. (?<=\s) # lookbehind for whitespace
  168. ["(]
  169. (.*?) # title = $4
  170. [")]
  171. [ ]*
  172. )? # title is optional
  173. (?:\n+|\Z)
  174. }xm',
  175. array($this, '_stripLinkDefinitions_callback'),
  176. $text);
  177. return $text;
  178. }
  179. protected function _stripLinkDefinitions_callback($matches) {
  180. $link_id = strtolower($matches[1]);
  181. $url = $matches[2] == '' ? $matches[3] : $matches[2];
  182. $this->urls[$link_id] = $url;
  183. $this->titles[$link_id] =& $matches[4];
  184. return ''; # String that will replace the block
  185. }
  186. protected function hashHTMLBlocks($text) {
  187. if ($this->no_markup) return $text;
  188. $less_than_tab = $this->tab_width - 1;
  189. # Hashify HTML blocks:
  190. # We only want to do this for block-level HTML tags, such as headers,
  191. # lists, and tables. That's because we still want to wrap <p>s around
  192. # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
  193. # phrase emphasis, and spans. The list of tags we're looking for is
  194. # hard-coded:
  195. #
  196. # * List "a" is made of tags which can be both inline or block-level.
  197. # These will be treated block-level when the start tag is alone on
  198. # its line, otherwise they're not matched here and will be taken as
  199. # inline later.
  200. # * List "b" is made of tags which are always block-level;
  201. #
  202. $block_tags_a_re = 'ins|del';
  203. $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
  204. 'script|noscript|style|form|fieldset|iframe|math|svg|'.
  205. 'article|section|nav|aside|hgroup|header|footer|'.
  206. 'figure';
  207. # Regular expression for the content of a block tag.
  208. $nested_tags_level = 4;
  209. $attr = '
  210. (?> # optional tag attributes
  211. \s # starts with whitespace
  212. (?>
  213. [^>"/]+ # text outside quotes
  214. |
  215. /+(?!>) # slash not followed by ">"
  216. |
  217. "[^"]*" # text inside double quotes (tolerate ">")
  218. |
  219. \'[^\']*\' # text inside single quotes (tolerate ">")
  220. )*
  221. )?
  222. ';
  223. $content =
  224. str_repeat('
  225. (?>
  226. [^<]+ # content without tag
  227. |
  228. <\2 # nested opening tag
  229. '.$attr.' # attributes
  230. (?>
  231. />
  232. |
  233. >', $nested_tags_level). # end of opening tag
  234. '.*?'. # last level nested tag content
  235. str_repeat('
  236. </\2\s*> # closing nested tag
  237. )
  238. |
  239. <(?!/\2\s*> # other tags with a different name
  240. )
  241. )*',
  242. $nested_tags_level);
  243. $content2 = str_replace('\2', '\3', $content);
  244. # First, look for nested blocks, e.g.:
  245. # <div>
  246. # <div>
  247. # tags for inner block must be indented.
  248. # </div>
  249. # </div>
  250. #
  251. # The outermost tags must start at the left margin for this to match, and
  252. # the inner nested divs must be indented.
  253. # We need to do this before the next, more liberal match, because the next
  254. # match will start at the first `<div>` and stop at the first `</div>`.
  255. $text = preg_replace_callback('{(?>
  256. (?>
  257. (?<=\n) # Starting on its own line
  258. | # or
  259. \A\n? # the at beginning of the doc
  260. )
  261. ( # save in $1
  262. # Match from `\n<tag>` to `</tag>\n`, handling nested tags
  263. # in between.
  264. [ ]{0,'.$less_than_tab.'}
  265. <('.$block_tags_b_re.')# start tag = $2
  266. '.$attr.'> # attributes followed by > and \n
  267. '.$content.' # content, support nesting
  268. </\2> # the matching end tag
  269. [ ]* # trailing spaces/tabs
  270. (?=\n+|\Z) # followed by a newline or end of document
  271. | # Special version for tags of group a.
  272. [ ]{0,'.$less_than_tab.'}
  273. <('.$block_tags_a_re.')# start tag = $3
  274. '.$attr.'>[ ]*\n # attributes followed by >
  275. '.$content2.' # content, support nesting
  276. </\3> # the matching end tag
  277. [ ]* # trailing spaces/tabs
  278. (?=\n+|\Z) # followed by a newline or end of document
  279. | # Special case just for <hr />. It was easier to make a special
  280. # case than to make the other regex more complicated.
  281. [ ]{0,'.$less_than_tab.'}
  282. <(hr) # start tag = $2
  283. '.$attr.' # attributes
  284. /?> # the matching end tag
  285. [ ]*
  286. (?=\n{2,}|\Z) # followed by a blank line or end of document
  287. | # Special case for standalone HTML comments:
  288. [ ]{0,'.$less_than_tab.'}
  289. (?s:
  290. <!-- .*? -->
  291. )
  292. [ ]*
  293. (?=\n{2,}|\Z) # followed by a blank line or end of document
  294. | # PHP and ASP-style processor instructions (<? and <%)
  295. [ ]{0,'.$less_than_tab.'}
  296. (?s:
  297. <([?%]) # $2
  298. .*?
  299. \2>
  300. )
  301. [ ]*
  302. (?=\n{2,}|\Z) # followed by a blank line or end of document
  303. )
  304. )}Sxmi',
  305. array($this, '_hashHTMLBlocks_callback'),
  306. $text);
  307. return $text;
  308. }
  309. protected function _hashHTMLBlocks_callback($matches) {
  310. $text = $matches[1];
  311. $key = $this->hashBlock($text);
  312. return "\n\n$key\n\n";
  313. }
  314. protected function hashPart($text, $boundary = 'X') {
  315. #
  316. # Called whenever a tag must be hashed when a function insert an atomic
  317. # element in the text stream. Passing $text to through this function gives
  318. # a unique text-token which will be reverted back when calling unhash.
  319. #
  320. # The $boundary argument specify what character should be used to surround
  321. # the token. By convension, "B" is used for block elements that needs not
  322. # to be wrapped into paragraph tags at the end, ":" is used for elements
  323. # that are word separators and "X" is used in the general case.
  324. #
  325. # Swap back any tag hash found in $text so we do not have to `unhash`
  326. # multiple times at the end.
  327. $text = $this->unhash($text);
  328. # Then hash the block.
  329. static $i = 0;
  330. $key = "$boundary\x1A" . ++$i . $boundary;
  331. $this->html_hashes[$key] = $text;
  332. return $key; # String that will replace the tag.
  333. }
  334. protected function hashBlock($text) {
  335. #
  336. # Shortcut function for hashPart with block-level boundaries.
  337. #
  338. return $this->hashPart($text, 'B');
  339. }
  340. protected $block_gamut = array(
  341. #
  342. # These are all the transformations that form block-level
  343. # tags like paragraphs, headers, and list items.
  344. #
  345. "doHeaders" => 10,
  346. "doHorizontalRules" => 20,
  347. "doLists" => 40,
  348. "doCodeBlocks" => 50,
  349. "doBlockQuotes" => 60,
  350. );
  351. protected function runBlockGamut($text) {
  352. #
  353. # Run block gamut tranformations.
  354. #
  355. # We need to escape raw HTML in Markdown source before doing anything
  356. # else. This need to be done for each block, and not only at the
  357. # begining in the Markdown function since hashed blocks can be part of
  358. # list items and could have been indented. Indented blocks would have
  359. # been seen as a code block in a previous pass of hashHTMLBlocks.
  360. $text = $this->hashHTMLBlocks($text);
  361. return $this->runBasicBlockGamut($text);
  362. }
  363. protected function runBasicBlockGamut($text) {
  364. #
  365. # Run block gamut tranformations, without hashing HTML blocks. This is
  366. # useful when HTML blocks are known to be already hashed, like in the first
  367. # whole-document pass.
  368. #
  369. foreach ($this->block_gamut as $method => $priority) {
  370. $text = $this->$method($text);
  371. }
  372. # Finally form paragraph and restore hashed blocks.
  373. $text = $this->formParagraphs($text);
  374. return $text;
  375. }
  376. protected function doHorizontalRules($text) {
  377. # Do Horizontal Rules:
  378. return preg_replace(
  379. '{
  380. ^[ ]{0,3} # Leading space
  381. ([-*_]) # $1: First marker
  382. (?> # Repeated marker group
  383. [ ]{0,2} # Zero, one, or two spaces.
  384. \1 # Marker character
  385. ){2,} # Group repeated at least twice
  386. [ ]* # Tailing spaces
  387. $ # End of line.
  388. }mx',
  389. "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
  390. $text);
  391. }
  392. protected $span_gamut = array(
  393. #
  394. # These are all the transformations that occur *within* block-level
  395. # tags like paragraphs, headers, and list items.
  396. #
  397. # Process character escapes, code spans, and inline HTML
  398. # in one shot.
  399. "parseSpan" => -30,
  400. # Process anchor and image tags. Images must come first,
  401. # because ![foo][f] looks like an anchor.
  402. "doImages" => 10,
  403. "doAnchors" => 20,
  404. # Make links out of things like `<http://example.com/>`
  405. # Must come after doAnchors, because you can use < and >
  406. # delimiters in inline links like [this](<url>).
  407. "doAutoLinks" => 30,
  408. "encodeAmpsAndAngles" => 40,
  409. "doItalicsAndBold" => 50,
  410. "doHardBreaks" => 60
  411. );
  412. protected function runSpanGamut($text) {
  413. #
  414. # Run span gamut tranformations.
  415. #
  416. foreach ($this->span_gamut as $method => $priority) {
  417. $text = $this->$method($text);
  418. }
  419. return $text;
  420. }
  421. protected function doHardBreaks($text) {
  422. # Do hard breaks:
  423. return preg_replace_callback('/ {2,}\n/',
  424. array($this, '_doHardBreaks_callback'), $text);
  425. }
  426. protected function _doHardBreaks_callback($matches) {
  427. return $this->hashPart("<br$this->empty_element_suffix\n");
  428. }
  429. protected function doAnchors($text) {
  430. #
  431. # Turn Markdown link shortcuts into XHTML <a> tags.
  432. #
  433. if ($this->in_anchor) return $text;
  434. $this->in_anchor = true;
  435. #
  436. # First, handle reference-style links: [link text] [id]
  437. #
  438. $text = preg_replace_callback('{
  439. ( # wrap whole match in $1
  440. \[
  441. ('.$this->nested_brackets_re.') # link text = $2
  442. \]
  443. [ ]? # one optional space
  444. (?:\n[ ]*)? # one optional newline followed by spaces
  445. \[
  446. (.*?) # id = $3
  447. \]
  448. )
  449. }xs',
  450. array($this, '_doAnchors_reference_callback'), $text);
  451. #
  452. # Next, inline-style links: [link text](url "optional title")
  453. #
  454. $text = preg_replace_callback('{
  455. ( # wrap whole match in $1
  456. \[
  457. ('.$this->nested_brackets_re.') # link text = $2
  458. \]
  459. \( # literal paren
  460. [ \n]*
  461. (?:
  462. <(.+?)> # href = $3
  463. |
  464. ('.$this->nested_url_parenthesis_re.') # href = $4
  465. )
  466. [ \n]*
  467. ( # $5
  468. ([\'"]) # quote char = $6
  469. (.*?) # Title = $7
  470. \6 # matching quote
  471. [ \n]* # ignore any spaces/tabs between closing quote and )
  472. )? # title is optional
  473. \)
  474. )
  475. }xs',
  476. array($this, '_doAnchors_inline_callback'), $text);
  477. #
  478. # Last, handle reference-style shortcuts: [link text]
  479. # These must come last in case you've also got [link text][1]
  480. # or [link text](/foo)
  481. #
  482. $text = preg_replace_callback('{
  483. ( # wrap whole match in $1
  484. \[
  485. ([^\[\]]+) # link text = $2; can\'t contain [ or ]
  486. \]
  487. )
  488. }xs',
  489. array($this, '_doAnchors_reference_callback'), $text);
  490. $this->in_anchor = false;
  491. return $text;
  492. }
  493. protected function _doAnchors_reference_callback($matches) {
  494. $whole_match = $matches[1];
  495. $link_text = $matches[2];
  496. $link_id =& $matches[3];
  497. if ($link_id == "") {
  498. # for shortcut links like [this][] or [this].
  499. $link_id = $link_text;
  500. }
  501. # lower-case and turn embedded newlines into spaces
  502. $link_id = strtolower($link_id);
  503. $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
  504. if (isset($this->urls[$link_id])) {
  505. $url = $this->urls[$link_id];
  506. $url = $this->encodeURLAttribute($url);
  507. $result = "<a href=\"$url\"";
  508. if ( isset( $this->titles[$link_id] ) ) {
  509. $title = $this->titles[$link_id];
  510. $title = $this->encodeAttribute($title);
  511. $result .= " title=\"$title\"";
  512. }
  513. $link_text = $this->runSpanGamut($link_text);
  514. $result .= ">$link_text</a>";
  515. $result = $this->hashPart($result);
  516. }
  517. else {
  518. $result = $whole_match;
  519. }
  520. return $result;
  521. }
  522. protected function _doAnchors_inline_callback($matches) {
  523. $whole_match = $matches[1];
  524. $link_text = $this->runSpanGamut($matches[2]);
  525. $url = $matches[3] == '' ? $matches[4] : $matches[3];
  526. $title =& $matches[7];
  527. // if the URL was of the form <s p a c e s> it got caught by the HTML
  528. // tag parser and hashed. Need to reverse the process before using the URL.
  529. $unhashed = $this->unhash($url);
  530. if ($unhashed != $url)
  531. $url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
  532. $url = $this->encodeURLAttribute($url);
  533. $result = "<a href=\"$url\"";
  534. if (isset($title)) {
  535. $title = $this->encodeAttribute($title);
  536. $result .= " title=\"$title\"";
  537. }
  538. $link_text = $this->runSpanGamut($link_text);
  539. $result .= ">$link_text</a>";
  540. return $this->hashPart($result);
  541. }
  542. protected function doImages($text) {
  543. #
  544. # Turn Markdown image shortcuts into <img> tags.
  545. #
  546. #
  547. # First, handle reference-style labeled images: ![alt text][id]
  548. #
  549. $text = preg_replace_callback('{
  550. ( # wrap whole match in $1
  551. !\[
  552. ('.$this->nested_brackets_re.') # alt text = $2
  553. \]
  554. [ ]? # one optional space
  555. (?:\n[ ]*)? # one optional newline followed by spaces
  556. \[
  557. (.*?) # id = $3
  558. \]
  559. )
  560. }xs',
  561. array($this, '_doImages_reference_callback'), $text);
  562. #
  563. # Next, handle inline images: ![alt text](url "optional title")
  564. # Don't forget: encode * and _
  565. #
  566. $text = preg_replace_callback('{
  567. ( # wrap whole match in $1
  568. !\[
  569. ('.$this->nested_brackets_re.') # alt text = $2
  570. \]
  571. \s? # One optional whitespace character
  572. \( # literal paren
  573. [ \n]*
  574. (?:
  575. <(\S*)> # src url = $3
  576. |
  577. ('.$this->nested_url_parenthesis_re.') # src url = $4
  578. )
  579. [ \n]*
  580. ( # $5
  581. ([\'"]) # quote char = $6
  582. (.*?) # title = $7
  583. \6 # matching quote
  584. [ \n]*
  585. )? # title is optional
  586. \)
  587. )
  588. }xs',
  589. array($this, '_doImages_inline_callback'), $text);
  590. return $text;
  591. }
  592. protected function _doImages_reference_callback($matches) {
  593. $whole_match = $matches[1];
  594. $alt_text = $matches[2];
  595. $link_id = strtolower($matches[3]);
  596. if ($link_id == "") {
  597. $link_id = strtolower($alt_text); # for shortcut links like ![this][].
  598. }
  599. $alt_text = $this->encodeAttribute($alt_text);
  600. if (isset($this->urls[$link_id])) {
  601. $url = $this->encodeURLAttribute($this->urls[$link_id]);
  602. $result = "<img src=\"$url\" alt=\"$alt_text\"";
  603. if (isset($this->titles[$link_id])) {
  604. $title = $this->titles[$link_id];
  605. $title = $this->encodeAttribute($title);
  606. $result .= " title=\"$title\"";
  607. }
  608. $result .= $this->empty_element_suffix;
  609. $result = $this->hashPart($result);
  610. }
  611. else {
  612. # If there's no such link ID, leave intact:
  613. $result = $whole_match;
  614. }
  615. return $result;
  616. }
  617. protected function _doImages_inline_callback($matches) {
  618. $whole_match = $matches[1];
  619. $alt_text = $matches[2];
  620. $url = $matches[3] == '' ? $matches[4] : $matches[3];
  621. $title =& $matches[7];
  622. $alt_text = $this->encodeAttribute($alt_text);
  623. $url = $this->encodeURLAttribute($url);
  624. $result = "<img src=\"$url\" alt=\"$alt_text\"";
  625. if (isset($title)) {
  626. $title = $this->encodeAttribute($title);
  627. $result .= " title=\"$title\""; # $title already quoted
  628. }
  629. $result .= $this->empty_element_suffix;
  630. return $this->hashPart($result);
  631. }
  632. protected function doHeaders($text) {
  633. # Setext-style headers:
  634. # Header 1
  635. # ========
  636. #
  637. # Header 2
  638. # --------
  639. #
  640. $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
  641. array($this, '_doHeaders_callback_setext'), $text);
  642. # atx-style headers:
  643. # # Header 1
  644. # ## Header 2
  645. # ## Header 2 with closing hashes ##
  646. # ...
  647. # ###### Header 6
  648. #
  649. $text = preg_replace_callback('{
  650. ^(\#{1,6}) # $1 = string of #\'s
  651. [ ]*
  652. (.+?) # $2 = Header text
  653. [ ]*
  654. \#* # optional closing #\'s (not counted)
  655. \n+
  656. }xm',
  657. array($this, '_doHeaders_callback_atx'), $text);
  658. return $text;
  659. }
  660. protected function _doHeaders_callback_setext($matches) {
  661. # Terrible hack to check we haven't found an empty list item.
  662. if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1]))
  663. return $matches[0];
  664. $level = $matches[2]{0} == '=' ? 1 : 2;
  665. $block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>";
  666. return "\n" . $this->hashBlock($block) . "\n\n";
  667. }
  668. protected function _doHeaders_callback_atx($matches) {
  669. $level = strlen($matches[1]);
  670. $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>";
  671. return "\n" . $this->hashBlock($block) . "\n\n";
  672. }
  673. protected function doLists($text) {
  674. #
  675. # Form HTML ordered (numbered) and unordered (bulleted) lists.
  676. #
  677. $less_than_tab = $this->tab_width - 1;
  678. # Re-usable patterns to match list item bullets and number markers:
  679. $marker_ul_re = '[*+-]';
  680. $marker_ol_re = '\d+[\.]';
  681. $markers_relist = array(
  682. $marker_ul_re => $marker_ol_re,
  683. $marker_ol_re => $marker_ul_re,
  684. );
  685. foreach ($markers_relist as $marker_re => $other_marker_re) {
  686. # Re-usable pattern to match any entirel ul or ol list:
  687. $whole_list_re = '
  688. ( # $1 = whole list
  689. ( # $2
  690. ([ ]{0,'.$less_than_tab.'}) # $3 = number of spaces
  691. ('.$marker_re.') # $4 = first list item marker
  692. [ ]+
  693. )
  694. (?s:.+?)
  695. ( # $5
  696. \z
  697. |
  698. \n{2,}
  699. (?=\S)
  700. (?! # Negative lookahead for another list item marker
  701. [ ]*
  702. '.$marker_re.'[ ]+
  703. )
  704. |
  705. (?= # Lookahead for another kind of list
  706. \n
  707. \3 # Must have the same indentation
  708. '.$other_marker_re.'[ ]+
  709. )
  710. )
  711. )
  712. '; // mx
  713. # We use a different prefix before nested lists than top-level lists.
  714. # See extended comment in _ProcessListItems().
  715. if ($this->list_level) {
  716. $text = preg_replace_callback('{
  717. ^
  718. '.$whole_list_re.'
  719. }mx',
  720. array($this, '_doLists_callback'), $text);
  721. }
  722. else {
  723. $text = preg_replace_callback('{
  724. (?:(?<=\n)\n|\A\n?) # Must eat the newline
  725. '.$whole_list_re.'
  726. }mx',
  727. array($this, '_doLists_callback'), $text);
  728. }
  729. }
  730. return $text;
  731. }
  732. protected function _doLists_callback($matches) {
  733. # Re-usable patterns to match list item bullets and number markers:
  734. $marker_ul_re = '[*+-]';
  735. $marker_ol_re = '\d+[\.]';
  736. $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
  737. $list = $matches[1];
  738. $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol";
  739. $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
  740. $list .= "\n";
  741. $result = $this->processListItems($list, $marker_any_re);
  742. $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
  743. return "\n". $result ."\n\n";
  744. }
  745. protected $list_level = 0;
  746. protected function processListItems($list_str, $marker_any_re) {
  747. #
  748. # Process the contents of a single ordered or unordered list, splitting it
  749. # into individual list items.
  750. #
  751. # The $this->list_level global keeps track of when we're inside a list.
  752. # Each time we enter a list, we increment it; when we leave a list,
  753. # we decrement. If it's zero, we're not in a list anymore.
  754. #
  755. # We do this because when we're not inside a list, we want to treat
  756. # something like this:
  757. #
  758. # I recommend upgrading to version
  759. # 8. Oops, now this line is treated
  760. # as a sub-list.
  761. #
  762. # As a single paragraph, despite the fact that the second line starts
  763. # with a digit-period-space sequence.
  764. #
  765. # Whereas when we're inside a list (or sub-list), that line will be
  766. # treated as the start of a sub-list. What a kludge, huh? This is
  767. # an aspect of Markdown's syntax that's hard to parse perfectly
  768. # without resorting to mind-reading. Perhaps the solution is to
  769. # change the syntax rules such that sub-lists must start with a
  770. # starting cardinal number; e.g. "1." or "a.".
  771. $this->list_level++;
  772. # trim trailing blank lines:
  773. $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
  774. $list_str = preg_replace_callback('{
  775. (\n)? # leading line = $1
  776. (^[ ]*) # leading whitespace = $2
  777. ('.$marker_any_re.' # list marker and space = $3
  778. (?:[ ]+|(?=\n)) # space only required if item is not empty
  779. )
  780. ((?s:.*?)) # list item text = $4
  781. (?:(\n+(?=\n))|\n) # tailing blank line = $5
  782. (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
  783. }xm',
  784. array($this, '_processListItems_callback'), $list_str);
  785. $this->list_level--;
  786. return $list_str;
  787. }
  788. protected function _processListItems_callback($matches) {
  789. $item = $matches[4];
  790. $leading_line =& $matches[1];
  791. $leading_space =& $matches[2];
  792. $marker_space = $matches[3];
  793. $tailing_blank_line =& $matches[5];
  794. if ($leading_line || $tailing_blank_line ||
  795. preg_match('/\n{2,}/', $item))
  796. {
  797. # Replace marker with the appropriate whitespace indentation
  798. $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
  799. $item = $this->runBlockGamut($this->outdent($item)."\n");
  800. }
  801. else {
  802. # Recursion for sub-lists:
  803. $item = $this->doLists($this->outdent($item));
  804. $item = preg_replace('/\n+$/', '', $item);
  805. $item = $this->runSpanGamut($item);
  806. }
  807. return "<li>" . $item . "</li>\n";
  808. }
  809. protected function doCodeBlocks($text) {
  810. #
  811. # Process Markdown `<pre><code>` blocks.
  812. #
  813. $text = preg_replace_callback('{
  814. (?:\n\n|\A\n?)
  815. ( # $1 = the code block -- one or more lines, starting with a space/tab
  816. (?>
  817. [ ]{'.$this->tab_width.'} # Lines must start with a tab or a tab-width of spaces
  818. .*\n+
  819. )+
  820. )
  821. ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
  822. }xm',
  823. array($this, '_doCodeBlocks_callback'), $text);
  824. return $text;
  825. }
  826. protected function _doCodeBlocks_callback($matches) {
  827. $codeblock = $matches[1];
  828. $codeblock = $this->outdent($codeblock);
  829. $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
  830. # trim leading newlines and trailing newlines
  831. $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
  832. $codeblock = "<pre><code>$codeblock\n</code></pre>";
  833. return "\n\n".$this->hashBlock($codeblock)."\n\n";
  834. }
  835. protected function makeCodeSpan($code) {
  836. #
  837. # Create a code span markup for $code. Called from handleSpanToken.
  838. #
  839. $code = htmlspecialchars(trim($code), ENT_NOQUOTES);
  840. # Vanilla: add 2 lines below to do <pre><code> if there are newlines in the code.
  841. if (strpos($code, "\n"))
  842. return $this->hashPart("<pre><code>$code</code></pre>");
  843. return $this->hashPart("<code>$code</code>");
  844. }
  845. protected $em_relist = array(
  846. '' => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?![\.,:;]?\s)',
  847. '*' => '(?<![\s*])\*(?!\*)',
  848. '_' => '(?<![\s_])_(?!_)',
  849. );
  850. protected $strong_relist = array(
  851. '' => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?![\.,:;]?\s)',
  852. '**' => '(?<![\s*])\*\*(?!\*)',
  853. '__' => '(?<![\s_])__(?!_)',
  854. );
  855. protected $em_strong_relist = array(
  856. '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?![\.,:;]?\s)',
  857. '***' => '(?<![\s*])\*\*\*(?!\*)',
  858. '___' => '(?<![\s_])___(?!_)',
  859. );
  860. protected $em_strong_prepared_relist;
  861. protected function prepareItalicsAndBold() {
  862. #
  863. # Prepare regular expressions for searching emphasis tokens in any
  864. # context.
  865. #
  866. foreach ($this->em_relist as $em => $em_re) {
  867. foreach ($this->strong_relist as $strong => $strong_re) {
  868. # Construct list of allowed token expressions.
  869. $token_relist = array();
  870. if (isset($this->em_strong_relist["$em$strong"])) {
  871. $token_relist[] = $this->em_strong_relist["$em$strong"];
  872. }
  873. $token_relist[] = $em_re;
  874. $token_relist[] = $strong_re;
  875. # Construct master expression from list.
  876. $token_re = '{('. implode('|', $token_relist) .')}';
  877. $this->em_strong_prepared_relist["$em$strong"] = $token_re;
  878. }
  879. }
  880. }
  881. protected function doItalicsAndBold($text) {
  882. $token_stack = array('');
  883. $text_stack = array('');
  884. $em = '';
  885. $strong = '';
  886. $tree_char_em = false;
  887. while (1) {
  888. #
  889. # Get prepared regular expression for seraching emphasis tokens
  890. # in current context.
  891. #
  892. $token_re = $this->em_strong_prepared_relist["$em$strong"];
  893. #
  894. # Each loop iteration search for the next emphasis token.
  895. # Each token is then passed to handleSpanToken.
  896. #
  897. $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
  898. $text_stack[0] .= $parts[0];
  899. $token =& $parts[1];
  900. $text =& $parts[2];
  901. if (empty($token)) {
  902. # Reached end of text span: empty stack without emitting.
  903. # any more emphasis.
  904. while ($token_stack[0]) {
  905. $text_stack[1] .= array_shift($token_stack);
  906. $text_stack[0] .= array_shift($text_stack);
  907. }
  908. break;
  909. }
  910. $token_len = strlen($token);
  911. if ($tree_char_em) {
  912. # Reached closing marker while inside a three-char emphasis.
  913. if ($token_len == 3) {
  914. # Three-char closing marker, close em and strong.
  915. array_shift($token_stack);
  916. $span = array_shift($text_stack);
  917. $span = $this->runSpanGamut($span);
  918. $span = "<strong><em>$span</em></strong>";
  919. $text_stack[0] .= $this->hashPart($span);
  920. $em = '';
  921. $strong = '';
  922. } else {
  923. # Other closing marker: close one em or strong and
  924. # change current token state to match the other
  925. $token_stack[0] = str_repeat($token{0}, 3-$token_len);
  926. $tag = $token_len == 2 ? "strong" : "em";
  927. $span = $text_stack[0];
  928. $span = $this->runSpanGamut($span);
  929. $span = "<$tag>$span</$tag>";
  930. $text_stack[0] = $this->hashPart($span);
  931. $$tag = ''; # $$tag stands for $em or $strong
  932. }
  933. $tree_char_em = false;
  934. } else if ($token_len == 3) {
  935. if ($em) {
  936. # Reached closing marker for both em and strong.
  937. # Closing strong marker:
  938. for ($i = 0; $i < 2; ++$i) {
  939. $shifted_token = array_shift($token_stack);
  940. $tag = strlen($shifted_token) == 2 ? "strong" : "em";
  941. $span = array_shift($text_stack);
  942. $span = $this->runSpanGamut($span);
  943. $span = "<$tag>$span</$tag>";
  944. $text_stack[0] .= $this->hashPart($span);
  945. $$tag = ''; # $$tag stands for $em or $strong
  946. }
  947. } else {
  948. # Reached opening three-char emphasis marker. Push on token
  949. # stack; will be handled by the special condition above.
  950. $em = $token{0};
  951. $strong = "$em$em";
  952. array_unshift($token_stack, $token);
  953. array_unshift($text_stack, '');
  954. $tree_char_em = true;
  955. }
  956. } else if ($token_len == 2) {
  957. if ($strong) {
  958. # Unwind any dangling emphasis marker:
  959. if (strlen($token_stack[0]) == 1) {
  960. $text_stack[1] .= array_shift($token_stack);
  961. $text_stack[0] .= array_shift($text_stack);
  962. }
  963. # Closing strong marker:
  964. array_shift($token_stack);
  965. $span = array_shift($text_stack);
  966. $span = $this->runSpanGamut($span);
  967. $span = "<strong>$span</strong>";
  968. $text_stack[0] .= $this->hashPart($span);
  969. $strong = '';
  970. } else {
  971. array_unshift($token_stack, $token);
  972. array_unshift($text_stack, '');
  973. $strong = $token;
  974. }
  975. } else {
  976. # Here $token_len == 1
  977. if ($em) {
  978. if (strlen($token_stack[0]) == 1) {
  979. # Closing emphasis marker:
  980. array_shift($token_stack);
  981. $span = array_shift($text_stack);
  982. $span = $this->runSpanGamut($span);
  983. $span = "<em>$span</em>";
  984. $text_stack[0] .= $this->hashPart($span);
  985. $em = '';
  986. } else {
  987. $text_stack[0] .= $token;
  988. }
  989. } else {
  990. array_unshift($token_stack, $token);
  991. array_unshift($text_stack, '');
  992. $em = $token;
  993. }
  994. }
  995. }
  996. return $text_stack[0];
  997. }
  998. protected function doBlockQuotes($text) {
  999. # Vanilla: delete `(.+\n)* # subsequent consecutive lines` from pattern.
  1000. $text = preg_replace_callback('/
  1001. ( # Wrap whole match in $1
  1002. (?>
  1003. ^[ ]*>[ ]? # ">" at the start of a line
  1004. .+\n # rest of the first line
  1005. \n* # blanks
  1006. )+
  1007. )
  1008. /xm',
  1009. array($this, '_doBlockQuotes_callback'), $text);
  1010. return $text;
  1011. }
  1012. protected function _doBlockQuotes_callback($matches) {
  1013. $bq = $matches[1];
  1014. # trim one level of quoting - trim whitespace-only lines
  1015. $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
  1016. $bq = $this->runBlockGamut($bq); # recurse
  1017. $bq = preg_replace('/^/m', " ", $bq);
  1018. # These leading spaces cause problem with <pre> content,
  1019. # so we need to fix that:
  1020. $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
  1021. array($this, '_doBlockQuotes_callback2'), $bq);
  1022. # Vanilla: add ` class=\"Quote\"`
  1023. return "\n". $this->hashBlock("<blockquote class=\"UserQuote\"><div class=\"QuoteText\">\n$bq\n</div></blockquote>")."\n\n";
  1024. }
  1025. protected function _doBlockQuotes_callback2($matches) {
  1026. $pre = $matches[1];
  1027. $pre = preg_replace('/^ /m', '', $pre);
  1028. return $pre;
  1029. }
  1030. protected function formParagraphs($text) {
  1031. #
  1032. # Params:
  1033. # $text - string to process with html <p> tags
  1034. #
  1035. # Strip leading and trailing lines:
  1036. $text = preg_replace('/\A\n+|\n+\z/', '', $text);
  1037. $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
  1038. #
  1039. # Wrap <p> tags and unhashify HTML blocks
  1040. #
  1041. foreach ($grafs as $key => $value) {
  1042. if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
  1043. # Is a paragraph.
  1044. $value = $this->runSpanGamut($value);
  1045. $value = preg_replace('/^([ ]*)/', "<p>", $value);
  1046. $value .= "</p>";
  1047. $grafs[$key] = $this->unhash($value);
  1048. }
  1049. else {
  1050. # Is a block.
  1051. # Modify elements of @grafs in-place...
  1052. $graf = $value;
  1053. $block = $this->html_hashes[$graf];
  1054. $graf = $block;
  1055. // if (preg_match('{
  1056. // \A
  1057. // ( # $1 = <div> tag
  1058. // <div \s+
  1059. // [^>]*
  1060. // \b
  1061. // markdown\s*=\s* ([\'"]) # $2 = attr quote char
  1062. // 1
  1063. // \2
  1064. // [^>]*
  1065. // >
  1066. // )
  1067. // ( # $3 = contents
  1068. // .*
  1069. // )
  1070. // (</div>) # $4 = closing tag
  1071. // \z
  1072. // }xs', $block, $matches))
  1073. // {
  1074. // list(, $div_open, , $div_content, $div_close) = $matches;
  1075. //
  1076. // # We can't call Markdown(), because that resets the hash;
  1077. // # that initialization code should be pulled into its own sub, though.
  1078. // $div_content = $this->hashHTMLBlocks($div_content);
  1079. //
  1080. // # Run document gamut methods on the content.
  1081. // foreach ($this->document_gamut as $method => $priority) {
  1082. // $div_content = $this->$method($div_content);
  1083. // }
  1084. //
  1085. // $div_open = preg_replace(
  1086. // '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
  1087. //
  1088. // $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
  1089. // }
  1090. $grafs[$key] = $graf;
  1091. }
  1092. }
  1093. return implode("\n\n", $grafs);
  1094. }
  1095. protected function encodeAttribute($text) {
  1096. #
  1097. # Encode text for a double-quoted HTML attribute. This function
  1098. # is *not* suitable for attributes enclosed in single quotes.
  1099. #
  1100. $text = $this->encodeAmpsAndAngles($text);
  1101. $text = str_replace('"', '&quot;', $text);
  1102. return $text;
  1103. }
  1104. protected function encodeURLAttribute($url, &$text = null) {
  1105. #
  1106. # Encode text for a double-quoted HTML attribute containing a URL,
  1107. # applying the URL filter if set. Also generates the textual
  1108. # representation for the URL (removing mailto: or tel:) storing it in $text.
  1109. # This function is *not* suitable for attributes enclosed in single quotes.
  1110. #
  1111. if ($this->url_filter_func)
  1112. $url = call_user_func($this->url_filter_func, $url);
  1113. if (preg_match('{^mailto:}i', $url))
  1114. $url = $this->encodeEntityObfuscatedAttribute($url, $text, 7);
  1115. else if (preg_match('{^tel:}i', $url))
  1116. {
  1117. $url = $this->encodeAttribute($url);
  1118. $text = substr($url, 4);
  1119. }
  1120. else
  1121. {
  1122. $url = $this->encodeAttribute($url);
  1123. $text = $url;
  1124. }
  1125. return $url;
  1126. }
  1127. protected function encodeAmpsAndAngles($text) {
  1128. #
  1129. # Smart processing for ampersands and angle brackets that need to
  1130. # be encoded. Valid character entities are left alone unless the
  1131. # no-entities mode is set.
  1132. #
  1133. if ($this->no_entities) {
  1134. $text = str_replace('&', '&amp;', $text);
  1135. } else {
  1136. # Ampersand-encoding based entirely on Nat Irons's Amputator
  1137. # MT plugin: <http://bumppo.net/projects/amputator/>
  1138. $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
  1139. '&amp;', $text);
  1140. }
  1141. # Encode remaining <'s
  1142. $text = str_replace('<', '&lt;', $text);
  1143. return $text;
  1144. }
  1145. protected function doAutoLinks($text) {
  1146. $text = preg_replace_callback('{<((https?|ftp|dict|tel):[^\'">\s]+)>}i',
  1147. array($this, '_doAutoLinks_url_callback'), $text);
  1148. # Email addresses: <address@domain.foo>
  1149. $text = preg_replace_callback('{
  1150. <
  1151. (?:mailto:)?
  1152. (
  1153. (?:
  1154. [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+
  1155. |
  1156. ".*?"
  1157. )
  1158. \@
  1159. (?:
  1160. [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
  1161. |
  1162. \[[\d.a-fA-F:]+\] # IPv4 & IPv6
  1163. )
  1164. )
  1165. >
  1166. }xi',
  1167. array($this, '_doAutoLinks_email_callback'), $text);
  1168. return $text;
  1169. }
  1170. protected function _doAutoLinks_url_callback($matches) {
  1171. $url = $this->encodeURLAttribute($matches[1], $text);
  1172. $link = "<a href=\"$url\">$text</a>";
  1173. return $this->hashPart($link);
  1174. }
  1175. protected function _doAutoLinks_email_callback($matches) {
  1176. $addr = $matches[1];
  1177. $url = $this->encodeURLAttribute("mailto:$addr", $text);
  1178. $link = "<a href=\"$url\">$text</a>";
  1179. return $this->hashPart($link);
  1180. }
  1181. protected function encodeEntityObfuscatedAttribute($text, &$tail = null, $head_length = 0) {
  1182. #
  1183. # Input: some text to obfuscate, e.g. "mailto:foo@example.com"
  1184. #
  1185. # Output: the same text but with most characters encoded as either a
  1186. # decimal or hex entity, in the hopes of foiling most address
  1187. # harvesting spam bots. E.g.:
  1188. #
  1189. # &#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
  1190. # &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
  1191. # &#x6d;
  1192. #
  1193. # Note: the additional output $tail is assigned the same value as the
  1194. # ouput, minus the number of characters specified by $head_length.
  1195. #
  1196. # Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
  1197. # With some optimizations by Milian Wolff. Forced encoding of HTML
  1198. # attribute special characters by Allan Odgaard.
  1199. #
  1200. if ($text == "") return $tail = "";
  1201. $chars = preg_split('/(?<!^)(?!$)/', $text);
  1202. $seed = (int)abs(crc32($text) / strlen($text)); # Deterministic seed.
  1203. foreach ($chars as $key => $char) {
  1204. $ord = ord($char);
  1205. # Ignore non-ascii chars.
  1206. if ($ord < 128) {
  1207. $r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
  1208. # roughly 10% raw, 45% hex, 45% dec
  1209. # '@' *must* be encoded. I insist.
  1210. # '"' and '>' have to be encoded inside the attribute
  1211. if ($r > 90 && strpos('@"&>', $char) === false) /* do nothing */;
  1212. else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
  1213. else $chars[$key] = '&#'.$ord.';';
  1214. }
  1215. }
  1216. $text = implode('', $chars);
  1217. $tail = $head_length ? implode('', array_slice($chars, $head_length)) : $text;
  1218. return $text;
  1219. }
  1220. protected function parseSpan($str) {
  1221. #
  1222. # Take the string $str and parse it into tokens, hashing embeded HTML,
  1223. # escaped characters and handling code spans.
  1224. #
  1225. $output = '';
  1226. $span_re = '{
  1227. (
  1228. \\\\'.$this->escape_chars_re.'
  1229. |
  1230. (?<![`\\\\])
  1231. `+ # code span marker
  1232. '.( $this->no_markup ? '' : '
  1233. |
  1234. <!-- .*? --> # comment
  1235. |
  1236. <\?.*?\?> | <%.*?%> # processing instruction
  1237. |
  1238. <[!$]?[-a-zA-Z0-9:_]+ # regular tags
  1239. (?>
  1240. \s
  1241. (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
  1242. )?
  1243. >
  1244. |
  1245. <[-a-zA-Z0-9:_]+\s*/> # xml-style empty tag
  1246. |
  1247. </[-a-zA-Z0-9:_]+\s*> # closing tag
  1248. ').'
  1249. )
  1250. }xs';
  1251. while (1) {
  1252. #
  1253. # Each loop iteration seach for either the next tag, the next
  1254. # openning code span marker, or the next escaped character.
  1255. # Each token is then passed to handleSpanToken.
  1256. #
  1257. $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
  1258. # Create token from text preceding tag.
  1259. if ($parts[0] != "") {
  1260. $output .= $parts[0];
  1261. }
  1262. # Check if we reach the end.
  1263. if (isset($parts[1])) {
  1264. $output .= $this->handleSpanToken($parts[1], $parts[2]);
  1265. $str = $parts[2];
  1266. }
  1267. else {
  1268. break;
  1269. }
  1270. }
  1271. return $output;
  1272. }
  1273. protected function handleSpanToken($token, &$str) {
  1274. #
  1275. # Handle $token provided by parseSpan by determining its nature and
  1276. # returning the corresponding value that should replace it.
  1277. #
  1278. switch ($token{0}) {
  1279. case "\\":
  1280. return $this->hashPart("&#". ord($token{1}). ";");
  1281. case "`":
  1282. # Search for end marker in remaining text.
  1283. if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm',
  1284. $str, $matches))
  1285. {
  1286. $str = $matches[2];
  1287. $codespan = $this->makeCodeSpan($matches[1]);
  1288. return $this->hashPart($codespan);
  1289. }
  1290. return $token; // return as text since no ending marker found.
  1291. default:
  1292. return $this->hashPart($token);
  1293. }
  1294. }
  1295. protected function outdent($text) {
  1296. #
  1297. # Remove one level of line-leading tabs or spaces
  1298. #
  1299. return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text);
  1300. }
  1301. # String length function for detab. `_initDetab` will create a function to
  1302. # hanlde UTF-8 if the default function does not exist.
  1303. protected $utf8_strlen = 'mb_strlen';
  1304. protected function detab($text) {
  1305. #
  1306. # Replace tabs with the appropriate amount of space.
  1307. #
  1308. # For each line we separate the line in blocks delemited by
  1309. # tab characters. Then we reconstruct every line by adding the
  1310. # appropriate number of space between each blocks.
  1311. $text = preg_replace_callback('/^.*\t.*$/m',
  1312. array($this, '_detab_callback'), $text);
  1313. return $text;
  1314. }
  1315. protected function _detab_callback($matches) {
  1316. $line = $matches[0];
  1317. $strlen = $this->utf8_strlen; # strlen function for UTF-8.
  1318. # Split in blocks.
  1319. $blocks = explode("\t", $line);
  1320. # Add each blocks to the line.
  1321. $line = $blocks[0];
  1322. unset($blocks[0]); # Do not add first block twice.
  1323. foreach ($blocks as $block) {
  1324. # Calculate amount of space, insert spaces, insert block.
  1325. $amount = $this->tab_width -
  1326. $strlen($line, 'UTF-8') % $this->tab_width;
  1327. $line .= str_repeat(" ", $amount) . $block;
  1328. }
  1329. return $line;
  1330. }
  1331. protected function _initDetab() {
  1332. #
  1333. # Check for the availability of the function in the `utf8_strlen` property
  1334. # (initially `mb_strlen`). If the function is not available, create a
  1335. # function that will loosely count the number of UTF-8 characters with a
  1336. # regular expression.
  1337. #
  1338. if (function_exists($this->utf8_strlen)) return;
  1339. $this->utf8_strlen = create_function('$text', 'return preg_match_all(
  1340. "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/",
  1341. $text, $m);');
  1342. }
  1343. protected function unhash($text) {
  1344. #
  1345. # Swap back in all the tags hashed by _HashHTMLBlocks.
  1346. #
  1347. return preg_replace_callback('/(.)\x1A[0-9]+\1/',
  1348. array($this, '_unhash_callback'), $text);
  1349. }
  1350. protected function _unhash_callback($matches) {
  1351. return $this->html_hashes[$matches[0]];
  1352. }
  1353. }
  1354. #
  1355. # Temporary Markdown Extra Parser Implementation Class
  1356. #
  1357. # NOTE: DON'T USE THIS CLASS
  1358. # Currently the implementation of of Extra resides here in this temporary class.
  1359. # This makes it easier to propagate the changes between the three different
  1360. # packaging styles of PHP Markdown. When this issue is resolved, this
  1361. # MarkdownExtra_TmpImpl class here will disappear and \Michelf\MarkdownExtra
  1362. # will contain the code. So please use \Michelf\MarkdownExtra and ignore this
  1363. # one.
  1364. #
  1365. abstract class _MarkdownExtra_TmpImpl extends \Michelf\Markdown {
  1366. ### Configuration Variables ###
  1367. # Prefix for footnote ids.
  1368. public $fn_id_prefix = "";
  1369. # Optional title attribute for footnote links and backlinks.
  1370. public $fn_link_title = "";
  1371. public $fn_backlink_title = "";
  1372. # Optional class attribute for footnote links and backlinks.
  1373. public $fn_link_class = "footnote-ref";
  1374. public $fn_backlink_class = "footnote-backref";
  1375. # Class name for table cell alignment (%% replaced left/center/right)
  1376. # For instance: 'go-%%' becomes 'go-left' or 'go-right' or 'go-center'
  1377. # If empty, the align attribute is used instead of a class name.
  1378. public $table_align_class_tmpl = '';
  1379. # Optional class prefix for fenced code block.
  1380. public $code_class_prefix = "";
  1381. # Class attribute for code blocks goes on the `code` tag;
  1382. # setting this to true will put attributes on the `pre` tag instead.
  1383. public $code_attr_on_pre = false;
  1384. # Predefined abbreviations.
  1385. public $predef_abbr = array();
  1386. ### Parser Implementation ###
  1387. public function __construct() {
  1388. #
  1389. # Constructor function. Initialize the parser object.
  1390. #
  1391. # Add extra escapable characters before parent constructor
  1392. # initialize the table.
  1393. $this->escape_chars .= ':|';
  1394. # Insert extra document, block, and span transformations.
  1395. # Parent constructor will do the sorting.
  1396. $this->document_gamut += array(
  1397. "doFencedCodeBlocks" => 5,
  1398. "stripFootnotes" => 15,
  1399. "stripAbbreviations" => 25,
  1400. "appendFootnotes" => 50,
  1401. );
  1402. $this->block_gamut += array(
  1403. "doFencedCodeBlocks" => 5,
  1404. "doTables" => 15,
  1405. "doDefLists" => 45,
  1406. );
  1407. $this->span_gamut += array(
  1408. "doFootnotes" => 5,
  1409. "doAbbreviations" => 70,
  1410. );
  1411. parent::__construct();
  1412. }
  1413. # Extra variables used during extra transformations.
  1414. protected $footnotes = array();
  1415. protected $footnotes_ordered = array();
  1416. protected $footnotes_ref_count = array();
  1417. protected $footnotes_numbers = array();
  1418. protected $abbr_desciptions = array();
  1419. protected $abbr_word_re = '';
  1420. # Give the current footnote number.
  1421. protected $footnote_counter = 1;
  1422. protected function setup() {
  1423. #
  1424. # Setting up Extra-specific variables.
  1425. #
  1426. parent::setup();
  1427. $this->footnotes = array();
  1428. $this->footnotes_ordered = array();
  1429. $this->footnotes_ref_count = array();
  1430. $this->footnotes_numbers = array();
  1431. $this->abbr_desciptions = array();
  1432. $this->abbr_word_re = '';
  1433. $this->footnote_counter = 1;
  1434. foreach ($this->predef_abbr as $abbr_word => $abbr_desc) {
  1435. if ($this->abbr_word_re)
  1436. $this->abbr_word_re .= '|';
  1437. $this->abbr_word_re .= preg_quote($abbr_word);
  1438. $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
  1439. }
  1440. }
  1441. protected function teardown() {
  1442. #
  1443. # Clearing Extra-specific variables.
  1444. #
  1445. $this->footnotes = array();
  1446. $this->footnotes_ordered = array();
  1447. $this->footnotes_ref_count = array();
  1448. $this->footnotes_numbers = array();
  1449. $this->abbr_desciptions = array();
  1450. $this->abbr_word_re = '';
  1451. parent::teardown();
  1452. }
  1453. ### Extra Attribute Parser ###
  1454. # Expression to use to catch attributes (includes the braces)
  1455. protected $id_class_attr_catch_re = '\{((?:[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,})[ ]*\}';
  1456. # Expression to use when parsing in a context when no capture is desired
  1457. protected $id_class_attr_nocatch_re = '\{(?:[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,}[ ]*\}';
  1458. protected function doExtraAttributes($tag_name, $attr) {
  1459. #
  1460. # Parse attributes caught by the $this->id_class_attr_catch_re expression
  1461. # and return the HTML-formatted list of attributes.
  1462. #
  1463. # Currently supported attributes are .class and #id.
  1464. #
  1465. if (empty($attr)) return "";
  1466. # Split on components
  1467. preg_match_all('/[#.a-z][-_:a-zA-Z0-9=]+/', $attr, $matches);
  1468. $elements = $matches[0];
  1469. # handle classes and ids (only first id taken into account)
  1470. $classes = array();
  1471. $attributes = array();
  1472. $id = false;
  1473. foreach ($elements as $element) {
  1474. if ($element{0} == '.') {
  1475. $classes[] = substr($element, 1);
  1476. } else if ($element{0} == '#') {
  1477. if ($id === false) $id = substr($element, 1);
  1478. } else if (strpos($element, '=') > 0) {
  1479. $parts = explode('=', $element, 2);
  1480. $attributes[] = $parts[0] . '="' . $parts[1] . '"';
  1481. }
  1482. }
  1483. # compose attributes as string
  1484. $attr_str = "";
  1485. if (!empty($id)) {
  1486. $attr_str .= ' id="'.$id.'"';
  1487. }
  1488. if (!empty($classes)) {
  1489. $attr_str .= ' class="'.implode(" ", $classes).'"';
  1490. }
  1491. if (!$this->no_markup && !empty($attributes)) {
  1492. $attr_str .= ' '.implode(" ", $attributes);
  1493. }
  1494. return $attr_str;
  1495. }
  1496. protected function stripLinkDefinitions($text) {
  1497. #
  1498. # Strips link definitions from text, stores the URLs and titles in
  1499. # hash references.
  1500. #
  1501. $less_than_tab = $this->tab_width - 1;
  1502. # Link defs are in the form: ^[id]: url "optional title"
  1503. $text = preg_replace_callback('{
  1504. ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
  1505. [ ]*
  1506. \n? # maybe *one* newline
  1507. [ ]*
  1508. (?:
  1509. <(.+?)> # url = $2
  1510. |
  1511. (\S+?) # url = $3
  1512. )
  1513. [ ]*
  1514. \n? # maybe one newline
  1515. [ ]*
  1516. (?:
  1517. (?<=\s) # lookbehind for whitespace
  1518. ["(]
  1519. (.*?) # title = $4
  1520. [")]
  1521. [ ]*
  1522. )? # title is optional
  1523. (?:[ ]* '.$this->id_class_attr_catch_re.' )? # $5 = extra id & class attr
  1524. (?:\n+|\Z)
  1525. }xm',
  1526. array($this, '_stripLinkDefinitions_callback'),
  1527. $text);
  1528. return $text;
  1529. }
  1530. protected function _stripLinkDefinitions_callback($matches) {
  1531. $link_id = strtolower($matches[1]);
  1532. $url = $matches[2] == '' ? $matches[3] : $matches[2];
  1533. $this->urls[$link_id] = $url;
  1534. $this->titles[$link_id] =& $matches[4];
  1535. $this->ref_attr[$link_id] = $this->doExtraAttributes("", $dummy =& $matches[5]);
  1536. return ''; # String that will replace the block
  1537. }
  1538. ### HTML Block Parser ###
  1539. # Tags that are always treated as block tags:
  1540. protected $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend|article|section|nav|aside|hgroup|header|footer|figcaption|figure';
  1541. # Tags tre

Large files files are truncated, but you can click here to view the full file