PageRenderTime 57ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 1ms

/kirby/parsers/markdown.extra.php

https://github.com/jordanstephens/kirbycms
PHP | 3072 lines | 1964 code | 345 blank | 763 comment | 198 complexity | 7c1c9d1c52ca240661b8593482cfe694 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

  1. <?php
  2. // direct access protection
  3. if(!defined('KIRBY')) die('Direct access is not allowed');
  4. /**
  5. * Kirby Hack:
  6. */
  7. function md($text) {
  8. return markdown($text);
  9. }
  10. #
  11. # Markdown Extra - A text-to-HTML conversion tool for web writers
  12. #
  13. # PHP Markdown & Extra
  14. # Copyright (c) 2004-2013 Michel Fortin
  15. # <http://michelf.ca/projects/php-markdown/>
  16. #
  17. # Original Markdown
  18. # Copyright (c) 2004-2006 John Gruber
  19. # <http://daringfireball.net/projects/markdown/>
  20. #
  21. define( 'MARKDOWN_VERSION', "1.0.1p" ); # Sun 13 Jan 2013
  22. define( 'MARKDOWNEXTRA_VERSION', "1.2.6" ); # Sun 13 Jan 2013
  23. #
  24. # Global default settings:
  25. #
  26. # Change to ">" for HTML output
  27. @define( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX', " />");
  28. # Define the width of a tab for code blocks.
  29. @define( 'MARKDOWN_TAB_WIDTH', 4 );
  30. # Optional title attribute for footnote links and backlinks.
  31. @define( 'MARKDOWN_FN_LINK_TITLE', "" );
  32. @define( 'MARKDOWN_FN_BACKLINK_TITLE', "" );
  33. # Optional class attribute for footnote links and backlinks.
  34. @define( 'MARKDOWN_FN_LINK_CLASS', "" );
  35. @define( 'MARKDOWN_FN_BACKLINK_CLASS', "" );
  36. # Optional class prefix for fenced code block.
  37. @define( 'MARKDOWN_CODE_CLASS_PREFIX', "" );
  38. # Class attribute for code blocks goes on the `code` tag;
  39. # setting this to true will put attributes on the `pre` tag instead.
  40. @define( 'MARKDOWN_CODE_ATTR_ON_PRE', false );
  41. #
  42. # WordPress settings:
  43. #
  44. # Change to false to remove Markdown from posts and/or comments.
  45. @define( 'MARKDOWN_WP_POSTS', true );
  46. @define( 'MARKDOWN_WP_COMMENTS', true );
  47. ### Standard Function Interface ###
  48. @define( 'MARKDOWN_PARSER_CLASS', 'MarkdownExtra_Parser' );
  49. function Markdown($text) {
  50. // global kirby switch for markdown
  51. if(c::get('markdown') === false) return $text;
  52. #
  53. # Initialize the parser and return the result of its transform method.
  54. #
  55. # Setup static parser variable.
  56. static $parser;
  57. if (!isset($parser)) {
  58. $parser_class = MARKDOWN_PARSER_CLASS;
  59. $parser = new $parser_class;
  60. }
  61. # Transform text using parser.
  62. return $parser->transform($text);
  63. }
  64. ### WordPress Plugin Interface ###
  65. /*
  66. Plugin Name: Markdown Extra
  67. Plugin Name: Markdown
  68. Plugin URI: http://michelf.ca/projects/php-markdown/
  69. Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://michelf.ca/projects/php-markdown/">More...</a>
  70. Version: 1.2.6
  71. Author: Michel Fortin
  72. Author URI: http://michelf.ca/
  73. */
  74. if (isset($wp_version)) {
  75. # More details about how it works here:
  76. # <http://michelf.ca/weblog/2005/wordpress-text-flow-vs-markdown/>
  77. # Post content and excerpts
  78. # - Remove WordPress paragraph generator.
  79. # - Run Markdown on excerpt, then remove all tags.
  80. # - Add paragraph tag around the excerpt, but remove it for the excerpt rss.
  81. if (MARKDOWN_WP_POSTS) {
  82. remove_filter('the_content', 'wpautop');
  83. remove_filter('the_content_rss', 'wpautop');
  84. remove_filter('the_excerpt', 'wpautop');
  85. add_filter('the_content', 'mdwp_MarkdownPost', 6);
  86. add_filter('the_content_rss', 'mdwp_MarkdownPost', 6);
  87. add_filter('get_the_excerpt', 'mdwp_MarkdownPost', 6);
  88. add_filter('get_the_excerpt', 'trim', 7);
  89. add_filter('the_excerpt', 'mdwp_add_p');
  90. add_filter('the_excerpt_rss', 'mdwp_strip_p');
  91. remove_filter('content_save_pre', 'balanceTags', 50);
  92. remove_filter('excerpt_save_pre', 'balanceTags', 50);
  93. add_filter('the_content', 'balanceTags', 50);
  94. add_filter('get_the_excerpt', 'balanceTags', 9);
  95. }
  96. # Add a footnote id prefix to posts when inside a loop.
  97. function mdwp_MarkdownPost($text) {
  98. static $parser;
  99. if (!$parser) {
  100. $parser_class = MARKDOWN_PARSER_CLASS;
  101. $parser = new $parser_class;
  102. }
  103. if (is_single() || is_page() || is_feed()) {
  104. $parser->fn_id_prefix = "";
  105. } else {
  106. $parser->fn_id_prefix = get_the_ID() . ".";
  107. }
  108. return $parser->transform($text);
  109. }
  110. # Comments
  111. # - Remove WordPress paragraph generator.
  112. # - Remove WordPress auto-link generator.
  113. # - Scramble important tags before passing them to the kses filter.
  114. # - Run Markdown on excerpt then remove paragraph tags.
  115. if (MARKDOWN_WP_COMMENTS) {
  116. remove_filter('comment_text', 'wpautop', 30);
  117. remove_filter('comment_text', 'make_clickable');
  118. add_filter('pre_comment_content', 'Markdown', 6);
  119. add_filter('pre_comment_content', 'mdwp_hide_tags', 8);
  120. add_filter('pre_comment_content', 'mdwp_show_tags', 12);
  121. add_filter('get_comment_text', 'Markdown', 6);
  122. add_filter('get_comment_excerpt', 'Markdown', 6);
  123. add_filter('get_comment_excerpt', 'mdwp_strip_p', 7);
  124. global $mdwp_hidden_tags, $mdwp_placeholders;
  125. $mdwp_hidden_tags = explode(' ',
  126. '<p> </p> <pre> </pre> <ol> </ol> <ul> </ul> <li> </li>');
  127. $mdwp_placeholders = explode(' ', str_rot13(
  128. 'pEj07ZbbBZ U1kqgh4w4p pre2zmeN6K QTi31t9pre ol0MP1jzJR '.
  129. 'ML5IjmbRol ulANi1NsGY J7zRLJqPul liA8ctl16T K9nhooUHli'));
  130. }
  131. function mdwp_add_p($text) {
  132. if (!preg_match('{^$|^<(p|ul|ol|dl|pre|blockquote)>}i', $text)) {
  133. $text = '<p>'.$text.'</p>';
  134. $text = preg_replace('{\n{2,}}', "</p>\n\n<p>", $text);
  135. }
  136. return $text;
  137. }
  138. function mdwp_strip_p($t) { return preg_replace('{</?p>}i', '', $t); }
  139. function mdwp_hide_tags($text) {
  140. global $mdwp_hidden_tags, $mdwp_placeholders;
  141. return str_replace($mdwp_hidden_tags, $mdwp_placeholders, $text);
  142. }
  143. function mdwp_show_tags($text) {
  144. global $mdwp_hidden_tags, $mdwp_placeholders;
  145. return str_replace($mdwp_placeholders, $mdwp_hidden_tags, $text);
  146. }
  147. }
  148. ### bBlog Plugin Info ###
  149. function identify_modifier_markdown() {
  150. return array(
  151. 'name' => 'markdown',
  152. 'type' => 'modifier',
  153. 'nicename' => 'PHP Markdown Extra',
  154. 'description' => 'A text-to-HTML conversion tool for web writers',
  155. 'authors' => 'Michel Fortin and John Gruber',
  156. 'licence' => 'GPL',
  157. 'version' => MARKDOWNEXTRA_VERSION,
  158. 'help' => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://michelf.ca/projects/php-markdown/">More...</a>',
  159. );
  160. }
  161. ### Smarty Modifier Interface ###
  162. function smarty_modifier_markdown($text) {
  163. return Markdown($text);
  164. }
  165. ### Textile Compatibility Mode ###
  166. # Rename this file to "classTextile.php" and it can replace Textile everywhere.
  167. if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) {
  168. # Try to include PHP SmartyPants. Should be in the same directory.
  169. @include_once 'smartypants.php';
  170. # Fake Textile class. It calls Markdown instead.
  171. class Textile {
  172. function TextileThis($text, $lite='', $encode='') {
  173. if ($lite == '' && $encode == '') $text = Markdown($text);
  174. if (function_exists('SmartyPants')) $text = SmartyPants($text);
  175. return $text;
  176. }
  177. # Fake restricted version: restrictions are not supported for now.
  178. function TextileRestricted($text, $lite='', $noimage='') {
  179. return $this->TextileThis($text, $lite);
  180. }
  181. # Workaround to ensure compatibility with TextPattern 4.0.3.
  182. function blockLite($text) { return $text; }
  183. }
  184. }
  185. #
  186. # Markdown Parser Class
  187. #
  188. class Markdown_Parser {
  189. ### Configuration Variables ###
  190. # Change to ">" for HTML output.
  191. var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX;
  192. var $tab_width = MARKDOWN_TAB_WIDTH;
  193. # Change to `true` to disallow markup or entities.
  194. var $no_markup = false;
  195. var $no_entities = false;
  196. # Predefined urls and titles for reference links and images.
  197. var $predef_urls = array();
  198. var $predef_titles = array();
  199. ### Parser Implementation ###
  200. # Regex to match balanced [brackets].
  201. # Needed to insert a maximum bracked depth while converting to PHP.
  202. var $nested_brackets_depth = 6;
  203. var $nested_brackets_re;
  204. var $nested_url_parenthesis_depth = 4;
  205. var $nested_url_parenthesis_re;
  206. # Table of hash values for escaped characters:
  207. var $escape_chars = '\`*_{}[]()>#+-.!';
  208. var $escape_chars_re;
  209. function Markdown_Parser() {
  210. #
  211. # Constructor function. Initialize appropriate member variables.
  212. #
  213. $this->_initDetab();
  214. $this->prepareItalicsAndBold();
  215. $this->nested_brackets_re =
  216. str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
  217. str_repeat('\])*', $this->nested_brackets_depth);
  218. $this->nested_url_parenthesis_re =
  219. str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
  220. str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
  221. $this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
  222. # Sort document, block, and span gamut in ascendent priority order.
  223. asort($this->document_gamut);
  224. asort($this->block_gamut);
  225. asort($this->span_gamut);
  226. }
  227. # Internal hashes used during transformation.
  228. var $urls = array();
  229. var $titles = array();
  230. var $html_hashes = array();
  231. # Status flag to avoid invalid nesting.
  232. var $in_anchor = false;
  233. function setup() {
  234. #
  235. # Called before the transformation process starts to setup parser
  236. # states.
  237. #
  238. # Clear global hashes.
  239. $this->urls = $this->predef_urls;
  240. $this->titles = $this->predef_titles;
  241. $this->html_hashes = array();
  242. $in_anchor = false;
  243. }
  244. function teardown() {
  245. #
  246. # Called after the transformation process to clear any variable
  247. # which may be taking up memory unnecessarly.
  248. #
  249. $this->urls = array();
  250. $this->titles = array();
  251. $this->html_hashes = array();
  252. }
  253. function transform($text) {
  254. #
  255. # Main function. Performs some preprocessing on the input text
  256. # and pass it through the document gamut.
  257. #
  258. $this->setup();
  259. # Remove UTF-8 BOM and marker character in input, if present.
  260. $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
  261. # Standardize line endings:
  262. # DOS to Unix and Mac to Unix
  263. $text = preg_replace('{\r\n?}', "\n", $text);
  264. # Make sure $text ends with a couple of newlines:
  265. $text .= "\n\n";
  266. # Convert all tabs to spaces.
  267. $text = $this->detab($text);
  268. # Turn block-level HTML blocks into hash entries
  269. $text = $this->hashHTMLBlocks($text);
  270. # Strip any lines consisting only of spaces and tabs.
  271. # This makes subsequent regexen easier to write, because we can
  272. # match consecutive blank lines with /\n+/ instead of something
  273. # contorted like /[ ]*\n+/ .
  274. $text = preg_replace('/^[ ]+$/m', '', $text);
  275. # Run document gamut methods.
  276. foreach ($this->document_gamut as $method => $priority) {
  277. $text = $this->$method($text);
  278. }
  279. $this->teardown();
  280. return $text . "\n";
  281. }
  282. var $document_gamut = array(
  283. # Strip link definitions, store in hashes.
  284. "stripLinkDefinitions" => 20,
  285. "runBasicBlockGamut" => 30,
  286. );
  287. function stripLinkDefinitions($text) {
  288. #
  289. # Strips link definitions from text, stores the URLs and titles in
  290. # hash references.
  291. #
  292. $less_than_tab = $this->tab_width - 1;
  293. # Link defs are in the form: ^[id]: url "optional title"
  294. $text = preg_replace_callback('{
  295. ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
  296. [ ]*
  297. \n? # maybe *one* newline
  298. [ ]*
  299. (?:
  300. <(.+?)> # url = $2
  301. |
  302. (\S+?) # url = $3
  303. )
  304. [ ]*
  305. \n? # maybe one newline
  306. [ ]*
  307. (?:
  308. (?<=\s) # lookbehind for whitespace
  309. ["(]
  310. (.*?) # title = $4
  311. [")]
  312. [ ]*
  313. )? # title is optional
  314. (?:\n+|\Z)
  315. }xm',
  316. array(&$this, '_stripLinkDefinitions_callback'),
  317. $text);
  318. return $text;
  319. }
  320. function _stripLinkDefinitions_callback($matches) {
  321. $link_id = strtolower($matches[1]);
  322. $url = $matches[2] == '' ? $matches[3] : $matches[2];
  323. $this->urls[$link_id] = $url;
  324. $this->titles[$link_id] =& $matches[4];
  325. return ''; # String that will replace the block
  326. }
  327. function hashHTMLBlocks($text) {
  328. if ($this->no_markup) return $text;
  329. $less_than_tab = $this->tab_width - 1;
  330. # Hashify HTML blocks:
  331. # We only want to do this for block-level HTML tags, such as headers,
  332. # lists, and tables. That's because we still want to wrap <p>s around
  333. # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
  334. # phrase emphasis, and spans. The list of tags we're looking for is
  335. # hard-coded:
  336. #
  337. # * List "a" is made of tags which can be both inline or block-level.
  338. # These will be treated block-level when the start tag is alone on
  339. # its line, otherwise they're not matched here and will be taken as
  340. # inline later.
  341. # * List "b" is made of tags which are always block-level;
  342. #
  343. $block_tags_a_re = 'ins|del';
  344. $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
  345. 'script|noscript|form|fieldset|iframe|math|svg|'.
  346. 'article|section|nav|aside|hgroup|header|footer|'.
  347. 'figure';
  348. # Regular expression for the content of a block tag.
  349. $nested_tags_level = 4;
  350. $attr = '
  351. (?> # optional tag attributes
  352. \s # starts with whitespace
  353. (?>
  354. [^>"/]+ # text outside quotes
  355. |
  356. /+(?!>) # slash not followed by ">"
  357. |
  358. "[^"]*" # text inside double quotes (tolerate ">")
  359. |
  360. \'[^\']*\' # text inside single quotes (tolerate ">")
  361. )*
  362. )?
  363. ';
  364. $content =
  365. str_repeat('
  366. (?>
  367. [^<]+ # content without tag
  368. |
  369. <\2 # nested opening tag
  370. '.$attr.' # attributes
  371. (?>
  372. />
  373. |
  374. >', $nested_tags_level). # end of opening tag
  375. '.*?'. # last level nested tag content
  376. str_repeat('
  377. </\2\s*> # closing nested tag
  378. )
  379. |
  380. <(?!/\2\s*> # other tags with a different name
  381. )
  382. )*',
  383. $nested_tags_level);
  384. $content2 = str_replace('\2', '\3', $content);
  385. # First, look for nested blocks, e.g.:
  386. # <div>
  387. # <div>
  388. # tags for inner block must be indented.
  389. # </div>
  390. # </div>
  391. #
  392. # The outermost tags must start at the left margin for this to match, and
  393. # the inner nested divs must be indented.
  394. # We need to do this before the next, more liberal match, because the next
  395. # match will start at the first `<div>` and stop at the first `</div>`.
  396. $text = preg_replace_callback('{(?>
  397. (?>
  398. (?<=\n\n) # Starting after a blank line
  399. | # or
  400. \A\n? # the beginning of the doc
  401. )
  402. ( # save in $1
  403. # Match from `\n<tag>` to `</tag>\n`, handling nested tags
  404. # in between.
  405. [ ]{0,'.$less_than_tab.'}
  406. <('.$block_tags_b_re.')# start tag = $2
  407. '.$attr.'> # attributes followed by > and \n
  408. '.$content.' # content, support nesting
  409. </\2> # the matching end tag
  410. [ ]* # trailing spaces/tabs
  411. (?=\n+|\Z) # followed by a newline or end of document
  412. | # Special version for tags of group a.
  413. [ ]{0,'.$less_than_tab.'}
  414. <('.$block_tags_a_re.')# start tag = $3
  415. '.$attr.'>[ ]*\n # attributes followed by >
  416. '.$content2.' # content, support nesting
  417. </\3> # the matching end tag
  418. [ ]* # trailing spaces/tabs
  419. (?=\n+|\Z) # followed by a newline or end of document
  420. | # Special case just for <hr />. It was easier to make a special
  421. # case than to make the other regex more complicated.
  422. [ ]{0,'.$less_than_tab.'}
  423. <(hr) # start tag = $2
  424. '.$attr.' # attributes
  425. /?> # the matching end tag
  426. [ ]*
  427. (?=\n{2,}|\Z) # followed by a blank line or end of document
  428. | # Special case for standalone HTML comments:
  429. [ ]{0,'.$less_than_tab.'}
  430. (?s:
  431. <!-- .*? -->
  432. )
  433. [ ]*
  434. (?=\n{2,}|\Z) # followed by a blank line or end of document
  435. | # PHP and ASP-style processor instructions (<? and <%)
  436. [ ]{0,'.$less_than_tab.'}
  437. (?s:
  438. <([?%]) # $2
  439. .*?
  440. \2>
  441. )
  442. [ ]*
  443. (?=\n{2,}|\Z) # followed by a blank line or end of document
  444. )
  445. )}Sxmi',
  446. array(&$this, '_hashHTMLBlocks_callback'),
  447. $text);
  448. return $text;
  449. }
  450. function _hashHTMLBlocks_callback($matches) {
  451. $text = $matches[1];
  452. $key = $this->hashBlock($text);
  453. return "\n\n$key\n\n";
  454. }
  455. function hashPart($text, $boundary = 'X') {
  456. #
  457. # Called whenever a tag must be hashed when a function insert an atomic
  458. # element in the text stream. Passing $text to through this function gives
  459. # a unique text-token which will be reverted back when calling unhash.
  460. #
  461. # The $boundary argument specify what character should be used to surround
  462. # the token. By convension, "B" is used for block elements that needs not
  463. # to be wrapped into paragraph tags at the end, ":" is used for elements
  464. # that are word separators and "X" is used in the general case.
  465. #
  466. # Swap back any tag hash found in $text so we do not have to `unhash`
  467. # multiple times at the end.
  468. $text = $this->unhash($text);
  469. # Then hash the block.
  470. static $i = 0;
  471. $key = "$boundary\x1A" . ++$i . $boundary;
  472. $this->html_hashes[$key] = $text;
  473. return $key; # String that will replace the tag.
  474. }
  475. function hashBlock($text) {
  476. #
  477. # Shortcut function for hashPart with block-level boundaries.
  478. #
  479. return $this->hashPart($text, 'B');
  480. }
  481. var $block_gamut = array(
  482. #
  483. # These are all the transformations that form block-level
  484. # tags like paragraphs, headers, and list items.
  485. #
  486. "doHeaders" => 10,
  487. "doHorizontalRules" => 20,
  488. "doLists" => 40,
  489. "doCodeBlocks" => 50,
  490. "doBlockQuotes" => 60,
  491. );
  492. function runBlockGamut($text) {
  493. #
  494. # Run block gamut tranformations.
  495. #
  496. # We need to escape raw HTML in Markdown source before doing anything
  497. # else. This need to be done for each block, and not only at the
  498. # begining in the Markdown function since hashed blocks can be part of
  499. # list items and could have been indented. Indented blocks would have
  500. # been seen as a code block in a previous pass of hashHTMLBlocks.
  501. $text = $this->hashHTMLBlocks($text);
  502. return $this->runBasicBlockGamut($text);
  503. }
  504. function runBasicBlockGamut($text) {
  505. #
  506. # Run block gamut tranformations, without hashing HTML blocks. This is
  507. # useful when HTML blocks are known to be already hashed, like in the first
  508. # whole-document pass.
  509. #
  510. foreach ($this->block_gamut as $method => $priority) {
  511. $text = $this->$method($text);
  512. }
  513. # Finally form paragraph and restore hashed blocks.
  514. $text = $this->formParagraphs($text);
  515. return $text;
  516. }
  517. function doHorizontalRules($text) {
  518. # Do Horizontal Rules:
  519. return preg_replace(
  520. '{
  521. ^[ ]{0,3} # Leading space
  522. ([-*_]) # $1: First marker
  523. (?> # Repeated marker group
  524. [ ]{0,2} # Zero, one, or two spaces.
  525. \1 # Marker character
  526. ){2,} # Group repeated at least twice
  527. [ ]* # Tailing spaces
  528. $ # End of line.
  529. }mx',
  530. "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
  531. $text);
  532. }
  533. var $span_gamut = array(
  534. #
  535. # These are all the transformations that occur *within* block-level
  536. # tags like paragraphs, headers, and list items.
  537. #
  538. # Process character escapes, code spans, and inline HTML
  539. # in one shot.
  540. "parseSpan" => -30,
  541. # Process anchor and image tags. Images must come first,
  542. # because ![foo][f] looks like an anchor.
  543. "doImages" => 10,
  544. "doAnchors" => 20,
  545. # Make links out of things like `<http://example.com/>`
  546. # Must come after doAnchors, because you can use < and >
  547. # delimiters in inline links like [this](<url>).
  548. "doAutoLinks" => 30,
  549. "encodeAmpsAndAngles" => 40,
  550. "doItalicsAndBold" => 50,
  551. "doHardBreaks" => 60,
  552. );
  553. function runSpanGamut($text) {
  554. #
  555. # Run span gamut tranformations.
  556. #
  557. foreach ($this->span_gamut as $method => $priority) {
  558. $text = $this->$method($text);
  559. }
  560. return $text;
  561. }
  562. function doHardBreaks($text) {
  563. /**
  564. * Kirby Hack
  565. */
  566. if(c::get('markdown.breaks')) {
  567. return preg_replace_callback('/ {2,}\n|\n{1}/', array(&$this, '_doHardBreaks_callback'), $text);
  568. } else {
  569. # Do hard breaks:
  570. return preg_replace_callback('/ {2,}\n/',
  571. array(&$this, '_doHardBreaks_callback'), $text);
  572. }
  573. }
  574. function _doHardBreaks_callback($matches) {
  575. return $this->hashPart("<br$this->empty_element_suffix\n");
  576. }
  577. function doAnchors($text) {
  578. #
  579. # Turn Markdown link shortcuts into XHTML <a> tags.
  580. #
  581. if ($this->in_anchor) return $text;
  582. $this->in_anchor = true;
  583. #
  584. # First, handle reference-style links: [link text] [id]
  585. #
  586. $text = preg_replace_callback('{
  587. ( # wrap whole match in $1
  588. \[
  589. ('.$this->nested_brackets_re.') # link text = $2
  590. \]
  591. [ ]? # one optional space
  592. (?:\n[ ]*)? # one optional newline followed by spaces
  593. \[
  594. (.*?) # id = $3
  595. \]
  596. )
  597. }xs',
  598. array(&$this, '_doAnchors_reference_callback'), $text);
  599. #
  600. # Next, inline-style links: [link text](url "optional title")
  601. #
  602. $text = preg_replace_callback('{
  603. ( # wrap whole match in $1
  604. \[
  605. ('.$this->nested_brackets_re.') # link text = $2
  606. \]
  607. \( # literal paren
  608. [ \n]*
  609. (?:
  610. <(.+?)> # href = $3
  611. |
  612. ('.$this->nested_url_parenthesis_re.') # href = $4
  613. )
  614. [ \n]*
  615. ( # $5
  616. ([\'"]) # quote char = $6
  617. (.*?) # Title = $7
  618. \6 # matching quote
  619. [ \n]* # ignore any spaces/tabs between closing quote and )
  620. )? # title is optional
  621. \)
  622. )
  623. }xs',
  624. array(&$this, '_doAnchors_inline_callback'), $text);
  625. #
  626. # Last, handle reference-style shortcuts: [link text]
  627. # These must come last in case you've also got [link text][1]
  628. # or [link text](/foo)
  629. #
  630. $text = preg_replace_callback('{
  631. ( # wrap whole match in $1
  632. \[
  633. ([^\[\]]+) # link text = $2; can\'t contain [ or ]
  634. \]
  635. )
  636. }xs',
  637. array(&$this, '_doAnchors_reference_callback'), $text);
  638. $this->in_anchor = false;
  639. return $text;
  640. }
  641. function _doAnchors_reference_callback($matches) {
  642. $whole_match = $matches[1];
  643. $link_text = $matches[2];
  644. $link_id =& $matches[3];
  645. if ($link_id == "") {
  646. # for shortcut links like [this][] or [this].
  647. $link_id = $link_text;
  648. }
  649. # lower-case and turn embedded newlines into spaces
  650. $link_id = strtolower($link_id);
  651. $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
  652. if (isset($this->urls[$link_id])) {
  653. $url = $this->urls[$link_id];
  654. $url = $this->encodeAttribute($url);
  655. $result = "<a href=\"$url\"";
  656. if ( isset( $this->titles[$link_id] ) ) {
  657. $title = $this->titles[$link_id];
  658. $title = $this->encodeAttribute($title);
  659. $result .= " title=\"$title\"";
  660. }
  661. $link_text = $this->runSpanGamut($link_text);
  662. $result .= ">$link_text</a>";
  663. $result = $this->hashPart($result);
  664. }
  665. else {
  666. $result = $whole_match;
  667. }
  668. return $result;
  669. }
  670. function _doAnchors_inline_callback($matches) {
  671. $whole_match = $matches[1];
  672. $link_text = $this->runSpanGamut($matches[2]);
  673. $url = $matches[3] == '' ? $matches[4] : $matches[3];
  674. $title =& $matches[7];
  675. $url = $this->encodeAttribute($url);
  676. $result = "<a href=\"$url\"";
  677. if (isset($title)) {
  678. $title = $this->encodeAttribute($title);
  679. $result .= " title=\"$title\"";
  680. }
  681. $link_text = $this->runSpanGamut($link_text);
  682. $result .= ">$link_text</a>";
  683. return $this->hashPart($result);
  684. }
  685. function doImages($text) {
  686. #
  687. # Turn Markdown image shortcuts into <img> tags.
  688. #
  689. #
  690. # First, handle reference-style labeled images: ![alt text][id]
  691. #
  692. $text = preg_replace_callback('{
  693. ( # wrap whole match in $1
  694. !\[
  695. ('.$this->nested_brackets_re.') # alt text = $2
  696. \]
  697. [ ]? # one optional space
  698. (?:\n[ ]*)? # one optional newline followed by spaces
  699. \[
  700. (.*?) # id = $3
  701. \]
  702. )
  703. }xs',
  704. array(&$this, '_doImages_reference_callback'), $text);
  705. #
  706. # Next, handle inline images: ![alt text](url "optional title")
  707. # Don't forget: encode * and _
  708. #
  709. $text = preg_replace_callback('{
  710. ( # wrap whole match in $1
  711. !\[
  712. ('.$this->nested_brackets_re.') # alt text = $2
  713. \]
  714. \s? # One optional whitespace character
  715. \( # literal paren
  716. [ \n]*
  717. (?:
  718. <(\S*)> # src url = $3
  719. |
  720. ('.$this->nested_url_parenthesis_re.') # src url = $4
  721. )
  722. [ \n]*
  723. ( # $5
  724. ([\'"]) # quote char = $6
  725. (.*?) # title = $7
  726. \6 # matching quote
  727. [ \n]*
  728. )? # title is optional
  729. \)
  730. )
  731. }xs',
  732. array(&$this, '_doImages_inline_callback'), $text);
  733. return $text;
  734. }
  735. function _doImages_reference_callback($matches) {
  736. $whole_match = $matches[1];
  737. $alt_text = $matches[2];
  738. $link_id = strtolower($matches[3]);
  739. if ($link_id == "") {
  740. $link_id = strtolower($alt_text); # for shortcut links like ![this][].
  741. }
  742. $alt_text = $this->encodeAttribute($alt_text);
  743. if (isset($this->urls[$link_id])) {
  744. $url = $this->encodeAttribute($this->urls[$link_id]);
  745. $result = "<img src=\"$url\" alt=\"$alt_text\"";
  746. if (isset($this->titles[$link_id])) {
  747. $title = $this->titles[$link_id];
  748. $title = $this->encodeAttribute($title);
  749. $result .= " title=\"$title\"";
  750. }
  751. $result .= $this->empty_element_suffix;
  752. $result = $this->hashPart($result);
  753. }
  754. else {
  755. # If there's no such link ID, leave intact:
  756. $result = $whole_match;
  757. }
  758. return $result;
  759. }
  760. function _doImages_inline_callback($matches) {
  761. $whole_match = $matches[1];
  762. $alt_text = $matches[2];
  763. $url = $matches[3] == '' ? $matches[4] : $matches[3];
  764. $title =& $matches[7];
  765. $alt_text = $this->encodeAttribute($alt_text);
  766. $url = $this->encodeAttribute($url);
  767. $result = "<img src=\"$url\" alt=\"$alt_text\"";
  768. if (isset($title)) {
  769. $title = $this->encodeAttribute($title);
  770. $result .= " title=\"$title\""; # $title already quoted
  771. }
  772. $result .= $this->empty_element_suffix;
  773. return $this->hashPart($result);
  774. }
  775. function doHeaders($text) {
  776. # Setext-style headers:
  777. # Header 1
  778. # ========
  779. #
  780. # Header 2
  781. # --------
  782. #
  783. $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
  784. array(&$this, '_doHeaders_callback_setext'), $text);
  785. # atx-style headers:
  786. # # Header 1
  787. # ## Header 2
  788. # ## Header 2 with closing hashes ##
  789. # ...
  790. # ###### Header 6
  791. #
  792. $text = preg_replace_callback('{
  793. ^(\#{1,6}) # $1 = string of #\'s
  794. [ ]*
  795. (.+?) # $2 = Header text
  796. [ ]*
  797. \#* # optional closing #\'s (not counted)
  798. \n+
  799. }xm',
  800. array(&$this, '_doHeaders_callback_atx'), $text);
  801. return $text;
  802. }
  803. function _doHeaders_callback_setext($matches) {
  804. # Terrible hack to check we haven't found an empty list item.
  805. if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1]))
  806. return $matches[0];
  807. $level = $matches[2]{0} == '=' ? 1 : 2;
  808. $block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>";
  809. return "\n" . $this->hashBlock($block) . "\n\n";
  810. }
  811. function _doHeaders_callback_atx($matches) {
  812. $level = strlen($matches[1]);
  813. $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>";
  814. return "\n" . $this->hashBlock($block) . "\n\n";
  815. }
  816. function doLists($text) {
  817. #
  818. # Form HTML ordered (numbered) and unordered (bulleted) lists.
  819. #
  820. $less_than_tab = $this->tab_width - 1;
  821. # Re-usable patterns to match list item bullets and number markers:
  822. $marker_ul_re = '[*+-]';
  823. $marker_ol_re = '\d+[\.]';
  824. $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
  825. $markers_relist = array(
  826. $marker_ul_re => $marker_ol_re,
  827. $marker_ol_re => $marker_ul_re,
  828. );
  829. foreach ($markers_relist as $marker_re => $other_marker_re) {
  830. # Re-usable pattern to match any entirel ul or ol list:
  831. $whole_list_re = '
  832. ( # $1 = whole list
  833. ( # $2
  834. ([ ]{0,'.$less_than_tab.'}) # $3 = number of spaces
  835. ('.$marker_re.') # $4 = first list item marker
  836. [ ]+
  837. )
  838. (?s:.+?)
  839. ( # $5
  840. \z
  841. |
  842. \n{2,}
  843. (?=\S)
  844. (?! # Negative lookahead for another list item marker
  845. [ ]*
  846. '.$marker_re.'[ ]+
  847. )
  848. |
  849. (?= # Lookahead for another kind of list
  850. \n
  851. \3 # Must have the same indentation
  852. '.$other_marker_re.'[ ]+
  853. )
  854. )
  855. )
  856. '; // mx
  857. # We use a different prefix before nested lists than top-level lists.
  858. # See extended comment in _ProcessListItems().
  859. if ($this->list_level) {
  860. $text = preg_replace_callback('{
  861. ^
  862. '.$whole_list_re.'
  863. }mx',
  864. array(&$this, '_doLists_callback'), $text);
  865. }
  866. else {
  867. $text = preg_replace_callback('{
  868. (?:(?<=\n)\n|\A\n?) # Must eat the newline
  869. '.$whole_list_re.'
  870. }mx',
  871. array(&$this, '_doLists_callback'), $text);
  872. }
  873. }
  874. return $text;
  875. }
  876. function _doLists_callback($matches) {
  877. # Re-usable patterns to match list item bullets and number markers:
  878. $marker_ul_re = '[*+-]';
  879. $marker_ol_re = '\d+[\.]';
  880. $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
  881. $list = $matches[1];
  882. $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol";
  883. $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
  884. $list .= "\n";
  885. $result = $this->processListItems($list, $marker_any_re);
  886. $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
  887. return "\n". $result ."\n\n";
  888. }
  889. var $list_level = 0;
  890. function processListItems($list_str, $marker_any_re) {
  891. #
  892. # Process the contents of a single ordered or unordered list, splitting it
  893. # into individual list items.
  894. #
  895. # The $this->list_level global keeps track of when we're inside a list.
  896. # Each time we enter a list, we increment it; when we leave a list,
  897. # we decrement. If it's zero, we're not in a list anymore.
  898. #
  899. # We do this because when we're not inside a list, we want to treat
  900. # something like this:
  901. #
  902. # I recommend upgrading to version
  903. # 8. Oops, now this line is treated
  904. # as a sub-list.
  905. #
  906. # As a single paragraph, despite the fact that the second line starts
  907. # with a digit-period-space sequence.
  908. #
  909. # Whereas when we're inside a list (or sub-list), that line will be
  910. # treated as the start of a sub-list. What a kludge, huh? This is
  911. # an aspect of Markdown's syntax that's hard to parse perfectly
  912. # without resorting to mind-reading. Perhaps the solution is to
  913. # change the syntax rules such that sub-lists must start with a
  914. # starting cardinal number; e.g. "1." or "a.".
  915. $this->list_level++;
  916. # trim trailing blank lines:
  917. $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
  918. $list_str = preg_replace_callback('{
  919. (\n)? # leading line = $1
  920. (^[ ]*) # leading whitespace = $2
  921. ('.$marker_any_re.' # list marker and space = $3
  922. (?:[ ]+|(?=\n)) # space only required if item is not empty
  923. )
  924. ((?s:.*?)) # list item text = $4
  925. (?:(\n+(?=\n))|\n) # tailing blank line = $5
  926. (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
  927. }xm',
  928. array(&$this, '_processListItems_callback'), $list_str);
  929. $this->list_level--;
  930. return $list_str;
  931. }
  932. function _processListItems_callback($matches) {
  933. $item = $matches[4];
  934. $leading_line =& $matches[1];
  935. $leading_space =& $matches[2];
  936. $marker_space = $matches[3];
  937. $tailing_blank_line =& $matches[5];
  938. if ($leading_line || $tailing_blank_line ||
  939. preg_match('/\n{2,}/', $item))
  940. {
  941. # Replace marker with the appropriate whitespace indentation
  942. $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
  943. $item = $this->runBlockGamut($this->outdent($item)."\n");
  944. }
  945. else {
  946. # Recursion for sub-lists:
  947. $item = $this->doLists($this->outdent($item));
  948. $item = preg_replace('/\n+$/', '', $item);
  949. $item = $this->runSpanGamut($item);
  950. }
  951. return "<li>" . $item . "</li>\n";
  952. }
  953. function doCodeBlocks($text) {
  954. #
  955. # Process Markdown `<pre><code>` blocks.
  956. #
  957. $text = preg_replace_callback('{
  958. (?:\n\n|\A\n?)
  959. ( # $1 = the code block -- one or more lines, starting with a space/tab
  960. (?>
  961. [ ]{'.$this->tab_width.'} # Lines must start with a tab or a tab-width of spaces
  962. .*\n+
  963. )+
  964. )
  965. ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
  966. }xm',
  967. array(&$this, '_doCodeBlocks_callback'), $text);
  968. return $text;
  969. }
  970. function _doCodeBlocks_callback($matches) {
  971. $codeblock = $matches[1];
  972. $codeblock = $this->outdent($codeblock);
  973. $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
  974. # trim leading newlines and trailing newlines
  975. $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
  976. $codeblock = "<pre><code>$codeblock\n</code></pre>";
  977. return "\n\n".$this->hashBlock($codeblock)."\n\n";
  978. }
  979. function makeCodeSpan($code) {
  980. #
  981. # Create a code span markup for $code. Called from handleSpanToken.
  982. #
  983. $code = htmlspecialchars(trim($code), ENT_NOQUOTES);
  984. return $this->hashPart("<code>$code</code>");
  985. }
  986. var $em_relist = array(
  987. '' => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?=\S|$)(?![\.,:;]\s)',
  988. '*' => '(?<=\S|^)(?<!\*)\*(?!\*)',
  989. '_' => '(?<=\S|^)(?<!_)_(?!_)',
  990. );
  991. var $strong_relist = array(
  992. '' => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?=\S|$)(?![\.,:;]\s)',
  993. '**' => '(?<=\S|^)(?<!\*)\*\*(?!\*)',
  994. '__' => '(?<=\S|^)(?<!_)__(?!_)',
  995. );
  996. var $em_strong_relist = array(
  997. '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?=\S|$)(?![\.,:;]\s)',
  998. '***' => '(?<=\S|^)(?<!\*)\*\*\*(?!\*)',
  999. '___' => '(?<=\S|^)(?<!_)___(?!_)',
  1000. );
  1001. var $em_strong_prepared_relist;
  1002. function prepareItalicsAndBold() {
  1003. #
  1004. # Prepare regular expressions for searching emphasis tokens in any
  1005. # context.
  1006. #
  1007. foreach ($this->em_relist as $em => $em_re) {
  1008. foreach ($this->strong_relist as $strong => $strong_re) {
  1009. # Construct list of allowed token expressions.
  1010. $token_relist = array();
  1011. if (isset($this->em_strong_relist["$em$strong"])) {
  1012. $token_relist[] = $this->em_strong_relist["$em$strong"];
  1013. }
  1014. $token_relist[] = $em_re;
  1015. $token_relist[] = $strong_re;
  1016. # Construct master expression from list.
  1017. $token_re = '{('. implode('|', $token_relist) .')}';
  1018. $this->em_strong_prepared_relist["$em$strong"] = $token_re;
  1019. }
  1020. }
  1021. }
  1022. function doItalicsAndBold($text) {
  1023. $token_stack = array('');
  1024. $text_stack = array('');
  1025. $em = '';
  1026. $strong = '';
  1027. $tree_char_em = false;
  1028. while (1) {
  1029. #
  1030. # Get prepared regular expression for seraching emphasis tokens
  1031. # in current context.
  1032. #
  1033. $token_re = $this->em_strong_prepared_relist["$em$strong"];
  1034. #
  1035. # Each loop iteration search for the next emphasis token.
  1036. # Each token is then passed to handleSpanToken.
  1037. #
  1038. $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
  1039. $text_stack[0] .= $parts[0];
  1040. $token =& $parts[1];
  1041. $text =& $parts[2];
  1042. if (empty($token)) {
  1043. # Reached end of text span: empty stack without emitting.
  1044. # any more emphasis.
  1045. while ($token_stack[0]) {
  1046. $text_stack[1] .= array_shift($token_stack);
  1047. $text_stack[0] .= array_shift($text_stack);
  1048. }
  1049. break;
  1050. }
  1051. $token_len = strlen($token);
  1052. if ($tree_char_em) {
  1053. # Reached closing marker while inside a three-char emphasis.
  1054. if ($token_len == 3) {
  1055. # Three-char closing marker, close em and strong.
  1056. array_shift($token_stack);
  1057. $span = array_shift($text_stack);
  1058. $span = $this->runSpanGamut($span);
  1059. $span = "<strong><em>$span</em></strong>";
  1060. $text_stack[0] .= $this->hashPart($span);
  1061. $em = '';
  1062. $strong = '';
  1063. } else {
  1064. # Other closing marker: close one em or strong and
  1065. # change current token state to match the other
  1066. $token_stack[0] = str_repeat($token{0}, 3-$token_len);
  1067. $tag = $token_len == 2 ? "strong" : "em";
  1068. $span = $text_stack[0];
  1069. $span = $this->runSpanGamut($span);
  1070. $span = "<$tag>$span</$tag>";
  1071. $text_stack[0] = $this->hashPart($span);
  1072. $$tag = ''; # $$tag stands for $em or $strong
  1073. }
  1074. $tree_char_em = false;
  1075. } else if ($token_len == 3) {
  1076. if ($em) {
  1077. # Reached closing marker for both em and strong.
  1078. # Closing strong marker:
  1079. for ($i = 0; $i < 2; ++$i) {
  1080. $shifted_token = array_shift($token_stack);
  1081. $tag = strlen($shifted_token) == 2 ? "strong" : "em";
  1082. $span = array_shift($text_stack);
  1083. $span = $this->runSpanGamut($span);
  1084. $span = "<$tag>$span</$tag>";
  1085. $text_stack[0] .= $this->hashPart($span);
  1086. $$tag = ''; # $$tag stands for $em or $strong
  1087. }
  1088. } else {
  1089. # Reached opening three-char emphasis marker. Push on token
  1090. # stack; will be handled by the special condition above.
  1091. $em = $token{0};
  1092. $strong = "$em$em";
  1093. array_unshift($token_stack, $token);
  1094. array_unshift($text_stack, '');
  1095. $tree_char_em = true;
  1096. }
  1097. } else if ($token_len == 2) {
  1098. if ($strong) {
  1099. # Unwind any dangling emphasis marker:
  1100. if (strlen($token_stack[0]) == 1) {
  1101. $text_stack[1] .= array_shift($token_stack);
  1102. $text_stack[0] .= array_shift($text_stack);
  1103. }
  1104. # Closing strong marker:
  1105. array_shift($token_stack);
  1106. $span = array_shift($text_stack);
  1107. $span = $this->runSpanGamut($span);
  1108. $span = "<strong>$span</strong>";
  1109. $text_stack[0] .= $this->hashPart($span);
  1110. $strong = '';
  1111. } else {
  1112. array_unshift($token_stack, $token);
  1113. array_unshift($text_stack, '');
  1114. $strong = $token;
  1115. }
  1116. } else {
  1117. # Here $token_len == 1
  1118. if ($em) {
  1119. if (strlen($token_stack[0]) == 1) {
  1120. # Closing emphasis marker:
  1121. array_shift($token_stack);
  1122. $span = array_shift($text_stack);
  1123. $span = $this->runSpanGamut($span);
  1124. $span = "<em>$span</em>";
  1125. $text_stack[0] .= $this->hashPart($span);
  1126. $em = '';
  1127. } else {
  1128. $text_stack[0] .= $token;
  1129. }
  1130. } else {
  1131. array_unshift($token_stack, $token);
  1132. array_unshift($text_stack, '');
  1133. $em = $token;
  1134. }
  1135. }
  1136. }
  1137. return $text_stack[0];
  1138. }
  1139. function doBlockQuotes($text) {
  1140. $text = preg_replace_callback('/
  1141. ( # Wrap whole match in $1
  1142. (?>
  1143. ^[ ]*>[ ]? # ">" at the start of a line
  1144. .+\n # rest of the first line
  1145. (.+\n)* # subsequent consecutive lines
  1146. \n* # blanks
  1147. )+
  1148. )
  1149. /xm',
  1150. array(&$this, '_doBlockQuotes_callback'), $text);
  1151. return $text;
  1152. }
  1153. function _doBlockQuotes_callback($matches) {
  1154. $bq = $matches[1];
  1155. # trim one level of quoting - trim whitespace-only lines
  1156. $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
  1157. $bq = $this->runBlockGamut($bq); # recurse
  1158. $bq = preg_replace('/^/m', " ", $bq);
  1159. # These leading spaces cause problem with <pre> content,
  1160. # so we need to fix that:
  1161. $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
  1162. array(&$this, '_doBlockQuotes_callback2'), $bq);
  1163. return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
  1164. }
  1165. function _doBlockQuotes_callback2($matches) {
  1166. $pre = $matches[1];
  1167. $pre = preg_replace('/^ /m', '', $pre);
  1168. return $pre;
  1169. }
  1170. function formParagraphs($text) {
  1171. #
  1172. # Params:
  1173. # $text - string to process with html <p> tags
  1174. #
  1175. # Strip leading and trailing lines:
  1176. $text = preg_replace('/\A\n+|\n+\z/', '', $text);
  1177. $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
  1178. #
  1179. # Wrap <p> tags and unhashify HTML blocks
  1180. #
  1181. foreach ($grafs as $key => $value) {
  1182. if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
  1183. # Is a paragraph.
  1184. $value = $this->runSpanGamut($value);
  1185. $value = preg_replace('/^([ ]*)/', "<p>", $value);
  1186. $value .= "</p>";
  1187. $grafs[$key] = $this->unhash($value);
  1188. }
  1189. else {
  1190. # Is a block.
  1191. # Modify elements of @grafs in-place...
  1192. $graf = $value;
  1193. $block = $this->html_hashes[$graf];
  1194. $graf = $block;
  1195. // if (preg_match('{
  1196. // \A
  1197. // ( # $1 = <div> tag
  1198. // <div \s+
  1199. // [^>]*
  1200. // \b
  1201. // markdown\s*=\s* ([\'"]) # $2 = attr quote char
  1202. // 1
  1203. // \2
  1204. // [^>]*
  1205. // >
  1206. // )
  1207. // ( # $3 = contents
  1208. // .*
  1209. // )
  1210. // (</div>) # $4 = closing tag
  1211. // \z
  1212. // }xs', $block, $matches))
  1213. // {
  1214. // list(, $div_open, , $div_content, $div_close) = $matches;
  1215. //
  1216. // # We can't call Markdown(), because that resets the hash;
  1217. // # that initialization code should be pulled into its own sub, though.
  1218. // $div_content = $this->hashHTMLBlocks($div_content);
  1219. //
  1220. // # Run document gamut methods on the content.
  1221. // foreach ($this->document_gamut as $method => $priority) {
  1222. // $div_content = $this->$method($div_content);
  1223. // }
  1224. //
  1225. // $div_open = preg_replace(
  1226. // '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
  1227. //
  1228. // $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
  1229. // }
  1230. $grafs[$key] = $graf;
  1231. }
  1232. }
  1233. return implode("\n\n", $grafs);
  1234. }
  1235. function encodeAttribute($text) {
  1236. #
  1237. # Encode text for a double-quoted HTML attribute. This function
  1238. # is *not* suitable for attributes enclosed in single quotes.
  1239. #
  1240. $text = $this->encodeAmpsAndAngles($text);
  1241. $text = str_replace('"', '&quot;', $text);
  1242. return $text;
  1243. }
  1244. function encodeAmpsAndAngles($text) {
  1245. #
  1246. # Smart processing for ampersands and angle brackets that need to
  1247. # be encoded. Valid character entities are left alone unless the
  1248. # no-entities mode is set.
  1249. #
  1250. if ($this->no_entities) {
  1251. $text = str_replace('&', '&amp;', $text);
  1252. } else {
  1253. # Ampersand-encoding based entirely on Nat Irons's Amputator
  1254. # MT plugin: <http://bumppo.net/projects/amputator/>
  1255. $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
  1256. '&amp;', $text);;
  1257. }
  1258. # Encode remaining <'s
  1259. $text = str_replace('<', '&lt;', $text);
  1260. return $text;
  1261. }
  1262. function doAutoLinks($text) {
  1263. $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i',
  1264. array(&$this, '_doAutoLinks_url_callback'), $text);
  1265. # Email addresses: <address@domain.foo>
  1266. $text = preg_replace_callback('{
  1267. <
  1268. (?:mailto:)?
  1269. (
  1270. (?:
  1271. [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+
  1272. |
  1273. ".*?"
  1274. )
  1275. \@
  1276. (?:
  1277. [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
  1278. |
  1279. \[[\d.a-fA-F:]+\] # IPv4 & IPv6
  1280. )
  1281. )
  1282. >
  1283. }xi',
  1284. array(&$this, '_doAutoLinks_email_callback'), $text);
  1285. return $text;
  1286. }
  1287. function _doAutoLinks_url_callback($matches) {
  1288. $url = $this->encodeAttribute($matches[1]);
  1289. $link = "<a href=\"$url\">$url</a>";
  1290. return $this->hashPart($link);
  1291. }
  1292. function _doAutoLinks_email_callback($matches) {
  1293. $address = $matches[1];
  1294. $link = $this->encodeEmailAddress($address);
  1295. return $this->hashPart($link);
  1296. }
  1297. function encodeEmailAddress($addr) {
  1298. #
  1299. # Input: an email address, e.g. "foo@example.com"
  1300. #
  1301. # Output: the email address as a mailto link, with each character
  1302. # of the address encoded as either a decimal or hex entity, in
  1303. # the hopes of foiling most address harvesting spam bots. E.g.:
  1304. #
  1305. # <p><a href="&#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
  1306. # &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
  1307. # &#x6d;">&#x66;o&#111;&#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;
  1308. # &#101;&#46;&#x63;&#111;&#x6d;</a></p>
  1309. #
  1310. # Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
  1311. # With some optimizations by Milian Wolff.
  1312. #
  1313. $addr = "mailto:" . $addr;
  1314. $chars = preg_split('/(?<!^)(?!$)/', $addr);
  1315. $seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed.
  1316. foreach ($chars as $key => $char) {
  1317. $ord = ord($char);
  1318. # Ignore non-ascii chars.
  1319. if ($ord < 128) {
  1320. $r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
  1321. # roughly 10% raw, 45% hex, 45% dec
  1322. # '@' *must* be encoded. I insist.
  1323. if ($r > 90 && $char != '@') /* do nothing */;
  1324. else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
  1325. else $chars[$key] = '&#'.$ord.';';
  1326. }
  1327. }
  1328. $addr = implode('', $chars);
  1329. $text = implode('', array_slice($chars, 7)); # text without `mailto:`
  1330. $addr = "<a href=\"$addr\">$text</a>";
  1331. return $addr;
  1332. }
  1333. function parseSpan($str) {
  1334. #
  1335. # Take the string $str and parse it into tokens, hashing embeded HTML,
  1336. # escaped characters and handling code spans.
  1337. #
  1338. $output = '';
  1339. $span_re = '{
  1340. (
  1341. \\\\'.$this->escape_chars_re.'
  1342. |
  1343. (?<![`\\\\])
  1344. `+ # code span marker
  1345. '.( $this->no_markup ? '' : '
  1346. |
  1347. <!-- .*? --> # comment
  1348. |
  1349. <\?.*?\?> | <%.*?%> # processing instruction
  1350. |
  1351. <[!$]?[-a-zA-Z0-9:_]+ # regular tags
  1352. (?>
  1353. \s
  1354. (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
  1355. )?
  1356. >
  1357. |
  1358. <[-a-zA-Z0-9:_]+\s*/> # xml-style empty tag
  1359. |
  1360. </[-a-zA-Z0-9:_]+\s*> # closing tag
  1361. ').'
  1362. )
  1363. }xs';
  1364. while (1) {
  1365. #
  1366. # Each loop iteration seach for either the next tag, the next
  1367. # openning code span marker, or the next escaped character.
  1368. # Each token is then passed to handleSpanToken.
  1369. #
  1370. $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
  1371. # Create token from text preceding tag.
  1372. if ($parts[0] != "") {
  1373. $output .= $parts[0];
  1374. }
  1375. # Check if we reach the end.
  1376. if (isset($parts[1])) {
  1377. $output .= $this->handleSpanToken($parts[1], $parts[2]);
  1378. $str = $parts[2];
  1379. }
  1380. else {
  1381. break;
  1382. }
  1383. }
  1384. return $output;
  1385. }
  1386. function handleSpanToken($token, &$str) {
  1387. #
  1388. # Handle $token provided by parseSpan by determining its nature and
  1389. # returning the corresponding value that should replace it.
  1390. #
  1391. switch ($token{0}) {
  1392. case "\\":
  1393. return $this->hashPart("&#". ord($token{1}). ";");
  1394. case "`":
  1395. # Search for end marker in remaining text.
  1396. if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm',
  1397. $str, $matches))
  1398. {
  1399. $str = $matches[2];
  1400. $codespan = $this->makeCodeSpan($matches[1]);
  1401. return $this->hashPart($codespan);
  1402. }
  1403. return $token; // return as text since no ending marker found.
  1404. default:
  1405. return $this->hashPart($token);
  1406. }
  1407. }
  1408. function outdent($text) {
  1409. #
  1410. # Remove one level of line-leading tabs or spaces
  1411. #
  1412. return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text);
  1413. }
  1414. # String length function for detab. `_initDetab` will create a function to
  1415. # hanlde UTF-8 if the default function does not exist.
  1416. var $utf8_strlen = 'mb_strlen';
  1417. function detab($text) {
  1418. #
  1419. # Replace tabs with the appropriate amount of space.
  1420. #
  1421. # For each line we separate the line in blocks delemited by
  1422. # tab characters. Then we reconstruct every line by adding the
  1423. # appropriate number of space between each blocks.
  1424. $text = preg_replace_callback('/^.*\t.*$/m',
  1425. array(&$this, '_detab_callback'), $text);
  1426. return $text;
  1427. }
  1428. function _detab_callback($matches) {
  1429. $line = $matches[0];
  1430. $strlen = $this->utf8_strlen; # strlen function for UTF-8.
  1431. # Split in blocks.
  1432. $blocks = explode("\t", $line);
  1433. # Add each blocks to the line.
  1434. $line = $blocks[0];
  1435. unset($blocks[0]); # Do not add first block twice.
  1436. foreach ($blocks as $block) {
  1437. # Calculate amount of space, insert spaces, insert block.
  1438. $amount = $this->tab_width -
  1439. $strlen($line, 'UTF-8') % $this->tab_width;
  1440. $line .= str_repeat(" ", $amount) . $block;
  1441. }
  1442. return $line;
  1443. }
  1444. function _initDetab() {
  1445. #
  1446. # Check for the availability of the function in the `utf8_strlen` property
  1447. # (initially `mb_strlen`). If the function is not available, create a
  1448. # function that will loosely count the number of UTF-8 characters with a
  1449. # regular expression.
  1450. #
  1451. if (function_exists($this->utf8_strlen)) return;
  1452. $this->utf8_strlen = create_function('$text', 'return preg_match_all(
  1453. "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/",
  1454. $text, $m);');
  1455. }
  1456. function unhash($text) {
  1457. #
  1458. # Swap back in all the tags hashed by _HashHTMLBlocks.
  1459. #
  1460. return preg_replace_callback('/(.)\x1A[0-9]+\1/',
  1461. array(&$this, '_unhash_callback'), $text);
  1462. }
  1463. function _unhash_callback($matches) {
  1464. return $this->html_hashes[$matches[0]];
  1465. }
  1466. }
  1467. #
  1468. # Markdown Extra Parser Class
  1469. #
  1470. class MarkdownExtra_Parser extends Markdown_Parser {
  1471. ### Configuration Variables ###
  1472. # Prefix for footnote ids.
  1473. var $fn_id_prefix = "";
  1474. # Optional title attribute for footnote links and backlinks.
  1475. var $fn_link_title = MARKDOWN_FN_LINK_TITLE;
  1476. var $fn_backlink_title = MARKDOWN_FN_BACKLINK_TITLE;
  1477. # Optional class attribute for footnote links and backlinks.
  1478. var $fn_link_class = MARKDOWN_FN_LINK_CLASS;
  1479. var $fn_backlink_class = MARKDOWN_FN_BACKLINK_CLASS;
  1480. # Optional class prefix for fenced code block.
  1481. var $code_class_prefix = MARKDOWN_CODE_CLASS_PREFIX;
  1482. # Class attribute for code blocks goes on the `code` tag;
  1483. # setting this to true will put attributes on the `pre` tag instead.
  1484. var $code_attr_on_pre = MARKDOWN_CODE_ATTR_ON_PRE;
  1485. # Predefined abbreviations.
  1486. var $predef_abbr = array();
  1487. ### Parser Implementation ###
  1488. function MarkdownExtra_Parser() {
  1489. #
  1490. # Constructor function. Initialize the parser object.
  1491. #
  1492. # Add extra escapable characters before parent constructor
  1493. # initialize the table.
  1494. $this->escape_chars .= ':|';
  1495. # Insert extra document, block, and span transformations.
  1496. # Parent constructor will do the sorting.
  1497. $this->document_gamut += array(
  1498. "doFencedCodeBlocks" => 5,
  1499. "stripFootnotes" => 15,
  1500. "stripAbbreviations" => 25,
  1501. "appendFootnotes" => 50,
  1502. );
  1503. $this->block_gamut += array(
  1504. "doFencedCodeBlocks" => 5,
  1505. "doTables" => 15,
  1506. "doDefLists" => 45,
  1507. );
  1508. $this->span_gamut += array(
  1509. "doFootnotes" => 5,
  1510. "doAbbreviations" => 70,
  1511. );
  1512. parent::Markdown_Parser();
  1513. }
  1514. # Extra variables used during extra transformations.
  1515. var $footnotes = array();
  1516. var $footnotes_ordered = array();
  1517. var $footnotes_ref_count = array();
  1518. var $footnotes_numbers = array();
  1519. var $abbr_desciptions = array();
  1520. var $abbr_word_re = '';
  1521. # Give the current footnote number.
  1522. var $footnote_counter = 1;
  1523. function setup() {
  1524. #
  1525. # Setting up Extra-specific variables.
  1526. #
  1527. parent::setup();
  1528. $this->footnotes = array();
  1529. $this->footnotes_ordered = array();
  1530. $this->footnotes_ref_count = array();
  1531. $this->footnotes_numbers = array();
  1532. $th

Large files files are truncated, but you can click here to view the full file