PageRenderTime 67ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/includes/smarty/plugins/modifier.markdown.php

https://github.com/dpcdesigns/agilebill
PHP | 1989 lines | 1166 code | 244 blank | 579 comment | 93 complexity | 2b572ac47f5b477487bf051f3fbbd018 MD5 | raw file
Possible License(s): Apache-2.0, MPL-2.0-no-copyleft-exception

Large files files are truncated, but you can click here to view the full file

  1. <?php
  2. #
  3. # PHP Markdown Extra - A text-to-HTML conversion tool for web writers
  4. #
  5. # Copyright (c) 2004-2005 Michel Fortin
  6. # <http://www.michelf.com/projects/php-markdown/>
  7. #
  8. # Based on Markdown
  9. # Copyright (c) 2004-2005 John Gruber
  10. # <http://daringfireball.net/projects/markdown/>
  11. #
  12. global $MarkdownPHPVersion, $MarkdownSyntaxVersion,
  13. $md_empty_element_suffix, $md_tab_width,
  14. $md_nested_brackets_depth, $md_nested_brackets,
  15. $md_escape_table, $md_backslash_escape_table,
  16. $md_list_level;
  17. $MarkdownPHPVersion = 'Extra 1.0'; # Mon 5 Sep 2005
  18. $MarkdownSyntaxVersion = '1.0.1'; # Sun 12 Dec 2004
  19. #
  20. # Global default settings:
  21. #
  22. $md_empty_element_suffix = " />"; # Change to ">" for HTML output
  23. $md_tab_width = 4;
  24. #
  25. # WordPress settings:
  26. #
  27. $md_wp_posts = true; # Set to false to remove Markdown from posts.
  28. $md_wp_comments = true; # Set to false to remove Markdown from comments.
  29. # -- WordPress Plugin Interface -----------------------------------------------
  30. /*
  31. Plugin Name: PHP Markdown Extra
  32. Plugin URI: http://www.michelf.com/projects/php-markdown/
  33. Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>
  34. Version: Extra 1.0
  35. Author: Michel Fortin
  36. Author URI: http://www.michelf.com/
  37. */
  38. if (isset($wp_version)) {
  39. # More details about how it works here:
  40. # <http://www.michelf.com/weblog/2005/wordpress-text-flow-vs-markdown/>
  41. # Post content and excerpts
  42. if ($md_wp_posts) {
  43. remove_filter('the_content', 'wpautop');
  44. remove_filter('the_excerpt', 'wpautop');
  45. add_filter('the_content', 'Markdown', 6);
  46. add_filter('get_the_excerpt', 'Markdown', 6);
  47. add_filter('get_the_excerpt', 'trim', 7);
  48. add_filter('the_excerpt', 'md_add_p');
  49. add_filter('the_excerpt_rss', 'md_strip_p');
  50. remove_filter('content_save_pre', 'balanceTags', 50);
  51. remove_filter('excerpt_save_pre', 'balanceTags', 50);
  52. add_filter('the_content', 'balanceTags', 50);
  53. add_filter('get_the_excerpt', 'balanceTags', 9);
  54. function md_add_p($text) {
  55. if (strlen($text) == 0) return;
  56. if (strcasecmp(substr($text, -3), '<p>') == 0) return $text;
  57. return '<p>'.$text.'</p>';
  58. }
  59. function md_strip_p($t) { return preg_replace('{</?[pP]>}', '', $t); }
  60. }
  61. # Comments
  62. if ($md_wp_comments) {
  63. remove_filter('comment_text', 'wpautop');
  64. remove_filter('comment_text', 'make_clickable');
  65. add_filter('pre_comment_content', 'Markdown', 6);
  66. add_filter('pre_comment_content', 'md_hide_tags', 8);
  67. add_filter('pre_comment_content', 'md_show_tags', 12);
  68. add_filter('get_comment_text', 'Markdown', 6);
  69. add_filter('get_comment_excerpt', 'Markdown', 6);
  70. add_filter('get_comment_excerpt', 'md_strip_p', 7);
  71. global $md_hidden_tags;
  72. $md_hidden_tags = array(
  73. '<p>' => md5('<p>'), '</p>' => md5('</p>'),
  74. '<pre>' => md5('<pre>'), '</pre>'=> md5('</pre>'),
  75. '<ol>' => md5('<ol>'), '</ol>' => md5('</ol>'),
  76. '<ul>' => md5('<ul>'), '</ul>' => md5('</ul>'),
  77. '<li>' => md5('<li>'), '</li>' => md5('</li>'),
  78. );
  79. function md_hide_tags($text) {
  80. global $md_hidden_tags;
  81. return str_replace(array_keys($md_hidden_tags),
  82. array_values($md_hidden_tags), $text);
  83. }
  84. function md_show_tags($text) {
  85. global $md_hidden_tags;
  86. return str_replace(array_values($md_hidden_tags),
  87. array_keys($md_hidden_tags), $text);
  88. }
  89. }
  90. }
  91. # -- bBlog Plugin Info --------------------------------------------------------
  92. function identify_modifier_markdown() {
  93. global $MarkdownPHPVersion;
  94. return array(
  95. 'name' => 'markdown',
  96. 'type' => 'modifier',
  97. 'nicename' => 'PHP Markdown Extra',
  98. 'description' => 'A text-to-HTML conversion tool for web writers',
  99. 'authors' => 'Michel Fortin and John Gruber',
  100. 'licence' => 'GPL',
  101. 'version' => $MarkdownPHPVersion,
  102. 'help' => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>'
  103. );
  104. }
  105. # -- Smarty Modifier Interface ------------------------------------------------
  106. function smarty_modifier_markdown($text) {
  107. return Markdown($text);
  108. }
  109. # -- Textile Compatibility Mode -----------------------------------------------
  110. # Rename this file to "classTextile.php" and it can replace Textile anywhere.
  111. if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) {
  112. # Try to include PHP SmartyPants. Should be in the same directory.
  113. @include_once 'smartypants.php';
  114. # Fake Textile class. It calls Markdown instead.
  115. class Textile {
  116. function TextileThis($text, $lite='', $encode='', $noimage='', $strict='') {
  117. if ($lite == '' && $encode == '') $text = Markdown($text);
  118. if (function_exists('SmartyPants')) $text = SmartyPants($text);
  119. return $text;
  120. }
  121. }
  122. }
  123. #
  124. # Globals:
  125. #
  126. # Regex to match balanced [brackets].
  127. # Needed to insert a maximum bracked depth while converting to PHP.
  128. $md_nested_brackets_depth = 6;
  129. $md_nested_brackets =
  130. str_repeat('(?>[^\[\]]+|\[', $md_nested_brackets_depth).
  131. str_repeat('\])*', $md_nested_brackets_depth);
  132. # Table of hash values for escaped characters:
  133. $md_escape_table = array(
  134. "\\" => md5("\\"),
  135. "`" => md5("`"),
  136. "*" => md5("*"),
  137. "_" => md5("_"),
  138. "{" => md5("{"),
  139. "}" => md5("}"),
  140. "[" => md5("["),
  141. "]" => md5("]"),
  142. "(" => md5("("),
  143. ")" => md5(")"),
  144. ">" => md5(">"),
  145. "#" => md5("#"),
  146. "+" => md5("+"),
  147. "-" => md5("-"),
  148. "." => md5("."),
  149. "!" => md5("!"),
  150. ":" => md5(":"),
  151. "|" => md5("|"),
  152. );
  153. # Create an identical table but for escaped characters.
  154. $md_backslash_escape_table;
  155. foreach ($md_escape_table as $key => $char)
  156. $md_backslash_escape_table["\\$key"] = $char;
  157. function Markdown($text) {
  158. #
  159. # Main function. The order in which other subs are called here is
  160. # essential. Link and image substitutions need to happen before
  161. # _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the <a>
  162. # and <img> tags get encoded.
  163. #
  164. # Clear the global hashes. If we don't clear these, you get conflicts
  165. # from other articles when generating a page which contains more than
  166. # one article (e.g. an index page that shows the N most recent
  167. # articles):
  168. global $md_urls, $md_titles, $md_html_blocks, $md_html_hashes;
  169. $md_urls = array();
  170. $md_titles = array();
  171. $md_html_blocks = array();
  172. $md_html_hashes = array();
  173. # Standardize line endings:
  174. # DOS to Unix and Mac to Unix
  175. $text = str_replace(array("\r\n", "\r"), "\n", $text);
  176. # Make sure $text ends with a couple of newlines:
  177. $text .= "\n\n";
  178. # Convert all tabs to spaces.
  179. $text = _Detab($text);
  180. # Turn block-level HTML blocks into hash entries
  181. $text = _HashHTMLBlocks($text);
  182. # Strip any lines consisting only of spaces and tabs.
  183. # This makes subsequent regexen easier to write, because we can
  184. # match consecutive blank lines with /\n+/ instead of something
  185. # contorted like /[ \t]*\n+/ .
  186. $text = preg_replace('/^[ \t]+$/m', '', $text);
  187. # Strip link definitions, store in hashes.
  188. $text = _StripLinkDefinitions($text);
  189. $text = _RunBlockGamut($text, FALSE);
  190. $text = _UnescapeSpecialChars($text);
  191. return $text . "\n";
  192. }
  193. function _StripLinkDefinitions($text) {
  194. #
  195. # Strips link definitions from text, stores the URLs and titles in
  196. # hash references.
  197. #
  198. global $md_tab_width;
  199. $less_than_tab = $md_tab_width - 1;
  200. # Link defs are in the form: ^[id]: url "optional title"
  201. $text = preg_replace_callback('{
  202. ^[ ]{0,'.$less_than_tab.'}\[(.+)\]: # id = $1
  203. [ \t]*
  204. \n? # maybe *one* newline
  205. [ \t]*
  206. <?(\S+?)>? # url = $2
  207. [ \t]*
  208. \n? # maybe one newline
  209. [ \t]*
  210. (?:
  211. (?<=\s) # lookbehind for whitespace
  212. ["(]
  213. (.+?) # title = $3
  214. [")]
  215. [ \t]*
  216. )? # title is optional
  217. (?:\n+|\Z)
  218. }xm',
  219. '_StripLinkDefinitions_callback',
  220. $text);
  221. return $text;
  222. }
  223. function _StripLinkDefinitions_callback($matches) {
  224. global $md_urls, $md_titles;
  225. $link_id = strtolower($matches[1]);
  226. $md_urls[$link_id] = _EncodeAmpsAndAngles($matches[2]);
  227. if (isset($matches[3]))
  228. $md_titles[$link_id] = str_replace('"', '&quot;', $matches[3]);
  229. return ''; # String that will replace the block
  230. }
  231. function _HashHTMLBlocks($text) {
  232. #
  233. # Hashify HTML Blocks and "clean tags".
  234. #
  235. # We only want to do this for block-level HTML tags, such as headers,
  236. # lists, and tables. That's because we still want to wrap <p>s around
  237. # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
  238. # phrase emphasis, and spans. The list of tags we're looking for is
  239. # hard-coded.
  240. #
  241. # This works by calling _HashHTMLBlocks_InMarkdown, which then calls
  242. # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
  243. # attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back
  244. # _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
  245. # These two functions are calling each other. It's recursive!
  246. #
  247. global $block_tags, $context_block_tags, $contain_span_tags,
  248. $clean_tags, $auto_close_tags;
  249. # Tags that are always treated as block tags:
  250. $block_tags = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|'.
  251. 'form|fieldset|iframe|hr|legend';
  252. # Tags treated as block tags only if the opening tag is alone on it's line:
  253. $context_block_tags = 'script|noscript|math|ins|del';
  254. # Tags where markdown="1" default to span mode:
  255. $contain_span_tags = 'p|h[1-6]|li|dd|dt|td|th|legend';
  256. # Tags which must not have their contents modified, no matter where
  257. # they appear:
  258. $clean_tags = 'script|math';
  259. # Tags that do not need to be closed.
  260. $auto_close_tags = 'hr|img';
  261. # Regex to match any tag.
  262. global $tag_match;
  263. $tag_match =
  264. '{
  265. ( # $2: Capture hole tag.
  266. </? # Any opening or closing tag.
  267. [\w:$]+ # Tag name.
  268. \s* # Whitespace.
  269. (?:
  270. ".*?" | # Double quotes (can contain `>`)
  271. \'.*?\' | # Single quotes (can contain `>`)
  272. .+? # Anything but quotes and `>`.
  273. )*?
  274. > # End of tag.
  275. |
  276. <!-- .*? --> # HTML Comment
  277. |
  278. <\? .*? \?> # Processing instruction
  279. |
  280. <!\[CDATA\[.*?\]\]> # CData Block
  281. )
  282. }xs';
  283. #
  284. # Call the HTML-in-Markdown hasher.
  285. #
  286. list($text, ) = _HashHTMLBlocks_InMarkdown($text);
  287. return $text;
  288. }
  289. function _HashHTMLBlocks_InMarkdown($text, $indent = 0,
  290. $enclosing_tag = '', $md_span = false)
  291. {
  292. #
  293. # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
  294. #
  295. # * $indent is the number of space to be ignored when checking for code
  296. # blocks. This is important because if we don't take the indent into
  297. # account, something like this (which looks right) won't work as expected:
  298. #
  299. # <div>
  300. # <div markdown="1">
  301. # Hello World. <-- Is this a Markdown code block or text?
  302. # </div> <-- Is this a Markdown code block or a real tag?
  303. # <div>
  304. #
  305. # If you don't like this, just don't indent the tag on which
  306. # you apply the markdown="1" attribute.
  307. #
  308. # * If $enclosing_tag is not empty, stops at the first unmatched closing
  309. # tag with that name. Nested tags supported.
  310. #
  311. # * If $md_span is true, text inside must treated as span. So any double
  312. # newline will be replaced by a single newline so that it does not create
  313. # paragraphs.
  314. #
  315. # Returns an array of that form: ( processed text , remaining text )
  316. #
  317. global $block_tags, $context_block_tags, $clean_tags, $auto_close_tags,
  318. $tag_match;
  319. if ($text === '') return array('', '');
  320. # Regex to check for the presense of newlines around a block tag.
  321. $newline_match_before = "/(?:^\n?|\n\n) *$/";
  322. $newline_match_after =
  323. '{
  324. ^ # Start of text following the tag.
  325. (?:[ ]*<!--.*?-->)? # Optional comment.
  326. [ ]*\n # Must be followed by newline.
  327. }xs';
  328. # Regex to match any tag.
  329. $block_tag_match =
  330. '{
  331. ( # $2: Capture hole tag.
  332. </? # Any opening or closing tag.
  333. (?: # Tag name.
  334. '.$block_tags.' |
  335. '.$context_block_tags.' |
  336. '.$clean_tags.' |
  337. (?!\s)'.$enclosing_tag.'
  338. )
  339. \s* # Whitespace.
  340. (?:
  341. ".*?" | # Double quotes (can contain `>`)
  342. \'.*?\' | # Single quotes (can contain `>`)
  343. .+? # Anything but quotes and `>`.
  344. )*?
  345. > # End of tag.
  346. |
  347. <!-- .*? --> # HTML Comment
  348. |
  349. <\? .*? \?> # Processing instruction
  350. |
  351. <!\[CDATA\[.*?\]\]> # CData Block
  352. )
  353. }xs';
  354. $depth = 0; # Current depth inside the tag tree.
  355. $parsed = ""; # Parsed text that will be returned.
  356. #
  357. # Loop through every tag until we find the closing tag of the parent
  358. # or loop until reaching the end of text if no parent tag specified.
  359. #
  360. do {
  361. #
  362. # Split the text using the first $tag_match pattern found.
  363. # Text before pattern will be first in the array, text after
  364. # pattern will be at the end, and between will be any catches made
  365. # by the pattern.
  366. #
  367. $parts = preg_split($block_tag_match, $text, 2,
  368. PREG_SPLIT_DELIM_CAPTURE);
  369. # If in Markdown span mode, replace any multiple newlines that would
  370. # trigger a new paragraph.
  371. if ($md_span) {
  372. $parts[0] = preg_replace('/\n\n/', "\n", $parts[0]);
  373. }
  374. $parsed .= $parts[0]; # Text before current tag.
  375. # If end of $text has been reached. Stop loop.
  376. if (count($parts) < 3) {
  377. $text = "";
  378. break;
  379. }
  380. $tag = $parts[1]; # Tag to handle.
  381. $text = $parts[2]; # Remaining text after current tag.
  382. #
  383. # Check for: Tag inside code block or span
  384. #
  385. if (# Find current paragraph
  386. preg_match('/(?>^\n?|\n\n)((?>.\n?)+?)$/', $parsed, $matches) &&
  387. (
  388. # Then match in it either a code block...
  389. preg_match('/^ {'.($indent+4).'}.*(?>\n {'.($indent+4).'}.*)*'.
  390. '(?!\n)$/', $matches[1], $x) ||
  391. # ...or unbalenced code span markers. (the regex matches balenced)
  392. !preg_match('/^(?>[^`]+|(`+)(?>[^`]+|(?!\1[^`])`)*?\1(?!`))*$/s',
  393. $matches[1])
  394. ))
  395. {
  396. # Tag is in code block or span and may not be a tag at all. So we
  397. # simply skip the first char (should be a `<`).
  398. $parsed .= $tag{0};
  399. $text = substr($tag, 1) . $text; # Put back $tag minus first char.
  400. }
  401. #
  402. # Check for: Opening Block level tag or
  403. # Opening Content Block tag (like ins and del)
  404. # used as a block tag (tag is alone on it's line).
  405. #
  406. else if (preg_match("{^<(?:$block_tags)\b}", $tag) ||
  407. ( preg_match("{^<(?:$context_block_tags)\b}", $tag) &&
  408. preg_match($newline_match_before, $parsed) &&
  409. preg_match($newline_match_after, $text) )
  410. )
  411. {
  412. # Need to parse tag and following text using the HTML parser.
  413. list($block_text, $text) =
  414. _HashHTMLBlocks_InHTML($tag . $text,
  415. "_HashHTMLBlocks_HashBlock", TRUE);
  416. # Make sure it stays outside of any paragraph by adding newlines.
  417. $parsed .= "\n\n$block_text\n\n";
  418. }
  419. #
  420. # Check for: Clean tag (like script, math)
  421. # HTML Comments, processing instructions.
  422. #
  423. else if (preg_match("{^<(?:$clean_tags)\b}", $tag) ||
  424. $tag{1} == '!' || $tag{1} == '?')
  425. {
  426. # Need to parse tag and following text using the HTML parser.
  427. # (don't check for markdown attribute)
  428. list($block_text, $text) =
  429. _HashHTMLBlocks_InHTML($tag . $text,
  430. "_HashHTMLBlocks_HashClean", FALSE);
  431. $parsed .= $block_text;
  432. }
  433. #
  434. # Check for: Tag with same name as enclosing tag.
  435. #
  436. else if ($enclosing_tag !== '' &&
  437. # Same name as enclosing tag.
  438. preg_match("{^</?(?:$enclosing_tag)\b}", $tag))
  439. {
  440. #
  441. # Increase/decrease nested tag count.
  442. #
  443. if ($tag{1} == '/') $depth--;
  444. else if ($tag{strlen($tag)-2} != '/') $depth++;
  445. if ($depth < 0) {
  446. #
  447. # Going out of parent element. Clean up and break so we
  448. # return to the calling function.
  449. #
  450. $text = $tag . $text;
  451. break;
  452. }
  453. $parsed .= $tag;
  454. }
  455. else {
  456. $parsed .= $tag;
  457. }
  458. } while ($depth >= 0);
  459. return array($parsed, $text);
  460. }
  461. function _HashHTMLBlocks_InHTML($text, $hash_function, $md_attr) {
  462. #
  463. # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
  464. #
  465. # * Calls $hash_function to convert any blocks.
  466. # * Stops when the first opening tag closes.
  467. # * $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
  468. # (it is not inside clean tags)
  469. #
  470. # Returns an array of that form: ( processed text , remaining text )
  471. #
  472. global $auto_close_tags, $contain_span_tags, $tag_match;
  473. if ($text === '') return array('', '');
  474. # Regex to match `markdown` attribute inside of a tag.
  475. $markdown_attr_match = '
  476. {
  477. \s* # Eat whitespace before the `markdown` attribute
  478. markdown
  479. \s*=\s*
  480. (["\']) # $1: quote delimiter
  481. (.*?) # $2: attribute value
  482. \1 # matching delimiter
  483. }xs';
  484. $original_text = $text; # Save original text in case of faliure.
  485. $depth = 0; # Current depth inside the tag tree.
  486. $block_text = ""; # Temporary text holder for current text.
  487. $parsed = ""; # Parsed text that will be returned.
  488. #
  489. # Get the name of the starting tag.
  490. #
  491. if (preg_match("/^<([\w:$]*)\b/", $text, $matches))
  492. $base_tag_name = $matches[1];
  493. #
  494. # Loop through every tag until we find the corresponding closing tag.
  495. #
  496. do {
  497. #
  498. # Split the text using the first $tag_match pattern found.
  499. # Text before pattern will be first in the array, text after
  500. # pattern will be at the end, and between will be any catches made
  501. # by the pattern.
  502. #
  503. $parts = preg_split($tag_match, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
  504. if (count($parts) < 3) {
  505. #
  506. # End of $text reached with unbalenced tag(s).
  507. # In that case, we return original text unchanged and pass the
  508. # first character as filtered to prevent an infinite loop in the
  509. # parent function.
  510. #
  511. return array($original_text{0}, substr($original_text, 1));
  512. }
  513. $block_text .= $parts[0]; # Text before current tag.
  514. $tag = $parts[1]; # Tag to handle.
  515. $text = $parts[2]; # Remaining text after current tag.
  516. #
  517. # Check for: Auto-close tag (like <hr/>)
  518. # Comments and Processing Instructions.
  519. #
  520. if (preg_match("{^</?(?:$auto_close_tags)\b}", $tag) ||
  521. $tag{1} == '!' || $tag{1} == '?')
  522. {
  523. # Just add the tag to the block as if it was text.
  524. $block_text .= $tag;
  525. }
  526. else {
  527. #
  528. # Increase/decrease nested tag count. Only do so if
  529. # the tag's name match base tag's.
  530. #
  531. if (preg_match("{^</?$base_tag_name\b}", $tag)) {
  532. if ($tag{1} == '/') $depth--;
  533. else if ($tag{strlen($tag)-2} != '/') $depth++;
  534. }
  535. #
  536. # Check for `markdown="1"` attribute and handle it.
  537. #
  538. if ($md_attr &&
  539. preg_match($markdown_attr_match, $tag, $attr_matches) &&
  540. preg_match('/^(?:1|block|span)$/', $attr_matches[2]))
  541. {
  542. # Remove `markdown` attribute from opening tag.
  543. $tag = preg_replace($markdown_attr_match, '', $tag);
  544. # Check if text inside this tag must be parsed in span mode.
  545. $md_mode = $attr_matches[2];
  546. $span_mode = $md_mode == 'span' || $md_mode != 'block' &&
  547. preg_match("{^<(?:$contain_span_tags)\b}", $tag);
  548. # Calculate indent before tag.
  549. preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches);
  550. $indent = strlen($matches[1]);
  551. # End preceding block with this tag.
  552. $block_text .= $tag;
  553. $parsed .= $hash_function($block_text, $span_mode);
  554. # Get enclosing tag name for the ParseMarkdown function.
  555. preg_match('/^<([\w:$]*)\b/', $tag, $matches);
  556. $tag_name = $matches[1];
  557. # Parse the content using the HTML-in-Markdown parser.
  558. list ($block_text, $text)
  559. = _HashHTMLBlocks_InMarkdown($text, $indent,
  560. $tag_name, $span_mode);
  561. # Outdent markdown text.
  562. if ($indent > 0) {
  563. $block_text = preg_replace("/^[ ]{1,$indent}/m", "",
  564. $block_text);
  565. }
  566. # Append tag content to parsed text.
  567. if (!$span_mode) $parsed .= "\n\n$block_text\n\n";
  568. else $parsed .= "$block_text";
  569. # Start over a new block.
  570. $block_text = "";
  571. }
  572. else $block_text .= $tag;
  573. }
  574. } while ($depth > 0);
  575. #
  576. # Hash last block text that wasn't processed inside the loop.
  577. #
  578. $parsed .= $hash_function($block_text);
  579. return array($parsed, $text);
  580. }
  581. function _HashHTMLBlocks_HashBlock($text) {
  582. global $md_html_hashes, $md_html_blocks;
  583. $key = md5($text);
  584. $md_html_hashes[$key] = $text;
  585. $md_html_blocks[$key] = $text;
  586. return $key; # String that will replace the tag.
  587. }
  588. function _HashHTMLBlocks_HashClean($text) {
  589. global $md_html_hashes;
  590. $key = md5($text);
  591. $md_html_hashes[$key] = $text;
  592. return $key; # String that will replace the clean tag.
  593. }
  594. function _HashBlock($text) {
  595. #
  596. # Called whenever a tag must be hashed. When a function insert a block-level
  597. # tag in $text, it pass through this function and is automaticaly escaped,
  598. # which remove the need to call _HashHTMLBlocks at every step.
  599. #
  600. # Swap back any tag hash found in $text so we do not have to _UnhashTags
  601. # multiple times at the end. Must do this because of
  602. $text = _UnhashTags($text);
  603. # Then hash the block as normal.
  604. return _HashHTMLBlocks_HashBlock($text);
  605. }
  606. function _RunBlockGamut($text, $hash_html_blocks = TRUE) {
  607. #
  608. # These are all the transformations that form block-level
  609. # tags like paragraphs, headers, and list items.
  610. #
  611. if ($hash_html_blocks) {
  612. # We need to escape raw HTML in Markdown source before doing anything
  613. # else. This need to be done for each block, and not only at the
  614. # begining in the Markdown function since hashed blocks can be part of
  615. # a list item and could have been indented. Indented blocks would have
  616. # been seen as a code block in previous pass of _HashHTMLBlocks.
  617. $text = _HashHTMLBlocks($text);
  618. }
  619. $text = _DoHeaders($text);
  620. $text = _DoTables($text);
  621. # Do Horizontal Rules:
  622. global $md_empty_element_suffix;
  623. $text = preg_replace(
  624. array('{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}emx',
  625. '{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}emx',
  626. '{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}emx'),
  627. "_HashBlock('\n<hr$md_empty_element_suffix\n')",
  628. $text);
  629. $text = _DoLists($text);
  630. $text = _DoDefLists($text);
  631. $text = _DoCodeBlocks($text);
  632. $text = _DoBlockQuotes($text);
  633. $text = _FormParagraphs($text);
  634. return $text;
  635. }
  636. function _RunSpanGamut($text) {
  637. #
  638. # These are all the transformations that occur *within* block-level
  639. # tags like paragraphs, headers, and list items.
  640. #
  641. global $md_empty_element_suffix;
  642. $text = _DoCodeSpans($text);
  643. $text = _EscapeSpecialChars($text);
  644. # Process anchor and image tags. Images must come first,
  645. # because ![foo][f] looks like an anchor.
  646. $text = _DoImages($text);
  647. $text = _DoAnchors($text);
  648. # Make links out of things like `<http://example.com/>`
  649. # Must come after _DoAnchors(), because you can use < and >
  650. # delimiters in inline links like [this](<url>).
  651. $text = _DoAutoLinks($text);
  652. $text = _EncodeAmpsAndAngles($text);
  653. $text = _DoItalicsAndBold($text);
  654. # Do hard breaks:
  655. $text = preg_replace('/ {2,}\n/', "<br$md_empty_element_suffix\n", $text);
  656. return $text;
  657. }
  658. function _EscapeSpecialChars($text) {
  659. global $md_escape_table;
  660. $tokens = _TokenizeHTML($text);
  661. $text = ''; # rebuild $text from the tokens
  662. # $in_pre = 0; # Keep track of when we're inside <pre> or <code> tags.
  663. # $tags_to_skip = "!<(/?)(?:pre|code|kbd|script|math)[\s>]!";
  664. foreach ($tokens as $cur_token) {
  665. if ($cur_token[0] == 'tag') {
  666. # Within tags, encode * and _ so they don't conflict
  667. # with their use in Markdown for italics and strong.
  668. # We're replacing each such character with its
  669. # corresponding MD5 checksum value; this is likely
  670. # overkill, but it should prevent us from colliding
  671. # with the escape values by accident.
  672. $cur_token[1] = str_replace(array('*', '_'),
  673. array($md_escape_table['*'], $md_escape_table['_']),
  674. $cur_token[1]);
  675. $text .= $cur_token[1];
  676. } else {
  677. $t = $cur_token[1];
  678. $t = _EncodeBackslashEscapes($t);
  679. $text .= $t;
  680. }
  681. }
  682. return $text;
  683. }
  684. function _DoAnchors($text) {
  685. #
  686. # Turn Markdown link shortcuts into XHTML <a> tags.
  687. #
  688. global $md_nested_brackets;
  689. #
  690. # First, handle reference-style links: [link text] [id]
  691. #
  692. $text = preg_replace_callback("{
  693. ( # wrap whole match in $1
  694. \\[
  695. ($md_nested_brackets) # link text = $2
  696. \\]
  697. [ ]? # one optional space
  698. (?:\\n[ ]*)? # one optional newline followed by spaces
  699. \\[
  700. (.*?) # id = $3
  701. \\]
  702. )
  703. }xs",
  704. '_DoAnchors_reference_callback', $text);
  705. #
  706. # Next, inline-style links: [link text](url "optional title")
  707. #
  708. $text = preg_replace_callback("{
  709. ( # wrap whole match in $1
  710. \\[
  711. ($md_nested_brackets) # link text = $2
  712. \\]
  713. \\( # literal paren
  714. [ \\t]*
  715. <?(.*?)>? # href = $3
  716. [ \\t]*
  717. ( # $4
  718. (['\"]) # quote char = $5
  719. (.*?) # Title = $6
  720. \\5 # matching quote
  721. )? # title is optional
  722. \\)
  723. )
  724. }xs",
  725. '_DoAnchors_inline_callback', $text);
  726. return $text;
  727. }
  728. function _DoAnchors_reference_callback($matches) {
  729. global $md_urls, $md_titles, $md_escape_table;
  730. $whole_match = $matches[1];
  731. $link_text = $matches[2];
  732. $link_id = strtolower($matches[3]);
  733. if ($link_id == "") {
  734. $link_id = strtolower($link_text); # for shortcut links like [this][].
  735. }
  736. if (isset($md_urls[$link_id])) {
  737. $url = $md_urls[$link_id];
  738. # We've got to encode these to avoid conflicting with italics/bold.
  739. $url = str_replace(array('*', '_'),
  740. array($md_escape_table['*'], $md_escape_table['_']),
  741. $url);
  742. $result = "<a href=\"$url\"";
  743. if ( isset( $md_titles[$link_id] ) ) {
  744. $title = $md_titles[$link_id];
  745. $title = str_replace(array('*', '_'),
  746. array($md_escape_table['*'],
  747. $md_escape_table['_']), $title);
  748. $result .= " title=\"$title\"";
  749. }
  750. $result .= ">$link_text</a>";
  751. }
  752. else {
  753. $result = $whole_match;
  754. }
  755. return $result;
  756. }
  757. function _DoAnchors_inline_callback($matches) {
  758. global $md_escape_table;
  759. $whole_match = $matches[1];
  760. $link_text = $matches[2];
  761. $url = $matches[3];
  762. $title =& $matches[6];
  763. # We've got to encode these to avoid conflicting with italics/bold.
  764. $url = str_replace(array('*', '_'),
  765. array($md_escape_table['*'], $md_escape_table['_']),
  766. $url);
  767. $result = "<a href=\"$url\"";
  768. if (isset($title)) {
  769. $title = str_replace('"', '&quot;', $title);
  770. $title = str_replace(array('*', '_'),
  771. array($md_escape_table['*'], $md_escape_table['_']),
  772. $title);
  773. $result .= " title=\"$title\"";
  774. }
  775. $result .= ">$link_text</a>";
  776. return $result;
  777. }
  778. function _DoImages($text) {
  779. #
  780. # Turn Markdown image shortcuts into <img> tags.
  781. #
  782. global $md_nested_brackets;
  783. #
  784. # First, handle reference-style labeled images: ![alt text][id]
  785. #
  786. $text = preg_replace_callback('{
  787. ( # wrap whole match in $1
  788. !\[
  789. ('.$md_nested_brackets.') # alt text = $2
  790. \]
  791. [ ]? # one optional space
  792. (?:\n[ ]*)? # one optional newline followed by spaces
  793. \[
  794. (.*?) # id = $3
  795. \]
  796. )
  797. }xs',
  798. '_DoImages_reference_callback', $text);
  799. #
  800. # Next, handle inline images: ![alt text](url "optional title")
  801. # Don't forget: encode * and _
  802. $text = preg_replace_callback('{
  803. ( # wrap whole match in $1
  804. !\[
  805. ('.$md_nested_brackets.') # alt text = $2
  806. \]
  807. \( # literal paren
  808. [ \t]*
  809. <?(\S+?)>? # src url = $3
  810. [ \t]*
  811. ( # $4
  812. ([\'"]) # quote char = $5
  813. (.*?) # title = $6
  814. \5 # matching quote
  815. [ \t]*
  816. )? # title is optional
  817. \)
  818. )
  819. }xs',
  820. '_DoImages_inline_callback', $text);
  821. return $text;
  822. }
  823. function _DoImages_reference_callback($matches) {
  824. global $md_urls, $md_titles, $md_empty_element_suffix, $md_escape_table;
  825. $whole_match = $matches[1];
  826. $alt_text = $matches[2];
  827. $link_id = strtolower($matches[3]);
  828. if ($link_id == "") {
  829. $link_id = strtolower($alt_text); # for shortcut links like ![this][].
  830. }
  831. $alt_text = str_replace('"', '&quot;', $alt_text);
  832. if (isset($md_urls[$link_id])) {
  833. $url = $md_urls[$link_id];
  834. # We've got to encode these to avoid conflicting with italics/bold.
  835. $url = str_replace(array('*', '_'),
  836. array($md_escape_table['*'], $md_escape_table['_']),
  837. $url);
  838. $result = "<img src=\"$url\" alt=\"$alt_text\"";
  839. if (isset($md_titles[$link_id])) {
  840. $title = $md_titles[$link_id];
  841. $title = str_replace(array('*', '_'),
  842. array($md_escape_table['*'],
  843. $md_escape_table['_']), $title);
  844. $result .= " title=\"$title\"";
  845. }
  846. $result .= $md_empty_element_suffix;
  847. }
  848. else {
  849. # If there's no such link ID, leave intact:
  850. $result = $whole_match;
  851. }
  852. return $result;
  853. }
  854. function _DoImages_inline_callback($matches) {
  855. global $md_empty_element_suffix, $md_escape_table;
  856. $whole_match = $matches[1];
  857. $alt_text = $matches[2];
  858. $url = $matches[3];
  859. $title = '';
  860. if (isset($matches[6])) {
  861. $title = $matches[6];
  862. }
  863. $alt_text = str_replace('"', '&quot;', $alt_text);
  864. $title = str_replace('"', '&quot;', $title);
  865. # We've got to encode these to avoid conflicting with italics/bold.
  866. $url = str_replace(array('*', '_'),
  867. array($md_escape_table['*'], $md_escape_table['_']),
  868. $url);
  869. $result = "<img src=\"$url\" alt=\"$alt_text\"";
  870. if (isset($title)) {
  871. $title = str_replace(array('*', '_'),
  872. array($md_escape_table['*'], $md_escape_table['_']),
  873. $title);
  874. $result .= " title=\"$title\""; # $title already quoted
  875. }
  876. $result .= $md_empty_element_suffix;
  877. return $result;
  878. }
  879. function _DoHeaders($text) {
  880. # Setext-style headers:
  881. # Header 1
  882. # ========
  883. #
  884. # Header 2
  885. # --------
  886. #
  887. $text = preg_replace(
  888. array('{ (^.+?) (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? [ \t]*\n=+[ \t]*\n+ }emx',
  889. '{ (^.+?) (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? [ \t]*\n-+[ \t]*\n+ }emx'),
  890. array("_HashBlock('<h1'. ('\\2'? ' id=\"'._UnslashQuotes('\\2').'\"':'').
  891. '>'._RunSpanGamut(_UnslashQuotes('\\1')).'</h1>'
  892. ) . '\n\n'",
  893. "_HashBlock('<h2'. ('\\2'? ' id=\"'._UnslashQuotes('\\2').'\"':'').
  894. '>'._RunSpanGamut(_UnslashQuotes('\\1')).'</h2>'
  895. ) . '\n\n'"),
  896. $text);
  897. # atx-style headers:
  898. # # Header 1
  899. # ## Header 2
  900. # ## Header 2 with closing hashes ##
  901. # ...
  902. # ###### Header 6
  903. #
  904. $text = preg_replace('{
  905. ^(\#{1,6}) # $1 = string of #\'s
  906. [ \t]*
  907. (.+?) # $2 = Header text
  908. [ \t]*
  909. \#* # optional closing #\'s (not counted)
  910. (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\}[ ]*)? # id attribute
  911. \n+
  912. }xme',
  913. "_HashBlock(
  914. '<h'.strlen('\\1'). ('\\3'? ' id=\"'._UnslashQuotes('\\3').'\"':'').'>'.
  915. _RunSpanGamut(_UnslashQuotes('\\2')).
  916. '</h'.strlen('\\1').'>'
  917. ) . '\n\n'",
  918. $text);
  919. return $text;
  920. }
  921. function _DoTables($text) {
  922. #
  923. # Form HTML tables.
  924. #
  925. global $md_tab_width;
  926. $less_than_tab = $md_tab_width - 1;
  927. #
  928. # Find tables with leading pipe.
  929. #
  930. # | Header 1 | Header 2
  931. # | -------- | --------
  932. # | Cell 1 | Cell 2
  933. # | Cell 3 | Cell 4
  934. #
  935. $text = preg_replace_callback('
  936. {
  937. ^ # Start of a line
  938. [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
  939. [|] # Optional leading pipe (present)
  940. (.+) \n # $1: Header row (at least one pipe)
  941. [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
  942. [|] ([ ]*[-:]+[-| :]*) \n # $2: Header underline
  943. ( # $3: Cells
  944. (?:
  945. [ ]* # Allowed whitespace.
  946. [|] .* \n # Row content.
  947. )*
  948. )
  949. (?=\n|\Z) # Stop at final double newline.
  950. }xm',
  951. '_DoTable_LeadingPipe_callback', $text);
  952. #
  953. # Find tables without leading pipe.
  954. #
  955. # Header 1 | Header 2
  956. # -------- | --------
  957. # Cell 1 | Cell 2
  958. # Cell 3 | Cell 4
  959. #
  960. $text = preg_replace_callback('
  961. {
  962. ^ # Start of a line
  963. [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
  964. (\S.*[|].*) \n # $1: Header row (at least one pipe)
  965. [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
  966. ([-:]+[ ]*[|][-| :]*) \n # $2: Header underline
  967. ( # $3: Cells
  968. (?:
  969. .* [|] .* \n # Row content
  970. )*
  971. )
  972. (?=\n|\Z) # Stop at final double newline.
  973. }xm',
  974. '_DoTable_callback', $text);
  975. return $text;
  976. }
  977. function _DoTable_LeadingPipe_callback($matches) {
  978. $head = $matches[1];
  979. $underline = $matches[2];
  980. $content = $matches[3];
  981. # Remove leading pipe for each row.
  982. $content = preg_replace('/^ *[|]/m', '', $content);
  983. return _DoTable_callback(array($matches[0], $head, $underline, $content));
  984. }
  985. function _DoTable_callback($matches) {
  986. $head = $matches[1];
  987. $underline = $matches[2];
  988. $content = $matches[3];
  989. # Remove any tailing pipes for each line.
  990. $head = preg_replace('/[|] *$/m', '', $head);
  991. $underline = preg_replace('/[|] *$/m', '', $underline);
  992. $content = preg_replace('/[|] *$/m', '', $content);
  993. # Reading alignement from header underline.
  994. $separators = preg_split('/ *[|] */', $underline);
  995. foreach ($separators as $n => $s) {
  996. if (preg_match('/^ *-+: *$/', $s)) $attr[$n] = ' align="right"';
  997. else if (preg_match('/^ *:-+: *$/', $s))$attr[$n] = ' align="center"';
  998. else if (preg_match('/^ *:-+ *$/', $s)) $attr[$n] = ' align="left"';
  999. else $attr[$n] = '';
  1000. }
  1001. # Creating code spans before splitting the row is an easy way to
  1002. # handle a code span containg pipes.
  1003. $head = _DoCodeSpans($head);
  1004. $headers = preg_split('/ *[|] */', $head);
  1005. $col_count = count($headers);
  1006. # Write column headers.
  1007. $text = "<table>\n";
  1008. $text .= "<thead>\n";
  1009. $text .= "<tr>\n";
  1010. foreach ($headers as $n => $header)
  1011. $text .= " <th$attr[$n]>"._RunSpanGamut(trim($header))."</th>\n";
  1012. $text .= "</tr>\n";
  1013. $text .= "</thead>\n";
  1014. # Split content by row.
  1015. $rows = explode("\n", trim($content, "\n"));
  1016. $text .= "<tbody>\n";
  1017. foreach ($rows as $row) {
  1018. # Creating code spans before splitting the row is an easy way to
  1019. # handle a code span containg pipes.
  1020. $row = _DoCodeSpans($row);
  1021. # Split row by cell.
  1022. $row_cells = preg_split('/ *[|] */', $row, $col_count);
  1023. $row_cells = array_pad($row_cells, $col_count, '');
  1024. $text .= "<tr>\n";
  1025. foreach ($row_cells as $n => $cell)
  1026. $text .= " <td$attr[$n]>"._RunSpanGamut(trim($cell))."</td>\n";
  1027. $text .= "</tr>\n";
  1028. }
  1029. $text .= "</tbody>\n";
  1030. $text .= "</table>";
  1031. return _HashBlock($text) . "\n";
  1032. }
  1033. function _DoLists($text) {
  1034. #
  1035. # Form HTML ordered (numbered) and unordered (bulleted) lists.
  1036. #
  1037. global $md_tab_width, $md_list_level;
  1038. $less_than_tab = $md_tab_width - 1;
  1039. # Re-usable patterns to match list item bullets and number markers:
  1040. $marker_ul = '[*+-]';
  1041. $marker_ol = '\d+[.]';
  1042. $marker_any = "(?:$marker_ul|$marker_ol)";
  1043. $markers = array($marker_ul, $marker_ol);
  1044. foreach ($markers as $marker) {
  1045. # Re-usable pattern to match any entirel ul or ol list:
  1046. $whole_list = '
  1047. ( # $1 = whole list
  1048. ( # $2
  1049. [ ]{0,'.$less_than_tab.'}
  1050. ('.$marker.') # $3 = first list item marker
  1051. [ \t]+
  1052. )
  1053. (?s:.+?)
  1054. ( # $4
  1055. \z
  1056. |
  1057. \n{2,}
  1058. (?=\S)
  1059. (?! # Negative lookahead for another list item marker
  1060. [ \t]*
  1061. '.$marker.'[ \t]+
  1062. )
  1063. )
  1064. )
  1065. '; // mx
  1066. # We use a different prefix before nested lists than top-level lists.
  1067. # See extended comment in _ProcessListItems().
  1068. if ($md_list_level) {
  1069. $text = preg_replace_callback('{
  1070. ^
  1071. '.$whole_list.'
  1072. }mx',
  1073. '_DoLists_callback', $text);
  1074. }
  1075. else {
  1076. $text = preg_replace_callback('{
  1077. (?:(?<=\n\n)|\A\n?)
  1078. '.$whole_list.'
  1079. }mx',
  1080. '_DoLists_callback', $text);
  1081. }
  1082. }
  1083. return $text;
  1084. }
  1085. function _DoLists_callback($matches) {
  1086. # Re-usable patterns to match list item bullets and number markers:
  1087. $marker_ul = '[*+-]';
  1088. $marker_ol = '\d+[.]';
  1089. $marker_any = "(?:$marker_ul|$marker_ol)";
  1090. $list = $matches[1];
  1091. $list_type = preg_match("/$marker_ul/", $matches[3]) ? "ul" : "ol";
  1092. $marker_any = ( $list_type == "ul" ? $marker_ul : $marker_ol );
  1093. # Turn double returns into triple returns, so that we can make a
  1094. # paragraph for the last item in a list, if necessary:
  1095. $list = preg_replace("/\n{2,}/", "\n\n\n", $list);
  1096. $result = _ProcessListItems($list, $marker_any);
  1097. $result = "<$list_type>\n" . $result . "</$list_type>";
  1098. return "\n" . _HashBlock($result) . "\n\n";
  1099. }
  1100. function _ProcessListItems($list_str, $marker_any) {
  1101. #
  1102. # Process the contents of a single ordered or unordered list, splitting it
  1103. # into individual list items.
  1104. #
  1105. global $md_list_level;
  1106. # The $md_list_level global keeps track of when we're inside a list.
  1107. # Each time we enter a list, we increment it; when we leave a list,
  1108. # we decrement. If it's zero, we're not in a list anymore.
  1109. #
  1110. # We do this because when we're not inside a list, we want to treat
  1111. # something like this:
  1112. #
  1113. # I recommend upgrading to version
  1114. # 8. Oops, now this line is treated
  1115. # as a sub-list.
  1116. #
  1117. # As a single paragraph, despite the fact that the second line starts
  1118. # with a digit-period-space sequence.
  1119. #
  1120. # Whereas when we're inside a list (or sub-list), that line will be
  1121. # treated as the start of a sub-list. What a kludge, huh? This is
  1122. # an aspect of Markdown's syntax that's hard to parse perfectly
  1123. # without resorting to mind-reading. Perhaps the solution is to
  1124. # change the syntax rules such that sub-lists must start with a
  1125. # starting cardinal number; e.g. "1." or "a.".
  1126. $md_list_level++;
  1127. # trim trailing blank lines:
  1128. $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
  1129. $list_str = preg_replace_callback('{
  1130. (\n)? # leading line = $1
  1131. (^[ \t]*) # leading whitespace = $2
  1132. ('.$marker_any.') [ \t]+ # list marker = $3
  1133. ((?s:.+?) # list item text = $4
  1134. (\n{1,2}))
  1135. (?= \n* (\z | \2 ('.$marker_any.') [ \t]+))
  1136. }xm',
  1137. '_ProcessListItems_callback', $list_str);
  1138. $md_list_level--;
  1139. return $list_str;
  1140. }
  1141. function _ProcessListItems_callback($matches) {
  1142. $item = $matches[4];
  1143. $leading_line =& $matches[1];
  1144. $leading_space =& $matches[2];
  1145. if ($leading_line || preg_match('/\n{2,}/', $item)) {
  1146. $item = _RunBlockGamut(_Outdent($item));
  1147. }
  1148. else {
  1149. # Recursion for sub-lists:
  1150. $item = _DoLists(_Outdent($item));
  1151. $item = preg_replace('/\n+$/', '', $item);
  1152. $item = _RunSpanGamut($item);
  1153. }
  1154. return "<li>" . $item . "</li>\n";
  1155. }
  1156. function _DoDefLists($text) {
  1157. #
  1158. # Form HTML definition lists.
  1159. #
  1160. global $md_tab_width;
  1161. $less_than_tab = $md_tab_width - 1;
  1162. # Re-usable patterns to match list item bullets and number markers:
  1163. # Re-usable pattern to match any entire dl list:
  1164. $whole_list = '
  1165. ( # $1 = whole list
  1166. ( # $2
  1167. [ ]{0,'.$less_than_tab.'}
  1168. ((?>.*\S.*\n)+) # $3 = defined term
  1169. \n?
  1170. [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
  1171. )
  1172. (?s:.+?)
  1173. ( # $4
  1174. \z
  1175. |
  1176. \n{2,}
  1177. (?=\S)
  1178. (?! # Negative lookahead for another term
  1179. [ ]{0,'.$less_than_tab.'}
  1180. (?: \S.*\n )+? # defined term
  1181. \n?
  1182. [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
  1183. )
  1184. (?! # Negative lookahead for another definition
  1185. [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
  1186. )
  1187. )
  1188. )
  1189. '; // mx
  1190. $text = preg_replace_callback('{
  1191. (?:(?<=\n\n)|\A\n?)
  1192. '.$whole_list.'
  1193. }mx',
  1194. '_DoDefLists_callback', $text);
  1195. return $text;
  1196. }
  1197. function _DoDefLists_callback($matches) {
  1198. # Re-usable patterns to match list item bullets and number markers:
  1199. $list = $matches[1];
  1200. # Turn double returns into triple returns, so that we can make a
  1201. # paragraph for the last item in a list, if necessary:
  1202. $result = trim(_ProcessDefListItems($list));
  1203. $result = "<dl>\n" . $result . "\n</dl>";
  1204. return _HashBlock($result) . "\n\n";
  1205. }
  1206. function _ProcessDefListItems($list_str) {
  1207. #
  1208. # Process the contents of a single ordered or unordered list, splitting it
  1209. # into individual list items.
  1210. #
  1211. global $md_tab_width;
  1212. $less_than_tab = $md_tab_width - 1;
  1213. # trim trailing blank lines:
  1214. $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
  1215. # Process definition terms.
  1216. $list_str = preg_replace_callback('{
  1217. (?:\n\n+|\A\n?) # leading line
  1218. ( # definition terms = $1
  1219. [ ]{0,'.$less_than_tab.'} # leading whitespace
  1220. (?![:][ ]|[ ]) # negative lookahead for a definition
  1221. # mark (colon) or more whitespace.
  1222. (?: \S.* \n)+? # actual term (not whitespace).
  1223. )
  1224. (?=\n?[ ]{0,3}:[ ]) # lookahead for following line feed
  1225. # with a definition mark.
  1226. }xm',
  1227. '_ProcessDefListItems_callback_dt', $list_str);
  1228. # Process actual definitions.
  1229. $list_str = preg_replace_callback('{
  1230. \n(\n+)? # leading line = $1
  1231. [ ]{0,'.$less_than_tab.'} # whitespace before colon
  1232. [:][ ]+ # definition mark (colon)
  1233. ((?s:.+?)) # definition text = $2
  1234. (?= \n+ # stop at next definition mark,
  1235. (?: # next term or end of text
  1236. [ ]{0,'.$less_than_tab.'} [:][ ] |
  1237. <dt> | \z
  1238. )
  1239. )
  1240. }xm',
  1241. '_ProcessDefListItems_callback_dd', $list_str);
  1242. return $list_str;
  1243. }
  1244. function _ProcessDefListItems_callback_dt($matches) {
  1245. $terms = explode("\n", trim($matches[1]));
  1246. $text = '';
  1247. foreach ($terms as $term) {
  1248. $term = _RunSpanGamut(trim($term));
  1249. $text .= "\n<dt>" . $term . "</dt>";
  1250. }
  1251. return $text . "\n";
  1252. }
  1253. function _ProcessDefListItems_callback_dd($matches) {
  1254. $leading_line = $matches[1];
  1255. $def = $matches[2];
  1256. if ($leading_line || preg_match('/\n{2,}/', $def)) {
  1257. $def = _RunBlockGamut(_Outdent($def . "\n\n"));
  1258. $def = "\n". $def ."\n";
  1259. }
  1260. else {
  1261. $def = rtrim($def);
  1262. $def = _RunSpanGamut(_Outdent($def));
  1263. }
  1264. return "\n<dd>" . $def . "</dd>\n";
  1265. }
  1266. function _DoCodeBlocks($text) {
  1267. #
  1268. # Process Markdown `<pre><code>` blocks.
  1269. #
  1270. global $md_tab_width;
  1271. $text = preg_replace_callback("{
  1272. (?:\\n\\n|\\A)
  1273. ( # $1 = the code block -- one or more lines, starting with a space/tab
  1274. (?:
  1275. (?:[ ]\{$md_tab_width} | \\t) # Lines must start with a tab or a tab-width of spaces
  1276. .*\\n+
  1277. )+
  1278. )
  1279. ((?=^[ ]{0,$md_tab_width}\\S)|\\Z) # Lookahead for non-space at line-start, or end of doc
  1280. }xm",
  1281. '_DoCodeBlocks_callback', $text);
  1282. return $text;
  1283. }
  1284. function _DoCodeBlocks_callback($matches) {
  1285. $codeblock = $matches[1];
  1286. $codeblock = _EncodeCode(_Outdent($codeblock));
  1287. // $codeblock = _Detab($codeblock);
  1288. # trim leading newlines and trailing whitespace
  1289. $codeblock = preg_replace(array('/\A\n+/', '/\s+\z/'), '', $codeblock);
  1290. $result = "<pre><code>" . $codeblock . "\n</code></pre>";
  1291. return "\n\n" . _HashBlock($result) . "\n\n";
  1292. }
  1293. function _DoCodeSpans($text) {
  1294. #
  1295. # * Backtick quotes are used for <code></code> spans.
  1296. #
  1297. # * You can use multiple backticks as the delimiters if you want to
  1298. # include literal backticks in the code span. So, this input:
  1299. #
  1300. # Just type ``foo `bar` baz`` at the prompt.
  1301. #
  1302. # Will translate to:
  1303. #
  1304. # <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
  1305. #
  1306. # There's no arbitrary limit to the number of backticks you
  1307. # can use as delimters. If you need three consecutive backticks
  1308. # in your code, use four for delimiters, etc.
  1309. #
  1310. # * You can use spaces to get literal backticks at the edges:
  1311. #
  1312. # ... type `` `bar` `` ...
  1313. #
  1314. # Turns to:
  1315. #
  1316. # ... type <code>`bar`</code> ...
  1317. #
  1318. $text = preg_replace_callback('@
  1319. (?<!\\\) # Character before opening ` can\'t be a backslash
  1320. (`+) # $1 = Opening run of `
  1321. (.+?) # $2 = The code block
  1322. (?<!`)
  1323. \1 # Matching closer
  1324. (?!`)
  1325. @xs',
  1326. '_DoCodeSpans_callback', $text);
  1327. return $text;
  1328. }
  1329. function _DoCodeSpans_callback($matches) {
  1330. $c = $matches[2];
  1331. $c = preg_replace('/^[ \t]*/', '', $c); # leading whitespace
  1332. $c = preg_replace('/[ \t]*$/', '', $c); # trailing whitespace
  1333. $c = _EncodeCode($c);
  1334. return "<code>$c</code>";
  1335. }
  1336. function _EncodeCode($_) {
  1337. #
  1338. # Encode/escape certain characters inside Markdown code runs.
  1339. # The point is that in code, these characters are literals,
  1340. # and lose their special Markdown meanings.
  1341. #
  1342. global $md_escape_table;
  1343. # Encode all ampersands; HTML entities are not
  1344. # entities within a Markdown code span.
  1345. $_ = str_replace('&', '&amp;', $_);
  1346. # Do the angle bracket song and dance:
  1347. $_ = str_replace(array('<', '>'),
  1348. array('&lt;', '&gt;'), $_);
  1349. # Now, escape characters that are magic in Markdown:
  1350. $_ = str_replace(array_keys($md_escape_table),
  1351. array_values($md_escape_table), $_);
  1352. return $_;
  1353. }
  1354. function _DoItalicsAndBold($text) {
  1355. # <strong> must go first:
  1356. $text = preg_replace(array(
  1357. '{
  1358. ( (?<!\w) __ ) # $1: Marker (not preceded by alphanum)
  1359. (?=\S) # Not followed by whitespace
  1360. (?!__) # or two others marker chars.
  1361. ( # $2: Content
  1362. (?>
  1363. [^_]+? # Anthing not em markers.
  1364. |
  1365. # Balence any regular _ emphasis inside.
  1366. (?<![a-zA-Z0-9])_ (?=\S) (?! _) (.+?)
  1367. (?<=\S) _ (?![a-zA-Z0-9])
  1368. )+?
  1369. )
  1370. (?<=\S) __ # End mark not preceded by whitespace.
  1371. (?!\w) # Not followed by alphanum.
  1372. }sx',
  1373. '{
  1374. ( (?<!\*\*) \*\* ) # $1: Marker (not preceded by two *)
  1375. (?=\S) # Not followed by whitespace
  1376. (?!\1) # or two others marker chars.
  1377. ( # $2: Content
  1378. (?>
  1379. [^*]+? # Anthing not em markers.
  1380. |
  1381. # Balence any regular * emphasis inside.
  1382. \* (?=\S) (?! \*) (.+?) (?<=\S) \*
  1383. )+?
  1384. )
  1385. (?<=\S) \*\* # End mark not preceded by whitespace.
  1386. }sx',
  1387. ),
  1388. '<strong>\2</strong>', $text);
  1389. # Then <em>:
  1390. $text = preg_replace(array(
  1391. '{ ( (?<!\w) _ ) (?=\S) (?! _) (.+?) (?<=\S) _ (?!\w) }sx',
  1392. '{ ( (?<!\*)\* ) (?=\S) (?! \*) (.+?) (?<=\S) \* }sx',
  1393. ),
  1394. '<em>\2</em>', $text);
  1395. return $text;
  1396. }
  1397. function _DoBlockQuotes($text) {
  1398. $text = preg_replace_callback('/
  1399. ( # Wrap whole match in $1
  1400. (
  1401. ^[ \t]*>[ \t]? # ">" at the start of a line
  1402. .+\n # rest of the first line
  1403. (.+\n)* # subsequent consecutive lines
  1404. \n* # blanks
  1405. )+
  1406. )
  1407. /xm',
  1408. '_DoBlockQuotes_callback', $text);
  1409. return $text;
  1410. }
  1411. function _DoBlockQuotes_callback($matches) {
  1412. $bq = $matches[1];
  1413. # trim one level of quoting - trim whitespace-only lines
  1414. $bq = preg_replace(array('/^[ \t]*>[ \t]?/m', '/^[ \t]+$/m'), '', $bq);
  1415. $bq = _RunBlockGamut($bq); # recurse
  1416. $bq = preg_replace('/^/m', " ", $bq);
  1417. # These leading spaces screw with <pre> content, so we need to fix that:
  1418. $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
  1419. '_DoBlockQuotes_callback2', $bq);
  1420. return _HashBlock("<blockquote>\n$bq\n</blockquote>") . "\n\n";
  1421. }
  1422. function _DoBlockQuotes_callback2($matches) {
  1423. $pre = $matches[1];
  1424. $pre = preg_replace('/^ /m', '', $pre);
  1425. return $pre;
  1426. }
  1427. function _FormParagraphs($text) {
  1428. #
  1429. # Params:
  1430. # $text - string to process with html <p> tags
  1431. #
  1432. global $md_html_blocks, $md_html_hashes;
  1433. # Strip leading and trailing lines:
  1434. $text = preg_replace(array('/\A\n+/', '/\n+\z/'), '', $text);
  1435. $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
  1436. #
  1437. # Wrap <p> tags and unhashify HTML blocks
  1438. #
  1439. foreach ($grafs as $key => $value) {
  1440. $value = trim(_RunSpanGamut($value));
  1441. # Check if this should be enclosed in a paragraph.
  1442. # Text equaling to a clean tag hash are not enclosed.
  1443. # Text starting with a block tag hash are not either.
  1444. $clean_key = $value;
  1445. $block_key = substr($value, 0, 32);
  1446. $is_p = (!isset($md_html_blocks[$block_key]) &&
  1447. !isset($md_html_hashes[$clean_key]));
  1448. if ($is_p) {
  1449. $value = "<p>$value</p>";
  1450. }
  1451. $grafs[$key] = $value;
  1452. }
  1453. # Join grafs in one text, then unhash HTML tags.
  1454. $text = implode("\n\n", $grafs);
  1455. # Finish by removing any tag hashes still present in $text.
  1456. $text = _UnhashTags($text);
  1457. return $text;
  1458. }
  1459. function _EncodeAmpsAndAngles($text) {
  1460. # Smart processing for ampersands and angle brackets that need to be encoded.
  1461. # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
  1462. # http://bumppo.net/projects/amputator/
  1463. $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
  1464. '&amp;', $text);;
  1465. # Encode naked <'s
  1466. $text = preg_replace('{<(?![a-z/?\$!])}i', '&lt;', $text);
  1467. return $text;
  1468. }
  1469. function _EncodeBackslashEscapes($text) {
  1470. #
  1471. # Parameter: String.
  1472. # Returns: The string, with after processing the following backslash
  1473. # escape sequences.
  1474. #
  1475. global $md_escape_table, $md_backslash_escape_table;
  1476. # Must process escaped backslashes first.
  1477. return str_replace(array_keys($md_backslash_escape_table),
  1478. array_values($md_backslash_escape_table), $text);
  1479. }
  1480. function _DoAutoLinks($text) {
  1481. $text = preg_replace("!<((https?|ftp):[^'\">\\s]+)>!",
  1482. '<a href="\1">\1</a>', $text);
  1483. # Email addresses: <address@domain.foo>
  1484. $text = preg_replace('{
  1485. <
  1486. (?:mailto:)?
  1487. (
  1488. [-.\w]+
  1489. \@
  1490. [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
  1491. )
  1492. >
  1493. }exi',
  1494. "_EncodeEmailAddress(_UnescapeSpecialChars(_UnslashQuotes('\\1')))",
  1495. $text);
  1496. return $text;
  1497. }
  1498. function _EncodeEmailAddress($addr) {
  1499. #
  1500. # Input: an email address, e.g. "foo@example.com"
  1501. #
  1502. # Output: the email address as a mailto link, with each character
  1503. # of the address encoded as either a decimal or hex entity, in
  1504. # the hopes of foiling most address harvesting spam bots. E.g.:
  1505. #
  1506. # <a href="&#x6D;&#97;&#105;&#108;&#x74;&#111;:&#102;&#111;&#111;&#64;&#101;
  1507. # x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;">&#102;&#111;&#111;
  1508. # &#64;&#101;x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;</a>
  1509. #
  1510. # Based by a filter by Matthew Wickline, posted to the BBEdit-Talk
  1511. # mailing list: <http://tinyurl.com/yu7ue>
  1512. #
  1513. $addr = "mailto:" . $addr;
  1514. $length = strlen($addr);
  1515. # leave ':' alone (to spot mailto: later)
  1516. $addr = preg_replace_callback('/([^\:])/',
  1517. '_EncodeEmailAddress_callback', $addr);
  1518. $addr = "<a href=\"$addr\">$addr</a>";
  1519. # strip the mailto: from the visible part
  1520. $addr = preg_replace('/">.+?:/', '">', $addr);
  1521. return $addr;
  1522. }
  1523. function _EncodeEmailAddress_callback($matches) {
  1524. $char = $matches[1];
  1525. $r = rand(0, 100);
  1526. # roughly 10% raw, 45% hex, 45% dec
  1527. # '@' *must* be encoded. I insist.
  1528. if ($r > 90 && $char != '@') return $char;
  1529. if ($r < 45) return '&#x'.dechex(ord($char)).';';
  1530. return '&#'.ord($char).';';
  1531. }
  1532. function _UnescapeSpecialChars($text) {
  1533. #
  1534. # Swap back in all the speā€¦

Large files files are truncated, but you can click here to view the full file