PageRenderTime 59ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/components/com_breezingforms/markdown.php

https://bitbucket.org/izubizarreta/https-bitbucket.org-bityvip-alpes
PHP | 1975 lines | 1167 code | 244 blank | 564 comment | 93 complexity | 7a39b908098936c3ee9d75d07170b4f9 MD5 | raw file
Possible License(s): GPL-2.0, BSD-3-Clause, LGPL-2.1, MIT, LGPL-3.0, LGPL-2.0, JSON

Large files files are truncated, but you can click here to view the full file

  1. <?php
  2. defined('_JEXEC') or die('Direct Access to this location is not allowed.');
  3. /**
  4. * BreezingForms - A Joomla Forms Application
  5. * @version 1.8
  6. * @package BreezingForms
  7. * @copyright (C) 2008-2012 by Markus Bopp
  8. * @license Released under the terms of the GNU General Public License
  9. **/
  10. global $MarkdownPHPVersion, $MarkdownSyntaxVersion,
  11. $md_empty_element_suffix, $md_tab_width,
  12. $md_nested_brackets_depth, $md_nested_brackets,
  13. $md_escape_table, $md_backslash_escape_table,
  14. $md_list_level;
  15. $MarkdownPHPVersion = 'Extra 1.0.1';
  16. $MarkdownSyntaxVersion = '1.0.1';
  17. #
  18. # Global default settings:
  19. #
  20. $md_empty_element_suffix = " />"; # Change to ">" for HTML output
  21. $md_tab_width = 4;
  22. $md_wp_posts = true; # Set to false to remove Markdown from posts.
  23. $md_wp_comments = true; # Set to false to remove Markdown from comments.
  24. if (isset($wp_version)) {
  25. # Post content and excerpts
  26. if ($md_wp_posts) {
  27. remove_filter('the_content', 'wpautop');
  28. remove_filter('the_excerpt', 'wpautop');
  29. add_filter('the_content', 'Markdown', 6);
  30. add_filter('get_the_excerpt', 'Markdown', 6);
  31. add_filter('get_the_excerpt', 'trim', 7);
  32. add_filter('the_excerpt', 'md_add_p');
  33. add_filter('the_excerpt_rss', 'md_strip_p');
  34. remove_filter('content_save_pre', 'balanceTags', 50);
  35. remove_filter('excerpt_save_pre', 'balanceTags', 50);
  36. add_filter('the_content', 'balanceTags', 50);
  37. add_filter('get_the_excerpt', 'balanceTags', 9);
  38. function md_add_p($text) {
  39. if (strlen($text) == 0) return;
  40. if (strcasecmp(substr($text, -3), '<p>') == 0) return $text;
  41. return '<p>'.$text.'</p>';
  42. }
  43. function md_strip_p($t) { return preg_replace('{</?[pP]>}', '', $t); }
  44. }
  45. # Comments
  46. if ($md_wp_comments) {
  47. remove_filter('comment_text', 'wpautop');
  48. remove_filter('comment_text', 'make_clickable');
  49. add_filter('pre_comment_content', 'Markdown', 6);
  50. add_filter('pre_comment_content', 'md_hide_tags', 8);
  51. add_filter('pre_comment_content', 'md_show_tags', 12);
  52. add_filter('get_comment_text', 'Markdown', 6);
  53. add_filter('get_comment_excerpt', 'Markdown', 6);
  54. add_filter('get_comment_excerpt', 'md_strip_p', 7);
  55. global $md_hidden_tags;
  56. $md_hidden_tags = array(
  57. '<p>' => md5('<p>'), '</p>' => md5('</p>'),
  58. '<pre>' => md5('<pre>'), '</pre>'=> md5('</pre>'),
  59. '<ol>' => md5('<ol>'), '</ol>' => md5('</ol>'),
  60. '<ul>' => md5('<ul>'), '</ul>' => md5('</ul>'),
  61. '<li>' => md5('<li>'), '</li>' => md5('</li>'),
  62. );
  63. function md_hide_tags($text) {
  64. global $md_hidden_tags;
  65. return str_replace(array_keys($md_hidden_tags),
  66. array_values($md_hidden_tags), $text);
  67. }
  68. function md_show_tags($text) {
  69. global $md_hidden_tags;
  70. return str_replace(array_values($md_hidden_tags),
  71. array_keys($md_hidden_tags), $text);
  72. }
  73. }
  74. }
  75. # -- bBlog Plugin Info --------------------------------------------------------
  76. function identify_modifier_markdown() {
  77. global $MarkdownPHPVersion;
  78. return array(
  79. 'name' => 'markdown',
  80. 'type' => 'modifier',
  81. 'nicename' => 'PHP Markdown Extra',
  82. 'description' => 'A text-to-HTML conversion tool for web writers',
  83. 'authors' => 'Michel Fortin and John Gruber',
  84. 'licence' => 'GPL',
  85. 'version' => $MarkdownPHPVersion,
  86. 'help' => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>'
  87. );
  88. }
  89. # -- Smarty Modifier Interface ------------------------------------------------
  90. function smarty_modifier_markdown($text) {
  91. return Markdown($text);
  92. }
  93. # -- Textile Compatibility Mode -----------------------------------------------
  94. # Rename this file to "classTextile.php" and it can replace Textile anywhere.
  95. if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) {
  96. # Try to include PHP SmartyPants. Should be in the same directory.
  97. @include_once 'smartypants.php';
  98. # Fake Textile class. It calls Markdown instead.
  99. class Textile {
  100. function TextileThis($text, $lite='', $encode='', $noimage='', $strict='') {
  101. if ($lite == '' && $encode == '') $text = Markdown($text);
  102. if (function_exists('SmartyPants')) $text = SmartyPants($text);
  103. return $text;
  104. }
  105. }
  106. }
  107. #
  108. # Globals:
  109. #
  110. # Regex to match balanced [brackets].
  111. # Needed to insert a maximum bracked depth while converting to PHP.
  112. $md_nested_brackets_depth = 6;
  113. $md_nested_brackets =
  114. str_repeat('(?>[^\[\]]+|\[', $md_nested_brackets_depth).
  115. str_repeat('\])*', $md_nested_brackets_depth);
  116. # Table of hash values for escaped characters:
  117. $md_escape_table = array(
  118. "\\" => md5("\\"),
  119. "`" => md5("`"),
  120. "*" => md5("*"),
  121. "_" => md5("_"),
  122. "{" => md5("{"),
  123. "}" => md5("}"),
  124. "[" => md5("["),
  125. "]" => md5("]"),
  126. "(" => md5("("),
  127. ")" => md5(")"),
  128. ">" => md5(">"),
  129. "#" => md5("#"),
  130. "+" => md5("+"),
  131. "-" => md5("-"),
  132. "." => md5("."),
  133. "!" => md5("!"),
  134. ":" => md5(":"),
  135. "|" => md5("|"),
  136. );
  137. # Create an identical table but for escaped characters.
  138. $md_backslash_escape_table;
  139. foreach ($md_escape_table as $key => $char)
  140. $md_backslash_escape_table["\\$key"] = $char;
  141. function Markdown($text) {
  142. #
  143. # Main function. The order in which other subs are called here is
  144. # essential. Link and image substitutions need to happen before
  145. # _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the <a>
  146. # and <img> tags get encoded.
  147. #
  148. # Clear the global hashes. If we don't clear these, you get conflicts
  149. # from other articles when generating a page which contains more than
  150. # one article (e.g. an index page that shows the N most recent
  151. # articles):
  152. global $md_urls, $md_titles, $md_html_blocks, $md_html_hashes;
  153. $md_urls = array();
  154. $md_titles = array();
  155. $md_html_blocks = array();
  156. $md_html_hashes = array();
  157. # Standardize line endings:
  158. # DOS to Unix and Mac to Unix
  159. $text = str_replace(array("\r\n", "\r"), "\n", $text);
  160. # Make sure $text ends with a couple of newlines:
  161. $text .= "\n\n";
  162. # Convert all tabs to spaces.
  163. $text = _Detab($text);
  164. # Turn block-level HTML blocks into hash entries
  165. $text = _HashHTMLBlocks($text);
  166. # Strip any lines consisting only of spaces and tabs.
  167. # This makes subsequent regexen easier to write, because we can
  168. # match consecutive blank lines with /\n+/ instead of something
  169. # contorted like /[ \t]*\n+/ .
  170. $text = preg_replace('/^[ \t]+$/m', '', $text);
  171. # Strip link definitions, store in hashes.
  172. $text = _StripLinkDefinitions($text);
  173. $text = _RunBlockGamut($text, FALSE);
  174. $text = _UnescapeSpecialChars($text);
  175. return $text . "\n";
  176. }
  177. function _StripLinkDefinitions($text) {
  178. #
  179. # Strips link definitions from text, stores the URLs and titles in
  180. # hash references.
  181. #
  182. global $md_tab_width;
  183. $less_than_tab = $md_tab_width - 1;
  184. # Link defs are in the form: ^[id]: url "optional title"
  185. $text = preg_replace_callback('{
  186. ^[ ]{0,'.$less_than_tab.'}\[(.+)\]: # id = $1
  187. [ \t]*
  188. \n? # maybe *one* newline
  189. [ \t]*
  190. <?(\S+?)>? # url = $2
  191. [ \t]*
  192. \n? # maybe one newline
  193. [ \t]*
  194. (?:
  195. (?<=\s) # lookbehind for whitespace
  196. ["(]
  197. (.+?) # title = $3
  198. [")]
  199. [ \t]*
  200. )? # title is optional
  201. (?:\n+|\Z)
  202. }xm',
  203. '_StripLinkDefinitions_callback',
  204. $text);
  205. return $text;
  206. }
  207. function _StripLinkDefinitions_callback($matches) {
  208. global $md_urls, $md_titles;
  209. $link_id = strtolower($matches[1]);
  210. $md_urls[$link_id] = _EncodeAmpsAndAngles($matches[2]);
  211. if (isset($matches[3]))
  212. $md_titles[$link_id] = str_replace('"', '&quot;', $matches[3]);
  213. return ''; # String that will replace the block
  214. }
  215. function _HashHTMLBlocks($text) {
  216. #
  217. # Hashify HTML Blocks and "clean tags".
  218. #
  219. # We only want to do this for block-level HTML tags, such as headers,
  220. # lists, and tables. That's because we still want to wrap <p>s around
  221. # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
  222. # phrase emphasis, and spans. The list of tags we're looking for is
  223. # hard-coded.
  224. #
  225. # This works by calling _HashHTMLBlocks_InMarkdown, which then calls
  226. # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
  227. # attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back
  228. # _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
  229. # These two functions are calling each other. It's recursive!
  230. #
  231. global $block_tags, $context_block_tags, $contain_span_tags,
  232. $clean_tags, $auto_close_tags;
  233. # Tags that are always treated as block tags:
  234. $block_tags = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|'.
  235. 'form|fieldset|iframe|hr|legend';
  236. # Tags treated as block tags only if the opening tag is alone on it's line:
  237. $context_block_tags = 'script|noscript|math|ins|del';
  238. # Tags where markdown="1" default to span mode:
  239. $contain_span_tags = 'p|h[1-6]|li|dd|dt|td|th|legend';
  240. # Tags which must not have their contents modified, no matter where
  241. # they appear:
  242. $clean_tags = 'script|math';
  243. # Tags that do not need to be closed.
  244. $auto_close_tags = 'hr|img';
  245. # Regex to match any tag.
  246. global $tag_match;
  247. $tag_match =
  248. '{
  249. ( # $2: Capture hole tag.
  250. </? # Any opening or closing tag.
  251. [\w:$]+ # Tag name.
  252. \s* # Whitespace.
  253. (?:
  254. ".*?" | # Double quotes (can contain `>`)
  255. \'.*?\' | # Single quotes (can contain `>`)
  256. .+? # Anything but quotes and `>`.
  257. )*?
  258. > # End of tag.
  259. |
  260. <!-- .*? --> # HTML Comment
  261. |
  262. <\? .*? \?> # Processing instruction
  263. |
  264. <!\[CDATA\[.*?\]\]> # CData Block
  265. )
  266. }xs';
  267. #
  268. # Call the HTML-in-Markdown hasher.
  269. #
  270. list($text, ) = _HashHTMLBlocks_InMarkdown($text);
  271. return $text;
  272. }
  273. function _HashHTMLBlocks_InMarkdown($text, $indent = 0,
  274. $enclosing_tag = '', $md_span = false)
  275. {
  276. #
  277. # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
  278. #
  279. # * $indent is the number of space to be ignored when checking for code
  280. # blocks. This is important because if we don't take the indent into
  281. # account, something like this (which looks right) won't work as expected:
  282. #
  283. # <div>
  284. # <div markdown="1">
  285. # Hello World. <-- Is this a Markdown code block or text?
  286. # </div> <-- Is this a Markdown code block or a real tag?
  287. # <div>
  288. #
  289. # If you don't like this, just don't indent the tag on which
  290. # you apply the markdown="1" attribute.
  291. #
  292. # * If $enclosing_tag is not empty, stops at the first unmatched closing
  293. # tag with that name. Nested tags supported.
  294. #
  295. # * If $md_span is true, text inside must treated as span. So any double
  296. # newline will be replaced by a single newline so that it does not create
  297. # paragraphs.
  298. #
  299. # Returns an array of that form: ( processed text , remaining text )
  300. #
  301. global $block_tags, $context_block_tags, $clean_tags, $auto_close_tags,
  302. $tag_match;
  303. if ($text === '') return array('', '');
  304. # Regex to check for the presense of newlines around a block tag.
  305. $newline_match_before = "/(?:^\n?|\n\n) *$/";
  306. $newline_match_after =
  307. '{
  308. ^ # Start of text following the tag.
  309. (?:[ ]*<!--.*?-->)? # Optional comment.
  310. [ ]*\n # Must be followed by newline.
  311. }xs';
  312. # Regex to match any tag.
  313. $block_tag_match =
  314. '{
  315. ( # $2: Capture hole tag.
  316. </? # Any opening or closing tag.
  317. (?: # Tag name.
  318. '.$block_tags.' |
  319. '.$context_block_tags.' |
  320. '.$clean_tags.' |
  321. (?!\s)'.$enclosing_tag.'
  322. )
  323. \s* # Whitespace.
  324. (?:
  325. ".*?" | # Double quotes (can contain `>`)
  326. \'.*?\' | # Single quotes (can contain `>`)
  327. .+? # Anything but quotes and `>`.
  328. )*?
  329. > # End of tag.
  330. |
  331. <!-- .*? --> # HTML Comment
  332. |
  333. <\? .*? \?> # Processing instruction
  334. |
  335. <!\[CDATA\[.*?\]\]> # CData Block
  336. )
  337. }xs';
  338. $depth = 0; # Current depth inside the tag tree.
  339. $parsed = ""; # Parsed text that will be returned.
  340. #
  341. # Loop through every tag until we find the closing tag of the parent
  342. # or loop until reaching the end of text if no parent tag specified.
  343. #
  344. do {
  345. #
  346. # Split the text using the first $tag_match pattern found.
  347. # Text before pattern will be first in the array, text after
  348. # pattern will be at the end, and between will be any catches made
  349. # by the pattern.
  350. #
  351. $parts = preg_split($block_tag_match, $text, 2,
  352. PREG_SPLIT_DELIM_CAPTURE);
  353. # If in Markdown span mode, replace any multiple newlines that would
  354. # trigger a new paragraph.
  355. if ($md_span) {
  356. $parts[0] = preg_replace('/\n\n/', "\n", $parts[0]);
  357. }
  358. $parsed .= $parts[0]; # Text before current tag.
  359. # If end of $text has been reached. Stop loop.
  360. if (count($parts) < 3) {
  361. $text = "";
  362. break;
  363. }
  364. $tag = $parts[1]; # Tag to handle.
  365. $text = $parts[2]; # Remaining text after current tag.
  366. #
  367. # Check for: Tag inside code block or span
  368. #
  369. if (# Find current paragraph
  370. preg_match('/(?>^\n?|\n\n)((?>.\n?)+?)$/', $parsed, $matches) &&
  371. (
  372. # Then match in it either a code block...
  373. preg_match('/^ {'.($indent+4).'}.*(?>\n {'.($indent+4).'}.*)*'.
  374. '(?!\n)$/', $matches[1], $x) ||
  375. # ...or unbalenced code span markers. (the regex matches balenced)
  376. !preg_match('/^(?>[^`]+|(`+)(?>[^`]+|(?!\1[^`])`)*?\1(?!`))*$/s',
  377. $matches[1])
  378. ))
  379. {
  380. # Tag is in code block or span and may not be a tag at all. So we
  381. # simply skip the first char (should be a `<`).
  382. $parsed .= $tag{0};
  383. $text = substr($tag, 1) . $text; # Put back $tag minus first char.
  384. }
  385. #
  386. # Check for: Opening Block level tag or
  387. # Opening Content Block tag (like ins and del)
  388. # used as a block tag (tag is alone on it's line).
  389. #
  390. else if (preg_match("{^<(?:$block_tags)\b}", $tag) ||
  391. ( preg_match("{^<(?:$context_block_tags)\b}", $tag) &&
  392. preg_match($newline_match_before, $parsed) &&
  393. preg_match($newline_match_after, $text) )
  394. )
  395. {
  396. # Need to parse tag and following text using the HTML parser.
  397. list($block_text, $text) =
  398. _HashHTMLBlocks_InHTML($tag . $text,
  399. "_HashHTMLBlocks_HashBlock", TRUE);
  400. # Make sure it stays outside of any paragraph by adding newlines.
  401. $parsed .= "\n\n$block_text\n\n";
  402. }
  403. #
  404. # Check for: Clean tag (like script, math)
  405. # HTML Comments, processing instructions.
  406. #
  407. else if (preg_match("{^<(?:$clean_tags)\b}", $tag) ||
  408. $tag{1} == '!' || $tag{1} == '?')
  409. {
  410. # Need to parse tag and following text using the HTML parser.
  411. # (don't check for markdown attribute)
  412. list($block_text, $text) =
  413. _HashHTMLBlocks_InHTML($tag . $text,
  414. "_HashHTMLBlocks_HashClean", FALSE);
  415. $parsed .= $block_text;
  416. }
  417. #
  418. # Check for: Tag with same name as enclosing tag.
  419. #
  420. else if ($enclosing_tag !== '' &&
  421. # Same name as enclosing tag.
  422. preg_match("{^</?(?:$enclosing_tag)\b}", $tag))
  423. {
  424. #
  425. # Increase/decrease nested tag count.
  426. #
  427. if ($tag{1} == '/') $depth--;
  428. else if ($tag{strlen($tag)-2} != '/') $depth++;
  429. if ($depth < 0) {
  430. #
  431. # Going out of parent element. Clean up and break so we
  432. # return to the calling function.
  433. #
  434. $text = $tag . $text;
  435. break;
  436. }
  437. $parsed .= $tag;
  438. }
  439. else {
  440. $parsed .= $tag;
  441. }
  442. } while ($depth >= 0);
  443. return array($parsed, $text);
  444. }
  445. function _HashHTMLBlocks_InHTML($text, $hash_function, $md_attr) {
  446. #
  447. # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
  448. #
  449. # * Calls $hash_function to convert any blocks.
  450. # * Stops when the first opening tag closes.
  451. # * $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
  452. # (it is not inside clean tags)
  453. #
  454. # Returns an array of that form: ( processed text , remaining text )
  455. #
  456. global $auto_close_tags, $contain_span_tags, $tag_match;
  457. if ($text === '') return array('', '');
  458. # Regex to match `markdown` attribute inside of a tag.
  459. $markdown_attr_match = '
  460. {
  461. \s* # Eat whitespace before the `markdown` attribute
  462. markdown
  463. \s*=\s*
  464. (["\']) # $1: quote delimiter
  465. (.*?) # $2: attribute value
  466. \1 # matching delimiter
  467. }xs';
  468. $original_text = $text; # Save original text in case of faliure.
  469. $depth = 0; # Current depth inside the tag tree.
  470. $block_text = ""; # Temporary text holder for current text.
  471. $parsed = ""; # Parsed text that will be returned.
  472. #
  473. # Get the name of the starting tag.
  474. #
  475. if (preg_match("/^<([\w:$]*)\b/", $text, $matches))
  476. $base_tag_name = $matches[1];
  477. #
  478. # Loop through every tag until we find the corresponding closing tag.
  479. #
  480. do {
  481. #
  482. # Split the text using the first $tag_match pattern found.
  483. # Text before pattern will be first in the array, text after
  484. # pattern will be at the end, and between will be any catches made
  485. # by the pattern.
  486. #
  487. $parts = preg_split($tag_match, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
  488. if (count($parts) < 3) {
  489. #
  490. # End of $text reached with unbalenced tag(s).
  491. # In that case, we return original text unchanged and pass the
  492. # first character as filtered to prevent an infinite loop in the
  493. # parent function.
  494. #
  495. return array($original_text{0}, substr($original_text, 1));
  496. }
  497. $block_text .= $parts[0]; # Text before current tag.
  498. $tag = $parts[1]; # Tag to handle.
  499. $text = $parts[2]; # Remaining text after current tag.
  500. #
  501. # Check for: Auto-close tag (like <hr/>)
  502. # Comments and Processing Instructions.
  503. #
  504. if (preg_match("{^</?(?:$auto_close_tags)\b}", $tag) ||
  505. $tag{1} == '!' || $tag{1} == '?')
  506. {
  507. # Just add the tag to the block as if it was text.
  508. $block_text .= $tag;
  509. }
  510. else {
  511. #
  512. # Increase/decrease nested tag count. Only do so if
  513. # the tag's name match base tag's.
  514. #
  515. if (preg_match("{^</?$base_tag_name\b}", $tag)) {
  516. if ($tag{1} == '/') $depth--;
  517. else if ($tag{strlen($tag)-2} != '/') $depth++;
  518. }
  519. #
  520. # Check for `markdown="1"` attribute and handle it.
  521. #
  522. if ($md_attr &&
  523. preg_match($markdown_attr_match, $tag, $attr_matches) &&
  524. preg_match('/^(?:1|block|span)$/', $attr_matches[2]))
  525. {
  526. # Remove `markdown` attribute from opening tag.
  527. $tag = preg_replace($markdown_attr_match, '', $tag);
  528. # Check if text inside this tag must be parsed in span mode.
  529. $md_mode = $attr_matches[2];
  530. $span_mode = $md_mode == 'span' || $md_mode != 'block' &&
  531. preg_match("{^<(?:$contain_span_tags)\b}", $tag);
  532. # Calculate indent before tag.
  533. preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches);
  534. $indent = strlen($matches[1]);
  535. # End preceding block with this tag.
  536. $block_text .= $tag;
  537. $parsed .= $hash_function($block_text, $span_mode);
  538. # Get enclosing tag name for the ParseMarkdown function.
  539. preg_match('/^<([\w:$]*)\b/', $tag, $matches);
  540. $tag_name = $matches[1];
  541. # Parse the content using the HTML-in-Markdown parser.
  542. list ($block_text, $text)
  543. = _HashHTMLBlocks_InMarkdown($text, $indent,
  544. $tag_name, $span_mode);
  545. # Outdent markdown text.
  546. if ($indent > 0) {
  547. $block_text = preg_replace("/^[ ]{1,$indent}/m", "",
  548. $block_text);
  549. }
  550. # Append tag content to parsed text.
  551. if (!$span_mode) $parsed .= "\n\n$block_text\n\n";
  552. else $parsed .= "$block_text";
  553. # Start over a new block.
  554. $block_text = "";
  555. }
  556. else $block_text .= $tag;
  557. }
  558. } while ($depth > 0);
  559. #
  560. # Hash last block text that wasn't processed inside the loop.
  561. #
  562. $parsed .= $hash_function($block_text);
  563. return array($parsed, $text);
  564. }
  565. function _HashHTMLBlocks_HashBlock($text) {
  566. global $md_html_hashes, $md_html_blocks;
  567. $key = md5($text);
  568. $md_html_hashes[$key] = $text;
  569. $md_html_blocks[$key] = $text;
  570. return $key; # String that will replace the tag.
  571. }
  572. function _HashHTMLBlocks_HashClean($text) {
  573. global $md_html_hashes;
  574. $key = md5($text);
  575. $md_html_hashes[$key] = $text;
  576. return $key; # String that will replace the clean tag.
  577. }
  578. function _HashBlock($text) {
  579. #
  580. # Called whenever a tag must be hashed. When a function insert a block-level
  581. # tag in $text, it pass through this function and is automaticaly escaped,
  582. # which remove the need to call _HashHTMLBlocks at every step.
  583. #
  584. # Swap back any tag hash found in $text so we do not have to _UnhashTags
  585. # multiple times at the end. Must do this because of
  586. $text = _UnhashTags($text);
  587. # Then hash the block as normal.
  588. return _HashHTMLBlocks_HashBlock($text);
  589. }
  590. function _RunBlockGamut($text, $hash_html_blocks = TRUE) {
  591. #
  592. # These are all the transformations that form block-level
  593. # tags like paragraphs, headers, and list items.
  594. #
  595. if ($hash_html_blocks) {
  596. # We need to escape raw HTML in Markdown source before doing anything
  597. # else. This need to be done for each block, and not only at the
  598. # begining in the Markdown function since hashed blocks can be part of
  599. # a list item and could have been indented. Indented blocks would have
  600. # been seen as a code block in previous pass of _HashHTMLBlocks.
  601. $text = _HashHTMLBlocks($text);
  602. }
  603. $text = _DoHeaders($text);
  604. $text = _DoTables($text);
  605. # Do Horizontal Rules:
  606. global $md_empty_element_suffix;
  607. $text = preg_replace(
  608. array('{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}emx',
  609. '{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}emx',
  610. '{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}emx'),
  611. "_HashBlock('\n<hr$md_empty_element_suffix\n')",
  612. $text);
  613. $text = _DoLists($text);
  614. $text = _DoDefLists($text);
  615. $text = _DoCodeBlocks($text);
  616. $text = _DoBlockQuotes($text);
  617. $text = _FormParagraphs($text);
  618. return $text;
  619. }
  620. function _RunSpanGamut($text) {
  621. #
  622. # These are all the transformations that occur *within* block-level
  623. # tags like paragraphs, headers, and list items.
  624. #
  625. global $md_empty_element_suffix;
  626. $text = _DoCodeSpans($text);
  627. $text = _EscapeSpecialChars($text);
  628. # Process anchor and image tags. Images must come first,
  629. # because ![foo][f] looks like an anchor.
  630. $text = _DoImages($text);
  631. $text = _DoAnchors($text);
  632. # Make links out of things like `<http://example.com/>`
  633. # Must come after _DoAnchors(), because you can use < and >
  634. # delimiters in inline links like [this](<url>).
  635. $text = _DoAutoLinks($text);
  636. $text = _EncodeAmpsAndAngles($text);
  637. $text = _DoItalicsAndBold($text);
  638. # Do hard breaks:
  639. $text = preg_replace('/ {2,}\n/', "<br$md_empty_element_suffix\n", $text);
  640. return $text;
  641. }
  642. function _EscapeSpecialChars($text) {
  643. global $md_escape_table;
  644. $tokens = _TokenizeHTML($text);
  645. $text = ''; # rebuild $text from the tokens
  646. # $in_pre = 0; # Keep track of when we're inside <pre> or <code> tags.
  647. # $tags_to_skip = "!<(/?)(?:pre|code|kbd|script|math)[\s>]!";
  648. foreach ($tokens as $cur_token) {
  649. if ($cur_token[0] == 'tag') {
  650. # Within tags, encode * and _ so they don't conflict
  651. # with their use in Markdown for italics and strong.
  652. # We're replacing each such character with its
  653. # corresponding MD5 checksum value; this is likely
  654. # overkill, but it should prevent us from colliding
  655. # with the escape values by accident.
  656. $cur_token[1] = str_replace(array('*', '_'),
  657. array($md_escape_table['*'], $md_escape_table['_']),
  658. $cur_token[1]);
  659. $text .= $cur_token[1];
  660. } else {
  661. $t = $cur_token[1];
  662. $t = _EncodeBackslashEscapes($t);
  663. $text .= $t;
  664. }
  665. }
  666. return $text;
  667. }
  668. function _DoAnchors($text) {
  669. #
  670. # Turn Markdown link shortcuts into XHTML <a> tags.
  671. #
  672. global $md_nested_brackets;
  673. #
  674. # First, handle reference-style links: [link text] [id]
  675. #
  676. $text = preg_replace_callback("{
  677. ( # wrap whole match in $1
  678. \\[
  679. ($md_nested_brackets) # link text = $2
  680. \\]
  681. [ ]? # one optional space
  682. (?:\\n[ ]*)? # one optional newline followed by spaces
  683. \\[
  684. (.*?) # id = $3
  685. \\]
  686. )
  687. }xs",
  688. '_DoAnchors_reference_callback', $text);
  689. #
  690. # Next, inline-style links: [link text](url "optional title")
  691. #
  692. $text = preg_replace_callback("{
  693. ( # wrap whole match in $1
  694. \\[
  695. ($md_nested_brackets) # link text = $2
  696. \\]
  697. \\( # literal paren
  698. [ \\t]*
  699. <?(.*?)>? # href = $3
  700. [ \\t]*
  701. ( # $4
  702. (['\"]) # quote char = $5
  703. (.*?) # Title = $6
  704. \\5 # matching quote
  705. )? # title is optional
  706. \\)
  707. )
  708. }xs",
  709. '_DoAnchors_inline_callback', $text);
  710. return $text;
  711. }
  712. function _DoAnchors_reference_callback($matches) {
  713. global $md_urls, $md_titles, $md_escape_table;
  714. $whole_match = $matches[1];
  715. $link_text = $matches[2];
  716. $link_id = strtolower($matches[3]);
  717. if ($link_id == "") {
  718. $link_id = strtolower($link_text); # for shortcut links like [this][].
  719. }
  720. if (isset($md_urls[$link_id])) {
  721. $url = $md_urls[$link_id];
  722. # We've got to encode these to avoid conflicting with italics/bold.
  723. $url = str_replace(array('*', '_'),
  724. array($md_escape_table['*'], $md_escape_table['_']),
  725. $url);
  726. $result = "<a href=\"$url\"";
  727. if ( isset( $md_titles[$link_id] ) ) {
  728. $title = $md_titles[$link_id];
  729. $title = str_replace(array('*', '_'),
  730. array($md_escape_table['*'],
  731. $md_escape_table['_']), $title);
  732. $result .= " title=\"$title\"";
  733. }
  734. $result .= ">$link_text</a>";
  735. }
  736. else {
  737. $result = $whole_match;
  738. }
  739. return $result;
  740. }
  741. function _DoAnchors_inline_callback($matches) {
  742. global $md_escape_table;
  743. $whole_match = $matches[1];
  744. $link_text = $matches[2];
  745. $url = $matches[3];
  746. $title =& $matches[6];
  747. # We've got to encode these to avoid conflicting with italics/bold.
  748. $url = str_replace(array('*', '_'),
  749. array($md_escape_table['*'], $md_escape_table['_']),
  750. $url);
  751. $result = "<a href=\"$url\"";
  752. if (isset($title)) {
  753. $title = str_replace('"', '&quot;', $title);
  754. $title = str_replace(array('*', '_'),
  755. array($md_escape_table['*'], $md_escape_table['_']),
  756. $title);
  757. $result .= " title=\"$title\"";
  758. }
  759. $result .= ">$link_text</a>";
  760. return $result;
  761. }
  762. function _DoImages($text) {
  763. #
  764. # Turn Markdown image shortcuts into <img> tags.
  765. #
  766. global $md_nested_brackets;
  767. #
  768. # First, handle reference-style labeled images: ![alt text][id]
  769. #
  770. $text = preg_replace_callback('{
  771. ( # wrap whole match in $1
  772. !\[
  773. ('.$md_nested_brackets.') # alt text = $2
  774. \]
  775. [ ]? # one optional space
  776. (?:\n[ ]*)? # one optional newline followed by spaces
  777. \[
  778. (.*?) # id = $3
  779. \]
  780. )
  781. }xs',
  782. '_DoImages_reference_callback', $text);
  783. #
  784. # Next, handle inline images: ![alt text](url "optional title")
  785. # Don't forget: encode * and _
  786. $text = preg_replace_callback('{
  787. ( # wrap whole match in $1
  788. !\[
  789. ('.$md_nested_brackets.') # alt text = $2
  790. \]
  791. \( # literal paren
  792. [ \t]*
  793. <?(\S+?)>? # src url = $3
  794. [ \t]*
  795. ( # $4
  796. ([\'"]) # quote char = $5
  797. (.*?) # title = $6
  798. \5 # matching quote
  799. [ \t]*
  800. )? # title is optional
  801. \)
  802. )
  803. }xs',
  804. '_DoImages_inline_callback', $text);
  805. return $text;
  806. }
  807. function _DoImages_reference_callback($matches) {
  808. global $md_urls, $md_titles, $md_empty_element_suffix, $md_escape_table;
  809. $whole_match = $matches[1];
  810. $alt_text = $matches[2];
  811. $link_id = strtolower($matches[3]);
  812. if ($link_id == "") {
  813. $link_id = strtolower($alt_text); # for shortcut links like ![this][].
  814. }
  815. $alt_text = str_replace('"', '&quot;', $alt_text);
  816. if (isset($md_urls[$link_id])) {
  817. $url = $md_urls[$link_id];
  818. # We've got to encode these to avoid conflicting with italics/bold.
  819. $url = str_replace(array('*', '_'),
  820. array($md_escape_table['*'], $md_escape_table['_']),
  821. $url);
  822. $result = "<img src=\"$url\" alt=\"$alt_text\"";
  823. if (isset($md_titles[$link_id])) {
  824. $title = $md_titles[$link_id];
  825. $title = str_replace(array('*', '_'),
  826. array($md_escape_table['*'],
  827. $md_escape_table['_']), $title);
  828. $result .= " title=\"$title\"";
  829. }
  830. $result .= $md_empty_element_suffix;
  831. }
  832. else {
  833. # If there's no such link ID, leave intact:
  834. $result = $whole_match;
  835. }
  836. return $result;
  837. }
  838. function _DoImages_inline_callback($matches) {
  839. global $md_empty_element_suffix, $md_escape_table;
  840. $whole_match = $matches[1];
  841. $alt_text = $matches[2];
  842. $url = $matches[3];
  843. $title = '';
  844. if (isset($matches[6])) {
  845. $title = $matches[6];
  846. }
  847. $alt_text = str_replace('"', '&quot;', $alt_text);
  848. $title = str_replace('"', '&quot;', $title);
  849. # We've got to encode these to avoid conflicting with italics/bold.
  850. $url = str_replace(array('*', '_'),
  851. array($md_escape_table['*'], $md_escape_table['_']),
  852. $url);
  853. $result = "<img src=\"$url\" alt=\"$alt_text\"";
  854. if (isset($title)) {
  855. $title = str_replace(array('*', '_'),
  856. array($md_escape_table['*'], $md_escape_table['_']),
  857. $title);
  858. $result .= " title=\"$title\""; # $title already quoted
  859. }
  860. $result .= $md_empty_element_suffix;
  861. return $result;
  862. }
  863. function _DoHeaders($text) {
  864. # Setext-style headers:
  865. # Header 1
  866. # ========
  867. #
  868. # Header 2
  869. # --------
  870. #
  871. $text = preg_replace(
  872. array('{ (^.+?) (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? [ \t]*\n=+[ \t]*\n+ }emx',
  873. '{ (^.+?) (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? [ \t]*\n-+[ \t]*\n+ }emx'),
  874. array("_HashBlock('<h1'. ('\\2'? ' id=\"'._UnslashQuotes('\\2').'\"':'').
  875. '>'._RunSpanGamut(_UnslashQuotes('\\1')).'</h1>'
  876. ) . '\n\n'",
  877. "_HashBlock('<h2'. ('\\2'? ' id=\"'._UnslashQuotes('\\2').'\"':'').
  878. '>'._RunSpanGamut(_UnslashQuotes('\\1')).'</h2>'
  879. ) . '\n\n'"),
  880. $text);
  881. # atx-style headers:
  882. # # Header 1
  883. # ## Header 2
  884. # ## Header 2 with closing hashes ##
  885. # ...
  886. # ###### Header 6
  887. #
  888. $text = preg_replace('{
  889. ^(\#{1,6}) # $1 = string of #\'s
  890. [ \t]*
  891. (.+?) # $2 = Header text
  892. [ \t]*
  893. \#* # optional closing #\'s (not counted)
  894. (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\}[ ]*)? # id attribute
  895. \n+
  896. }xme',
  897. "_HashBlock(
  898. '<h'.strlen('\\1'). ('\\3'? ' id=\"'._UnslashQuotes('\\3').'\"':'').'>'.
  899. _RunSpanGamut(_UnslashQuotes('\\2')).
  900. '</h'.strlen('\\1').'>'
  901. ) . '\n\n'",
  902. $text);
  903. return $text;
  904. }
  905. function _DoTables($text) {
  906. #
  907. # Form HTML tables.
  908. #
  909. global $md_tab_width;
  910. $less_than_tab = $md_tab_width - 1;
  911. #
  912. # Find tables with leading pipe.
  913. #
  914. # | Header 1 | Header 2
  915. # | -------- | --------
  916. # | Cell 1 | Cell 2
  917. # | Cell 3 | Cell 4
  918. #
  919. $text = preg_replace_callback('
  920. {
  921. ^ # Start of a line
  922. [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
  923. [|] # Optional leading pipe (present)
  924. (.+) \n # $1: Header row (at least one pipe)
  925. [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
  926. [|] ([ ]*[-:]+[-| :]*) \n # $2: Header underline
  927. ( # $3: Cells
  928. (?:
  929. [ ]* # Allowed whitespace.
  930. [|] .* \n # Row content.
  931. )*
  932. )
  933. (?=\n|\Z) # Stop at final double newline.
  934. }xm',
  935. '_DoTable_LeadingPipe_callback', $text);
  936. #
  937. # Find tables without leading pipe.
  938. #
  939. # Header 1 | Header 2
  940. # -------- | --------
  941. # Cell 1 | Cell 2
  942. # Cell 3 | Cell 4
  943. #
  944. $text = preg_replace_callback('
  945. {
  946. ^ # Start of a line
  947. [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
  948. (\S.*[|].*) \n # $1: Header row (at least one pipe)
  949. [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
  950. ([-:]+[ ]*[|][-| :]*) \n # $2: Header underline
  951. ( # $3: Cells
  952. (?:
  953. .* [|] .* \n # Row content
  954. )*
  955. )
  956. (?=\n|\Z) # Stop at final double newline.
  957. }xm',
  958. '_DoTable_callback', $text);
  959. return $text;
  960. }
  961. function _DoTable_LeadingPipe_callback($matches) {
  962. $head = $matches[1];
  963. $underline = $matches[2];
  964. $content = $matches[3];
  965. # Remove leading pipe for each row.
  966. $content = preg_replace('/^ *[|]/m', '', $content);
  967. return _DoTable_callback(array($matches[0], $head, $underline, $content));
  968. }
  969. function _DoTable_callback($matches) {
  970. $head = $matches[1];
  971. $underline = $matches[2];
  972. $content = $matches[3];
  973. # Remove any tailing pipes for each line.
  974. $head = preg_replace('/[|] *$/m', '', $head);
  975. $underline = preg_replace('/[|] *$/m', '', $underline);
  976. $content = preg_replace('/[|] *$/m', '', $content);
  977. # Reading alignement from header underline.
  978. $separators = preg_split('/ *[|] */', $underline);
  979. foreach ($separators as $n => $s) {
  980. if (preg_match('/^ *-+: *$/', $s)) $attr[$n] = ' align="right"';
  981. else if (preg_match('/^ *:-+: *$/', $s))$attr[$n] = ' align="center"';
  982. else if (preg_match('/^ *:-+ *$/', $s)) $attr[$n] = ' align="left"';
  983. else $attr[$n] = '';
  984. }
  985. # Creating code spans before splitting the row is an easy way to
  986. # handle a code span containg pipes.
  987. $head = _DoCodeSpans($head);
  988. $headers = preg_split('/ *[|] */', $head);
  989. $col_count = count($headers);
  990. # Write column headers.
  991. $text = "<table>\n";
  992. $text .= "<thead>\n";
  993. $text .= "<tr>\n";
  994. foreach ($headers as $n => $header)
  995. $text .= " <th$attr[$n]>"._RunSpanGamut(trim($header))."</th>\n";
  996. $text .= "</tr>\n";
  997. $text .= "</thead>\n";
  998. # Split content by row.
  999. $rows = explode("\n", trim($content, "\n"));
  1000. $text .= "<tbody>\n";
  1001. foreach ($rows as $row) {
  1002. # Creating code spans before splitting the row is an easy way to
  1003. # handle a code span containg pipes.
  1004. $row = _DoCodeSpans($row);
  1005. # Split row by cell.
  1006. $row_cells = preg_split('/ *[|] */', $row, $col_count);
  1007. $row_cells = array_pad($row_cells, $col_count, '');
  1008. $text .= "<tr>\n";
  1009. foreach ($row_cells as $n => $cell)
  1010. $text .= " <td$attr[$n]>"._RunSpanGamut(trim($cell))."</td>\n";
  1011. $text .= "</tr>\n";
  1012. }
  1013. $text .= "</tbody>\n";
  1014. $text .= "</table>";
  1015. return _HashBlock($text) . "\n";
  1016. }
  1017. function _DoLists($text) {
  1018. #
  1019. # Form HTML ordered (numbered) and unordered (bulleted) lists.
  1020. #
  1021. global $md_tab_width, $md_list_level;
  1022. $less_than_tab = $md_tab_width - 1;
  1023. # Re-usable patterns to match list item bullets and number markers:
  1024. $marker_ul = '[*+-]';
  1025. $marker_ol = '\d+[.]';
  1026. $marker_any = "(?:$marker_ul|$marker_ol)";
  1027. $markers = array($marker_ul, $marker_ol);
  1028. foreach ($markers as $marker) {
  1029. # Re-usable pattern to match any entirel ul or ol list:
  1030. $whole_list = '
  1031. ( # $1 = whole list
  1032. ( # $2
  1033. [ ]{0,'.$less_than_tab.'}
  1034. ('.$marker.') # $3 = first list item marker
  1035. [ \t]+
  1036. )
  1037. (?s:.+?)
  1038. ( # $4
  1039. \z
  1040. |
  1041. \n{2,}
  1042. (?=\S)
  1043. (?! # Negative lookahead for another list item marker
  1044. [ \t]*
  1045. '.$marker.'[ \t]+
  1046. )
  1047. )
  1048. )
  1049. '; // mx
  1050. # We use a different prefix before nested lists than top-level lists.
  1051. # See extended comment in _ProcessListItems().
  1052. if ($md_list_level) {
  1053. $text = preg_replace_callback('{
  1054. ^
  1055. '.$whole_list.'
  1056. }mx',
  1057. '_DoLists_callback', $text);
  1058. }
  1059. else {
  1060. $text = preg_replace_callback('{
  1061. (?:(?<=\n\n)|\A\n?)
  1062. '.$whole_list.'
  1063. }mx',
  1064. '_DoLists_callback', $text);
  1065. }
  1066. }
  1067. return $text;
  1068. }
  1069. function _DoLists_callback($matches) {
  1070. # Re-usable patterns to match list item bullets and number markers:
  1071. $marker_ul = '[*+-]';
  1072. $marker_ol = '\d+[.]';
  1073. $marker_any = "(?:$marker_ul|$marker_ol)";
  1074. $list = $matches[1];
  1075. $list_type = preg_match("/$marker_ul/", $matches[3]) ? "ul" : "ol";
  1076. $marker_any = ( $list_type == "ul" ? $marker_ul : $marker_ol );
  1077. # Turn double returns into triple returns, so that we can make a
  1078. # paragraph for the last item in a list, if necessary:
  1079. $list = preg_replace("/\n{2,}/", "\n\n\n", $list);
  1080. $result = _ProcessListItems($list, $marker_any);
  1081. $result = "<$list_type>\n" . $result . "</$list_type>";
  1082. return "\n" . _HashBlock($result) . "\n\n";
  1083. }
  1084. function _ProcessListItems($list_str, $marker_any) {
  1085. #
  1086. # Process the contents of a single ordered or unordered list, splitting it
  1087. # into individual list items.
  1088. #
  1089. global $md_list_level;
  1090. # The $md_list_level global keeps track of when we're inside a list.
  1091. # Each time we enter a list, we increment it; when we leave a list,
  1092. # we decrement. If it's zero, we're not in a list anymore.
  1093. #
  1094. # We do this because when we're not inside a list, we want to treat
  1095. # something like this:
  1096. #
  1097. # I recommend upgrading to version
  1098. # 8. Oops, now this line is treated
  1099. # as a sub-list.
  1100. #
  1101. # As a single paragraph, despite the fact that the second line starts
  1102. # with a digit-period-space sequence.
  1103. #
  1104. # Whereas when we're inside a list (or sub-list), that line will be
  1105. # treated as the start of a sub-list. What a kludge, huh? This is
  1106. # an aspect of Markdown's syntax that's hard to parse perfectly
  1107. # without resorting to mind-reading. Perhaps the solution is to
  1108. # change the syntax rules such that sub-lists must start with a
  1109. # starting cardinal number; e.g. "1." or "a.".
  1110. $md_list_level++;
  1111. # trim trailing blank lines:
  1112. $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
  1113. $list_str = preg_replace_callback('{
  1114. (\n)? # leading line = $1
  1115. (^[ \t]*) # leading whitespace = $2
  1116. ('.$marker_any.') [ \t]+ # list marker = $3
  1117. ((?s:.+?) # list item text = $4
  1118. (\n{1,2}))
  1119. (?= \n* (\z | \2 ('.$marker_any.') [ \t]+))
  1120. }xm',
  1121. '_ProcessListItems_callback', $list_str);
  1122. $md_list_level--;
  1123. return $list_str;
  1124. }
  1125. function _ProcessListItems_callback($matches) {
  1126. $item = $matches[4];
  1127. $leading_line =& $matches[1];
  1128. $leading_space =& $matches[2];
  1129. if ($leading_line || preg_match('/\n{2,}/', $item)) {
  1130. $item = _RunBlockGamut(_Outdent($item));
  1131. }
  1132. else {
  1133. # Recursion for sub-lists:
  1134. $item = _DoLists(_Outdent($item));
  1135. $item = preg_replace('/\n+$/', '', $item);
  1136. $item = _RunSpanGamut($item);
  1137. }
  1138. return "<li>" . $item . "</li>\n";
  1139. }
  1140. function _DoDefLists($text) {
  1141. #
  1142. # Form HTML definition lists.
  1143. #
  1144. global $md_tab_width;
  1145. $less_than_tab = $md_tab_width - 1;
  1146. # Re-usable patterns to match list item bullets and number markers:
  1147. # Re-usable pattern to match any entire dl list:
  1148. $whole_list = '
  1149. ( # $1 = whole list
  1150. ( # $2
  1151. [ ]{0,'.$less_than_tab.'}
  1152. ((?>.*\S.*\n)+) # $3 = defined term
  1153. \n?
  1154. [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
  1155. )
  1156. (?s:.+?)
  1157. ( # $4
  1158. \z
  1159. |
  1160. \n{2,}
  1161. (?=\S)
  1162. (?! # Negative lookahead for another term
  1163. [ ]{0,'.$less_than_tab.'}
  1164. (?: \S.*\n )+? # defined term
  1165. \n?
  1166. [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
  1167. )
  1168. (?! # Negative lookahead for another definition
  1169. [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
  1170. )
  1171. )
  1172. )
  1173. '; // mx
  1174. $text = preg_replace_callback('{
  1175. (?:(?<=\n\n)|\A\n?)
  1176. '.$whole_list.'
  1177. }mx',
  1178. '_DoDefLists_callback', $text);
  1179. return $text;
  1180. }
  1181. function _DoDefLists_callback($matches) {
  1182. # Re-usable patterns to match list item bullets and number markers:
  1183. $list = $matches[1];
  1184. # Turn double returns into triple returns, so that we can make a
  1185. # paragraph for the last item in a list, if necessary:
  1186. $result = trim(_ProcessDefListItems($list));
  1187. $result = "<dl>\n" . $result . "\n</dl>";
  1188. return _HashBlock($result) . "\n\n";
  1189. }
  1190. function _ProcessDefListItems($list_str) {
  1191. #
  1192. # Process the contents of a single ordered or unordered list, splitting it
  1193. # into individual list items.
  1194. #
  1195. global $md_tab_width;
  1196. $less_than_tab = $md_tab_width - 1;
  1197. # trim trailing blank lines:
  1198. $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
  1199. # Process definition terms.
  1200. $list_str = preg_replace_callback('{
  1201. (?:\n\n+|\A\n?) # leading line
  1202. ( # definition terms = $1
  1203. [ ]{0,'.$less_than_tab.'} # leading whitespace
  1204. (?![:][ ]|[ ]) # negative lookahead for a definition
  1205. # mark (colon) or more whitespace.
  1206. (?: \S.* \n)+? # actual term (not whitespace).
  1207. )
  1208. (?=\n?[ ]{0,3}:[ ]) # lookahead for following line feed
  1209. # with a definition mark.
  1210. }xm',
  1211. '_ProcessDefListItems_callback_dt', $list_str);
  1212. # Process actual definitions.
  1213. $list_str = preg_replace_callback('{
  1214. \n(\n+)? # leading line = $1
  1215. [ ]{0,'.$less_than_tab.'} # whitespace before colon
  1216. [:][ ]+ # definition mark (colon)
  1217. ((?s:.+?)) # definition text = $2
  1218. (?= \n+ # stop at next definition mark,
  1219. (?: # next term or end of text
  1220. [ ]{0,'.$less_than_tab.'} [:][ ] |
  1221. <dt> | \z
  1222. )
  1223. )
  1224. }xm',
  1225. '_ProcessDefListItems_callback_dd', $list_str);
  1226. return $list_str;
  1227. }
  1228. function _ProcessDefListItems_callback_dt($matches) {
  1229. $terms = explode("\n", trim($matches[1]));
  1230. $text = '';
  1231. foreach ($terms as $term) {
  1232. $term = _RunSpanGamut(trim($term));
  1233. $text .= "\n<dt>" . $term . "</dt>";
  1234. }
  1235. return $text . "\n";
  1236. }
  1237. function _ProcessDefListItems_callback_dd($matches) {
  1238. $leading_line = $matches[1];
  1239. $def = $matches[2];
  1240. if ($leading_line || preg_match('/\n{2,}/', $def)) {
  1241. $def = _RunBlockGamut(_Outdent($def . "\n\n"));
  1242. $def = "\n". $def ."\n";
  1243. }
  1244. else {
  1245. $def = rtrim($def);
  1246. $def = _RunSpanGamut(_Outdent($def));
  1247. }
  1248. return "\n<dd>" . $def . "</dd>\n";
  1249. }
  1250. function _DoCodeBlocks($text) {
  1251. #
  1252. # Process Markdown `<pre><code>` blocks.
  1253. #
  1254. global $md_tab_width;
  1255. $text = preg_replace_callback('{
  1256. (?:\n\n|\A)
  1257. ( # $1 = the code block -- one or more lines, starting with a space/tab
  1258. (?:
  1259. (?:[ ]{'.$md_tab_width.'} | \t) # Lines must start with a tab or a tab-width of spaces
  1260. .*\n+
  1261. )+
  1262. )
  1263. ((?=^[ ]{0,'.$md_tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
  1264. }xm',
  1265. '_DoCodeBlocks_callback', $text);
  1266. return $text;
  1267. }
  1268. function _DoCodeBlocks_callback($matches) {
  1269. $codeblock = $matches[1];
  1270. $codeblock = _EncodeCode(_Outdent($codeblock));
  1271. // $codeblock = _Detab($codeblock);
  1272. # trim leading newlines and trailing whitespace
  1273. $codeblock = preg_replace(array('/\A\n+/', '/\s+\z/'), '', $codeblock);
  1274. $result = "<pre><code>" . $codeblock . "\n</code></pre>";
  1275. return "\n\n" . _HashBlock($result) . "\n\n";
  1276. }
  1277. function _DoCodeSpans($text) {
  1278. #
  1279. # * Backtick quotes are used for <code></code> spans.
  1280. #
  1281. # * You can use multiple backticks as the delimiters if you want to
  1282. # include literal backticks in the code span. So, this input:
  1283. #
  1284. # Just type ``foo `bar` baz`` at the prompt.
  1285. #
  1286. # Will translate to:
  1287. #
  1288. # <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
  1289. #
  1290. # There's no arbitrary limit to the number of backticks you
  1291. # can use as delimters. If you need three consecutive backticks
  1292. # in your code, use four for delimiters, etc.
  1293. #
  1294. # * You can use spaces to get literal backticks at the edges:
  1295. #
  1296. # ... type `` `bar` `` ...
  1297. #
  1298. # Turns to:
  1299. #
  1300. # ... type <code>`bar`</code> ...
  1301. #
  1302. $text = preg_replace_callback('@
  1303. (?<!\\\) # Character before opening ` can\'t be a backslash
  1304. (`+) # $1 = Opening run of `
  1305. (.+?) # $2 = The code block
  1306. (?<!`)
  1307. \1 # Matching closer
  1308. (?!`)
  1309. @xs',
  1310. '_DoCodeSpans_callback', $text);
  1311. return $text;
  1312. }
  1313. function _DoCodeSpans_callback($matches) {
  1314. $c = $matches[2];
  1315. $c = preg_replace('/^[ \t]*/', '', $c); # leading whitespace
  1316. $c = preg_replace('/[ \t]*$/', '', $c); # trailing whitespace
  1317. $c = _EncodeCode($c);
  1318. return "<code>$c</code>";
  1319. }
  1320. function _EncodeCode($_) {
  1321. #
  1322. # Encode/escape certain characters inside Markdown code runs.
  1323. # The point is that in code, these characters are literals,
  1324. # and lose their special Markdown meanings.
  1325. #
  1326. global $md_escape_table;
  1327. # Encode all ampersands; HTML entities are not
  1328. # entities within a Markdown code span.
  1329. $_ = str_replace('&', '&amp;', $_);
  1330. # Do the angle bracket song and dance:
  1331. $_ = str_replace(array('<', '>'),
  1332. array('&lt;', '&gt;'), $_);
  1333. # Now, escape characters that are magic in Markdown:
  1334. $_ = str_replace(array_keys($md_escape_table),
  1335. array_values($md_escape_table), $_);
  1336. return $_;
  1337. }
  1338. function _DoItalicsAndBold($text) {
  1339. # <strong> must go first:
  1340. $text = preg_replace(array(
  1341. '{
  1342. ( (?<!\w) __ ) # $1: Marker (not preceded by alphanum)
  1343. (?=\S) # Not followed by whitespace
  1344. (?!__) # or two others marker chars.
  1345. ( # $2: Content
  1346. (?>
  1347. [^_]+? # Anthing not em markers.
  1348. |
  1349. # Balence any regular _ emphasis inside.
  1350. (?<![a-zA-Z0-9])_ (?=\S) (?! _) (.+?)
  1351. (?<=\S) _ (?![a-zA-Z0-9])
  1352. )+?
  1353. )
  1354. (?<=\S) __ # End mark not preceded by whitespace.
  1355. (?!\w) # Not followed by alphanum.
  1356. }sx',
  1357. '{
  1358. ( (?<!\*\*) \*\* ) # $1: Marker (not preceded by two *)
  1359. (?=\S) # Not followed by whitespace
  1360. (?!\1) # or two others marker chars.
  1361. ( # $2: Content
  1362. (?>
  1363. [^*]+? # Anthing not em markers.
  1364. |
  1365. # Balence any regular * emphasis inside.
  1366. \* (?=\S) (?! \*) (.+?) (?<=\S) \*
  1367. )+?
  1368. )
  1369. (?<=\S) \*\* # End mark not preceded by whitespace.
  1370. }sx',
  1371. ),
  1372. '<strong>\2</strong>', $text);
  1373. # Then <em>:
  1374. $text = preg_replace(array(
  1375. '{ ( (?<!\w) _ ) (?=\S) (?! _) (.+?) (?<=\S) _ (?!\w) }sx',
  1376. '{ ( (?<!\*)\* ) (?=\S) (?! \*) (.+?) (?<=\S) \* }sx',
  1377. ),
  1378. '<em>\2</em>', $text);
  1379. return $text;
  1380. }
  1381. function _DoBlockQuotes($text) {
  1382. $text = preg_replace_callback('/
  1383. ( # Wrap whole match in $1
  1384. (
  1385. ^[ \t]*>[ \t]? # ">" at the start of a line
  1386. .+\n # rest of the first line
  1387. (.+\n)* # subsequent consecutive lines
  1388. \n* # blanks
  1389. )+
  1390. )
  1391. /xm',
  1392. '_DoBlockQuotes_callback', $text);
  1393. return $text;
  1394. }
  1395. function _DoBlockQuotes_callback($matches) {
  1396. $bq = $matches[1];
  1397. # trim one level of quoting - trim whitespace-only lines
  1398. $bq = preg_replace(array('/^[ \t]*>[ \t]?/m', '/^[ \t]+$/m'), '', $bq);
  1399. $bq = _RunBlockGamut($bq); # recurse
  1400. $bq = preg_replace('/^/m', " ", $bq);
  1401. # These leading spaces screw with <pre> content, so we need to fix that:
  1402. $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
  1403. '_DoBlockQuotes_callback2', $bq);
  1404. return _HashBlock("<blockquote>\n$bq\n</blockquote>") . "\n\n";
  1405. }
  1406. function _DoBlockQuotes_callback2($matches) {
  1407. $pre = $matches[1];
  1408. $pre = preg_replace('/^ /m', '', $pre);
  1409. return $pre;
  1410. }
  1411. function _FormParagraphs($text) {
  1412. #
  1413. # Params:
  1414. # $text - string to process with html <p> tags
  1415. #
  1416. global $md_html_blocks, $md_html_hashes;
  1417. # Strip leading and trailing lines:
  1418. $text = preg_replace(array('/\A\n+/', '/\n+\z/'), '', $text);
  1419. $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
  1420. #
  1421. # Wrap <p> tags and unhashify HTML blocks
  1422. #
  1423. foreach ($grafs as $key => $value) {
  1424. $value = trim(_RunSpanGamut($value));
  1425. # Check if this should be enclosed in a paragraph.
  1426. # Text equaling to a clean tag hash are not enclosed.
  1427. # Text starting with a block tag hash are not either.
  1428. $clean_key = $value;
  1429. $block_key = substr($value, 0, 32);
  1430. $is_p = (!isset($md_html_blocks[$block_key]) &&
  1431. !isset($md_html_hashes[$clean_key]));
  1432. if ($is_p) {
  1433. $value = "<p>$value</p>";
  1434. }
  1435. $grafs[$key] = $value;
  1436. }
  1437. # Join grafs in one text, then unhash HTML tags.
  1438. $text = implode("\n\n", $grafs);
  1439. # Finish by removing any tag hashes still present in $text.
  1440. $text = _UnhashTags($text);
  1441. return $text;
  1442. }
  1443. function _EncodeAmpsAndAngles($text) {
  1444. # Smart processing for ampersands and angle brackets that need to be encoded.
  1445. # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
  1446. # http://bumppo.net/projects/amputator/
  1447. $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
  1448. '&amp;', $text);;
  1449. # Encode naked <'s
  1450. $text = preg_replace('{<(?![a-z/?\$!])}i', '&lt;', $text);
  1451. return $text;
  1452. }
  1453. function _EncodeBackslashEscapes($text) {
  1454. #
  1455. # Parameter: String.
  1456. # Returns: The string, with after processing the following backslash
  1457. # escape sequences.
  1458. #
  1459. global $md_escape_table, $md_backslash_escape_table;
  1460. # Must process escaped backslashes first.
  1461. return str_replace(array_keys($md_backslash_escape_table),
  1462. array_values($md_backslash_escape_table), $text);
  1463. }
  1464. function _DoAutoLinks($text) {
  1465. $text = preg_replace("!<((https?|ftp):[^'\">\\s]+)>!",
  1466. '<a href="\1">\1</a>', $text);
  1467. # Email addresses: <address@domain.foo>
  1468. $text = preg_replace('{
  1469. <
  1470. (?:mailto:)?
  1471. (
  1472. [-.\w]+
  1473. \@
  1474. [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
  1475. )
  1476. >
  1477. }exi',
  1478. "_EncodeEmailAddress(_UnescapeSpecialChars(_UnslashQuotes('\\1')))",
  1479. $text);
  1480. return $text;
  1481. }
  1482. function _EncodeEmailAddress($addr) {
  1483. #
  1484. # Input: an email address, e.g. "foo@example.com"
  1485. #
  1486. # Output: the email address as a mailto link, with each character
  1487. # of the address encoded as either a decimal or hex entity, in
  1488. # the hopes of foiling most address harvesting spam bots. E.g.:
  1489. #
  1490. # <a href="&#x6D;&#97;&#105;&#108;&#x74;&#111;:&#102;&#111;&#111;&#64;&#101;
  1491. # x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;">&#102;&#111;&#111;
  1492. # &#64;&#101;x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;</a>
  1493. #
  1494. # Based by a filter by Matthew Wickline, posted to the BBEdit-Talk
  1495. # mailing list: <http://tinyurl.com/yu7ue>
  1496. #
  1497. $addr = "mailto:" . $addr;
  1498. $length = strlen($addr);
  1499. # leave ':' alone (to spot mailto: later)
  1500. $addr = preg_replace_callback('/([^\:])/',
  1501. '_EncodeEmailAddress_callback', $addr);
  1502. $addr = "<a href=\"$addr\">$addr</a>";
  1503. # strip the mailto: from the visible part
  1504. $addr = preg_replace('/">.+?:/', '">', $addr);
  1505. return $addr;
  1506. }
  1507. function _EncodeEmailAddress_callback($matches) {
  1508. $char = $matches[1];
  1509. $r = rand(0, 100);
  1510. # roughly 10% raw, 45% hex, 45% dec
  1511. # '@' *must* be encoded. I insist.
  1512. if ($r > 90 && $char != '@') return $char;
  1513. if ($r < 45) return '&#x'.dechex(ord($char)).';';
  1514. return '&#'.ord($char).';';
  1515. }
  1516. function _UnescapeSpecialChars($text) {
  1517. #
  1518. # Swap back in all the special characters we've hidden.
  1519. #
  1520. global $md_escape_table;
  1521. return str_replace(array_values($md_escape_table),
  1522. array_keys($md_escape_table), $text);
  1523. }
  1524. function _UnhashTags($text) {
  1525. #
  1526. # Swap back in all the tags hashed by _HashHTMLBlocks.
  1527. #
  1528. global $md_html_hashes;
  1529. return str_replace(array_keys($md_html_hashes),
  1530. array_values($md_html_hashes), $text);
  1531. }
  1532. # _TokenizeHTML is shared between PHP Markdown and PHP SmartyPants.
  1533. # We only define it if it is not already defined.
  1534. if (!function_exists('_TokenizeHTML')) :
  1535. function _TokenizeHTML($str) {
  1536. #
  1537. # Parameter: String containing HTML markup.
  1538. # Returns: An array of the tokens comprising the input
  1539. # string. Each token is either a tag (possibly with nested,
  1540. # …

Large files files are truncated, but you can click here to view the full file