PageRenderTime 51ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 1ms

/extensions/markdown/lib/markdown_extra.php

https://github.com/bauhouse/sym-spectrum
PHP | 1876 lines | 1089 code | 228 blank | 559 comment | 79 complexity | fdb161214b3433988f031554edb2ea98 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

  1. <?php
  2. #
  3. # PHP Markdown Extra - A text-to-HTML conversion tool for web writers
  4. #
  5. # Copyright (c) 2004-2005 Michel Fortin
  6. # <http://www.michelf.com/projects/php-markdown/>
  7. #
  8. # Based on Markdown
  9. # Copyright (c) 2004-2005 John Gruber
  10. # <http://daringfireball.net/projects/markdown/>
  11. #
  12. global $MarkdownPHPVersion, $MarkdownSyntaxVersion,
  13. $md_empty_element_suffix, $md_tab_width,
  14. $md_nested_brackets_depth, $md_nested_brackets,
  15. $md_escape_table, $md_backslash_escape_table,
  16. $md_list_level;
  17. $MarkdownPHPVersion = 'Extra 1.0.1'; # Fri 9 Dec 2005
  18. $MarkdownSyntaxVersion = '1.0.1'; # Sun 12 Dec 2004
  19. #
  20. # Global default settings:
  21. #
  22. $md_empty_element_suffix = " />"; # Change to ">" for HTML output
  23. $md_tab_width = 4;
  24. #
  25. # Globals:
  26. #
  27. # Regex to match balanced [brackets].
  28. # Needed to insert a maximum bracked depth while converting to PHP.
  29. $md_nested_brackets_depth = 6;
  30. $md_nested_brackets =
  31. str_repeat('(?>[^\[\]]+|\[', $md_nested_brackets_depth).
  32. str_repeat('\])*', $md_nested_brackets_depth);
  33. # Table of hash values for escaped characters:
  34. $md_escape_table = array(
  35. "\\" => md5("\\"),
  36. "`" => md5("`"),
  37. "*" => md5("*"),
  38. "_" => md5("_"),
  39. "{" => md5("{"),
  40. "}" => md5("}"),
  41. "[" => md5("["),
  42. "]" => md5("]"),
  43. "(" => md5("("),
  44. ")" => md5(")"),
  45. ">" => md5(">"),
  46. "#" => md5("#"),
  47. "+" => md5("+"),
  48. "-" => md5("-"),
  49. "." => md5("."),
  50. "!" => md5("!"),
  51. ":" => md5(":"),
  52. "|" => md5("|"),
  53. );
  54. # Create an identical table but for escaped characters.
  55. $md_backslash_escape_table;
  56. foreach ($md_escape_table as $key => $char)
  57. $md_backslash_escape_table["\\$key"] = $char;
  58. function Markdown($text) {
  59. #
  60. # Main function. The order in which other subs are called here is
  61. # essential. Link and image substitutions need to happen before
  62. # _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the <a>
  63. # and <img> tags get encoded.
  64. #
  65. # Clear the global hashes. If we don't clear these, you get conflicts
  66. # from other articles when generating a page which contains more than
  67. # one article (e.g. an index page that shows the N most recent
  68. # articles):
  69. global $md_urls, $md_titles, $md_html_blocks, $md_html_hashes;
  70. $md_urls = array();
  71. $md_titles = array();
  72. $md_html_blocks = array();
  73. $md_html_hashes = array();
  74. # Standardize line endings:
  75. # DOS to Unix and Mac to Unix
  76. $text = str_replace(array("\r\n", "\r"), "\n", $text);
  77. # Make sure $text ends with a couple of newlines:
  78. $text .= "\n\n";
  79. # Convert all tabs to spaces.
  80. $text = _Detab($text);
  81. # Turn block-level HTML blocks into hash entries
  82. $text = _HashHTMLBlocks($text);
  83. # Strip any lines consisting only of spaces and tabs.
  84. # This makes subsequent regexen easier to write, because we can
  85. # match consecutive blank lines with /\n+/ instead of something
  86. # contorted like /[ \t]*\n+/ .
  87. $text = preg_replace('/^[ \t]+$/m', '', $text);
  88. # Strip link definitions, store in hashes.
  89. $text = _StripLinkDefinitions($text);
  90. $text = _RunBlockGamut($text, FALSE);
  91. $text = _UnescapeSpecialChars($text);
  92. return $text . "\n";
  93. }
  94. function _StripLinkDefinitions($text) {
  95. #
  96. # Strips link definitions from text, stores the URLs and titles in
  97. # hash references.
  98. #
  99. global $md_tab_width;
  100. $less_than_tab = $md_tab_width - 1;
  101. # Link defs are in the form: ^[id]: url "optional title"
  102. $text = preg_replace_callback('{
  103. ^[ ]{0,'.$less_than_tab.'}\[(.+)\]: # id = $1
  104. [ \t]*
  105. \n? # maybe *one* newline
  106. [ \t]*
  107. <?(\S+?)>? # url = $2
  108. [ \t]*
  109. \n? # maybe one newline
  110. [ \t]*
  111. (?:
  112. (?<=\s) # lookbehind for whitespace
  113. ["(]
  114. (.+?) # title = $3
  115. [")]
  116. [ \t]*
  117. )? # title is optional
  118. (?:\n+|\Z)
  119. }xm',
  120. '_StripLinkDefinitions_callback',
  121. $text);
  122. return $text;
  123. }
  124. function _StripLinkDefinitions_callback($matches) {
  125. global $md_urls, $md_titles;
  126. $link_id = strtolower($matches[1]);
  127. $md_urls[$link_id] = _EncodeAmpsAndAngles($matches[2]);
  128. if(isset($matches[3]))
  129. $md_titles[$link_id] = str_replace('"', '&quot;', $matches[3]);
  130. return ''; # String that will replace the block
  131. }
  132. function _HashHTMLBlocks($text) {
  133. #
  134. # Hashify HTML Blocks and "clean tags".
  135. #
  136. # We only want to do this for block-level HTML tags, such as headers,
  137. # lists, and tables. That's because we still want to wrap <p>s around
  138. # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
  139. # phrase emphasis, and spans. The list of tags we're looking for is
  140. # hard-coded.
  141. #
  142. # This works by calling _HashHTMLBlocks_InMarkdown, which then calls
  143. # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
  144. # attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back
  145. # _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
  146. # These two functions are calling each other. It's recursive!
  147. #
  148. global $block_tags, $context_block_tags, $contain_span_tags,
  149. $clean_tags, $auto_close_tags;
  150. # Tags that are always treated as block tags:
  151. $block_tags = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|'.
  152. 'form|fieldset|iframe|hr|legend';
  153. # Tags treated as block tags only if the opening tag is alone on it's line:
  154. $context_block_tags = 'script|noscript|math|ins|del';
  155. # Tags where markdown="1" default to span mode:
  156. $contain_span_tags = 'p|h[1-6]|li|dd|dt|td|th|legend';
  157. # Tags which must not have their contents modified, no matter where
  158. # they appear:
  159. $clean_tags = 'script|math';
  160. # Tags that do not need to be closed.
  161. $auto_close_tags = 'hr|img';
  162. # Regex to match any tag.
  163. global $tag_match;
  164. $tag_match =
  165. '{
  166. ( # $2: Capture hole tag.
  167. </? # Any opening or closing tag.
  168. [\w:$]+ # Tag name.
  169. \s* # Whitespace.
  170. (?:
  171. ".*?" | # Double quotes (can contain `>`)
  172. \'.*?\' | # Single quotes (can contain `>`)
  173. .+? # Anything but quotes and `>`.
  174. )*?
  175. > # End of tag.
  176. |
  177. <!-- .*? --> # HTML Comment
  178. |
  179. <\? .*? \?> # Processing instruction
  180. |
  181. <!\[CDATA\[.*?\]\]> # CData Block
  182. )
  183. }xs';
  184. #
  185. # Call the HTML-in-Markdown hasher.
  186. #
  187. list($text, ) = _HashHTMLBlocks_InMarkdown($text);
  188. return $text;
  189. }
  190. function _HashHTMLBlocks_InMarkdown($text, $indent = 0,
  191. $enclosing_tag = '', $md_span = false)
  192. {
  193. #
  194. # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
  195. #
  196. # * $indent is the number of space to be ignored when checking for code
  197. # blocks. This is important because if we don't take the indent into
  198. # account, something like this (which looks right) won't work as expected:
  199. #
  200. # <div>
  201. # <div markdown="1">
  202. # Hello World. <-- Is this a Markdown code block or text?
  203. # </div> <-- Is this a Markdown code block or a real tag?
  204. # <div>
  205. #
  206. # If you don't like this, just don't indent the tag on which
  207. # you apply the markdown="1" attribute.
  208. #
  209. # * If $enclosing_tag is not empty, stops at the first unmatched closing
  210. # tag with that name. Nested tags supported.
  211. #
  212. # * If $md_span is true, text inside must treated as span. So any double
  213. # newline will be replaced by a single newline so that it does not create
  214. # paragraphs.
  215. #
  216. # Returns an array of that form: ( processed text , remaining text )
  217. #
  218. global $block_tags, $context_block_tags, $clean_tags, $auto_close_tags,
  219. $tag_match;
  220. if($text === '') return array('', '');
  221. # Regex to check for the presense of newlines around a block tag.
  222. $newline_match_before = "/(?:^\n?|\n\n) *$/";
  223. $newline_match_after =
  224. '{
  225. ^ # Start of text following the tag.
  226. (?:[ ]*<!--.*?-->)? # Optional comment.
  227. [ ]*\n # Must be followed by newline.
  228. }xs';
  229. # Regex to match any tag.
  230. $block_tag_match =
  231. '{
  232. ( # $2: Capture hole tag.
  233. </? # Any opening or closing tag.
  234. (?: # Tag name.
  235. '.$block_tags.' |
  236. '.$context_block_tags.' |
  237. '.$clean_tags.' |
  238. (?!\s)'.$enclosing_tag.'
  239. )
  240. \s* # Whitespace.
  241. (?:
  242. ".*?" | # Double quotes (can contain `>`)
  243. \'.*?\' | # Single quotes (can contain `>`)
  244. .+? # Anything but quotes and `>`.
  245. )*?
  246. > # End of tag.
  247. |
  248. <!-- .*? --> # HTML Comment
  249. |
  250. <\? .*? \?> # Processing instruction
  251. |
  252. <!\[CDATA\[.*?\]\]> # CData Block
  253. )
  254. }xs';
  255. $depth = 0; # Current depth inside the tag tree.
  256. $parsed = ""; # Parsed text that will be returned.
  257. #
  258. # Loop through every tag until we find the closing tag of the parent
  259. # or loop until reaching the end of text if no parent tag specified.
  260. #
  261. do {
  262. #
  263. # Split the text using the first $tag_match pattern found.
  264. # Text before pattern will be first in the array, text after
  265. # pattern will be at the end, and between will be any catches made
  266. # by the pattern.
  267. #
  268. $parts = preg_split($block_tag_match, $text, 2,
  269. PREG_SPLIT_DELIM_CAPTURE);
  270. # If in Markdown span mode, replace any multiple newlines that would
  271. # trigger a new paragraph.
  272. if($md_span) {
  273. $parts[0] = preg_replace('/\n\n/', "\n", $parts[0]);
  274. }
  275. $parsed .= $parts[0]; # Text before current tag.
  276. # If end of $text has been reached. Stop loop.
  277. if(count($parts) < 3) {
  278. $text = "";
  279. break;
  280. }
  281. $tag = $parts[1]; # Tag to handle.
  282. $text = $parts[2]; # Remaining text after current tag.
  283. #
  284. # Check for: Tag inside code block or span
  285. #
  286. if(# Find current paragraph
  287. preg_match('/(?>^\n?|\n\n)((?>.\n?)+?)$/', $parsed, $matches) &&
  288. (
  289. # Then match in it either a code block...
  290. preg_match('/^ {'.($indent+4).'}.*(?>\n {'.($indent+4).'}.*)*'.
  291. '(?!\n)$/', $matches[1], $x) ||
  292. # ...or unbalenced code span markers. (the regex matches balenced)
  293. !preg_match('/^(?>[^`]+|(`+)(?>[^`]+|(?!\1[^`])`)*?\1(?!`))*$/s',
  294. $matches[1])
  295. ))
  296. {
  297. # Tag is in code block or span and may not be a tag at all. So we
  298. # simply skip the first char (should be a `<`).
  299. $parsed .= $tag{0};
  300. $text = substr($tag, 1) . $text; # Put back $tag minus first char.
  301. }
  302. #
  303. # Check for: Opening Block level tag or
  304. # Opening Content Block tag (like ins and del)
  305. # used as a block tag (tag is alone on it's line).
  306. #
  307. else if(preg_match("{^<(?:$block_tags)\b}", $tag) ||
  308. ( preg_match("{^<(?:$context_block_tags)\b}", $tag) &&
  309. preg_match($newline_match_before, $parsed) &&
  310. preg_match($newline_match_after, $text) )
  311. )
  312. {
  313. # Need to parse tag and following text using the HTML parser.
  314. list($block_text, $text) =
  315. _HashHTMLBlocks_InHTML($tag . $text,
  316. "_HashHTMLBlocks_HashBlock", TRUE);
  317. # Make sure it stays outside of any paragraph by adding newlines.
  318. $parsed .= "\n\n$block_text\n\n";
  319. }
  320. #
  321. # Check for: Clean tag (like script, math)
  322. # HTML Comments, processing instructions.
  323. #
  324. else if(preg_match("{^<(?:$clean_tags)\b}", $tag) ||
  325. $tag{1} == '!' || $tag{1} == '?')
  326. {
  327. # Need to parse tag and following text using the HTML parser.
  328. # (don't check for markdown attribute)
  329. list($block_text, $text) =
  330. _HashHTMLBlocks_InHTML($tag . $text,
  331. "_HashHTMLBlocks_HashClean", FALSE);
  332. $parsed .= $block_text;
  333. }
  334. #
  335. # Check for: Tag with same name as enclosing tag.
  336. #
  337. else if($enclosing_tag !== '' &&
  338. # Same name as enclosing tag.
  339. preg_match("{^</?(?:$enclosing_tag)\b}", $tag))
  340. {
  341. #
  342. # Increase/decrease nested tag count.
  343. #
  344. if($tag{1} == '/') $depth--;
  345. else if($tag{strlen($tag)-2} != '/') $depth++;
  346. if($depth < 0) {
  347. #
  348. # Going out of parent element. Clean up and break so we
  349. # return to the calling function.
  350. #
  351. $text = $tag . $text;
  352. break;
  353. }
  354. $parsed .= $tag;
  355. }
  356. else {
  357. $parsed .= $tag;
  358. }
  359. } while ($depth >= 0);
  360. return array($parsed, $text);
  361. }
  362. function _HashHTMLBlocks_InHTML($text, $hash_function, $md_attr) {
  363. #
  364. # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
  365. #
  366. # * Calls $hash_function to convert any blocks.
  367. # * Stops when the first opening tag closes.
  368. # * $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
  369. # (it is not inside clean tags)
  370. #
  371. # Returns an array of that form: ( processed text , remaining text )
  372. #
  373. global $auto_close_tags, $contain_span_tags, $tag_match;
  374. if($text === '') return array('', '');
  375. # Regex to match `markdown` attribute inside of a tag.
  376. $markdown_attr_match = '
  377. {
  378. \s* # Eat whitespace before the `markdown` attribute
  379. markdown
  380. \s*=\s*
  381. (["\']) # $1: quote delimiter
  382. (.*?) # $2: attribute value
  383. \1 # matching delimiter
  384. }xs';
  385. $original_text = $text; # Save original text in case of faliure.
  386. $depth = 0; # Current depth inside the tag tree.
  387. $block_text = ""; # Temporary text holder for current text.
  388. $parsed = ""; # Parsed text that will be returned.
  389. #
  390. # Get the name of the starting tag.
  391. #
  392. if(preg_match("/^<([\w:$]*)\b/", $text, $matches))
  393. $base_tag_name = $matches[1];
  394. #
  395. # Loop through every tag until we find the corresponding closing tag.
  396. #
  397. do {
  398. #
  399. # Split the text using the first $tag_match pattern found.
  400. # Text before pattern will be first in the array, text after
  401. # pattern will be at the end, and between will be any catches made
  402. # by the pattern.
  403. #
  404. $parts = preg_split($tag_match, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
  405. if(count($parts) < 3) {
  406. #
  407. # End of $text reached with unbalenced tag(s).
  408. # In that case, we return original text unchanged and pass the
  409. # first character as filtered to prevent an infinite loop in the
  410. # parent function.
  411. #
  412. return array($original_text{0}, substr($original_text, 1));
  413. }
  414. $block_text .= $parts[0]; # Text before current tag.
  415. $tag = $parts[1]; # Tag to handle.
  416. $text = $parts[2]; # Remaining text after current tag.
  417. #
  418. # Check for: Auto-close tag (like <hr/>)
  419. # Comments and Processing Instructions.
  420. #
  421. if(preg_match("{^</?(?:$auto_close_tags)\b}", $tag) ||
  422. $tag{1} == '!' || $tag{1} == '?')
  423. {
  424. # Just add the tag to the block as if it was text.
  425. $block_text .= $tag;
  426. }
  427. else {
  428. #
  429. # Increase/decrease nested tag count. Only do so if
  430. # the tag's name match base tag's.
  431. #
  432. if(preg_match("{^</?$base_tag_name\b}", $tag)) {
  433. if($tag{1} == '/') $depth--;
  434. else if($tag{strlen($tag)-2} != '/') $depth++;
  435. }
  436. #
  437. # Check for `markdown="1"` attribute and handle it.
  438. #
  439. if($md_attr &&
  440. preg_match($markdown_attr_match, $tag, $attr_matches) &&
  441. preg_match('/^(?:1|block|span)$/', $attr_matches[2]))
  442. {
  443. # Remove `markdown` attribute from opening tag.
  444. $tag = preg_replace($markdown_attr_match, '', $tag);
  445. # Check if text inside this tag must be parsed in span mode.
  446. $md_mode = $attr_matches[2];
  447. $span_mode = $md_mode == 'span' || $md_mode != 'block' &&
  448. preg_match("{^<(?:$contain_span_tags)\b}", $tag);
  449. # Calculate indent before tag.
  450. preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches);
  451. $indent = strlen($matches[1]);
  452. # End preceding block with this tag.
  453. $block_text .= $tag;
  454. $parsed .= $hash_function($block_text, $span_mode);
  455. # Get enclosing tag name for the ParseMarkdown function.
  456. preg_match('/^<([\w:$]*)\b/', $tag, $matches);
  457. $tag_name = $matches[1];
  458. # Parse the content using the HTML-in-Markdown parser.
  459. list ($block_text, $text)
  460. = _HashHTMLBlocks_InMarkdown($text, $indent,
  461. $tag_name, $span_mode);
  462. # Outdent markdown text.
  463. if($indent > 0) {
  464. $block_text = preg_replace("/^[ ]{1,$indent}/m", "",
  465. $block_text);
  466. }
  467. # Append tag content to parsed text.
  468. if(!$span_mode) $parsed .= "\n\n$block_text\n\n";
  469. else $parsed .= "$block_text";
  470. # Start over a new block.
  471. $block_text = "";
  472. }
  473. else $block_text .= $tag;
  474. }
  475. } while ($depth > 0);
  476. #
  477. # Hash last block text that wasn't processed inside the loop.
  478. #
  479. $parsed .= $hash_function($block_text);
  480. return array($parsed, $text);
  481. }
  482. function _HashHTMLBlocks_HashBlock($text) {
  483. global $md_html_hashes, $md_html_blocks;
  484. $key = md5($text);
  485. $md_html_hashes[$key] = $text;
  486. $md_html_blocks[$key] = $text;
  487. return $key; # String that will replace the tag.
  488. }
  489. function _HashHTMLBlocks_HashClean($text) {
  490. global $md_html_hashes;
  491. $key = md5($text);
  492. $md_html_hashes[$key] = $text;
  493. return $key; # String that will replace the clean tag.
  494. }
  495. function _HashBlock($text) {
  496. #
  497. # Called whenever a tag must be hashed. When a function insert a block-level
  498. # tag in $text, it pass through this function and is automaticaly escaped,
  499. # which remove the need to call _HashHTMLBlocks at every step.
  500. #
  501. # Swap back any tag hash found in $text so we do not have to _UnhashTags
  502. # multiple times at the end. Must do this because of
  503. $text = _UnhashTags($text);
  504. # Then hash the block as normal.
  505. return _HashHTMLBlocks_HashBlock($text);
  506. }
  507. function _RunBlockGamut($text, $hash_html_blocks = TRUE) {
  508. #
  509. # These are all the transformations that form block-level
  510. # tags like paragraphs, headers, and list items.
  511. #
  512. if($hash_html_blocks) {
  513. # We need to escape raw HTML in Markdown source before doing anything
  514. # else. This need to be done for each block, and not only at the
  515. # begining in the Markdown function since hashed blocks can be part of
  516. # a list item and could have been indented. Indented blocks would have
  517. # been seen as a code block in previous pass of _HashHTMLBlocks.
  518. $text = _HashHTMLBlocks($text);
  519. }
  520. $text = _DoHeaders($text);
  521. $text = _DoTables($text);
  522. # Do Horizontal Rules:
  523. global $md_empty_element_suffix;
  524. $text = preg_replace(
  525. array('{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}emx',
  526. '{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}emx',
  527. '{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}emx'),
  528. "_HashBlock('\n<hr$md_empty_element_suffix\n')",
  529. $text);
  530. $text = _DoLists($text);
  531. $text = _DoDefLists($text);
  532. $text = _DoCodeBlocks($text);
  533. $text = _DoBlockQuotes($text);
  534. $text = _FormParagraphs($text);
  535. return $text;
  536. }
  537. function _RunSpanGamut($text) {
  538. #
  539. # These are all the transformations that occur *within* block-level
  540. # tags like paragraphs, headers, and list items.
  541. #
  542. global $md_empty_element_suffix;
  543. $text = _DoCodeSpans($text);
  544. $text = _EscapeSpecialChars($text);
  545. # Process anchor and image tags. Images must come first,
  546. # because ![foo][f] looks like an anchor.
  547. $text = _DoImages($text);
  548. $text = _DoAnchors($text);
  549. # Make links out of things like `<http://example.com/>`
  550. # Must come after _DoAnchors(), because you can use < and >
  551. # delimiters in inline links like [this](<url>).
  552. $text = _DoAutoLinks($text);
  553. $text = _EncodeAmpsAndAngles($text);
  554. $text = _DoItalicsAndBold($text);
  555. # Do hard breaks:
  556. $text = preg_replace('/ {2,}\n/', "<br$md_empty_element_suffix\n", $text);
  557. return $text;
  558. }
  559. function _EscapeSpecialChars($text) {
  560. global $md_escape_table;
  561. $tokens = _TokenizeHTML($text);
  562. $text = ''; # rebuild $text from the tokens
  563. # $in_pre = 0; # Keep track of when we're inside <pre> or <code> tags.
  564. # $tags_to_skip = "!<(/?)(?:pre|code|kbd|script|math)[\s>]!";
  565. foreach ($tokens as $cur_token) {
  566. if($cur_token[0] == 'tag') {
  567. # Within tags, encode * and _ so they don't conflict
  568. # with their use in Markdown for italics and strong.
  569. # We're replacing each such character with its
  570. # corresponding MD5 checksum value; this is likely
  571. # overkill, but it should prevent us from colliding
  572. # with the escape values by accident.
  573. $cur_token[1] = str_replace(array('*', '_'),
  574. array($md_escape_table['*'], $md_escape_table['_']),
  575. $cur_token[1]);
  576. $text .= $cur_token[1];
  577. } else {
  578. $t = $cur_token[1];
  579. $t = _EncodeBackslashEscapes($t);
  580. $text .= $t;
  581. }
  582. }
  583. return $text;
  584. }
  585. function _DoAnchors($text) {
  586. #
  587. # Turn Markdown link shortcuts into XHTML <a> tags.
  588. #
  589. global $md_nested_brackets;
  590. #
  591. # First, handle reference-style links: [link text] [id]
  592. #
  593. $text = preg_replace_callback("{
  594. ( # wrap whole match in $1
  595. \\[
  596. ($md_nested_brackets) # link text = $2
  597. \\]
  598. [ ]? # one optional space
  599. (?:\\n[ ]*)? # one optional newline followed by spaces
  600. \\[
  601. (.*?) # id = $3
  602. \\]
  603. )
  604. }xs",
  605. '_DoAnchors_reference_callback', $text);
  606. #
  607. # Next, inline-style links: [link text](url "optional title")
  608. #
  609. $text = preg_replace_callback("{
  610. ( # wrap whole match in $1
  611. \\[
  612. ($md_nested_brackets) # link text = $2
  613. \\]
  614. \\( # literal paren
  615. [ \\t]*
  616. <?(.*?)>? # href = $3
  617. [ \\t]*
  618. ( # $4
  619. (['\"]) # quote char = $5
  620. (.*?) # Title = $6
  621. \\5 # matching quote
  622. )? # title is optional
  623. \\)
  624. )
  625. }xs",
  626. '_DoAnchors_inline_callback', $text);
  627. return $text;
  628. }
  629. function _DoAnchors_reference_callback($matches) {
  630. global $md_urls, $md_titles, $md_escape_table;
  631. $whole_match = $matches[1];
  632. $link_text = $matches[2];
  633. $link_id = strtolower($matches[3]);
  634. if($link_id == "") {
  635. $link_id = strtolower($link_text); # for shortcut links like [this][].
  636. }
  637. if(isset($md_urls[$link_id])) {
  638. $url = $md_urls[$link_id];
  639. # We've got to encode these to avoid conflicting with italics/bold.
  640. $url = str_replace(array('*', '_'),
  641. array($md_escape_table['*'], $md_escape_table['_']),
  642. $url);
  643. $result = "<a href=\"$url\"";
  644. if( isset( $md_titles[$link_id] ) ) {
  645. $title = $md_titles[$link_id];
  646. $title = str_replace(array('*', '_'),
  647. array($md_escape_table['*'],
  648. $md_escape_table['_']), $title);
  649. $result .= " title=\"$title\"";
  650. }
  651. $result .= ">$link_text</a>";
  652. }
  653. else {
  654. $result = $whole_match;
  655. }
  656. return $result;
  657. }
  658. function _DoAnchors_inline_callback($matches) {
  659. global $md_escape_table;
  660. $whole_match = $matches[1];
  661. $link_text = $matches[2];
  662. $url = $matches[3];
  663. $title =& $matches[6];
  664. # We've got to encode these to avoid conflicting with italics/bold.
  665. $url = str_replace(array('*', '_'),
  666. array($md_escape_table['*'], $md_escape_table['_']),
  667. $url);
  668. $result = "<a href=\"$url\"";
  669. if(isset($title)) {
  670. $title = str_replace('"', '&quot;', $title);
  671. $title = str_replace(array('*', '_'),
  672. array($md_escape_table['*'], $md_escape_table['_']),
  673. $title);
  674. $result .= " title=\"$title\"";
  675. }
  676. $result .= ">$link_text</a>";
  677. return $result;
  678. }
  679. function _DoImages($text) {
  680. #
  681. # Turn Markdown image shortcuts into <img> tags.
  682. #
  683. global $md_nested_brackets;
  684. #
  685. # First, handle reference-style labeled images: ![alt text][id]
  686. #
  687. $text = preg_replace_callback('{
  688. ( # wrap whole match in $1
  689. !\[
  690. ('.$md_nested_brackets.') # alt text = $2
  691. \]
  692. [ ]? # one optional space
  693. (?:\n[ ]*)? # one optional newline followed by spaces
  694. \[
  695. (.*?) # id = $3
  696. \]
  697. )
  698. }xs',
  699. '_DoImages_reference_callback', $text);
  700. #
  701. # Next, handle inline images: ![alt text](url "optional title")
  702. # Don't forget: encode * and _
  703. $text = preg_replace_callback('{
  704. ( # wrap whole match in $1
  705. !\[
  706. ('.$md_nested_brackets.') # alt text = $2
  707. \]
  708. \( # literal paren
  709. [ \t]*
  710. <?(\S+?)>? # src url = $3
  711. [ \t]*
  712. ( # $4
  713. ([\'"]) # quote char = $5
  714. (.*?) # title = $6
  715. \5 # matching quote
  716. [ \t]*
  717. )? # title is optional
  718. \)
  719. )
  720. }xs',
  721. '_DoImages_inline_callback', $text);
  722. return $text;
  723. }
  724. function _DoImages_reference_callback($matches) {
  725. global $md_urls, $md_titles, $md_empty_element_suffix, $md_escape_table;
  726. $whole_match = $matches[1];
  727. $alt_text = $matches[2];
  728. $link_id = strtolower($matches[3]);
  729. if($link_id == "") {
  730. $link_id = strtolower($alt_text); # for shortcut links like ![this][].
  731. }
  732. $alt_text = str_replace('"', '&quot;', $alt_text);
  733. if(isset($md_urls[$link_id])) {
  734. $url = $md_urls[$link_id];
  735. # We've got to encode these to avoid conflicting with italics/bold.
  736. $url = str_replace(array('*', '_'),
  737. array($md_escape_table['*'], $md_escape_table['_']),
  738. $url);
  739. $result = "<img src=\"$url\" alt=\"$alt_text\"";
  740. if(isset($md_titles[$link_id])) {
  741. $title = $md_titles[$link_id];
  742. $title = str_replace(array('*', '_'),
  743. array($md_escape_table['*'],
  744. $md_escape_table['_']), $title);
  745. $result .= " title=\"$title\"";
  746. }
  747. $result .= $md_empty_element_suffix;
  748. }
  749. else {
  750. # If there's no such link ID, leave intact:
  751. $result = $whole_match;
  752. }
  753. return $result;
  754. }
  755. function _DoImages_inline_callback($matches) {
  756. global $md_empty_element_suffix, $md_escape_table;
  757. $whole_match = $matches[1];
  758. $alt_text = $matches[2];
  759. $url = $matches[3];
  760. $title = '';
  761. if(isset($matches[6])) {
  762. $title = $matches[6];
  763. }
  764. $alt_text = str_replace('"', '&quot;', $alt_text);
  765. $title = str_replace('"', '&quot;', $title);
  766. # We've got to encode these to avoid conflicting with italics/bold.
  767. $url = str_replace(array('*', '_'),
  768. array($md_escape_table['*'], $md_escape_table['_']),
  769. $url);
  770. $result = "<img src=\"$url\" alt=\"$alt_text\"";
  771. if(isset($title)) {
  772. $title = str_replace(array('*', '_'),
  773. array($md_escape_table['*'], $md_escape_table['_']),
  774. $title);
  775. $result .= " title=\"$title\""; # $title already quoted
  776. }
  777. $result .= $md_empty_element_suffix;
  778. return $result;
  779. }
  780. function _DoHeaders($text) {
  781. # Setext-style headers:
  782. # Header 1
  783. # ========
  784. #
  785. # Header 2
  786. # --------
  787. #
  788. $text = preg_replace(
  789. array('{ (^.+?) (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? [ \t]*\n=+[ \t]*\n+ }emx',
  790. '{ (^.+?) (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? [ \t]*\n-+[ \t]*\n+ }emx'),
  791. array("_HashBlock('<h1'. ('\\2'? ' id=\"'._UnslashQuotes('\\2').'\"':'').
  792. '>'._RunSpanGamut(_UnslashQuotes('\\1')).'</h1>'
  793. ) . '\n\n'",
  794. "_HashBlock('<h2'. ('\\2'? ' id=\"'._UnslashQuotes('\\2').'\"':'').
  795. '>'._RunSpanGamut(_UnslashQuotes('\\1')).'</h2>'
  796. ) . '\n\n'"),
  797. $text);
  798. # atx-style headers:
  799. # # Header 1
  800. # ## Header 2
  801. # ## Header 2 with closing hashes ##
  802. # ...
  803. # ###### Header 6
  804. #
  805. $text = preg_replace('{
  806. ^(\#{1,6}) # $1 = string of #\'s
  807. [ \t]*
  808. (.+?) # $2 = Header text
  809. [ \t]*
  810. \#* # optional closing #\'s (not counted)
  811. (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\}[ ]*)? # id attribute
  812. \n+
  813. }xme',
  814. "_HashBlock(
  815. '<h'.strlen('\\1'). ('\\3'? ' id=\"'._UnslashQuotes('\\3').'\"':'').'>'.
  816. _RunSpanGamut(_UnslashQuotes('\\2')).
  817. '</h'.strlen('\\1').'>'
  818. ) . '\n\n'",
  819. $text);
  820. return $text;
  821. }
  822. function _DoTables($text) {
  823. #
  824. # Form HTML tables.
  825. #
  826. global $md_tab_width;
  827. $less_than_tab = $md_tab_width - 1;
  828. #
  829. # Find tables with leading pipe.
  830. #
  831. # | Header 1 | Header 2
  832. # | -------- | --------
  833. # | Cell 1 | Cell 2
  834. # | Cell 3 | Cell 4
  835. #
  836. $text = preg_replace_callback('
  837. {
  838. ^ # Start of a line
  839. [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
  840. [|] # Optional leading pipe (present)
  841. (.+) \n # $1: Header row (at least one pipe)
  842. [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
  843. [|] ([ ]*[-:]+[-| :]*) \n # $2: Header underline
  844. ( # $3: Cells
  845. (?:
  846. [ ]* # Allowed whitespace.
  847. [|] .* \n # Row content.
  848. )*
  849. )
  850. (?=\n|\Z) # Stop at final double newline.
  851. }xm',
  852. '_DoTable_LeadingPipe_callback', $text);
  853. #
  854. # Find tables without leading pipe.
  855. #
  856. # Header 1 | Header 2
  857. # -------- | --------
  858. # Cell 1 | Cell 2
  859. # Cell 3 | Cell 4
  860. #
  861. $text = preg_replace_callback('
  862. {
  863. ^ # Start of a line
  864. [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
  865. (\S.*[|].*) \n # $1: Header row (at least one pipe)
  866. [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
  867. ([-:]+[ ]*[|][-| :]*) \n # $2: Header underline
  868. ( # $3: Cells
  869. (?:
  870. .* [|] .* \n # Row content
  871. )*
  872. )
  873. (?=\n|\Z) # Stop at final double newline.
  874. }xm',
  875. '_DoTable_callback', $text);
  876. return $text;
  877. }
  878. function _DoTable_LeadingPipe_callback($matches) {
  879. $head = $matches[1];
  880. $underline = $matches[2];
  881. $content = $matches[3];
  882. # Remove leading pipe for each row.
  883. $content = preg_replace('/^ *[|]/m', '', $content);
  884. return _DoTable_callback(array($matches[0], $head, $underline, $content));
  885. }
  886. function _DoTable_callback($matches) {
  887. $head = $matches[1];
  888. $underline = $matches[2];
  889. $content = $matches[3];
  890. # Remove any tailing pipes for each line.
  891. $head = preg_replace('/[|] *$/m', '', $head);
  892. $underline = preg_replace('/[|] *$/m', '', $underline);
  893. $content = preg_replace('/[|] *$/m', '', $content);
  894. # Reading alignement from header underline.
  895. $separators = preg_split('/ *[|] */', $underline);
  896. foreach ($separators as $n => $s) {
  897. if(preg_match('/^ *-+: *$/', $s)) $attr[$n] = ' align="right"';
  898. else if(preg_match('/^ *:-+: *$/', $s))$attr[$n] = ' align="center"';
  899. else if(preg_match('/^ *:-+ *$/', $s)) $attr[$n] = ' align="left"';
  900. else $attr[$n] = '';
  901. }
  902. # Creating code spans before splitting the row is an easy way to
  903. # handle a code span containg pipes.
  904. $head = _DoCodeSpans($head);
  905. $headers = preg_split('/ *[|] */', $head);
  906. $col_count = count($headers);
  907. # Write column headers.
  908. $text = "<table>\n";
  909. $text .= "<thead>\n";
  910. $text .= "<tr>\n";
  911. foreach ($headers as $n => $header)
  912. $text .= " <th$attr[$n]>"._RunSpanGamut(trim($header))."</th>\n";
  913. $text .= "</tr>\n";
  914. $text .= "</thead>\n";
  915. # Split content by row.
  916. $rows = explode("\n", trim($content, "\n"));
  917. $text .= "<tbody>\n";
  918. foreach ($rows as $row) {
  919. # Creating code spans before splitting the row is an easy way to
  920. # handle a code span containg pipes.
  921. $row = _DoCodeSpans($row);
  922. # Split row by cell.
  923. $row_cells = preg_split('/ *[|] */', $row, $col_count);
  924. $row_cells = array_pad($row_cells, $col_count, '');
  925. $text .= "<tr>\n";
  926. foreach ($row_cells as $n => $cell)
  927. $text .= " <td$attr[$n]>"._RunSpanGamut(trim($cell))."</td>\n";
  928. $text .= "</tr>\n";
  929. }
  930. $text .= "</tbody>\n";
  931. $text .= "</table>";
  932. return _HashBlock($text) . "\n";
  933. }
  934. function _DoLists($text) {
  935. #
  936. # Form HTML ordered (numbered) and unordered (bulleted) lists.
  937. #
  938. global $md_tab_width, $md_list_level;
  939. $less_than_tab = $md_tab_width - 1;
  940. # Re-usable patterns to match list item bullets and number markers:
  941. $marker_ul = '[*+-]';
  942. $marker_ol = '\d+[.]';
  943. $marker_any = "(?:$marker_ul|$marker_ol)";
  944. $markers = array($marker_ul, $marker_ol);
  945. foreach ($markers as $marker) {
  946. # Re-usable pattern to match any entirel ul or ol list:
  947. $whole_list = '
  948. ( # $1 = whole list
  949. ( # $2
  950. [ ]{0,'.$less_than_tab.'}
  951. ('.$marker.') # $3 = first list item marker
  952. [ \t]+
  953. )
  954. (?s:.+?)
  955. ( # $4
  956. \z
  957. |
  958. \n{2,}
  959. (?=\S)
  960. (?! # Negative lookahead for another list item marker
  961. [ \t]*
  962. '.$marker.'[ \t]+
  963. )
  964. )
  965. )
  966. '; // mx
  967. # We use a different prefix before nested lists than top-level lists.
  968. # See extended comment in _ProcessListItems().
  969. if($md_list_level) {
  970. $text = preg_replace_callback('{
  971. ^
  972. '.$whole_list.'
  973. }mx',
  974. '_DoLists_callback', $text);
  975. }
  976. else {
  977. $text = preg_replace_callback('{
  978. (?:(?<=\n\n)|\A\n?)
  979. '.$whole_list.'
  980. }mx',
  981. '_DoLists_callback', $text);
  982. }
  983. }
  984. return $text;
  985. }
  986. function _DoLists_callback($matches) {
  987. # Re-usable patterns to match list item bullets and number markers:
  988. $marker_ul = '[*+-]';
  989. $marker_ol = '\d+[.]';
  990. $marker_any = "(?:$marker_ul|$marker_ol)";
  991. $list = $matches[1];
  992. $list_type = preg_match("/$marker_ul/", $matches[3]) ? "ul" : "ol";
  993. $marker_any = ( $list_type == "ul" ? $marker_ul : $marker_ol );
  994. # Turn double returns into triple returns, so that we can make a
  995. # paragraph for the last item in a list, if necessary:
  996. $list = preg_replace("/\n{2,}/", "\n\n\n", $list);
  997. $result = _ProcessListItems($list, $marker_any);
  998. $result = "<$list_type>\n" . $result . "</$list_type>";
  999. return "\n" . _HashBlock($result) . "\n\n";
  1000. }
  1001. function _ProcessListItems($list_str, $marker_any) {
  1002. #
  1003. # Process the contents of a single ordered or unordered list, splitting it
  1004. # into individual list items.
  1005. #
  1006. global $md_list_level;
  1007. # The $md_list_level global keeps track of when we're inside a list.
  1008. # Each time we enter a list, we increment it; when we leave a list,
  1009. # we decrement. If it's zero, we're not in a list anymore.
  1010. #
  1011. # We do this because when we're not inside a list, we want to treat
  1012. # something like this:
  1013. #
  1014. # I recommend upgrading to version
  1015. # 8. Oops, now this line is treated
  1016. # as a sub-list.
  1017. #
  1018. # As a single paragraph, despite the fact that the second line starts
  1019. # with a digit-period-space sequence.
  1020. #
  1021. # Whereas when we're inside a list (or sub-list), that line will be
  1022. # treated as the start of a sub-list. What a kludge, huh? This is
  1023. # an aspect of Markdown's syntax that's hard to parse perfectly
  1024. # without resorting to mind-reading. Perhaps the solution is to
  1025. # change the syntax rules such that sub-lists must start with a
  1026. # starting cardinal number; e.g. "1." or "a.".
  1027. $md_list_level++;
  1028. # trim trailing blank lines:
  1029. $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
  1030. $list_str = preg_replace_callback('{
  1031. (\n)? # leading line = $1
  1032. (^[ \t]*) # leading whitespace = $2
  1033. ('.$marker_any.') [ \t]+ # list marker = $3
  1034. ((?s:.+?) # list item text = $4
  1035. (\n{1,2}))
  1036. (?= \n* (\z | \2 ('.$marker_any.') [ \t]+))
  1037. }xm',
  1038. '_ProcessListItems_callback', $list_str);
  1039. $md_list_level--;
  1040. return $list_str;
  1041. }
  1042. function _ProcessListItems_callback($matches) {
  1043. $item = $matches[4];
  1044. $leading_line =& $matches[1];
  1045. $leading_space =& $matches[2];
  1046. if($leading_line || preg_match('/\n{2,}/', $item)) {
  1047. $item = _RunBlockGamut(_Outdent($item));
  1048. }
  1049. else {
  1050. # Recursion for sub-lists:
  1051. $item = _DoLists(_Outdent($item));
  1052. $item = preg_replace('/\n+$/', '', $item);
  1053. $item = _RunSpanGamut($item);
  1054. }
  1055. return "<li>" . $item . "</li>\n";
  1056. }
  1057. function _DoDefLists($text) {
  1058. #
  1059. # Form HTML definition lists.
  1060. #
  1061. global $md_tab_width;
  1062. $less_than_tab = $md_tab_width - 1;
  1063. # Re-usable patterns to match list item bullets and number markers:
  1064. # Re-usable pattern to match any entire dl list:
  1065. $whole_list = '
  1066. ( # $1 = whole list
  1067. ( # $2
  1068. [ ]{0,'.$less_than_tab.'}
  1069. ((?>.*\S.*\n)+) # $3 = defined term
  1070. \n?
  1071. [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
  1072. )
  1073. (?s:.+?)
  1074. ( # $4
  1075. \z
  1076. |
  1077. \n{2,}
  1078. (?=\S)
  1079. (?! # Negative lookahead for another term
  1080. [ ]{0,'.$less_than_tab.'}
  1081. (?: \S.*\n )+? # defined term
  1082. \n?
  1083. [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
  1084. )
  1085. (?! # Negative lookahead for another definition
  1086. [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
  1087. )
  1088. )
  1089. )
  1090. '; // mx
  1091. $text = preg_replace_callback('{
  1092. (?:(?<=\n\n)|\A\n?)
  1093. '.$whole_list.'
  1094. }mx',
  1095. '_DoDefLists_callback', $text);
  1096. return $text;
  1097. }
  1098. function _DoDefLists_callback($matches) {
  1099. # Re-usable patterns to match list item bullets and number markers:
  1100. $list = $matches[1];
  1101. # Turn double returns into triple returns, so that we can make a
  1102. # paragraph for the last item in a list, if necessary:
  1103. $result = trim(_ProcessDefListItems($list));
  1104. $result = "<dl>\n" . $result . "\n</dl>";
  1105. return _HashBlock($result) . "\n\n";
  1106. }
  1107. function _ProcessDefListItems($list_str) {
  1108. #
  1109. # Process the contents of a single ordered or unordered list, splitting it
  1110. # into individual list items.
  1111. #
  1112. global $md_tab_width;
  1113. $less_than_tab = $md_tab_width - 1;
  1114. # trim trailing blank lines:
  1115. $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
  1116. # Process definition terms.
  1117. $list_str = preg_replace_callback('{
  1118. (?:\n\n+|\A\n?) # leading line
  1119. ( # definition terms = $1
  1120. [ ]{0,'.$less_than_tab.'} # leading whitespace
  1121. (?![:][ ]|[ ]) # negative lookahead for a definition
  1122. # mark (colon) or more whitespace.
  1123. (?: \S.* \n)+? # actual term (not whitespace).
  1124. )
  1125. (?=\n?[ ]{0,3}:[ ]) # lookahead for following line feed
  1126. # with a definition mark.
  1127. }xm',
  1128. '_ProcessDefListItems_callback_dt', $list_str);
  1129. # Process actual definitions.
  1130. $list_str = preg_replace_callback('{
  1131. \n(\n+)? # leading line = $1
  1132. [ ]{0,'.$less_than_tab.'} # whitespace before colon
  1133. [:][ ]+ # definition mark (colon)
  1134. ((?s:.+?)) # definition text = $2
  1135. (?= \n+ # stop at next definition mark,
  1136. (?: # next term or end of text
  1137. [ ]{0,'.$less_than_tab.'} [:][ ] |
  1138. <dt> | \z
  1139. )
  1140. )
  1141. }xm',
  1142. '_ProcessDefListItems_callback_dd', $list_str);
  1143. return $list_str;
  1144. }
  1145. function _ProcessDefListItems_callback_dt($matches) {
  1146. $terms = explode("\n", trim($matches[1]));
  1147. $text = '';
  1148. foreach ($terms as $term) {
  1149. $term = _RunSpanGamut(trim($term));
  1150. $text .= "\n<dt>" . $term . "</dt>";
  1151. }
  1152. return $text . "\n";
  1153. }
  1154. function _ProcessDefListItems_callback_dd($matches) {
  1155. $leading_line = $matches[1];
  1156. $def = $matches[2];
  1157. if($leading_line || preg_match('/\n{2,}/', $def)) {
  1158. $def = _RunBlockGamut(_Outdent($def . "\n\n"));
  1159. $def = "\n". $def ."\n";
  1160. }
  1161. else {
  1162. $def = rtrim($def);
  1163. $def = _RunSpanGamut(_Outdent($def));
  1164. }
  1165. return "\n<dd>" . $def . "</dd>\n";
  1166. }
  1167. function _DoCodeBlocks($text) {
  1168. #
  1169. # Process Markdown `<pre><code>` blocks.
  1170. #
  1171. global $md_tab_width;
  1172. $text = preg_replace_callback('{
  1173. (?:\n\n|\A)
  1174. ( # $1 = the code block -- one or more lines, starting with a space/tab
  1175. (?:
  1176. (?:[ ]{'.$md_tab_width.'} | \t) # Lines must start with a tab or a tab-width of spaces
  1177. .*\n+
  1178. )+
  1179. )
  1180. ((?=^[ ]{0,'.$md_tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
  1181. }xm',
  1182. '_DoCodeBlocks_callback', $text);
  1183. return $text;
  1184. }
  1185. function _DoCodeBlocks_callback($matches) {
  1186. $codeblock = $matches[1];
  1187. $codeblock = _EncodeCode(_Outdent($codeblock));
  1188. // $codeblock = _Detab($codeblock);
  1189. # trim leading newlines and trailing whitespace
  1190. $codeblock = preg_replace(array('/\A\n+/', '/\s+\z/'), '', $codeblock);
  1191. $result = "<pre><code>" . $codeblock . "\n</code></pre>";
  1192. return "\n\n" . _HashBlock($result) . "\n\n";
  1193. }
  1194. function _DoCodeSpans($text) {
  1195. #
  1196. # * Backtick quotes are used for <code></code> spans.
  1197. #
  1198. # * You can use multiple backticks as the delimiters if you want to
  1199. # include literal backticks in the code span. So, this input:
  1200. #
  1201. # Just type ``foo `bar` baz`` at the prompt.
  1202. #
  1203. # Will translate to:
  1204. #
  1205. # <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
  1206. #
  1207. # There's no arbitrary limit to the number of backticks you
  1208. # can use as delimters. If you need three consecutive backticks
  1209. # in your code, use four for delimiters, etc.
  1210. #
  1211. # * You can use spaces to get literal backticks at the edges:
  1212. #
  1213. # ... type `` `bar` `` ...
  1214. #
  1215. # Turns to:
  1216. #
  1217. # ... type <code>`bar`</code> ...
  1218. #
  1219. $text = preg_replace_callback('@
  1220. (?<!\\\) # Character before opening ` can\'t be a backslash
  1221. (`+) # $1 = Opening run of `
  1222. (.+?) # $2 = The code block
  1223. (?<!`)
  1224. \1 # Matching closer
  1225. (?!`)
  1226. @xs',
  1227. '_DoCodeSpans_callback', $text);
  1228. return $text;
  1229. }
  1230. function _DoCodeSpans_callback($matches) {
  1231. $c = $matches[2];
  1232. $c = preg_replace('/^[ \t]*/', '', $c); # leading whitespace
  1233. $c = preg_replace('/[ \t]*$/', '', $c); # trailing whitespace
  1234. $c = _EncodeCode($c);
  1235. return "<code>$c</code>";
  1236. }
  1237. function _EncodeCode($_) {
  1238. #
  1239. # Encode/escape certain characters inside Markdown code runs.
  1240. # The point is that in code, these characters are literals,
  1241. # and lose their special Markdown meanings.
  1242. #
  1243. global $md_escape_table;
  1244. # Encode all ampersands; HTML entities are not
  1245. # entities within a Markdown code span.
  1246. $_ = str_replace('&', '&amp;', $_);
  1247. # Do the angle bracket song and dance:
  1248. $_ = str_replace(array('<', '>'),
  1249. array('&lt;', '&gt;'), $_);
  1250. # Now, escape characters that are magic in Markdown:
  1251. $_ = str_replace(array_keys($md_escape_table),
  1252. array_values($md_escape_table), $_);
  1253. return $_;
  1254. }
  1255. function _DoItalicsAndBold($text) {
  1256. # <strong> must go first:
  1257. $text = preg_replace(array(
  1258. '{
  1259. ( (?<!\w) __ ) # $1: Marker (not preceded by alphanum)
  1260. (?=\S) # Not followed by whitespace
  1261. (?!__) # or two others marker chars.
  1262. ( # $2: Content
  1263. (?>
  1264. [^_]+? # Anthing not em markers.
  1265. |
  1266. # Balence any regular _ emphasis inside.
  1267. (?<![a-zA-Z0-9])_ (?=\S) (?! _) (.+?)
  1268. (?<=\S) _ (?![a-zA-Z0-9])
  1269. )+?
  1270. )
  1271. (?<=\S) __ # End mark not preceded by whitespace.
  1272. (?!\w) # Not followed by alphanum.
  1273. }sx',
  1274. '{
  1275. ( (?<!\*\*) \*\* ) # $1: Marker (not preceded by two *)
  1276. (?=\S) # Not followed by whitespace
  1277. (?!\1) # or two others marker chars.
  1278. ( # $2: Content
  1279. (?>
  1280. [^*]+? # Anthing not em markers.
  1281. |
  1282. # Balence any regular * emphasis inside.
  1283. \* (?=\S) (?! \*) (.+?) (?<=\S) \*
  1284. )+?
  1285. )
  1286. (?<=\S) \*\* # End mark not preceded by whitespace.
  1287. }sx',
  1288. ),
  1289. '<strong>\2</strong>', $text);
  1290. # Then <em>:
  1291. $text = preg_replace(array(
  1292. '{ ( (?<!\w) _ ) (?=\S) (?! _) (.+?) (?<=\S) _ (?!\w) }sx',
  1293. '{ ( (?<!\*)\* ) (?=\S) (?! \*) (.+?) (?<=\S) \* }sx',
  1294. ),
  1295. '<em>\2</em>', $text);
  1296. return $text;
  1297. }
  1298. function _DoBlockQuotes($text) {
  1299. $text = preg_replace_callback('/
  1300. ( # Wrap whole match in $1
  1301. (
  1302. ^[ \t]*>[ \t]? # ">" at the start of a line
  1303. .+\n # rest of the first line
  1304. (.+\n)* # subsequent consecutive lines
  1305. \n* # blanks
  1306. )+
  1307. )
  1308. /xm',
  1309. '_DoBlockQuotes_callback', $text);
  1310. return $text;
  1311. }
  1312. function _DoBlockQuotes_callback($matches) {
  1313. $bq = $matches[1];
  1314. # trim one level of quoting - trim whitespace-only lines
  1315. $bq = preg_replace(array('/^[ \t]*>[ \t]?/m', '/^[ \t]+$/m'), '', $bq);
  1316. $bq = _RunBlockGamut($bq); # recurse
  1317. $bq = preg_replace('/^/m', " ", $bq);
  1318. # These leading spaces screw with <pre> content, so we need to fix that:
  1319. $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
  1320. '_DoBlockQuotes_callback2', $bq);
  1321. return _HashBlock("<blockquote>\n$bq\n</blockquote>") . "\n\n";
  1322. }
  1323. function _DoBlockQuotes_callback2($matches) {
  1324. $pre = $matches[1];
  1325. $pre = preg_replace('/^ /m', '', $pre);
  1326. return $pre;
  1327. }
  1328. function _FormParagraphs($text) {
  1329. #
  1330. # Params:
  1331. # $text - string to process with html <p> tags
  1332. #
  1333. global $md_html_blocks, $md_html_hashes;
  1334. # Strip leading and trailing lines:
  1335. $text = preg_replace(array('/\A\n+/', '/\n+\z/'), '', $text);
  1336. $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
  1337. #
  1338. # Wrap <p> tags and unhashify HTML blocks
  1339. #
  1340. foreach ($grafs as $key => $value) {
  1341. $value = trim(_RunSpanGamut($value));
  1342. # Check if this should be enclosed in a paragraph.
  1343. # Text equaling to a clean tag hash are not enclosed.
  1344. # Text starting with a block tag hash are not either.
  1345. $clean_key = $value;
  1346. $block_key = substr($value, 0, 32);
  1347. $is_p = (!isset($md_html_blocks[$block_key]) &&
  1348. !isset($md_html_hashes[$clean_key]));
  1349. if($is_p) {
  1350. $value = "<p>$value</p>";
  1351. }
  1352. $grafs[$key] = $value;
  1353. }
  1354. # Join grafs in one text, then unhash HTML tags.
  1355. $text = implode("\n\n", $grafs);
  1356. # Finish by removing any tag hashes still present in $text.
  1357. $text = _UnhashTags($text);
  1358. return $text;
  1359. }
  1360. function _EncodeAmpsAndAngles($text) {
  1361. # Smart processing for ampersands and angle brackets that need to be encoded.
  1362. # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
  1363. # http://bumppo.net/projects/amputator/
  1364. $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
  1365. '&amp;', $text);;
  1366. # Encode naked <'s
  1367. $text = preg_replace('{<(?![a-z/?\$!])}i', '&lt;', $text);
  1368. return $text;
  1369. }
  1370. function _EncodeBackslashEscapes($text) {
  1371. #
  1372. # Parameter: String.
  1373. # Returns: The string, with after processing the following backslash
  1374. # escape sequences.
  1375. #
  1376. global $md_escape_table, $md_backslash_escape_table;
  1377. # Must process escaped backslashes first.
  1378. return str_replace(array_keys($md_backslash_escape_table),
  1379. array_values($md_backslash_escape_table), $text);
  1380. }
  1381. function _DoAutoLinks($text) {
  1382. $text = preg_replace("!<((https?|ftp):[^'\">\\s]+)>!",
  1383. '<a href="\1">\1</a>', $text);
  1384. # Email addresses: <address@domain.foo>
  1385. $text = preg_replace('{
  1386. <
  1387. (?:mailto:)?
  1388. (
  1389. [-.\w]+
  1390. \@
  1391. [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
  1392. )
  1393. >
  1394. }exi',
  1395. "_EncodeEmailAddress(_UnescapeSpecialChars(_UnslashQuotes('\\1')))",
  1396. $text);
  1397. return $text;
  1398. }
  1399. function _EncodeEmailAddress($addr) {
  1400. #
  1401. # Input: an email address, e.g. "foo@example.com"
  1402. #
  1403. # Output: the email address as a mailto link, with each character
  1404. # of the address encoded as either a decimal or hex entity, in
  1405. # the hopes of foiling most address harvesting spam bots. E.g.:
  1406. #
  1407. # <a href="&#x6D;&#97;&#105;&#108;&#x74;&#111;:&#102;&#111;&#111;&#64;&#101;
  1408. # x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;">&#102;&#111;&#111;
  1409. # &#64;&#101;x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;</a>
  1410. #
  1411. # Based by a filter by Matthew Wickline, posted to the BBEdit-Talk
  1412. # mailing list: <http://tinyurl.com/yu7ue>
  1413. #
  1414. $addr = "mailto:" . $addr;
  1415. $length = strlen($addr);
  1416. # leave ':' alone (to spot mailto: later)
  1417. $addr = preg_replace_callback('/([^\:])/',
  1418. '_EncodeEmailAddress_callback', $addr);
  1419. $addr = "<a href=\"$addr\">$addr</a>";
  1420. # strip the mailto: from the visible part
  1421. $addr = preg_replace('/">.+?:/', '">', $addr);
  1422. return $addr;
  1423. }
  1424. function _EncodeEmailAddress_callback($matches) {
  1425. $char = $matches[1];
  1426. $r = rand(0, 100);
  1427. # roughly 10% raw, 45% hex, 45% dec
  1428. # '@' *must* be encoded. I insist.
  1429. if($r > 90 && $char != '@') return $char;
  1430. if($r < 45) return '&#x'.dechex(ord($char)).';';
  1431. return '&#'.ord($char).';';
  1432. }
  1433. function _UnescapeSpecialChars($text) {
  1434. #
  1435. # Swap back in all the special characters we've hidden.
  1436. #
  1437. global $md_escape_table;
  1438. return str_replace(array_values($md_escape_table),
  1439. array_keys($md_escape_table), $text);
  1440. }
  1441. function _UnhashTags($text) {
  1442. #
  1443. # Swap back in all the tags hashed by _HashHTMLBlocks.
  1444. #
  1445. global $md_html_hashes;
  1446. return str_replace(array_keys($md_html_hashes),
  1447. array_values($md_html_hashes), $text);
  1448. }
  1449. # _TokenizeHTML is shared between PHP Markdown and PHP SmartyPants.
  1450. # We only define it if it is not already defined.
  1451. if(!function_exists('_TokenizeHTML')) :
  1452. function _TokenizeHTML($str) {
  1453. #
  1454. # Parameter: String containing HTML markup.
  1455. # Returns: An array of the tokens comprising the input
  1456. # string. Each token is either a tag (possibly with nested,
  1457. # tags contained therein, such as <a href="<MTFoo>">, or a
  1458. # run of text between tags. Each element of the array is a
  1459. # two-element array; the first is either 'tag' or 'text';
  1460. # the second is the actual value.
  1461. #
  1462. #
  1463. # Regular expression derived from the _tokenize() subroutine in
  1464. # Brad Choate's MTRegex plugin.
  1465. # <http://www.bradchoate.com/past/mtregex.php>
  1466. #
  1467. $index = 0;
  1468. $tokens = array();
  1469. $match = '(?s:<!(?:--.*?--\s*)+>)|'. # comment
  1470. '(?s:<\?.*?\?>)|'. # processing instruction
  1471. # regular tags
  1472. '(?:<[/!$]?[-a-zA-Z0-9:]+\b(?>[^"\'>]+|"[^"]*"|\'[^\']*\')*>)';
  1473. $parts = preg_split("{($match)}", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
  1474. foreach ($parts as $part) {
  1475. if(++$index % 2 && $part != '')
  1476. $tokens[] = array('text', $part);
  1477. else
  1478. $tokens[] = array('tag', $part);
  1479. }
  1480. return $tokens;
  1481. }
  1482. endif;
  1483. function _Outdent($text) {
  1484. #
  1485. # Remove one level of line-leading tabs or spaces
  1486. #
  1487. global $md_tab_width;
  1488. return preg_replace("/^(\\t|[ ]{1,$md_tab_width})/m", "", $text);
  1489. }
  1490. function _Detab($text) {
  1491. #
  1492. # Replace tabs with the appropriate amount of space.
  1493. #
  1494. global $md_tab_width;
  1495. # For each line we separate the line in blocks delemited by
  1496. # tab characters. Then we reconstruct every line by adding the
  1497. # appropriate number of space between each blocks.
  1498. $lines = explode("\n", $text);
  1499. $text = "";
  1500. foreach ($lines as $line) {
  1501. # Split in blocks.
  1502. $blocks = explode("\t", $line);
  1503. # Add each blocks to the line.
  1504. $line = $blocks[0];
  1505. unset($blocks[0]); # Do not add first block twice.
  1506. foreach ($blocks as $block) {
  1507. # Calculate amount of space, insert spaces, insert block.
  1508. $amount = $md_tab_width - strlen($line) % $md_tab_width;
  1509. $line .= str_repeat(" ", $amount) . $block;
  1510. }
  1511. $text .= "$line\n";
  1512. }
  1513. return $text;
  1514. }
  1515. function _UnslashQuotes($text) {
  1516. #
  1517. # This function is useful to remove automaticaly slashed double quotes
  1518. # when using preg_replace and evaluating an expression.
  1519. # Parameter: String.
  1520. # Returns: The string with any slash-double-quote (\") sequence replaced
  1521. # by a single double quote.
  1522. #
  1523. return str_replace('\"', '"', $text);
  1524. }
  1525. /*
  1526. PHP Markdown Extra
  1527. ==================
  1528. Description
  1529. -----------
  1530. This is a PHP translation of the original Markdown formatter written in
  1531. Perl by John Gruber. This special version of PHP Markdown also include
  1532. syntax additions by myself.
  1533. Markdown is a text-to-HTML filter; it translates an easy-to-read /
  1534. easy-to-write structured text format into HTML. Markdown's text format
  1535. is most similar to that of plain text email, and supports features such
  1536. as headers, *emphasis*, code blocks, blockquotes, and links.
  1537. Markdown's syntax is designed not as a generic markup language, but
  1538. specifically to serve as a front-end to (X)HTML. You can use span-level
  1539. HTML tags anywhere in a Markdown document, and you can use block level
  1540. HTML tags (like <div> and <table> as well).
  1541. For more information about Markdown's syntax, see:
  1542. <http://daringfireball.net/projects/markdown/>
  1543. Bugs
  1544. ----
  1545. To file bug reports please send email to:
  1546. <michel.fortin@michelf.com>
  1547. Please include with your report: (1) the example input; (2) the output you
  1548. expected; (3) the output Markdown actually produced.
  1549. Version History
  1550. ---------------
  1551. See Readme file for details.
  1552. Extra 1.0.1 - 9 December 2005
  1553. Extra 1.0 - 5 September 2005
  1554. Extra 1.0b4 - 1 August 2005
  1555. Extra 1.0b3 - 29 July 2005
  1556. Extra 1.0b2 - 26 July 2005
  1557. Extra 1.0b1 - 25 July 2005
  1558. Author & Contributors
  1559. ---------------------
  1560. Original Markdown in Perl by John Gruber
  1561. <http://daringfireball.net/>
  1562. PHP port and extras by Michel Fortin
  1563. <http://www.michelf.com/…

Large files files are truncated, but you can click here to view the full file