PageRenderTime 64ms CodeModel.GetById 28ms RepoModel.GetById 0ms app.codeStats 1ms

/lib/contrib/markdown.php

https://github.com/matthiask/swisdk2
PHP | 2734 lines | 1564 code | 356 blank | 814 comment | 128 complexity | 428f2512b964ad740c19aea35e722d9f MD5 | raw file
Possible License(s): GPL-2.0

Large files files are truncated, but you can click here to view the full file

  1. <?php
  2. #
  3. # Markdown Extra - A text-to-HTML conversion tool for web writers
  4. #
  5. # PHP Markdown & Extra
  6. # Copyright (c) 2004-2006 Michel Fortin
  7. # <http://www.michelf.com/projects/php-markdown/>
  8. #
  9. # Original Markdown
  10. # Copyright (c) 2004-2006 John Gruber
  11. # <http://daringfireball.net/projects/markdown/>
  12. #
  13. define( 'MARKDOWN_VERSION', "1.0.1e" ); # Thu 28 Dec 2006
  14. define( 'MARKDOWNEXTRA_VERSION', "1.1.1" ); # Thu 28 Dec 2006
  15. #
  16. # Global default settings:
  17. #
  18. # Change to ">" for HTML output
  19. define( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX', " />");
  20. # Define the width of a tab for code blocks.
  21. define( 'MARKDOWN_TAB_WIDTH', 4 );
  22. # Optional title attribute for footnote links and backlinks.
  23. define( 'MARKDOWN_FN_LINK_TITLE', "" );
  24. define( 'MARKDOWN_FN_BACKLINK_TITLE', "" );
  25. # Optional class attribute for footnote links and backlinks.
  26. define( 'MARKDOWN_FN_LINK_CLASS', "" );
  27. define( 'MARKDOWN_FN_BACKLINK_CLASS', "" );
  28. #
  29. # WordPress settings:
  30. #
  31. # Change to false to remove Markdown from posts and/or comments.
  32. define( 'MARKDOWN_WP_POSTS', true );
  33. define( 'MARKDOWN_WP_COMMENTS', true );
  34. ### Standard Function Interface ###
  35. define( 'MARKDOWN_PARSER_CLASS', 'MarkdownExtra_Parser' );
  36. function Markdown($text) {
  37. #
  38. # Initialize the parser and return the result of its transform method.
  39. #
  40. # Setup static parser variable.
  41. static $parser;
  42. if (!isset($parser)) {
  43. $parser_class = MARKDOWN_PARSER_CLASS;
  44. $parser = new $parser_class;
  45. }
  46. # Transform text using parser.
  47. return $parser->transform($text);
  48. }
  49. ### WordPress Plugin Interface ###
  50. /*
  51. Plugin Name: Markdown Extra
  52. Plugin URI: http://www.michelf.com/projects/php-markdown/
  53. Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>
  54. Version: 1.1.1
  55. Author: Michel Fortin
  56. Author URI: http://www.michelf.com/
  57. */
  58. if (isset($wp_version)) {
  59. # More details about how it works here:
  60. # <http://www.michelf.com/weblog/2005/wordpress-text-flow-vs-markdown/>
  61. # Post content and excerpts
  62. # - Remove WordPress paragraph generator.
  63. # - Run Markdown on excerpt, then remove all tags.
  64. # - Add paragraph tag around the excerpt, but remove it for the excerpt rss.
  65. if (MARKDOWN_WP_POSTS) {
  66. remove_filter('the_content', 'wpautop');
  67. remove_filter('the_excerpt', 'wpautop');
  68. add_filter('the_content', 'Markdown', 6);
  69. add_filter('get_the_excerpt', 'Markdown', 6);
  70. add_filter('get_the_excerpt', 'trim', 7);
  71. add_filter('the_excerpt', 'mdwp_add_p');
  72. add_filter('the_excerpt_rss', 'mdwp_strip_p');
  73. remove_filter('content_save_pre', 'balanceTags', 50);
  74. remove_filter('excerpt_save_pre', 'balanceTags', 50);
  75. add_filter('the_content', 'balanceTags', 50);
  76. add_filter('get_the_excerpt', 'balanceTags', 9);
  77. }
  78. # Comments
  79. # - Remove WordPress paragraph generator.
  80. # - Remove WordPress auto-link generator.
  81. # - Scramble important tags before passing them to the kses filter.
  82. # - Run Markdown on excerpt then remove paragraph tags.
  83. if (MARKDOWN_WP_COMMENTS) {
  84. remove_filter('comment_text', 'wpautop');
  85. remove_filter('comment_text', 'make_clickable');
  86. add_filter('pre_comment_content', 'Markdown', 6);
  87. add_filter('pre_comment_content', 'mdwp_hide_tags', 8);
  88. add_filter('pre_comment_content', 'mdwp_show_tags', 12);
  89. add_filter('get_comment_text', 'Markdown', 6);
  90. add_filter('get_comment_excerpt', 'Markdown', 6);
  91. add_filter('get_comment_excerpt', 'mdwp_strip_p', 7);
  92. global $markdown_hidden_tags;
  93. $markdown_hidden_tags = array(
  94. '<p>' => md5('<p>'), '</p>' => md5('</p>'),
  95. '<pre>' => md5('<pre>'), '</pre>'=> md5('</pre>'),
  96. '<ol>' => md5('<ol>'), '</ol>' => md5('</ol>'),
  97. '<ul>' => md5('<ul>'), '</ul>' => md5('</ul>'),
  98. '<li>' => md5('<li>'), '</li>' => md5('</li>'),
  99. );
  100. }
  101. function mdwp_add_p($text) {
  102. if (strlen($text) == 0) return;
  103. if (strcasecmp(substr($text, -3), '<p>') == 0) return $text;
  104. return '<p>'.$text.'</p>';
  105. }
  106. function mdwp_strip_p($t) { return preg_replace('{</?[pP]>}', '', $t); }
  107. function mdwp_hide_tags($text) {
  108. global $markdown_hidden_tags;
  109. return str_replace(array_keys($markdown_hidden_tags),
  110. array_values($markdown_hidden_tags), $text);
  111. }
  112. function mdwp_show_tags($text) {
  113. global $markdown_hidden_tags;
  114. return str_replace(array_values($markdown_hidden_tags),
  115. array_keys($markdown_hidden_tags), $text);
  116. }
  117. }
  118. ### bBlog Plugin Info ###
  119. function identify_modifier_markdown() {
  120. return array(
  121. 'name' => 'markdown',
  122. 'type' => 'modifier',
  123. 'nicename' => 'PHP Markdown Extra',
  124. 'description' => 'A text-to-HTML conversion tool for web writers',
  125. 'authors' => 'Michel Fortin and John Gruber',
  126. 'licence' => 'GPL',
  127. 'version' => MARKDOWNEXTRA_VERSION,
  128. 'help' => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>',
  129. );
  130. }
  131. ### Smarty Modifier Interface ###
  132. function smarty_modifier_markdown($text) {
  133. return Markdown($text);
  134. }
  135. ### Textile Compatibility Mode ###
  136. # Rename this file to "classTextile.php" and it can replace Textile everywhere.
  137. if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) {
  138. # Try to include PHP SmartyPants. Should be in the same directory.
  139. @include_once 'smartypants.php';
  140. # Fake Textile class. It calls Markdown instead.
  141. class Textile {
  142. function TextileThis($text, $lite='', $encode='') {
  143. if ($lite == '' && $encode == '') $text = Markdown($text);
  144. if (function_exists('SmartyPants')) $text = SmartyPants($text);
  145. return $text;
  146. }
  147. # Workaround to ensure compatibility with TextPattern 4.0.3.
  148. function blockLite($text) { return $text; }
  149. }
  150. }
  151. #
  152. # Markdown Parser Class
  153. #
  154. class Markdown_Parser {
  155. # Regex to match balanced [brackets].
  156. # Needed to insert a maximum bracked depth while converting to PHP.
  157. var $nested_brackets_depth = 6;
  158. var $nested_brackets;
  159. # Table of hash values for escaped characters:
  160. var $escape_chars = '\`*_{}[]()>#+-.!';
  161. var $escape_table = array();
  162. var $backslash_escape_table = array();
  163. # Change to ">" for HTML output.
  164. var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX;
  165. var $tab_width = MARKDOWN_TAB_WIDTH;
  166. function Markdown_Parser() {
  167. #
  168. # Constructor function. Initialize appropriate member variables.
  169. #
  170. $this->_initDetab();
  171. $this->nested_brackets =
  172. str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
  173. str_repeat('\])*', $this->nested_brackets_depth);
  174. # Create an identical table but for escaped characters.
  175. foreach (preg_split('/(?!^|$)/', $this->escape_chars) as $char) {
  176. $hash = md5($char);
  177. $this->escape_table[$char] = $hash;
  178. $this->backslash_escape_table["\\$char"] = $hash;
  179. }
  180. # Sort document, block, and span gamut in ascendent priority order.
  181. asort($this->document_gamut);
  182. asort($this->block_gamut);
  183. asort($this->span_gamut);
  184. }
  185. # Internal hashes used during transformation.
  186. var $urls = array();
  187. var $titles = array();
  188. var $html_blocks = array();
  189. var $html_hashes = array(); # Contains both blocks and span hashes.
  190. function transform($text) {
  191. #
  192. # Main function. The order in which other subs are called here is
  193. # essential. Link and image substitutions need to happen before
  194. # _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the <a>
  195. # and <img> tags get encoded.
  196. #
  197. # Clear the global hashes. If we don't clear these, you get conflicts
  198. # from other articles when generating a page which contains more than
  199. # one article (e.g. an index page that shows the N most recent
  200. # articles):
  201. $this->urls = array();
  202. $this->titles = array();
  203. $this->html_blocks = array();
  204. $this->html_hashes = array();
  205. # Standardize line endings:
  206. # DOS to Unix and Mac to Unix
  207. $text = str_replace(array("\r\n", "\r"), "\n", $text);
  208. # Make sure $text ends with a couple of newlines:
  209. $text .= "\n\n";
  210. # Convert all tabs to spaces.
  211. $text = $this->detab($text);
  212. # Turn block-level HTML blocks into hash entries
  213. $text = $this->hashHTMLBlocks($text);
  214. # Strip any lines consisting only of spaces and tabs.
  215. # This makes subsequent regexen easier to write, because we can
  216. # match consecutive blank lines with /\n+/ instead of something
  217. # contorted like /[ \t]*\n+/ .
  218. $text = preg_replace('/^[ \t]+$/m', '', $text);
  219. # Run document gamut methods.
  220. foreach ($this->document_gamut as $method => $priority) {
  221. $text = $this->$method($text);
  222. }
  223. return $text . "\n";
  224. }
  225. var $document_gamut = array(
  226. # Strip link definitions, store in hashes.
  227. "stripLinkDefinitions" => 20,
  228. "runBasicBlockGamut" => 30,
  229. "unescapeSpecialChars" => 90,
  230. );
  231. function stripLinkDefinitions($text) {
  232. #
  233. # Strips link definitions from text, stores the URLs and titles in
  234. # hash references.
  235. #
  236. $less_than_tab = $this->tab_width - 1;
  237. # Link defs are in the form: ^[id]: url "optional title"
  238. $text = preg_replace_callback('{
  239. ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
  240. [ \t]*
  241. \n? # maybe *one* newline
  242. [ \t]*
  243. <?(\S+?)>? # url = $2
  244. [ \t]*
  245. \n? # maybe one newline
  246. [ \t]*
  247. (?:
  248. (?<=\s) # lookbehind for whitespace
  249. ["(]
  250. (.*?) # title = $3
  251. [")]
  252. [ \t]*
  253. )? # title is optional
  254. (?:\n+|\Z)
  255. }xm',
  256. array(&$this, '_stripLinkDefinitions_callback'),
  257. $text);
  258. return $text;
  259. }
  260. function _stripLinkDefinitions_callback($matches) {
  261. $link_id = strtolower($matches[1]);
  262. $this->urls[$link_id] = $this->encodeAmpsAndAngles($matches[2]);
  263. if (isset($matches[3]))
  264. $this->titles[$link_id] = str_replace('"', '&quot;', $matches[3]);
  265. return ''; # String that will replace the block
  266. }
  267. function hashHTMLBlocks($text) {
  268. $less_than_tab = $this->tab_width - 1;
  269. # Hashify HTML blocks:
  270. # We only want to do this for block-level HTML tags, such as headers,
  271. # lists, and tables. That's because we still want to wrap <p>s around
  272. # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
  273. # phrase emphasis, and spans. The list of tags we're looking for is
  274. # hard-coded:
  275. $block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
  276. 'script|noscript|form|fieldset|iframe|math|ins|del';
  277. $block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
  278. 'script|noscript|form|fieldset|iframe|math';
  279. # Regular expression for the content of a block tag.
  280. $nested_tags_level = 4;
  281. $attr = '
  282. (?> # optional tag attributes
  283. \s # starts with whitespace
  284. (?>
  285. [^>"/]+ # text outside quotes
  286. |
  287. /+(?!>) # slash not followed by ">"
  288. |
  289. "[^"]*" # text inside double quotes (tolerate ">")
  290. |
  291. \'[^\']*\' # text inside single quotes (tolerate ">")
  292. )*
  293. )?
  294. ';
  295. $content =
  296. str_repeat('
  297. (?>
  298. [^<]+ # content without tag
  299. |
  300. <\2 # nested opening tag
  301. '.$attr.' # attributes
  302. (?:
  303. />
  304. |
  305. >', $nested_tags_level). # end of opening tag
  306. '.*?'. # last level nested tag content
  307. str_repeat('
  308. </\2\s*> # closing nested tag
  309. )
  310. |
  311. <(?!/\2\s*> # other tags with a different name
  312. )
  313. )*',
  314. $nested_tags_level);
  315. # First, look for nested blocks, e.g.:
  316. # <div>
  317. # <div>
  318. # tags for inner block must be indented.
  319. # </div>
  320. # </div>
  321. #
  322. # The outermost tags must start at the left margin for this to match, and
  323. # the inner nested divs must be indented.
  324. # We need to do this before the next, more liberal match, because the next
  325. # match will start at the first `<div>` and stop at the first `</div>`.
  326. $text = preg_replace_callback('{
  327. ( # save in $1
  328. ^ # start of line (with /m)
  329. <('.$block_tags_a.')# start tag = $2
  330. '.$attr.'>\n # attributes followed by > and \n
  331. '.$content.' # content, support nesting
  332. </\2> # the matching end tag
  333. [ \t]* # trailing spaces/tabs
  334. (?=\n+|\Z) # followed by a newline or end of document
  335. )
  336. }xm',
  337. array(&$this, '_hashHTMLBlocks_callback'),
  338. $text);
  339. #
  340. # Match from `\n<tag>` to `</tag>\n`, handling nested tags in between.
  341. #
  342. $text = preg_replace_callback('{
  343. ( # save in $1
  344. ^ # start of line (with /m)
  345. <('.$block_tags_b.')# start tag = $2
  346. '.$attr.'> # attributes followed by >
  347. '.$content.' # content, support nesting
  348. </\2> # the matching end tag
  349. [ \t]* # trailing spaces/tabs
  350. (?=\n+|\Z) # followed by a newline or end of document
  351. )
  352. }xm',
  353. array(&$this, '_hashHTMLBlocks_callback'),
  354. $text);
  355. # Special case just for <hr />. It was easier to make a special case than
  356. # to make the other regex more complicated.
  357. $text = preg_replace_callback('{
  358. (?:
  359. (?<=\n\n) # Starting after a blank line
  360. | # or
  361. \A\n? # the beginning of the doc
  362. )
  363. ( # save in $1
  364. [ ]{0,'.$less_than_tab.'}
  365. <(hr) # start tag = $2
  366. \b # word break
  367. ([^<>])*? #
  368. /?> # the matching end tag
  369. [ \t]*
  370. (?=\n{2,}|\Z) # followed by a blank line or end of document
  371. )
  372. }x',
  373. array(&$this, '_hashHTMLBlocks_callback'),
  374. $text);
  375. # Special case for standalone HTML comments:
  376. $text = preg_replace_callback('{
  377. (?:
  378. (?<=\n\n) # Starting after a blank line
  379. | # or
  380. \A\n? # the beginning of the doc
  381. )
  382. ( # save in $1
  383. [ ]{0,'.$less_than_tab.'}
  384. (?s:
  385. <!-- .*? -->
  386. )
  387. [ \t]*
  388. (?=\n{2,}|\Z) # followed by a blank line or end of document
  389. )
  390. }x',
  391. array(&$this, '_hashHTMLBlocks_callback'),
  392. $text);
  393. # PHP and ASP-style processor instructions (<? and <%...%>)
  394. $text = preg_replace_callback('{
  395. (?:
  396. (?<=\n\n) # Starting after a blank line
  397. | # or
  398. \A\n? # the beginning of the doc
  399. )
  400. ( # save in $1
  401. [ ]{0,'.$less_than_tab.'}
  402. (?s:
  403. <([?%]) # $2
  404. .*?
  405. \2>
  406. )
  407. [ \t]*
  408. (?=\n{2,}|\Z) # followed by a blank line or end of document
  409. )
  410. }x',
  411. array(&$this, '_hashHTMLBlocks_callback'),
  412. $text);
  413. return $text;
  414. }
  415. function _hashHTMLBlocks_callback($matches) {
  416. $text = $matches[1];
  417. $key = $this->hashBlock($text);
  418. return "\n\n$key\n\n";
  419. }
  420. function hashBlock($text) {
  421. #
  422. # Called whenever a tag must be hashed when a function insert a block-level
  423. # tag in $text, it pass through this function and is automaticaly escaped,
  424. # which remove the need to call _HashHTMLBlocks at every step.
  425. #
  426. # Swap back any tag hash found in $text so we do not have to `unhash`
  427. # multiple times at the end.
  428. $text = $this->unhash($text);
  429. # Then hash the block.
  430. $key = md5($text);
  431. $this->html_hashes[$key] = $text;
  432. $this->html_blocks[$key] = $text;
  433. return $key; # String that will replace the tag.
  434. }
  435. function hashSpan($text) {
  436. #
  437. # Called whenever a tag must be hashed when a function insert a span-level
  438. # element in $text, it pass through this function and is automaticaly
  439. # escaped, blocking invalid nested overlap.
  440. #
  441. # Swap back any tag hash found in $text so we do not have to `unhash`
  442. # multiple times at the end.
  443. $text = $this->unhash($text);
  444. # Then hash the span.
  445. $key = md5($text);
  446. $this->html_hashes[$key] = $text;
  447. return $key; # String that will replace the span tag.
  448. }
  449. var $block_gamut = array(
  450. #
  451. # These are all the transformations that form block-level
  452. # tags like paragraphs, headers, and list items.
  453. #
  454. "doHeaders" => 10,
  455. "doHorizontalRules" => 20,
  456. "doLists" => 40,
  457. "doCodeBlocks" => 50,
  458. "doBlockQuotes" => 60,
  459. );
  460. function runBlockGamut($text) {
  461. #
  462. # Run block gamut tranformations.
  463. #
  464. # We need to escape raw HTML in Markdown source before doing anything
  465. # else. This need to be done for each block, and not only at the
  466. # begining in the Markdown function since hashed blocks can be part of
  467. # list items and could have been indented. Indented blocks would have
  468. # been seen as a code block in a previous pass of hashHTMLBlocks.
  469. $text = $this->hashHTMLBlocks($text);
  470. return $this->runBasicBlockGamut($text);
  471. }
  472. function runBasicBlockGamut($text) {
  473. #
  474. # Run block gamut tranformations, without hashing HTML blocks. This is
  475. # useful when HTML blocks are known to be already hashed, like in the first
  476. # whole-document pass.
  477. #
  478. foreach ($this->block_gamut as $method => $priority) {
  479. $text = $this->$method($text);
  480. }
  481. # Finally form paragraph and restore hashed blocks.
  482. $text = $this->formParagraphs($text);
  483. return $text;
  484. }
  485. function doHorizontalRules($text) {
  486. # Do Horizontal Rules:
  487. return preg_replace(
  488. array('{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}mx',
  489. '{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}mx',
  490. '{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}mx'),
  491. "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
  492. $text);
  493. }
  494. var $span_gamut = array(
  495. #
  496. # These are all the transformations that occur *within* block-level
  497. # tags like paragraphs, headers, and list items.
  498. #
  499. "escapeSpecialCharsWithinTagAttributes" => -20,
  500. "doCodeSpans" => -10,
  501. "encodeBackslashEscapes" => -5,
  502. # Process anchor and image tags. Images must come first,
  503. # because ![foo][f] looks like an anchor.
  504. "doImages" => 10,
  505. "doAnchors" => 20,
  506. # Make links out of things like `<http://example.com/>`
  507. # Must come after doAnchors, because you can use < and >
  508. # delimiters in inline links like [this](<url>).
  509. "doAutoLinks" => 30,
  510. "encodeAmpsAndAngles" => 40,
  511. "doItalicsAndBold" => 50,
  512. "doHardBreaks" => 60,
  513. );
  514. function runSpanGamut($text) {
  515. #
  516. # Run span gamut tranformations.
  517. #
  518. foreach ($this->span_gamut as $method => $priority) {
  519. $text = $this->$method($text);
  520. }
  521. return $text;
  522. }
  523. function doHardBreaks($text) {
  524. # Do hard breaks:
  525. $br_tag = $this->hashSpan("<br$this->empty_element_suffix\n");
  526. return preg_replace('/ {2,}\n/', $br_tag, $text);
  527. }
  528. function escapeSpecialCharsWithinTagAttributes($text) {
  529. #
  530. # Within tags -- meaning between < and > -- encode [\ ` * _] so they
  531. # don't conflict with their use in Markdown for code, italics and strong.
  532. # We're replacing each such character with its corresponding MD5 checksum
  533. # value; this is likely overkill, but it should prevent us from colliding
  534. # with the escape values by accident.
  535. #
  536. $tokens = $this->tokenizeHTML($text);
  537. $text = ''; # rebuild $text from the tokens
  538. foreach ($tokens as $cur_token) {
  539. if ($cur_token[0] == 'tag') {
  540. $cur_token[1] = str_replace('\\', $this->escape_table['\\'], $cur_token[1]);
  541. $cur_token[1] = str_replace(array('`'), $this->escape_table['`'], $cur_token[1]);
  542. $cur_token[1] = str_replace('*', $this->escape_table['*'], $cur_token[1]);
  543. $cur_token[1] = str_replace('_', $this->escape_table['_'], $cur_token[1]);
  544. }
  545. $text .= $cur_token[1];
  546. }
  547. return $text;
  548. }
  549. function doAnchors($text) {
  550. #
  551. # Turn Markdown link shortcuts into XHTML <a> tags.
  552. #
  553. #
  554. # First, handle reference-style links: [link text] [id]
  555. #
  556. $text = preg_replace_callback('{
  557. ( # wrap whole match in $1
  558. \[
  559. ('.$this->nested_brackets.') # link text = $2
  560. \]
  561. [ ]? # one optional space
  562. (?:\n[ ]*)? # one optional newline followed by spaces
  563. \[
  564. (.*?) # id = $3
  565. \]
  566. )
  567. }xs',
  568. array(&$this, '_doAnchors_reference_callback'), $text);
  569. #
  570. # Next, inline-style links: [link text](url "optional title")
  571. #
  572. $text = preg_replace_callback('{
  573. ( # wrap whole match in $1
  574. \[
  575. ('.$this->nested_brackets.') # link text = $2
  576. \]
  577. \( # literal paren
  578. [ \t]*
  579. <?(.*?)>? # href = $3
  580. [ \t]*
  581. ( # $4
  582. ([\'"]) # quote char = $5
  583. (.*?) # Title = $6
  584. \5 # matching quote
  585. [ \t]* # ignore any spaces/tabs between closing quote and )
  586. )? # title is optional
  587. \)
  588. )
  589. }xs',
  590. array(&$this, '_DoAnchors_inline_callback'), $text);
  591. #
  592. # Last, handle reference-style shortcuts: [link text]
  593. # These must come last in case you've also got [link test][1]
  594. # or [link test](/foo)
  595. #
  596. // $text = preg_replace_callback('{
  597. // ( # wrap whole match in $1
  598. // \[
  599. // ([^\[\]]+) # link text = $2; can\'t contain [ or ]
  600. // \]
  601. // )
  602. // }xs',
  603. // array(&$this, '_doAnchors_reference_callback'), $text);
  604. return $text;
  605. }
  606. function _doAnchors_reference_callback($matches) {
  607. $whole_match = $matches[1];
  608. $link_text = $matches[2];
  609. $link_id =& $matches[3];
  610. if ($link_id == "") {
  611. # for shortcut links like [this][] or [this].
  612. $link_id = $link_text;
  613. }
  614. # lower-case and turn embedded newlines into spaces
  615. $link_id = strtolower($link_id);
  616. $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
  617. if (isset($this->urls[$link_id])) {
  618. $url = $this->urls[$link_id];
  619. $url = $this->encodeAmpsAndAngles($url);
  620. $result = "<a href=\"$url\"";
  621. if ( isset( $this->titles[$link_id] ) ) {
  622. $title = $this->titles[$link_id];
  623. $title = $this->encodeAmpsAndAngles($title);
  624. $result .= " title=\"$title\"";
  625. }
  626. $link_text = $this->runSpanGamut($link_text);
  627. $result .= ">$link_text</a>";
  628. $result = $this->hashSpan($result);
  629. }
  630. else {
  631. $result = $whole_match;
  632. }
  633. return $result;
  634. }
  635. function _doAnchors_inline_callback($matches) {
  636. $whole_match = $matches[1];
  637. $link_text = $this->runSpanGamut($matches[2]);
  638. $url = $matches[3];
  639. $title =& $matches[6];
  640. $url = $this->encodeAmpsAndAngles($url);
  641. $result = "<a href=\"$url\"";
  642. if (isset($title)) {
  643. $title = str_replace('"', '&quot;', $title);
  644. $title = $this->encodeAmpsAndAngles($title);
  645. $result .= " title=\"$title\"";
  646. }
  647. $link_text = $this->runSpanGamut($link_text);
  648. $result .= ">$link_text</a>";
  649. return $this->hashSpan($result);
  650. }
  651. function doImages($text) {
  652. #
  653. # Turn Markdown image shortcuts into <img> tags.
  654. #
  655. #
  656. # First, handle reference-style labeled images: ![alt text][id]
  657. #
  658. $text = preg_replace_callback('{
  659. ( # wrap whole match in $1
  660. !\[
  661. ('.$this->nested_brackets.') # alt text = $2
  662. \]
  663. [ ]? # one optional space
  664. (?:\n[ ]*)? # one optional newline followed by spaces
  665. \[
  666. (.*?) # id = $3
  667. \]
  668. )
  669. }xs',
  670. array(&$this, '_doImages_reference_callback'), $text);
  671. #
  672. # Next, handle inline images: ![alt text](url "optional title")
  673. # Don't forget: encode * and _
  674. #
  675. $text = preg_replace_callback('{
  676. ( # wrap whole match in $1
  677. !\[
  678. ('.$this->nested_brackets.') # alt text = $2
  679. \]
  680. \s? # One optional whitespace character
  681. \( # literal paren
  682. [ \t]*
  683. <?(\S+?)>? # src url = $3
  684. [ \t]*
  685. ( # $4
  686. ([\'"]) # quote char = $5
  687. (.*?) # title = $6
  688. \5 # matching quote
  689. [ \t]*
  690. )? # title is optional
  691. \)
  692. )
  693. }xs',
  694. array(&$this, '_doImages_inline_callback'), $text);
  695. return $text;
  696. }
  697. function _doImages_reference_callback($matches) {
  698. $whole_match = $matches[1];
  699. $alt_text = $matches[2];
  700. $link_id = strtolower($matches[3]);
  701. if ($link_id == "") {
  702. $link_id = strtolower($alt_text); # for shortcut links like ![this][].
  703. }
  704. $alt_text = str_replace('"', '&quot;', $alt_text);
  705. if (isset($this->urls[$link_id])) {
  706. $url = $this->urls[$link_id];
  707. $result = "<img src=\"$url\" alt=\"$alt_text\"";
  708. if (isset($this->titles[$link_id])) {
  709. $title = $this->titles[$link_id];
  710. $result .= " title=\"$title\"";
  711. }
  712. $result .= $this->empty_element_suffix;
  713. $result = $this->hashSpan($result);
  714. }
  715. else {
  716. # If there's no such link ID, leave intact:
  717. $result = $whole_match;
  718. }
  719. return $result;
  720. }
  721. function _doImages_inline_callback($matches) {
  722. $whole_match = $matches[1];
  723. $alt_text = $matches[2];
  724. $url = $matches[3];
  725. $title =& $matches[6];
  726. $alt_text = str_replace('"', '&quot;', $alt_text);
  727. $result = "<img src=\"$url\" alt=\"$alt_text\"";
  728. if (isset($title)) {
  729. $title = str_replace('"', '&quot;', $title);
  730. $result .= " title=\"$title\""; # $title already quoted
  731. }
  732. $result .= $this->empty_element_suffix;
  733. return $this->hashSpan($result);
  734. }
  735. function doHeaders($text) {
  736. # Setext-style headers:
  737. # Header 1
  738. # ========
  739. #
  740. # Header 2
  741. # --------
  742. #
  743. $text = preg_replace_callback('{ ^(.+)[ \t]*\n=+[ \t]*\n+ }mx',
  744. array(&$this, '_doHeaders_callback_setext_h1'), $text);
  745. $text = preg_replace_callback('{ ^(.+)[ \t]*\n-+[ \t]*\n+ }mx',
  746. array(&$this, '_doHeaders_callback_setext_h2'), $text);
  747. # atx-style headers:
  748. # # Header 1
  749. # ## Header 2
  750. # ## Header 2 with closing hashes ##
  751. # ...
  752. # ###### Header 6
  753. #
  754. $text = preg_replace_callback('{
  755. ^(\#{1,6}) # $1 = string of #\'s
  756. [ \t]*
  757. (.+?) # $2 = Header text
  758. [ \t]*
  759. \#* # optional closing #\'s (not counted)
  760. \n+
  761. }xm',
  762. array(&$this, '_doHeaders_callback_atx'), $text);
  763. return $text;
  764. }
  765. function _doHeaders_callback_setext_h1($matches) {
  766. return $this->hashBlock("<h1>".$this->runSpanGamut($matches[1])."</h1>")."\n\n";
  767. }
  768. function _doHeaders_callback_setext_h2($matches) {
  769. return $this->hashBlock("<h2>".$this->runSpanGamut($matches[1])."</h2>")."\n\n";
  770. }
  771. function _doHeaders_callback_atx($matches) {
  772. $level = strlen($matches[1]);
  773. return $this->hashBlock("<h$level>".$this->runSpanGamut($matches[2])."</h$level>")."\n\n";
  774. }
  775. function doLists($text) {
  776. #
  777. # Form HTML ordered (numbered) and unordered (bulleted) lists.
  778. #
  779. $less_than_tab = $this->tab_width - 1;
  780. # Re-usable patterns to match list item bullets and number markers:
  781. $marker_ul = '[*+-]';
  782. $marker_ol = '\d+[.]';
  783. $marker_any = "(?:$marker_ul|$marker_ol)";
  784. $markers = array($marker_ul, $marker_ol);
  785. foreach ($markers as $marker) {
  786. # Re-usable pattern to match any entirel ul or ol list:
  787. $whole_list = '
  788. ( # $1 = whole list
  789. ( # $2
  790. [ ]{0,'.$less_than_tab.'}
  791. ('.$marker.') # $3 = first list item marker
  792. [ \t]+
  793. )
  794. (?s:.+?)
  795. ( # $4
  796. \z
  797. |
  798. \n{2,}
  799. (?=\S)
  800. (?! # Negative lookahead for another list item marker
  801. [ \t]*
  802. '.$marker.'[ \t]+
  803. )
  804. )
  805. )
  806. '; // mx
  807. # We use a different prefix before nested lists than top-level lists.
  808. # See extended comment in _ProcessListItems().
  809. if ($this->list_level) {
  810. $text = preg_replace_callback('{
  811. ^
  812. '.$whole_list.'
  813. }mx',
  814. array(&$this, '_doLists_callback'), $text);
  815. }
  816. else {
  817. $text = preg_replace_callback('{
  818. (?:(?<=\n)\n|\A\n?) # Must eat the newline
  819. '.$whole_list.'
  820. }mx',
  821. array(&$this, '_doLists_callback'), $text);
  822. }
  823. }
  824. return $text;
  825. }
  826. function _doLists_callback($matches) {
  827. # Re-usable patterns to match list item bullets and number markers:
  828. $marker_ul = '[*+-]';
  829. $marker_ol = '\d+[.]';
  830. $marker_any = "(?:$marker_ul|$marker_ol)";
  831. $list = $matches[1];
  832. $list_type = preg_match("/$marker_ul/", $matches[3]) ? "ul" : "ol";
  833. $marker_any = ( $list_type == "ul" ? $marker_ul : $marker_ol );
  834. # Turn double returns into triple returns, so that we can make a
  835. # paragraph for the last item in a list, if necessary:
  836. $list = preg_replace("/\n{2,}/", "\n\n\n", $list);
  837. $result = $this->processListItems($list, $marker_any);
  838. $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
  839. return "\n". $result ."\n\n";
  840. }
  841. var $list_level = 0;
  842. function processListItems($list_str, $marker_any) {
  843. #
  844. # Process the contents of a single ordered or unordered list, splitting it
  845. # into individual list items.
  846. #
  847. # The $this->list_level global keeps track of when we're inside a list.
  848. # Each time we enter a list, we increment it; when we leave a list,
  849. # we decrement. If it's zero, we're not in a list anymore.
  850. #
  851. # We do this because when we're not inside a list, we want to treat
  852. # something like this:
  853. #
  854. # I recommend upgrading to version
  855. # 8. Oops, now this line is treated
  856. # as a sub-list.
  857. #
  858. # As a single paragraph, despite the fact that the second line starts
  859. # with a digit-period-space sequence.
  860. #
  861. # Whereas when we're inside a list (or sub-list), that line will be
  862. # treated as the start of a sub-list. What a kludge, huh? This is
  863. # an aspect of Markdown's syntax that's hard to parse perfectly
  864. # without resorting to mind-reading. Perhaps the solution is to
  865. # change the syntax rules such that sub-lists must start with a
  866. # starting cardinal number; e.g. "1." or "a.".
  867. $this->list_level++;
  868. # trim trailing blank lines:
  869. $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
  870. $list_str = preg_replace_callback('{
  871. (\n)? # leading line = $1
  872. (^[ \t]*) # leading whitespace = $2
  873. ('.$marker_any.') [ \t]+ # list marker = $3
  874. ((?s:.+?) # list item text = $4
  875. (\n{1,2}))
  876. (?= \n* (\z | \2 ('.$marker_any.') [ \t]+))
  877. }xm',
  878. array(&$this, '_processListItems_callback'), $list_str);
  879. $this->list_level--;
  880. return $list_str;
  881. }
  882. function _processListItems_callback($matches) {
  883. $item = $matches[4];
  884. $leading_line =& $matches[1];
  885. $leading_space =& $matches[2];
  886. if ($leading_line || preg_match('/\n{2,}/', $item)) {
  887. $item = $this->runBlockGamut($this->outdent($item));
  888. }
  889. else {
  890. # Recursion for sub-lists:
  891. $item = $this->doLists($this->outdent($item));
  892. $item = preg_replace('/\n+$/', '', $item);
  893. $item = $this->runSpanGamut($item);
  894. }
  895. return "<li>" . $item . "</li>\n";
  896. }
  897. function doCodeBlocks($text) {
  898. #
  899. # Process Markdown `<pre><code>` blocks.
  900. #
  901. $text = preg_replace_callback('{
  902. (?:\n\n|\A)
  903. ( # $1 = the code block -- one or more lines, starting with a space/tab
  904. (?:
  905. (?:[ ]{'.$this->tab_width.'} | \t) # Lines must start with a tab or a tab-width of spaces
  906. .*\n+
  907. )+
  908. )
  909. ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
  910. }xm',
  911. array(&$this, '_doCodeBlocks_callback'), $text);
  912. return $text;
  913. }
  914. function _doCodeBlocks_callback($matches) {
  915. $codeblock = $matches[1];
  916. $codeblock = $this->encodeCode($this->outdent($codeblock));
  917. // $codeblock = $this->detab($codeblock);
  918. # trim leading newlines and trailing whitespace
  919. $codeblock = preg_replace(array('/\A\n+/', '/\n+\z/'), '', $codeblock);
  920. $result = "\n\n".$this->hashBlock("<pre><code>" . $codeblock . "\n</code></pre>")."\n\n";
  921. return $result;
  922. }
  923. function doCodeSpans($text) {
  924. #
  925. # * Backtick quotes are used for <code></code> spans.
  926. #
  927. # * You can use multiple backticks as the delimiters if you want to
  928. # include literal backticks in the code span. So, this input:
  929. #
  930. # Just type ``foo `bar` baz`` at the prompt.
  931. #
  932. # Will translate to:
  933. #
  934. # <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
  935. #
  936. # There's no arbitrary limit to the number of backticks you
  937. # can use as delimters. If you need three consecutive backticks
  938. # in your code, use four for delimiters, etc.
  939. #
  940. # * You can use spaces to get literal backticks at the edges:
  941. #
  942. # ... type `` `bar` `` ...
  943. #
  944. # Turns to:
  945. #
  946. # ... type <code>`bar`</code> ...
  947. #
  948. $text = preg_replace_callback('@
  949. (?<!\\\) # Character before opening ` can\'t be a backslash
  950. (`+) # $1 = Opening run of `
  951. (.+?) # $2 = The code block
  952. (?<!`)
  953. \1 # Matching closer
  954. (?!`)
  955. @xs',
  956. array(&$this, '_doCodeSpans_callback'), $text);
  957. return $text;
  958. }
  959. function _doCodeSpans_callback($matches) {
  960. $c = $matches[2];
  961. $c = preg_replace('/^[ \t]*/', '', $c); # leading whitespace
  962. $c = preg_replace('/[ \t]*$/', '', $c); # trailing whitespace
  963. $c = $this->encodeCode($c);
  964. return $this->hashSpan("<code>$c</code>");
  965. }
  966. function encodeCode($_) {
  967. #
  968. # Encode/escape certain characters inside Markdown code runs.
  969. # The point is that in code, these characters are literals,
  970. # and lose their special Markdown meanings.
  971. #
  972. # Encode all ampersands; HTML entities are not
  973. # entities within a Markdown code span.
  974. $_ = str_replace('&', '&amp;', $_);
  975. # Do the angle bracket song and dance:
  976. $_ = str_replace(array('<', '>'),
  977. array('&lt;', '&gt;'), $_);
  978. # Now, escape characters that are magic in Markdown:
  979. // $_ = str_replace(array_keys($this->escape_table),
  980. // array_values($this->escape_table), $_);
  981. return $_;
  982. }
  983. function doItalicsAndBold($text) {
  984. # <strong> must go first:
  985. $text = preg_replace_callback('{
  986. ( # $1: Marker
  987. (?<!\*\*) \* | # (not preceded by two chars of
  988. (?<!__) _ # the same marker)
  989. )
  990. \1
  991. (?=\S) # Not followed by whitespace
  992. (?!\1\1) # or two others marker chars.
  993. ( # $2: Content
  994. (?:
  995. [^*_]+? # Anthing not em markers.
  996. |
  997. # Balence any regular emphasis inside.
  998. \1 (?=\S) .+? (?<=\S) \1
  999. |
  1000. (?! \1 ) . # Allow unbalenced * and _.
  1001. )+?
  1002. )
  1003. (?<=\S) \1\1 # End mark not preceded by whitespace.
  1004. }sx',
  1005. array(&$this, '_doItalicAndBold_strong_callback'), $text);
  1006. # Then <em>:
  1007. $text = preg_replace_callback(
  1008. '{ ( (?<!\*)\* | (?<!_)_ ) (?=\S) (?! \1) (.+?) (?<=\S) \1 }sx',
  1009. array(&$this, '_doItalicAndBold_em_callback'), $text);
  1010. return $text;
  1011. }
  1012. function _doItalicAndBold_em_callback($matches) {
  1013. $text = $matches[2];
  1014. $text = $this->runSpanGamut($text);
  1015. return $this->hashSpan("<em>$text</em>");
  1016. }
  1017. function _doItalicAndBold_strong_callback($matches) {
  1018. $text = $matches[2];
  1019. $text = $this->runSpanGamut($text);
  1020. return $this->hashSpan("<strong>$text</strong>");
  1021. }
  1022. function doBlockQuotes($text) {
  1023. $text = preg_replace_callback('/
  1024. ( # Wrap whole match in $1
  1025. (
  1026. ^[ \t]*>[ \t]? # ">" at the start of a line
  1027. .+\n # rest of the first line
  1028. (.+\n)* # subsequent consecutive lines
  1029. \n* # blanks
  1030. )+
  1031. )
  1032. /xm',
  1033. array(&$this, '_doBlockQuotes_callback'), $text);
  1034. return $text;
  1035. }
  1036. function _doBlockQuotes_callback($matches) {
  1037. $bq = $matches[1];
  1038. # trim one level of quoting - trim whitespace-only lines
  1039. $bq = preg_replace(array('/^[ \t]*>[ \t]?/m', '/^[ \t]+$/m'), '', $bq);
  1040. $bq = $this->runBlockGamut($bq); # recurse
  1041. $bq = preg_replace('/^/m', " ", $bq);
  1042. # These leading spaces cause problem with <pre> content,
  1043. # so we need to fix that:
  1044. $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
  1045. array(&$this, '_DoBlockQuotes_callback2'), $bq);
  1046. return $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
  1047. }
  1048. function _doBlockQuotes_callback2($matches) {
  1049. $pre = $matches[1];
  1050. $pre = preg_replace('/^ /m', '', $pre);
  1051. return $pre;
  1052. }
  1053. function formParagraphs($text) {
  1054. #
  1055. # Params:
  1056. # $text - string to process with html <p> tags
  1057. #
  1058. # Strip leading and trailing lines:
  1059. $text = preg_replace(array('/\A\n+/', '/\n+\z/'), '', $text);
  1060. $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
  1061. #
  1062. # Wrap <p> tags.
  1063. #
  1064. foreach ($grafs as $key => $value) {
  1065. if (!isset( $this->html_blocks[$value] )) {
  1066. $value = $this->runSpanGamut($value);
  1067. $value = preg_replace('/^([ \t]*)/', "<p>", $value);
  1068. $value .= "</p>";
  1069. $grafs[$key] = $this->unhash($value);
  1070. }
  1071. }
  1072. #
  1073. # Unhashify HTML blocks
  1074. #
  1075. foreach ($grafs as $key => $graf) {
  1076. # Modify elements of @grafs in-place...
  1077. if (isset($this->html_blocks[$graf])) {
  1078. $block = $this->html_blocks[$graf];
  1079. $graf = $block;
  1080. // if (preg_match('{
  1081. // \A
  1082. // ( # $1 = <div> tag
  1083. // <div \s+
  1084. // [^>]*
  1085. // \b
  1086. // markdown\s*=\s* ([\'"]) # $2 = attr quote char
  1087. // 1
  1088. // \2
  1089. // [^>]*
  1090. // >
  1091. // )
  1092. // ( # $3 = contents
  1093. // .*
  1094. // )
  1095. // (</div>) # $4 = closing tag
  1096. // \z
  1097. // }xs', $block, $matches))
  1098. // {
  1099. // list(, $div_open, , $div_content, $div_close) = $matches;
  1100. //
  1101. // # We can't call Markdown(), because that resets the hash;
  1102. // # that initialization code should be pulled into its own sub, though.
  1103. // $div_content = $this->hashHTMLBlocks($div_content);
  1104. //
  1105. // # Run document gamut methods on the content.
  1106. // foreach ($this->document_gamut as $method => $priority) {
  1107. // $div_content = $this->$method($div_content);
  1108. // }
  1109. //
  1110. // $div_open = preg_replace(
  1111. // '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
  1112. //
  1113. // $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
  1114. // }
  1115. $grafs[$key] = $graf;
  1116. }
  1117. }
  1118. return implode("\n\n", $grafs);
  1119. }
  1120. function encodeAmpsAndAngles($text) {
  1121. # Smart processing for ampersands and angle brackets that need to be encoded.
  1122. # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
  1123. # http://bumppo.net/projects/amputator/
  1124. $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
  1125. '&amp;', $text);;
  1126. # Encode naked <'s
  1127. $text = preg_replace('{<(?![a-z/?\$!])}i', '&lt;', $text);
  1128. return $text;
  1129. }
  1130. function encodeBackslashEscapes($text) {
  1131. #
  1132. # Parameter: String.
  1133. # Returns: The string, with after processing the following backslash
  1134. # escape sequences.
  1135. #
  1136. # Must process escaped backslashes first.
  1137. return str_replace(array_keys($this->backslash_escape_table),
  1138. array_values($this->backslash_escape_table), $text);
  1139. }
  1140. function doAutoLinks($text) {
  1141. $text = preg_replace('{<((https?|ftp|dict):[^\'">\s]+)>}',
  1142. '<a href="\1">\1</a>', $text);
  1143. # Email addresses: <address@domain.foo>
  1144. $text = preg_replace_callback('{
  1145. <
  1146. (?:mailto:)?
  1147. (
  1148. [-.\w\x80-\xFF]+
  1149. \@
  1150. [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
  1151. )
  1152. >
  1153. }xi',
  1154. array(&$this, '_doAutoLinks_callback'), $text);
  1155. return $text;
  1156. }
  1157. function _doAutoLinks_callback($matches) {
  1158. $address = $matches[1];
  1159. $address = $this->unescapeSpecialChars($address);
  1160. $address = $this->encodeEmailAddress($address);
  1161. return $this->hashSpan($address);
  1162. }
  1163. function encodeEmailAddress($addr) {
  1164. #
  1165. # Input: an email address, e.g. "foo@example.com"
  1166. #
  1167. # Output: the email address as a mailto link, with each character
  1168. # of the address encoded as either a decimal or hex entity, in
  1169. # the hopes of foiling most address harvesting spam bots. E.g.:
  1170. #
  1171. # <p><a href="&#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
  1172. # &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
  1173. # &#x6d;">&#x66;o&#111;&#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;
  1174. # &#101;&#46;&#x63;&#111;&#x6d;</a></p>
  1175. #
  1176. # Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
  1177. # With some optimizations by Milian Wolff.
  1178. #
  1179. $addr = "mailto:" . $addr;
  1180. $chars = preg_split('/(?<!^)(?!$)/', $addr);
  1181. $seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed.
  1182. foreach ($chars as $key => $char) {
  1183. $ord = ord($char);
  1184. # Ignore non-ascii chars.
  1185. if ($ord < 128) {
  1186. $r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
  1187. # roughly 10% raw, 45% hex, 45% dec
  1188. # '@' *must* be encoded. I insist.
  1189. if ($r > 90 && $char != '@') /* do nothing */;
  1190. else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
  1191. else $chars[$key] = '&#'.$ord.';';
  1192. }
  1193. }
  1194. $addr = implode('', $chars);
  1195. $text = implode('', array_slice($chars, 7)); # text without `mailto:`
  1196. $addr = "<a href=\"$addr\">$text</a>";
  1197. return $addr;
  1198. }
  1199. function unescapeSpecialChars($text) {
  1200. #
  1201. # Swap back in all the special characters we've hidden.
  1202. #
  1203. return str_replace(array_values($this->escape_table),
  1204. array_keys($this->escape_table), $text);
  1205. }
  1206. function tokenizeHTML($str) {
  1207. #
  1208. # Parameter: String containing HTML + Markdown markup.
  1209. # Returns: An array of the tokens comprising the input
  1210. # string. Each token is either a tag or a run of text
  1211. # between tags. Each element of the array is a
  1212. # two-element array; the first is either 'tag' or 'text';
  1213. # the second is the actual value.
  1214. # Note: Markdown code spans are taken into account: no tag token is
  1215. # generated within a code span.
  1216. #
  1217. $tokens = array();
  1218. while ($str != "") {
  1219. #
  1220. # Each loop iteration seach for either the next tag or the next
  1221. # openning code span marker. If a code span marker is found, the
  1222. # code span is extracted in entierty and will result in an extra
  1223. # text token.
  1224. #
  1225. $parts = preg_split('{
  1226. (
  1227. (?<![`\\\\])
  1228. `+ # code span marker
  1229. |
  1230. <!-- .*? --> # comment
  1231. |
  1232. <\?.*?\?> | <%.*?%> # processing instruction
  1233. |
  1234. <[/!$]?[-a-zA-Z0-9:]+ # regular tags
  1235. (?:
  1236. \s
  1237. (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
  1238. )?
  1239. >
  1240. )
  1241. }xs', $str, 2, PREG_SPLIT_DELIM_CAPTURE);
  1242. # Create token from text preceding tag.
  1243. if ($parts[0] != "") {
  1244. $tokens[] = array('text', $parts[0]);
  1245. }
  1246. # Check if we reach the end.
  1247. if (count($parts) < 3) {
  1248. break;
  1249. }
  1250. # Create token from tag or code span.
  1251. if ($parts[1]{0} == "`") {
  1252. $tokens[] = array('text', $parts[1]);
  1253. $str = $parts[2];
  1254. # Skip the whole code span, pass as text token.
  1255. if (preg_match('/^(.*(?<!`\\\\)'.$parts[1].'(?!`))(.*)$/',
  1256. $str, $matches))
  1257. {
  1258. $tokens[] = array('text', $matches[1]);
  1259. $str = $matches[2];
  1260. }
  1261. } else {
  1262. $tokens[] = array('tag', $parts[1]);
  1263. $str = $parts[2];
  1264. }
  1265. }
  1266. return $tokens;
  1267. }
  1268. function outdent($text) {
  1269. #
  1270. # Remove one level of line-leading tabs or spaces
  1271. #
  1272. return preg_replace("/^(\\t|[ ]{1,$this->tab_width})/m", "", $text);
  1273. }
  1274. # Strlen function that will be used by detab. _initDetab will create a
  1275. # function to hanlde UTF-8 if the default function does not exist.
  1276. var $utf8_strlen = 'mb_strlen';
  1277. function detab($text) {
  1278. #
  1279. # Replace tabs with the appropriate amount of space.
  1280. #
  1281. # For each line we separate the line in blocks delemited by
  1282. # tab characters. Then we reconstruct every line by adding the
  1283. # appropriate number of space between each blocks.
  1284. $strlen = $this->utf8_strlen; # best strlen function for UTF-8.
  1285. $lines = explode("\n", $text);
  1286. $text = "";
  1287. foreach ($lines as $line) {
  1288. # Split in blocks.
  1289. $blocks = explode("\t", $line);
  1290. # Add each blocks to the line.
  1291. $line = $blocks[0];
  1292. unset($blocks[0]); # Do not add first block twice.
  1293. foreach ($blocks as $block) {
  1294. # Calculate amount of space, insert spaces, insert block.
  1295. $amount = $this->tab_width -
  1296. $strlen($line, 'UTF-8') % $this->tab_width;
  1297. $line .= str_repeat(" ", $amount) . $block;
  1298. }
  1299. $text .= "$line\n";
  1300. }
  1301. return $text;
  1302. }
  1303. function _initDetab() {
  1304. #
  1305. # Check for the availability of the function in the `utf8_strlen` property
  1306. # (probably `mb_strlen`). If the function is not available, create a
  1307. # function that will loosely count the number of UTF-8 characters with a
  1308. # regular expression.
  1309. #
  1310. if (function_exists($this->utf8_strlen)) return;
  1311. $this->utf8_strlen = 'Markdown_UTF8_strlen';
  1312. if (function_exists($this->utf8_strlen)) return;
  1313. function Markdown_UTF8_strlen($text) {
  1314. return preg_match_all('/[\x00-\xBF]|[\xC0-\xFF][\x80-\xBF]*/',
  1315. $text, $m);
  1316. }
  1317. }
  1318. function unhash($text) {
  1319. #
  1320. # Swap back in all the tags hashed by _HashHTMLBlocks.
  1321. #
  1322. return str_replace(array_keys($this->html_hashes),
  1323. array_values($this->html_hashes), $text);
  1324. }
  1325. }
  1326. #
  1327. # Markdown Extra Parser Class
  1328. #
  1329. class MarkdownExtra_Parser extends Markdown_Parser {
  1330. # Prefix for footnote ids.
  1331. var $fn_id_prefix = "";
  1332. # Optional title attribute for footnote links and backlinks.
  1333. var $fn_link_title = MARKDOWN_FN_LINK_TITLE;
  1334. var $fn_backlink_title = MARKDOWN_FN_BACKLINK_TITLE;
  1335. # Optional class attribute for footnote links and backlinks.
  1336. var $fn_link_class = MARKDOWN_FN_LINK_CLASS;
  1337. var $fn_backlink_class = MARKDOWN_FN_BACKLINK_CLASS;
  1338. function MarkdownExtra_Parser() {
  1339. #
  1340. # Constructor function. Initialize the parser object.
  1341. #
  1342. # Add extra escapable characters before parent constructor
  1343. # initialize the table.
  1344. $this->escape_chars .= ':|';
  1345. # Insert extra document, block, and span transformations.
  1346. # Parent constructor will do the sorting.
  1347. $this->document_gamut += array(
  1348. "stripFootnotes" => 15,
  1349. "stripAbbreviations" => 25,
  1350. "appendFootnotes" => 50,
  1351. );
  1352. $this->block_gamut += array(
  1353. "doTables" => 15,
  1354. "doDefLists" => 45,
  1355. );
  1356. $this->span_gamut += array(
  1357. "doFootnotes" => 4,
  1358. "doAbbreviations" => 5,
  1359. );
  1360. parent::Markdown_Parser();
  1361. }
  1362. # Extra hashes used during extra transformations.
  1363. var $footnotes = array();
  1364. var $footnotes_ordered = array();
  1365. var $abbr_desciptions = array();
  1366. var $abbr_matches = array();
  1367. var $html_cleans = array();
  1368. function transform($text) {
  1369. #
  1370. # Added clear to the new $html_hashes, reordered `hashHTMLBlocks` before
  1371. # blank line stripping and added extra parameter to `runBlockGamut`.
  1372. #
  1373. # Clear the global hashes. If we don't clear these, you get conflicts
  1374. # from other articles when generating a page which contains more than
  1375. # one article (e.g. an index page that shows the N most recent
  1376. # articles):
  1377. $this->footnotes = array();
  1378. $this->footnotes_ordered = array();
  1379. $this->abbr_desciptions = array();
  1380. $this->abbr_matches = array();
  1381. $this->html_cleans = array();
  1382. return parent::transform($text);
  1383. }
  1384. ### HTML Block Parser ###
  1385. # Tags that are always treated as block tags:
  1386. var $block_tags = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend';
  1387. # Tags treated as block tags only if the opening tag is alone on it's line:
  1388. var $context_block_tags = 'script|noscript|math|ins|del';
  1389. # Tags where markdown="1" default to span mode:
  1390. var $contain_span_tags = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
  1391. # Tags which must not have their contents modified, no matter where
  1392. # they appear:
  1393. var $clean_tags = 'script|math';
  1394. # Tags that do not need to be closed.
  1395. var $auto_close_tags = 'hr|img';
  1396. function hashHTMLBlocks($text) {
  1397. #
  1398. # Hashify HTML Blocks and "clean tags".
  1399. #
  1400. # We only want to do this for block-level HTML tags, such as headers,
  1401. # lists, and tables. That's because we still want to wrap <p>s around
  1402. # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
  1403. # phrase emphasis, and spans. The list of tags we're looking for is
  1404. # hard-coded.
  1405. #
  1406. # This works by calling _HashHTMLBlocks_InMarkdown, which then calls
  1407. # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
  1408. # attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back
  1409. # _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
  1410. # These two functions are calling each other. It's recursive!
  1411. #
  1412. #
  1413. # Call the HTML-in-Markdown hasher.
  1414. #
  1415. list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
  1416. return $text;
  1417. }
  1418. function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
  1419. $enclosing_tag = '', $span = false)
  1420. {
  1421. #
  1422. # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
  1423. #
  1424. # * $indent is the number of space to be ignored when checking for code
  1425. # blocks. This is important because if we don't take the indent into
  1426. # account, something like this (which looks right) won't work as expected:
  1427. #
  1428. # <div>
  1429. # <div markdown="1">
  1430. # Hello World. <-- Is this a Markdown code block or text?
  1431. # </div> <-- Is this a Markdown code block or a real tag?
  1432. # <div>
  1433. #
  1434. # If you don't like this, just don't indent the tag on which
  1435. # you apply the markdown="1" attribute.
  1436. #
  1437. # * If $enclosing_tag is not empty, stops at the first unmatched closing
  1438. # tag with that name. Nested tags supported.
  1439. #
  1440. # * If $span is true, text inside must treated as span. So any double
  1441. # newline will be replaced by a single newline so that it does not create
  1442. # paragraphs.
  1443. #
  1444. # Returns an array of that form: ( processed text , remaining text )
  1445. #
  1446. if ($text === '') return array('', '');
  1447. # Regex to check for the presense of newlines around a block tag.
  1448. $newline_match_before = '/(?:^\n?|\n\n)*$/';
  1449. $newline_match_after =
  1450. '{
  1451. ^ # Start of text following the tag.
  1452. (?:[ ]*<!--.*?-->)? # Optional comment.
  1453. [ ]*\n # Must be followed by newline.
  1454. }xs';
  1455. # Regex to match any tag.
  1456. $block_tag_match =
  1457. '{
  1458. ( # $2: Capture hole tag.
  1459. </? # Any opening or closing tag.
  1460. (?: # Tag name.
  1461. '.$this->block_tags.' |
  1462. '.$this->context_block_tags.' |
  1463. '.$this->clean_tags.' |
  1464. (?!\s)'.$enclosing_tag.'
  1465. )
  1466. \s* # Whitespace.
  1467. (?:
  1468. ".*?" | # Double quotes (can contain `>`)
  1469. \'.*?\' | # Single quotes (can contain `>`)
  1470. .+? # Anything but quotes and `>`.
  1471. )*?
  1472. > # End of tag.
  1473. |
  1474. <!-- .*? --> # HTML Comment
  1475. |
  1476. <\?.*?\?> | <%.*?%> # Processing instruction
  1477. |
  1478. <!\[CDATA\[.*?\]\]> # CData Block
  1479. )
  1480. }xs';
  1481. $depth = 0; # Current depth inside the tag tree.
  1482. $parsed = ""; # Parsed text that will be returned.
  1483. #
  1484. # Loop through every tag until we find the closing tag of the parent
  1485. # or loop until reaching the end of text if no parent tag specified.
  1486. #
  1487. do {
  1488. #
  1489. # Split the text using the first $tag_match pattern found.
  1490. # Text before pattern will be first in the array, text after
  1491. # pattern will be at the end, and between will be any catches made
  1492. # by the pattern.
  1493. #
  1494. $parts = preg_split($block_tag_match, $text, 2,
  1495. PREG_SPLIT_DELIM_CAPTU

Large files files are truncated, but you can click here to view the full file