PageRenderTime 60ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/application/libraries/Markdown.php

https://github.com/MHordecki/milionkostek
PHP | 2789 lines | 1608 code | 364 blank | 817 comment | 134 complexity | 897c99061ba8b17bc0b39a8f64382ca8 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

  1. <?php
  2. #
  3. # Markdown Extra - A text-to-HTML conversion tool for web writers
  4. #
  5. # PHP Markdown & Extra
  6. # Copyright (c) 2004-2007 Michel Fortin
  7. # <http://www.michelf.com/projects/php-markdown/>
  8. #
  9. # Original Markdown
  10. # Copyright (c) 2004-2006 John Gruber
  11. # <http://daringfireball.net/projects/markdown/>
  12. #
  13. define( 'MARKDOWN_VERSION', "1.0.1h" ); # Fri 3 Aug 2007
  14. define( 'MARKDOWNEXTRA_VERSION', "1.1.4" ); # Fri 3 Aug 2007
  15. #
  16. # Global default settings:
  17. #
  18. # Change to ">" for HTML output
  19. define( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX', " />");
  20. # Define the width of a tab for code blocks.
  21. define( 'MARKDOWN_TAB_WIDTH', 4 );
  22. # Optional title attribute for footnote links and backlinks.
  23. define( 'MARKDOWN_FN_LINK_TITLE', "" );
  24. define( 'MARKDOWN_FN_BACKLINK_TITLE', "" );
  25. # Optional class attribute for footnote links and backlinks.
  26. define( 'MARKDOWN_FN_LINK_CLASS', "" );
  27. define( 'MARKDOWN_FN_BACKLINK_CLASS', "" );
  28. #
  29. # WordPress settings:
  30. #
  31. # Change to false to remove Markdown from posts and/or comments.
  32. define( 'MARKDOWN_WP_POSTS', true );
  33. define( 'MARKDOWN_WP_COMMENTS', true );
  34. ### Standard Function Interface ###
  35. define( 'MARKDOWN_PARSER_CLASS', 'MarkdownExtra_Parser' );
  36. function Markdown($text) {
  37. #
  38. # Initialize the parser and return the result of its transform method.
  39. #
  40. # Setup static parser variable.
  41. static $parser;
  42. if (!isset($parser)) {
  43. $parser_class = MARKDOWN_PARSER_CLASS;
  44. $parser = new $parser_class;
  45. }
  46. # Transform text using parser.
  47. return $parser->transform($text);
  48. }
  49. ### WordPress Plugin Interface ###
  50. /*
  51. Plugin Name: Markdown Extra
  52. Plugin URI: http://www.michelf.com/projects/php-markdown/
  53. Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>
  54. Version: 1.1.4
  55. Author: Michel Fortin
  56. Author URI: http://www.michelf.com/
  57. */
  58. if (isset($wp_version)) {
  59. # More details about how it works here:
  60. # <http://www.michelf.com/weblog/2005/wordpress-text-flow-vs-markdown/>
  61. # Post content and excerpts
  62. # - Remove WordPress paragraph generator.
  63. # - Run Markdown on excerpt, then remove all tags.
  64. # - Add paragraph tag around the excerpt, but remove it for the excerpt rss.
  65. if (MARKDOWN_WP_POSTS) {
  66. remove_filter('the_content', 'wpautop');
  67. remove_filter('the_content_rss', 'wpautop');
  68. remove_filter('the_excerpt', 'wpautop');
  69. add_filter('the_content', 'Markdown', 6);
  70. add_filter('the_content_rss', 'Markdown', 6);
  71. add_filter('get_the_excerpt', 'Markdown', 6);
  72. add_filter('get_the_excerpt', 'trim', 7);
  73. add_filter('the_excerpt', 'mdwp_add_p');
  74. add_filter('the_excerpt_rss', 'mdwp_strip_p');
  75. remove_filter('content_save_pre', 'balanceTags', 50);
  76. remove_filter('excerpt_save_pre', 'balanceTags', 50);
  77. add_filter('the_content', 'balanceTags', 50);
  78. add_filter('get_the_excerpt', 'balanceTags', 9);
  79. }
  80. # Comments
  81. # - Remove WordPress paragraph generator.
  82. # - Remove WordPress auto-link generator.
  83. # - Scramble important tags before passing them to the kses filter.
  84. # - Run Markdown on excerpt then remove paragraph tags.
  85. if (MARKDOWN_WP_COMMENTS) {
  86. remove_filter('comment_text', 'wpautop', 30);
  87. remove_filter('comment_text', 'make_clickable');
  88. add_filter('pre_comment_content', 'Markdown', 6);
  89. add_filter('pre_comment_content', 'mdwp_hide_tags', 8);
  90. add_filter('pre_comment_content', 'mdwp_show_tags', 12);
  91. add_filter('get_comment_text', 'Markdown', 6);
  92. add_filter('get_comment_excerpt', 'Markdown', 6);
  93. add_filter('get_comment_excerpt', 'mdwp_strip_p', 7);
  94. global $mdwp_hidden_tags, $mdwp_placeholders;
  95. $mdwp_hidden_tags = explode(' ',
  96. '<p> </p> <pre> </pre> <ol> </ol> <ul> </ul> <li> </li>');
  97. $mdwp_placeholders = explode(' ', str_rot13(
  98. 'pEj07ZbbBZ U1kqgh4w4p pre2zmeN6K QTi31t9pre ol0MP1jzJR '.
  99. 'ML5IjmbRol ulANi1NsGY J7zRLJqPul liA8ctl16T K9nhooUHli'));
  100. }
  101. function mdwp_add_p($text) {
  102. if (!preg_match('{^$|^<(p|ul|ol|dl|pre|blockquote)>}i', $text)) {
  103. $text = '<p>'.$text.'</p>';
  104. $text = preg_replace('{\n{2,}}', "</p>\n\n<p>", $text);
  105. }
  106. return $text;
  107. }
  108. function mdwp_strip_p($t) { return preg_replace('{</?p>}i', '', $t); }
  109. function mdwp_hide_tags($text) {
  110. global $mdwp_hidden_tags, $mdwp_placeholders;
  111. return str_replace($mdwp_hidden_tags, $mdwp_placeholders, $text);
  112. }
  113. function mdwp_show_tags($text) {
  114. global $mdwp_hidden_tags, $mdwp_placeholders;
  115. return str_replace($mdwp_placeholders, $mdwp_hidden_tags, $text);
  116. }
  117. }
  118. ### bBlog Plugin Info ###
  119. function identify_modifier_markdown() {
  120. return array(
  121. 'name' => 'markdown',
  122. 'type' => 'modifier',
  123. 'nicename' => 'PHP Markdown Extra',
  124. 'description' => 'A text-to-HTML conversion tool for web writers',
  125. 'authors' => 'Michel Fortin and John Gruber',
  126. 'licence' => 'GPL',
  127. 'version' => MARKDOWNEXTRA_VERSION,
  128. 'help' => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>',
  129. );
  130. }
  131. ### Smarty Modifier Interface ###
  132. function smarty_modifier_markdown($text) {
  133. return Markdown($text);
  134. }
  135. ### Textile Compatibility Mode ###
  136. # Rename this file to "classTextile.php" and it can replace Textile everywhere.
  137. if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) {
  138. # Try to include PHP SmartyPants. Should be in the same directory.
  139. @include_once 'smartypants.php';
  140. # Fake Textile class. It calls Markdown instead.
  141. class Textile {
  142. function TextileThis($text, $lite='', $encode='') {
  143. if ($lite == '' && $encode == '') $text = Markdown($text);
  144. if (function_exists('SmartyPants')) $text = SmartyPants($text);
  145. return $text;
  146. }
  147. # Fake restricted version: restrictions are not supported for now.
  148. function TextileRestricted($text, $lite='', $noimage='') {
  149. return $this->TextileThis($text, $lite);
  150. }
  151. # Workaround to ensure compatibility with TextPattern 4.0.3.
  152. function blockLite($text) { return $text; }
  153. }
  154. }
  155. #
  156. # Markdown Parser Class
  157. #
  158. class Markdown_Parser {
  159. # Regex to match balanced [brackets].
  160. # Needed to insert a maximum bracked depth while converting to PHP.
  161. var $nested_brackets_depth = 6;
  162. var $nested_brackets;
  163. var $nested_url_parenthesis_depth = 4;
  164. var $nested_url_parenthesis;
  165. # Table of hash values for escaped characters:
  166. var $escape_chars = '\`*_{}[]()>#+-.!';
  167. // var $escape_table = array();
  168. var $backslash_escape_table = array();
  169. # Change to ">" for HTML output.
  170. var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX;
  171. var $tab_width = MARKDOWN_TAB_WIDTH;
  172. # Change to `true` to disallow markup or entities.
  173. var $no_markup = false;
  174. var $no_entities = false;
  175. function Markdown_Parser() {
  176. #
  177. # Constructor function. Initialize appropriate member variables.
  178. #
  179. $this->_initDetab();
  180. $this->nested_brackets =
  181. str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
  182. str_repeat('\])*', $this->nested_brackets_depth);
  183. $this->nested_url_parenthesis =
  184. str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
  185. str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
  186. # Create an identical table but for escaped characters.
  187. foreach (preg_split('/(?!^|$)/', $this->escape_chars) as $char) {
  188. $entity = "&#". ord($char). ";";
  189. // $this->escape_table[$char] = $entity;
  190. $this->backslash_escape_table["\\$char"] = $entity;
  191. }
  192. # Sort document, block, and span gamut in ascendent priority order.
  193. asort($this->document_gamut);
  194. asort($this->block_gamut);
  195. asort($this->span_gamut);
  196. }
  197. # Internal hashes used during transformation.
  198. var $urls = array();
  199. var $titles = array();
  200. var $html_blocks = array();
  201. var $html_hashes = array(); # Contains both blocks and span hashes.
  202. # Status flag to avoid invalid nesting.
  203. var $in_anchor = false;
  204. function transform($text) {
  205. #
  206. # Main function. The order in which other subs are called here is
  207. # essential. Link and image substitutions need to happen before
  208. # _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the <a>
  209. # and <img> tags get encoded.
  210. #
  211. # Clear the global hashes. If we don't clear these, you get conflicts
  212. # from other articles when generating a page which contains more than
  213. # one article (e.g. an index page that shows the N most recent
  214. # articles):
  215. $this->urls = array();
  216. $this->titles = array();
  217. $this->html_blocks = array();
  218. $this->html_hashes = array();
  219. # Standardize line endings:
  220. # DOS to Unix and Mac to Unix
  221. $text = str_replace(array("\r\n", "\r"), "\n", $text);
  222. # Make sure $text ends with a couple of newlines:
  223. $text .= "\n\n";
  224. # Convert all tabs to spaces.
  225. $text = $this->detab($text);
  226. # Turn block-level HTML blocks into hash entries
  227. $text = $this->hashHTMLBlocks($text);
  228. # Strip any lines consisting only of spaces and tabs.
  229. # This makes subsequent regexen easier to write, because we can
  230. # match consecutive blank lines with /\n+/ instead of something
  231. # contorted like /[ ]*\n+/ .
  232. $text = preg_replace('/^[ ]+$/m', '', $text);
  233. # Run document gamut methods.
  234. foreach ($this->document_gamut as $method => $priority) {
  235. $text = $this->$method($text);
  236. }
  237. return $text . "\n";
  238. }
  239. var $document_gamut = array(
  240. # Strip link definitions, store in hashes.
  241. "stripLinkDefinitions" => 20,
  242. "runBasicBlockGamut" => 30,
  243. );
  244. function stripLinkDefinitions($text) {
  245. #
  246. # Strips link definitions from text, stores the URLs and titles in
  247. # hash references.
  248. #
  249. $less_than_tab = $this->tab_width - 1;
  250. # Link defs are in the form: ^[id]: url "optional title"
  251. $text = preg_replace_callback('{
  252. ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
  253. [ ]*
  254. \n? # maybe *one* newline
  255. [ ]*
  256. <?(\S+?)>? # url = $2
  257. [ ]*
  258. \n? # maybe one newline
  259. [ ]*
  260. (?:
  261. (?<=\s) # lookbehind for whitespace
  262. ["(]
  263. (.*?) # title = $3
  264. [")]
  265. [ ]*
  266. )? # title is optional
  267. (?:\n+|\Z)
  268. }xm',
  269. array(&$this, '_stripLinkDefinitions_callback'),
  270. $text);
  271. return $text;
  272. }
  273. function _stripLinkDefinitions_callback($matches) {
  274. $link_id = strtolower($matches[1]);
  275. $this->urls[$link_id] = $this->encodeAmpsAndAngles($matches[2]);
  276. if (isset($matches[3]))
  277. $this->titles[$link_id] = str_replace('"', '&quot;', $matches[3]);
  278. return ''; # String that will replace the block
  279. }
  280. function hashHTMLBlocks($text) {
  281. if ($this->no_markup) return $text;
  282. $less_than_tab = $this->tab_width - 1;
  283. # Hashify HTML blocks:
  284. # We only want to do this for block-level HTML tags, such as headers,
  285. # lists, and tables. That's because we still want to wrap <p>s around
  286. # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
  287. # phrase emphasis, and spans. The list of tags we're looking for is
  288. # hard-coded:
  289. $block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
  290. 'script|noscript|form|fieldset|iframe|math|ins|del';
  291. $block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
  292. 'script|noscript|form|fieldset|iframe|math';
  293. # Regular expression for the content of a block tag.
  294. $nested_tags_level = 4;
  295. $attr = '
  296. (?> # optional tag attributes
  297. \s # starts with whitespace
  298. (?>
  299. [^>"/]+ # text outside quotes
  300. |
  301. /+(?!>) # slash not followed by ">"
  302. |
  303. "[^"]*" # text inside double quotes (tolerate ">")
  304. |
  305. \'[^\']*\' # text inside single quotes (tolerate ">")
  306. )*
  307. )?
  308. ';
  309. $content =
  310. str_repeat('
  311. (?>
  312. [^<]+ # content without tag
  313. |
  314. <\2 # nested opening tag
  315. '.$attr.' # attributes
  316. (?:
  317. />
  318. |
  319. >', $nested_tags_level). # end of opening tag
  320. '.*?'. # last level nested tag content
  321. str_repeat('
  322. </\2\s*> # closing nested tag
  323. )
  324. |
  325. <(?!/\2\s*> # other tags with a different name
  326. )
  327. )*',
  328. $nested_tags_level);
  329. # First, look for nested blocks, e.g.:
  330. # <div>
  331. # <div>
  332. # tags for inner block must be indented.
  333. # </div>
  334. # </div>
  335. #
  336. # The outermost tags must start at the left margin for this to match, and
  337. # the inner nested divs must be indented.
  338. # We need to do this before the next, more liberal match, because the next
  339. # match will start at the first `<div>` and stop at the first `</div>`.
  340. $text = preg_replace_callback('{
  341. ( # save in $1
  342. ^ # start of line (with /m)
  343. <('.$block_tags_a.')# start tag = $2
  344. '.$attr.'>\n # attributes followed by > and \n
  345. '.$content.' # content, support nesting
  346. </\2> # the matching end tag
  347. [ ]* # trailing spaces/tabs
  348. (?=\n+|\Z) # followed by a newline or end of document
  349. )
  350. }xmi',
  351. array(&$this, '_hashHTMLBlocks_callback'),
  352. $text);
  353. #
  354. # Match from `\n<tag>` to `</tag>\n`, handling nested tags in between.
  355. #
  356. $text = preg_replace_callback('{
  357. ( # save in $1
  358. ^ # start of line (with /m)
  359. <('.$block_tags_b.')# start tag = $2
  360. '.$attr.'> # attributes followed by >
  361. '.$content.' # content, support nesting
  362. </\2> # the matching end tag
  363. [ ]* # trailing spaces/tabs
  364. (?=\n+|\Z) # followed by a newline or end of document
  365. )
  366. }xmi',
  367. array(&$this, '_hashHTMLBlocks_callback'),
  368. $text);
  369. # Special case just for <hr />. It was easier to make a special case than
  370. # to make the other regex more complicated.
  371. $text = preg_replace_callback('{
  372. (?:
  373. (?<=\n\n) # Starting after a blank line
  374. | # or
  375. \A\n? # the beginning of the doc
  376. )
  377. ( # save in $1
  378. [ ]{0,'.$less_than_tab.'}
  379. <(hr) # start tag = $2
  380. \b # word break
  381. ([^<>])*? #
  382. /?> # the matching end tag
  383. [ ]*
  384. (?=\n{2,}|\Z) # followed by a blank line or end of document
  385. )
  386. }xi',
  387. array(&$this, '_hashHTMLBlocks_callback'),
  388. $text);
  389. # Special case for standalone HTML comments:
  390. $text = preg_replace_callback('{
  391. (?:
  392. (?<=\n\n) # Starting after a blank line
  393. | # or
  394. \A\n? # the beginning of the doc
  395. )
  396. ( # save in $1
  397. [ ]{0,'.$less_than_tab.'}
  398. (?s:
  399. <!-- .*? -->
  400. )
  401. [ ]*
  402. (?=\n{2,}|\Z) # followed by a blank line or end of document
  403. )
  404. }x',
  405. array(&$this, '_hashHTMLBlocks_callback'),
  406. $text);
  407. # PHP and ASP-style processor instructions (<? and <%)
  408. $text = preg_replace_callback('{
  409. (?:
  410. (?<=\n\n) # Starting after a blank line
  411. | # or
  412. \A\n? # the beginning of the doc
  413. )
  414. ( # save in $1
  415. [ ]{0,'.$less_than_tab.'}
  416. (?s:
  417. <([?%]) # $2
  418. .*?
  419. \2>
  420. )
  421. [ ]*
  422. (?=\n{2,}|\Z) # followed by a blank line or end of document
  423. )
  424. }x',
  425. array(&$this, '_hashHTMLBlocks_callback'),
  426. $text);
  427. return $text;
  428. }
  429. function _hashHTMLBlocks_callback($matches) {
  430. $text = $matches[1];
  431. $key = $this->hashBlock($text);
  432. return "\n\n$key\n\n";
  433. }
  434. function hashBlock($text) {
  435. #
  436. # Called whenever a tag must be hashed when a function insert a block-level
  437. # tag in $text, it pass through this function and is automaticaly escaped,
  438. # which remove the need to call _HashHTMLBlocks at every step.
  439. #
  440. # Swap back any tag hash found in $text so we do not have to `unhash`
  441. # multiple times at the end.
  442. $text = $this->unhash($text);
  443. # Then hash the block.
  444. $key = "B\x1A". md5($text);
  445. $this->html_hashes[$key] = $text;
  446. $this->html_blocks[$key] = $text;
  447. return $key; # String that will replace the tag.
  448. }
  449. function hashSpan($text, $word_separator = false) {
  450. #
  451. # Called whenever a tag must be hashed when a function insert a span-level
  452. # element in $text, it pass through this function and is automaticaly
  453. # escaped, blocking invalid nested overlap. If optional argument
  454. # $word_separator is true, surround the hash value by spaces.
  455. #
  456. # Swap back any tag hash found in $text so we do not have to `unhash`
  457. # multiple times at the end.
  458. $text = $this->unhash($text);
  459. # Then hash the span.
  460. $key = "S\x1A". md5($text);
  461. if ($word_separator) $key = ":$key:";
  462. $this->html_hashes[$key] = $text;
  463. return $key; # String that will replace the span tag.
  464. }
  465. var $block_gamut = array(
  466. #
  467. # These are all the transformations that form block-level
  468. # tags like paragraphs, headers, and list items.
  469. #
  470. "doHeaders" => 10,
  471. "doHorizontalRules" => 20,
  472. "doLists" => 40,
  473. "doCodeBlocks" => 50,
  474. "doBlockQuotes" => 60,
  475. );
  476. function runBlockGamut($text) {
  477. #
  478. # Run block gamut tranformations.
  479. #
  480. # We need to escape raw HTML in Markdown source before doing anything
  481. # else. This need to be done for each block, and not only at the
  482. # begining in the Markdown function since hashed blocks can be part of
  483. # list items and could have been indented. Indented blocks would have
  484. # been seen as a code block in a previous pass of hashHTMLBlocks.
  485. $text = $this->hashHTMLBlocks($text);
  486. return $this->runBasicBlockGamut($text);
  487. }
  488. function runBasicBlockGamut($text) {
  489. #
  490. # Run block gamut tranformations, without hashing HTML blocks. This is
  491. # useful when HTML blocks are known to be already hashed, like in the first
  492. # whole-document pass.
  493. #
  494. foreach ($this->block_gamut as $method => $priority) {
  495. $text = $this->$method($text);
  496. }
  497. # Finally form paragraph and restore hashed blocks.
  498. $text = $this->formParagraphs($text);
  499. return $text;
  500. }
  501. function doHorizontalRules($text) {
  502. # Do Horizontal Rules:
  503. return preg_replace(
  504. array('{^[ ]{0,2}([ ]?\*[ ]?){3,}[ ]*$}mx',
  505. '{^[ ]{0,2}([ ]? -[ ]?){3,}[ ]*$}mx',
  506. '{^[ ]{0,2}([ ]? _[ ]?){3,}[ ]*$}mx'),
  507. "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
  508. $text);
  509. }
  510. var $span_gamut = array(
  511. #
  512. # These are all the transformations that occur *within* block-level
  513. # tags like paragraphs, headers, and list items.
  514. #
  515. "escapeSpecialCharsWithinTagAttributes" => -20,
  516. "doCodeSpans" => -10,
  517. "encodeBackslashEscapes" => -5,
  518. # Process anchor and image tags. Images must come first,
  519. # because ![foo][f] looks like an anchor.
  520. "doImages" => 10,
  521. "doAnchors" => 20,
  522. # Make links out of things like `<http://example.com/>`
  523. # Must come after doAnchors, because you can use < and >
  524. # delimiters in inline links like [this](<url>).
  525. "doAutoLinks" => 30,
  526. "encodeAmpsAndAngles" => 40,
  527. "doItalicsAndBold" => 50,
  528. "doHardBreaks" => 60,
  529. );
  530. function runSpanGamut($text) {
  531. #
  532. # Run span gamut tranformations.
  533. #
  534. foreach ($this->span_gamut as $method => $priority) {
  535. $text = $this->$method($text);
  536. }
  537. return $text;
  538. }
  539. function doHardBreaks($text) {
  540. # Do hard breaks:
  541. $br_tag = $this->hashSpan("<br$this->empty_element_suffix\n");
  542. return preg_replace('/ {2,}\n/', $br_tag, $text);
  543. }
  544. function escapeSpecialCharsWithinTagAttributes($text) {
  545. #
  546. # Within tags -- meaning between < and > -- encode [\ ` * _] so they
  547. # don't conflict with their use in Markdown for code, italics and strong.
  548. # We're replacing each such character with its corresponding MD5 checksum
  549. # value; this is likely overkill, but it should prevent us from colliding
  550. # with the escape values by accident.
  551. #
  552. if ($this->no_markup) return $text;
  553. $tokens = $this->tokenizeHTML($text);
  554. $text = ''; # rebuild $text from the tokens
  555. foreach ($tokens as $cur_token) {
  556. if ($cur_token[0] == 'tag') {
  557. // $cur_token[1] = str_replace('\\', $this->escape_table['\\'], $cur_token[1]);
  558. // $cur_token[1] = str_replace('`', $this->escape_table['`'], $cur_token[1]);
  559. // $cur_token[1] = str_replace('*', $this->escape_table['*'], $cur_token[1]);
  560. // $cur_token[1] = str_replace('_', $this->escape_table['_'], $cur_token[1]);
  561. $cur_token[1] = $this->hashSpan($cur_token[1]);
  562. }
  563. $text .= $cur_token[1];
  564. }
  565. return $text;
  566. }
  567. function doAnchors($text) {
  568. #
  569. # Turn Markdown link shortcuts into XHTML <a> tags.
  570. #
  571. if ($this->in_anchor) return $text;
  572. $this->in_anchor = true;
  573. #
  574. # First, handle reference-style links: [link text] [id]
  575. #
  576. $text = preg_replace_callback('{
  577. ( # wrap whole match in $1
  578. \[
  579. ('.$this->nested_brackets.') # link text = $2
  580. \]
  581. [ ]? # one optional space
  582. (?:\n[ ]*)? # one optional newline followed by spaces
  583. \[
  584. (.*?) # id = $3
  585. \]
  586. )
  587. }xs',
  588. array(&$this, '_doAnchors_reference_callback'), $text);
  589. #
  590. # Next, inline-style links: [link text](url "optional title")
  591. #
  592. $text = preg_replace_callback('{
  593. ( # wrap whole match in $1
  594. \[
  595. ('.$this->nested_brackets.') # link text = $2
  596. \]
  597. \( # literal paren
  598. [ ]*
  599. (?:
  600. <(\S*)> # href = $3
  601. |
  602. ('.$this->nested_url_parenthesis.') # href = $4
  603. )
  604. [ ]*
  605. ( # $5
  606. ([\'"]) # quote char = $6
  607. (.*?) # Title = $7
  608. \6 # matching quote
  609. [ ]* # ignore any spaces/tabs between closing quote and )
  610. )? # title is optional
  611. \)
  612. )
  613. }xs',
  614. array(&$this, '_DoAnchors_inline_callback'), $text);
  615. #
  616. # Last, handle reference-style shortcuts: [link text]
  617. # These must come last in case you've also got [link test][1]
  618. # or [link test](/foo)
  619. #
  620. // $text = preg_replace_callback('{
  621. // ( # wrap whole match in $1
  622. // \[
  623. // ([^\[\]]+) # link text = $2; can\'t contain [ or ]
  624. // \]
  625. // )
  626. // }xs',
  627. // array(&$this, '_doAnchors_reference_callback'), $text);
  628. $this->in_anchor = false;
  629. return $text;
  630. }
  631. function _doAnchors_reference_callback($matches) {
  632. $whole_match = $matches[1];
  633. $link_text = $matches[2];
  634. $link_id =& $matches[3];
  635. if ($link_id == "") {
  636. # for shortcut links like [this][] or [this].
  637. $link_id = $link_text;
  638. }
  639. # lower-case and turn embedded newlines into spaces
  640. $link_id = strtolower($link_id);
  641. $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
  642. if (isset($this->urls[$link_id])) {
  643. $url = $this->urls[$link_id];
  644. $url = $this->encodeAmpsAndAngles($url);
  645. $result = "<a href=\"$url\"";
  646. if ( isset( $this->titles[$link_id] ) ) {
  647. $title = $this->titles[$link_id];
  648. $title = $this->encodeAmpsAndAngles($title);
  649. $result .= " title=\"$title\"";
  650. }
  651. $link_text = $this->runSpanGamut($link_text);
  652. $result .= ">$link_text</a>";
  653. $result = $this->hashSpan($result);
  654. }
  655. else {
  656. $result = $whole_match;
  657. }
  658. return $result;
  659. }
  660. function _doAnchors_inline_callback($matches) {
  661. $whole_match = $matches[1];
  662. $link_text = $this->runSpanGamut($matches[2]);
  663. $url = $matches[3] == '' ? $matches[4] : $matches[3];
  664. $title =& $matches[7];
  665. $url = $this->encodeAmpsAndAngles($url);
  666. $result = "<a href=\"$url\"";
  667. if (isset($title)) {
  668. $title = str_replace('"', '&quot;', $title);
  669. $title = $this->encodeAmpsAndAngles($title);
  670. $result .= " title=\"$title\"";
  671. }
  672. $link_text = $this->runSpanGamut($link_text);
  673. $result .= ">$link_text</a>";
  674. return $this->hashSpan($result);
  675. }
  676. function doImages($text) {
  677. #
  678. # Turn Markdown image shortcuts into <img> tags.
  679. #
  680. #
  681. # First, handle reference-style labeled images: ![alt text][id]
  682. #
  683. $text = preg_replace_callback('{
  684. ( # wrap whole match in $1
  685. !\[
  686. ('.$this->nested_brackets.') # alt text = $2
  687. \]
  688. [ ]? # one optional space
  689. (?:\n[ ]*)? # one optional newline followed by spaces
  690. \[
  691. (.*?) # id = $3
  692. \]
  693. )
  694. }xs',
  695. array(&$this, '_doImages_reference_callback'), $text);
  696. #
  697. # Next, handle inline images: ![alt text](url "optional title")
  698. # Don't forget: encode * and _
  699. #
  700. $text = preg_replace_callback('{
  701. ( # wrap whole match in $1
  702. !\[
  703. ('.$this->nested_brackets.') # alt text = $2
  704. \]
  705. \s? # One optional whitespace character
  706. \( # literal paren
  707. [ ]*
  708. (?:
  709. <(\S*)> # src url = $3
  710. |
  711. ('.$this->nested_url_parenthesis.') # src url = $4
  712. )
  713. [ ]*
  714. ( # $5
  715. ([\'"]) # quote char = $6
  716. (.*?) # title = $7
  717. \6 # matching quote
  718. [ ]*
  719. )? # title is optional
  720. \)
  721. )
  722. }xs',
  723. array(&$this, '_doImages_inline_callback'), $text);
  724. return $text;
  725. }
  726. function _doImages_reference_callback($matches) {
  727. $whole_match = $matches[1];
  728. $alt_text = $matches[2];
  729. $link_id = strtolower($matches[3]);
  730. if ($link_id == "") {
  731. $link_id = strtolower($alt_text); # for shortcut links like ![this][].
  732. }
  733. $alt_text = str_replace('"', '&quot;', $alt_text);
  734. if (isset($this->urls[$link_id])) {
  735. $url = $this->urls[$link_id];
  736. $result = "<img src=\"$url\" alt=\"$alt_text\"";
  737. if (isset($this->titles[$link_id])) {
  738. $title = $this->titles[$link_id];
  739. $result .= " title=\"$title\"";
  740. }
  741. $result .= $this->empty_element_suffix;
  742. $result = $this->hashSpan($result);
  743. }
  744. else {
  745. # If there's no such link ID, leave intact:
  746. $result = $whole_match;
  747. }
  748. return $result;
  749. }
  750. function _doImages_inline_callback($matches) {
  751. $whole_match = $matches[1];
  752. $alt_text = $matches[2];
  753. $url = $matches[3] == '' ? $matches[4] : $matches[3];
  754. $title =& $matches[7];
  755. $alt_text = str_replace('"', '&quot;', $alt_text);
  756. $result = "<img src=\"$url\" alt=\"$alt_text\"";
  757. if (isset($title)) {
  758. $title = str_replace('"', '&quot;', $title);
  759. $result .= " title=\"$title\""; # $title already quoted
  760. }
  761. $result .= $this->empty_element_suffix;
  762. return $this->hashSpan($result);
  763. }
  764. function doHeaders($text) {
  765. # Setext-style headers:
  766. # Header 1
  767. # ========
  768. #
  769. # Header 2
  770. # --------
  771. #
  772. $text = preg_replace_callback('{ ^(.+?)[ ]*\n=+[ ]*\n+ }mx',
  773. array(&$this, '_doHeaders_callback_setext_h1'), $text);
  774. $text = preg_replace_callback('{ ^(.+?)[ ]*\n-+[ ]*\n+ }mx',
  775. array(&$this, '_doHeaders_callback_setext_h2'), $text);
  776. # atx-style headers:
  777. # # Header 1
  778. # ## Header 2
  779. # ## Header 2 with closing hashes ##
  780. # ...
  781. # ###### Header 6
  782. #
  783. $text = preg_replace_callback('{
  784. ^(\#{1,6}) # $1 = string of #\'s
  785. [ ]*
  786. (.+?) # $2 = Header text
  787. [ ]*
  788. \#* # optional closing #\'s (not counted)
  789. \n+
  790. }xm',
  791. array(&$this, '_doHeaders_callback_atx'), $text);
  792. return $text;
  793. }
  794. function _doHeaders_callback_setext_h1($matches) {
  795. $block = "<h1>".$this->runSpanGamut($matches[1])."</h1>";
  796. return "\n" . $this->hashBlock($block) . "\n\n";
  797. }
  798. function _doHeaders_callback_setext_h2($matches) {
  799. $block = "<h2>".$this->runSpanGamut($matches[1])."</h2>";
  800. return "\n" . $this->hashBlock($block) . "\n\n";
  801. }
  802. function _doHeaders_callback_atx($matches) {
  803. $level = strlen($matches[1]);
  804. $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>";
  805. return "\n" . $this->hashBlock($block) . "\n\n";
  806. }
  807. function doLists($text) {
  808. #
  809. # Form HTML ordered (numbered) and unordered (bulleted) lists.
  810. #
  811. $less_than_tab = $this->tab_width - 1;
  812. # Re-usable patterns to match list item bullets and number markers:
  813. $marker_ul = '[*+-]';
  814. $marker_ol = '\d+[.]';
  815. $marker_any = "(?:$marker_ul|$marker_ol)";
  816. $markers = array($marker_ul, $marker_ol);
  817. foreach ($markers as $marker) {
  818. # Re-usable pattern to match any entirel ul or ol list:
  819. $whole_list = '
  820. ( # $1 = whole list
  821. ( # $2
  822. [ ]{0,'.$less_than_tab.'}
  823. ('.$marker.') # $3 = first list item marker
  824. [ ]+
  825. )
  826. (?s:.+?)
  827. ( # $4
  828. \z
  829. |
  830. \n{2,}
  831. (?=\S)
  832. (?! # Negative lookahead for another list item marker
  833. [ ]*
  834. '.$marker.'[ ]+
  835. )
  836. )
  837. )
  838. '; // mx
  839. # We use a different prefix before nested lists than top-level lists.
  840. # See extended comment in _ProcessListItems().
  841. if ($this->list_level) {
  842. $text = preg_replace_callback('{
  843. ^
  844. '.$whole_list.'
  845. }mx',
  846. array(&$this, '_doLists_callback'), $text);
  847. }
  848. else {
  849. $text = preg_replace_callback('{
  850. (?:(?<=\n)\n|\A\n?) # Must eat the newline
  851. '.$whole_list.'
  852. }mx',
  853. array(&$this, '_doLists_callback'), $text);
  854. }
  855. }
  856. return $text;
  857. }
  858. function _doLists_callback($matches) {
  859. # Re-usable patterns to match list item bullets and number markers:
  860. $marker_ul = '[*+-]';
  861. $marker_ol = '\d+[.]';
  862. $marker_any = "(?:$marker_ul|$marker_ol)";
  863. $list = $matches[1];
  864. $list_type = preg_match("/$marker_ul/", $matches[3]) ? "ul" : "ol";
  865. $marker_any = ( $list_type == "ul" ? $marker_ul : $marker_ol );
  866. $list .= "\n";
  867. $result = $this->processListItems($list, $marker_any);
  868. $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
  869. return "\n". $result ."\n\n";
  870. }
  871. var $list_level = 0;
  872. function processListItems($list_str, $marker_any) {
  873. #
  874. # Process the contents of a single ordered or unordered list, splitting it
  875. # into individual list items.
  876. #
  877. # The $this->list_level global keeps track of when we're inside a list.
  878. # Each time we enter a list, we increment it; when we leave a list,
  879. # we decrement. If it's zero, we're not in a list anymore.
  880. #
  881. # We do this because when we're not inside a list, we want to treat
  882. # something like this:
  883. #
  884. # I recommend upgrading to version
  885. # 8. Oops, now this line is treated
  886. # as a sub-list.
  887. #
  888. # As a single paragraph, despite the fact that the second line starts
  889. # with a digit-period-space sequence.
  890. #
  891. # Whereas when we're inside a list (or sub-list), that line will be
  892. # treated as the start of a sub-list. What a kludge, huh? This is
  893. # an aspect of Markdown's syntax that's hard to parse perfectly
  894. # without resorting to mind-reading. Perhaps the solution is to
  895. # change the syntax rules such that sub-lists must start with a
  896. # starting cardinal number; e.g. "1." or "a.".
  897. $this->list_level++;
  898. # trim trailing blank lines:
  899. $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
  900. $list_str = preg_replace_callback('{
  901. (\n)? # leading line = $1
  902. (^[ ]*) # leading whitespace = $2
  903. ('.$marker_any.') [ ]+ # list marker = $3
  904. ((?s:.+?)) # list item text = $4
  905. (?:(\n+(?=\n))|\n) # tailing blank line = $5
  906. (?= \n* (\z | \2 ('.$marker_any.') [ ]+))
  907. }xm',
  908. array(&$this, '_processListItems_callback'), $list_str);
  909. $this->list_level--;
  910. return $list_str;
  911. }
  912. function _processListItems_callback($matches) {
  913. $item = $matches[4];
  914. $leading_line =& $matches[1];
  915. $leading_space =& $matches[2];
  916. $tailing_blank_line =& $matches[5];
  917. if ($leading_line || $tailing_blank_line ||
  918. preg_match('/\n{2,}/', $item))
  919. {
  920. $item = $this->runBlockGamut($this->outdent($item)."\n");
  921. }
  922. else {
  923. # Recursion for sub-lists:
  924. $item = $this->doLists($this->outdent($item));
  925. $item = preg_replace('/\n+$/', '', $item);
  926. $item = $this->runSpanGamut($item);
  927. }
  928. return "<li>" . $item . "</li>\n";
  929. }
  930. function doCodeBlocks($text) {
  931. #
  932. # Process Markdown `<pre><code>` blocks.
  933. #
  934. $text = preg_replace_callback('{
  935. (?:\n\n|\A)
  936. ( # $1 = the code block -- one or more lines, starting with a space/tab
  937. (?:
  938. (?:[ ]{'.$this->tab_width.'} | \t) # Lines must start with a tab or a tab-width of spaces
  939. .*\n+
  940. )+
  941. )
  942. ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
  943. }xm',
  944. array(&$this, '_doCodeBlocks_callback'), $text);
  945. return $text;
  946. }
  947. function _doCodeBlocks_callback($matches) {
  948. $codeblock = $matches[1];
  949. $codeblock = $this->encodeCode($this->outdent($codeblock));
  950. // $codeblock = $this->detab($codeblock);
  951. # trim leading newlines and trailing whitespace
  952. $codeblock = preg_replace(array('/\A\n+/', '/\n+\z/'), '', $codeblock);
  953. $result = "\n\n".$this->hashBlock("<pre><code>" . $codeblock . "\n</code></pre>")."\n\n";
  954. return $result;
  955. }
  956. function doCodeSpans($text) {
  957. #
  958. # * Backtick quotes are used for <code></code> spans.
  959. #
  960. # * You can use multiple backticks as the delimiters if you want to
  961. # include literal backticks in the code span. So, this input:
  962. #
  963. # Just type ``foo `bar` baz`` at the prompt.
  964. #
  965. # Will translate to:
  966. #
  967. # <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
  968. #
  969. # There's no arbitrary limit to the number of backticks you
  970. # can use as delimters. If you need three consecutive backticks
  971. # in your code, use four for delimiters, etc.
  972. #
  973. # * You can use spaces to get literal backticks at the edges:
  974. #
  975. # ... type `` `bar` `` ...
  976. #
  977. # Turns to:
  978. #
  979. # ... type <code>`bar`</code> ...
  980. #
  981. $text = preg_replace_callback('@
  982. (?<!\\\) # Character before opening ` can\'t be a backslash
  983. (`+) # $1 = Opening run of `
  984. (.+?) # $2 = The code block
  985. (?<!`)
  986. \1 # Matching closer
  987. (?!`)
  988. @xs',
  989. array(&$this, '_doCodeSpans_callback'), $text);
  990. return $text;
  991. }
  992. function _doCodeSpans_callback($matches) {
  993. $c = $matches[2];
  994. $c = preg_replace('/^[ ]*/', '', $c); # leading whitespace
  995. $c = preg_replace('/[ ]*$/', '', $c); # trailing whitespace
  996. $c = $this->encodeCode($c);
  997. return $this->hashSpan("<code>$c</code>");
  998. }
  999. function encodeCode($_) {
  1000. #
  1001. # Encode/escape certain characters inside Markdown code runs.
  1002. # The point is that in code, these characters are literals,
  1003. # and lose their special Markdown meanings.
  1004. #
  1005. # Encode all ampersands; HTML entities are not
  1006. # entities within a Markdown code span.
  1007. $_ = str_replace('&', '&amp;', $_);
  1008. # Do the angle bracket song and dance:
  1009. $_ = str_replace(array('<', '>'),
  1010. array('&lt;', '&gt;'), $_);
  1011. # Now, escape characters that are magic in Markdown:
  1012. // $_ = str_replace(array_keys($this->escape_table),
  1013. // array_values($this->escape_table), $_);
  1014. return $_;
  1015. }
  1016. function doItalicsAndBold($text) {
  1017. # <strong> must go first:
  1018. $text = preg_replace_callback('{
  1019. ( # $1: Marker
  1020. (?<!\*\*) \* | # (not preceded by two chars of
  1021. (?<!__) _ # the same marker)
  1022. )
  1023. \1
  1024. (?=\S) # Not followed by whitespace
  1025. (?!\1\1) # or two others marker chars.
  1026. ( # $2: Content
  1027. (?>
  1028. [^*_]+? # Anthing not em markers.
  1029. |
  1030. # Balence any regular emphasis inside.
  1031. \1 (?=\S) .+? (?<=\S) \1
  1032. |
  1033. . # Allow unbalenced * and _.
  1034. )+?
  1035. )
  1036. (?<=\S) \1\1 # End mark not preceded by whitespace.
  1037. }sx',
  1038. array(&$this, '_doItalicAndBold_strong_callback'), $text);
  1039. # Then <em>:
  1040. $text = preg_replace_callback(
  1041. '{ ( (?<!\*)\* | (?<!_)_ ) (?=\S) (?! \1) (.+?) (?<=\S)(?<!\s(?=\1).) \1 }sx',
  1042. array(&$this, '_doItalicAndBold_em_callback'), $text);
  1043. return $text;
  1044. }
  1045. function _doItalicAndBold_em_callback($matches) {
  1046. $text = $matches[2];
  1047. $text = $this->runSpanGamut($text);
  1048. return $this->hashSpan("<em>$text</em>");
  1049. }
  1050. function _doItalicAndBold_strong_callback($matches) {
  1051. $text = $matches[2];
  1052. $text = $this->runSpanGamut($text);
  1053. return $this->hashSpan("<strong>$text</strong>");
  1054. }
  1055. function doBlockQuotes($text) {
  1056. $text = preg_replace_callback('/
  1057. ( # Wrap whole match in $1
  1058. (
  1059. ^[ ]*>[ ]? # ">" at the start of a line
  1060. .+\n # rest of the first line
  1061. (.+\n)* # subsequent consecutive lines
  1062. \n* # blanks
  1063. )+
  1064. )
  1065. /xm',
  1066. array(&$this, '_doBlockQuotes_callback'), $text);
  1067. return $text;
  1068. }
  1069. function _doBlockQuotes_callback($matches) {
  1070. $bq = $matches[1];
  1071. # trim one level of quoting - trim whitespace-only lines
  1072. $bq = preg_replace(array('/^[ ]*>[ ]?/m', '/^[ ]+$/m'), '', $bq);
  1073. $bq = $this->runBlockGamut($bq); # recurse
  1074. $bq = preg_replace('/^/m', " ", $bq);
  1075. # These leading spaces cause problem with <pre> content,
  1076. # so we need to fix that:
  1077. $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
  1078. array(&$this, '_DoBlockQuotes_callback2'), $bq);
  1079. return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
  1080. }
  1081. function _doBlockQuotes_callback2($matches) {
  1082. $pre = $matches[1];
  1083. $pre = preg_replace('/^ /m', '', $pre);
  1084. return $pre;
  1085. }
  1086. function formParagraphs($text) {
  1087. #
  1088. # Params:
  1089. # $text - string to process with html <p> tags
  1090. #
  1091. # Strip leading and trailing lines:
  1092. $text = preg_replace(array('/\A\n+/', '/\n+\z/'), '', $text);
  1093. $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
  1094. #
  1095. # Wrap <p> tags.
  1096. #
  1097. foreach ($grafs as $key => $value) {
  1098. if (!isset( $this->html_blocks[$value] )) {
  1099. $value = $this->runSpanGamut($value);
  1100. $value = preg_replace('/^([ ]*)/', "<p>", $value);
  1101. $value .= "</p>";
  1102. $grafs[$key] = $this->unhash($value);
  1103. }
  1104. }
  1105. #
  1106. # Unhashify HTML blocks
  1107. #
  1108. foreach ($grafs as $key => $graf) {
  1109. # Modify elements of @grafs in-place...
  1110. if (isset($this->html_blocks[$graf])) {
  1111. $block = $this->html_blocks[$graf];
  1112. $graf = $block;
  1113. // if (preg_match('{
  1114. // \A
  1115. // ( # $1 = <div> tag
  1116. // <div \s+
  1117. // [^>]*
  1118. // \b
  1119. // markdown\s*=\s* ([\'"]) # $2 = attr quote char
  1120. // 1
  1121. // \2
  1122. // [^>]*
  1123. // >
  1124. // )
  1125. // ( # $3 = contents
  1126. // .*
  1127. // )
  1128. // (</div>) # $4 = closing tag
  1129. // \z
  1130. // }xs', $block, $matches))
  1131. // {
  1132. // list(, $div_open, , $div_content, $div_close) = $matches;
  1133. //
  1134. // # We can't call Markdown(), because that resets the hash;
  1135. // # that initialization code should be pulled into its own sub, though.
  1136. // $div_content = $this->hashHTMLBlocks($div_content);
  1137. //
  1138. // # Run document gamut methods on the content.
  1139. // foreach ($this->document_gamut as $method => $priority) {
  1140. // $div_content = $this->$method($div_content);
  1141. // }
  1142. //
  1143. // $div_open = preg_replace(
  1144. // '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
  1145. //
  1146. // $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
  1147. // }
  1148. $grafs[$key] = $graf;
  1149. }
  1150. }
  1151. return implode("\n\n", $grafs);
  1152. }
  1153. function encodeAmpsAndAngles($text) {
  1154. # Smart processing for ampersands and angle brackets that need to be encoded.
  1155. if ($this->no_entities) {
  1156. $text = str_replace('&', '&amp;', $text);
  1157. $text = str_replace('<', '&lt;', $text);
  1158. return $text;
  1159. }
  1160. # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
  1161. # http://bumppo.net/projects/amputator/
  1162. $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
  1163. '&amp;', $text);;
  1164. # Encode naked <'s
  1165. $text = preg_replace('{<(?![a-z/?\$!%])}i', '&lt;', $text);
  1166. return $text;
  1167. }
  1168. function encodeBackslashEscapes($text) {
  1169. #
  1170. # Parameter: String.
  1171. # Returns: The string, with after processing the following backslash
  1172. # escape sequences.
  1173. #
  1174. # Must process escaped backslashes first (should be first in list).
  1175. foreach ($this->backslash_escape_table as $search => $replacement) {
  1176. $text = str_replace($search, $this->hashSpan($replacement), $text);
  1177. }
  1178. return $text;
  1179. }
  1180. function doAutoLinks($text) {
  1181. $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}',
  1182. array(&$this, '_doAutoLinks_url_callback'), $text);
  1183. # Email addresses: <address@domain.foo>
  1184. $text = preg_replace_callback('{
  1185. <
  1186. (?:mailto:)?
  1187. (
  1188. [-.\w\x80-\xFF]+
  1189. \@
  1190. [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
  1191. )
  1192. >
  1193. }xi',
  1194. array(&$this, '_doAutoLinks_email_callback'), $text);
  1195. return $text;
  1196. }
  1197. function _doAutoLinks_url_callback($matches) {
  1198. $url = $this->encodeAmpsAndAngles($matches[1]);
  1199. $link = "<a href=\"$url\">$url</a>";
  1200. return $this->hashSpan($link);
  1201. }
  1202. function _doAutoLinks_email_callback($matches) {
  1203. $address = $matches[1];
  1204. $link = $this->encodeEmailAddress($address);
  1205. return $this->hashSpan($link);
  1206. }
  1207. function encodeEmailAddress($addr) {
  1208. #
  1209. # Input: an email address, e.g. "foo@example.com"
  1210. #
  1211. # Output: the email address as a mailto link, with each character
  1212. # of the address encoded as either a decimal or hex entity, in
  1213. # the hopes of foiling most address harvesting spam bots. E.g.:
  1214. #
  1215. # <p><a href="&#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
  1216. # &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
  1217. # &#x6d;">&#x66;o&#111;&#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;
  1218. # &#101;&#46;&#x63;&#111;&#x6d;</a></p>
  1219. #
  1220. # Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
  1221. # With some optimizations by Milian Wolff.
  1222. #
  1223. $addr = "mailto:" . $addr;
  1224. $chars = preg_split('/(?<!^)(?!$)/', $addr);
  1225. $seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed.
  1226. foreach ($chars as $key => $char) {
  1227. $ord = ord($char);
  1228. # Ignore non-ascii chars.
  1229. if ($ord < 128) {
  1230. $r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
  1231. # roughly 10% raw, 45% hex, 45% dec
  1232. # '@' *must* be encoded. I insist.
  1233. if ($r > 90 && $char != '@') /* do nothing */;
  1234. else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
  1235. else $chars[$key] = '&#'.$ord.';';
  1236. }
  1237. }
  1238. $addr = implode('', $chars);
  1239. $text = implode('', array_slice($chars, 7)); # text without `mailto:`
  1240. $addr = "<a href=\"$addr\">$text</a>";
  1241. return $addr;
  1242. }
  1243. function tokenizeHTML($str) {
  1244. #
  1245. # Parameter: String containing HTML + Markdown markup.
  1246. # Returns: An array of the tokens comprising the input
  1247. # string. Each token is either a tag or a run of text
  1248. # between tags. Each element of the array is a
  1249. # two-element array; the first is either 'tag' or 'text';
  1250. # the second is the actual value.
  1251. # Note: Markdown code spans are taken into account: no tag token is
  1252. # generated within a code span.
  1253. #
  1254. $tokens = array();
  1255. while ($str != "") {
  1256. #
  1257. # Each loop iteration seach for either the next tag or the next
  1258. # openning code span marker. If a code span marker is found, the
  1259. # code span is extracted in entierty and will result in an extra
  1260. # text token.
  1261. #
  1262. $parts = preg_split('{
  1263. (
  1264. (?<![`\\\\])
  1265. `+ # code span marker
  1266. |
  1267. <!-- .*? --> # comment
  1268. |
  1269. <\?.*?\?> | <%.*?%> # processing instruction
  1270. |
  1271. <[/!$]?[-a-zA-Z0-9:]+ # regular tags
  1272. (?:
  1273. \s
  1274. (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
  1275. )?
  1276. >
  1277. )
  1278. }xs', $str, 2, PREG_SPLIT_DELIM_CAPTURE);
  1279. # Create token from text preceding tag.
  1280. if ($parts[0] != "") {
  1281. $tokens[] = array('text', $parts[0]);
  1282. }
  1283. # Check if we reach the end.
  1284. if (count($parts) < 3) {
  1285. break;
  1286. }
  1287. # Create token from tag or code span.
  1288. if ($parts[1]{0} == "`") {
  1289. $tokens[] = array('text', $parts[1]);
  1290. $str = $parts[2];
  1291. # Skip the whole code span, pass as text token.
  1292. if (preg_match('/^(.*(?<!`\\\\)'.$parts[1].'(?!`))(.*)$/sm',
  1293. $str, $matches))
  1294. {
  1295. $tokens[] = array('text', $matches[1]);
  1296. $str = $matches[2];
  1297. }
  1298. } else {
  1299. $tokens[] = array('tag', $parts[1]);
  1300. $str = $parts[2];
  1301. }
  1302. }
  1303. return $tokens;
  1304. }
  1305. function outdent($text) {
  1306. #
  1307. # Remove one level of line-leading tabs or spaces
  1308. #
  1309. return preg_replace("/^(\\t|[ ]{1,$this->tab_width})/m", "", $text);
  1310. }
  1311. # String length function for detab. `_initDetab` will create a function to
  1312. # hanlde UTF-8 if the default function does not exist.
  1313. var $utf8_strlen = 'mb_strlen';
  1314. function detab($text) {
  1315. #
  1316. # Replace tabs with the appropriate amount of space.
  1317. #
  1318. # For each line we separate the line in blocks delemited by
  1319. # tab characters. Then we reconstruct every line by adding the
  1320. # appropriate number of space between each blocks.
  1321. $strlen = $this->utf8_strlen; # strlen function for UTF-8.
  1322. $lines = explode("\n", $text);
  1323. $text = "";
  1324. foreach ($lines as $line) {
  1325. # Split in blocks.
  1326. $blocks = explode("\t", $line);
  1327. # Add each blocks to the line.
  1328. $line = $blocks[0];
  1329. unset($blocks[0]); # Do not add first block twice.
  1330. foreach ($blocks as $block) {
  1331. # Calculate amount of space, insert spaces, insert block.
  1332. $amount = $this->tab_width -
  1333. $strlen($line, 'UTF-8') % $this->tab_width;
  1334. $line .= str_repeat(" ", $amount) . $block;
  1335. }
  1336. $text .= "$line\n";
  1337. }
  1338. return $text;
  1339. }
  1340. function _initDetab() {
  1341. #
  1342. # Check for the availability of the function in the `utf8_strlen` property
  1343. # (initially `mb_strlen`). If the function is not available, create a
  1344. # function that will loosely count the number of UTF-8 characters with a
  1345. # regular expression.
  1346. #
  1347. if (function_exists($this->utf8_strlen)) return;
  1348. $this->utf8_strlen = create_function('$text', 'return preg_match_all(
  1349. "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/",
  1350. $text, $m);');
  1351. }
  1352. function unhash($text) {
  1353. #
  1354. # Swap back in all the tags hashed by _HashHTMLBlocks.
  1355. #
  1356. return str_replace(array_keys($this->html_hashes),
  1357. array_values($this->html_hashes), $text);
  1358. }
  1359. }
  1360. #
  1361. # Markdown Extra Parser Class
  1362. #
  1363. class MarkdownExtra_Parser extends Markdown_Parser {
  1364. # Prefix for footnote ids.
  1365. var $fn_id_prefix = "";
  1366. # Optional title attribute for footnote links and backlinks.
  1367. var $fn_link_title = MARKDOWN_FN_LINK_TITLE;
  1368. var $fn_backlink_title = MARKDOWN_FN_BACKLINK_TITLE;
  1369. # Optional class attribute for footnote links and backlinks.
  1370. var $fn_link_class = MARKDOWN_FN_LINK_CLASS;
  1371. var $fn_backlink_class = MARKDOWN_FN_BACKLINK_CLASS;
  1372. function MarkdownExtra_Parser() {
  1373. #
  1374. # Constructor function. Initialize the parser object.
  1375. #
  1376. # Add extra escapable characters before parent constructor
  1377. # initialize the table.
  1378. $this->escape_chars .= ':|';
  1379. # Insert extra document, block, and span transformations.
  1380. # Parent constructor will do the sorting.
  1381. $this->document_gamut += array(
  1382. "stripFootnotes" => 15,
  1383. "stripAbbreviations" => 25,
  1384. "appendFootnotes" => 50,
  1385. );
  1386. $this->block_gamut += array(
  1387. "doTables" => 15,
  1388. "doDefLists" => 45,
  1389. );
  1390. $this->span_gamut += array(
  1391. "doFootnotes" => 5,
  1392. "doAbbreviations" => 70,
  1393. );
  1394. parent::Markdown_Parser();
  1395. }
  1396. # Extra hashes used during extra transformations.
  1397. var $footnotes = array();
  1398. var $footnotes_ordered = array();
  1399. var $abbr_desciptions = array();
  1400. var $abbr_matches = array();
  1401. var $html_cleans = array();
  1402. # Status flag to avoid invalid nesting.
  1403. var $in_footnote = false;
  1404. function transform($text) {
  1405. #
  1406. # Added clear to the new $html_hashes, reordered `hashHTMLBlocks` before
  1407. # blank line stripping and added extra parameter to `runBlockGamut`.
  1408. #
  1409. # Clear the global hashes. If we don't clear these, you get conflicts
  1410. # from other articles when generating a page which contains more than
  1411. # one article (e.g. an index page that shows the N most recent
  1412. # articles):
  1413. $this->footnotes = array();
  1414. $this->footnotes_ordered = array();
  1415. $this->abbr_desciptions = array();
  1416. $this->abbr_matches = array();
  1417. $this->html_cleans = array();
  1418. return parent::transform($text);
  1419. }
  1420. ### HTML Block Parser ###
  1421. # Tags that are always treated as block tags:
  1422. var $block_tags = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend';
  1423. # Tags treated as block tags only if the opening tag is alone on it's line:
  1424. var $context_block_tags = 'script|noscript|math|ins|del';
  1425. # Tags where markdown="1" default to span mode:
  1426. var $contain_span_tags = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
  1427. # Tags which must not have their contents modified, no matter where
  1428. # they appear:
  1429. var $clean_tags = 'script|math';
  1430. # Tags that do not need to be closed.
  1431. var $auto_close_tags = 'hr|img';
  1432. function hashHTMLBlocks($text) {
  1433. #
  1434. # Hashify HTML Blocks and "clean tags".
  1435. #
  1436. # We only want to do this for block-level HTML tags, such as headers,
  1437. # lists, and tables. That's because we still want to wrap <p>s around
  1438. # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
  1439. # phrase emphasis, and spans. The list of tags we're looking for is
  1440. # hard-coded.
  1441. #
  1442. # This works by calling _HashHTMLBlocks_InMarkdown, which then calls
  1443. # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
  1444. # attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back
  1445. # _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
  1446. # These two functions are calling each other. It's recursive!
  1447. #
  1448. #
  1449. # Call the HTML-in-Markdown hasher.
  1450. #
  1451. list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
  1452. return $text;
  1453. }
  1454. function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
  1455. $enclosing_tag = '', $span = false)
  1456. {
  1457. #
  1458. # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
  1459. #
  1460. # * $indent is the number of space to be ignored when checking for code
  1461. # blocks. This is important because if we don't take the indent into
  1462. # account, something like this (which looks right) won't work as expected:
  1463. #
  1464. # <div>
  1465. # <div markdown="1">
  1466. # Hello World. <-- Is this a Markdown code block or text?
  1467. # </div> <-- Is this a Markdown code block or a real tag?
  1468. # <div>
  1469. #
  1470. # If you don't like this, just don't indent the tag on which
  1471. # you apply the markdown="1" attribute.
  1472. #
  1473. # * If $enclosing_tag is not empty, stops at the first unmatched closing
  1474. # tag with that name. Nested tags supported.
  1475. #
  1476. # * If $span is true, text inside must treated as span. So any double
  1477. # newline will be replaced by a single newline so that it does not create
  1478. # paragraphs.
  1479. #
  1480. # Returns an array of that form: ( processed text , remaining text )
  1481. #
  1482. if ($text === '') return array('', '');
  1483. # Regex to check for the presense of newlines around a block tag.
  1484. $newline_match_before = '/(?:^\n?|\n\n)*$/';
  1485. $newline_match_after =
  1486. '{
  1487. ^ # Start of text foll

Large files files are truncated, but you can click here to view the full file