PageRenderTime 60ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/libs/devblocks/libs/markdown/markdown.php

https://github.com/sluther/portsensor
PHP | 2609 lines | 1501 code | 331 blank | 777 comment | 115 complexity | 2e760f45120e9d8bca8e1f405ed4c207 MD5 | raw file
Possible License(s): LGPL-2.1, BSD-3-Clause

Large files files are truncated, but you can click here to view the full file

  1. <?php
  2. #
  3. # Markdown Extra - A text-to-HTML conversion tool for web writers
  4. #
  5. # WGM Changes:
  6. # [JSJ] Removed Wordpress/BBlog/Textile related code
  7. #
  8. # PHP Markdown & Extra
  9. # Copyright (c) 2004-2007 Michel Fortin
  10. # <http://www.michelf.com/projects/php-markdown/>
  11. #
  12. # Original Markdown
  13. # Copyright (c) 2004-2006 John Gruber
  14. # <http://daringfireball.net/projects/markdown/>
  15. #
  16. define( 'MARKDOWN_VERSION', "1.0.1f" ); # Wed 7 Feb 2007
  17. define( 'MARKDOWNEXTRA_VERSION', "1.1.2" ); # Wed 7 Feb 2007
  18. #
  19. # Global default settings:
  20. #
  21. # Change to ">" for HTML output
  22. define( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX', " />");
  23. # Define the width of a tab for code blocks.
  24. define( 'MARKDOWN_TAB_WIDTH', 4 );
  25. # Optional title attribute for footnote links and backlinks.
  26. define( 'MARKDOWN_FN_LINK_TITLE', "" );
  27. define( 'MARKDOWN_FN_BACKLINK_TITLE', "" );
  28. # Optional class attribute for footnote links and backlinks.
  29. define( 'MARKDOWN_FN_LINK_CLASS', "" );
  30. define( 'MARKDOWN_FN_BACKLINK_CLASS', "" );
  31. ### Standard Function Interface ###
  32. define( 'MARKDOWN_PARSER_CLASS', 'MarkdownExtra_Parser' );
  33. function Markdown($text) {
  34. #
  35. # Initialize the parser and return the result of its transform method.
  36. #
  37. # Setup static parser variable.
  38. static $parser;
  39. if (!isset($parser)) {
  40. $parser_class = MARKDOWN_PARSER_CLASS;
  41. $parser = new $parser_class;
  42. }
  43. # Transform text using parser.
  44. return $parser->transform($text);
  45. }
  46. ### Smarty Modifier Interface ###
  47. function smarty_modifier_markdown($text) {
  48. return Markdown($text);
  49. }
  50. #
  51. # Markdown Parser Class
  52. #
  53. class Markdown_Parser {
  54. # Regex to match balanced [brackets].
  55. # Needed to insert a maximum bracked depth while converting to PHP.
  56. var $nested_brackets_depth = 6;
  57. var $nested_brackets;
  58. # Table of hash values for escaped characters:
  59. var $escape_chars = '\`*_{}[]()>#+-.!';
  60. var $escape_table = array();
  61. var $backslash_escape_table = array();
  62. # Change to ">" for HTML output.
  63. var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX;
  64. var $tab_width = MARKDOWN_TAB_WIDTH;
  65. function Markdown_Parser() {
  66. #
  67. # Constructor function. Initialize appropriate member variables.
  68. #
  69. $this->_initDetab();
  70. $this->nested_brackets =
  71. str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
  72. str_repeat('\])*', $this->nested_brackets_depth);
  73. # Create an identical table but for escaped characters.
  74. foreach (preg_split('/(?!^|$)/', $this->escape_chars) as $char) {
  75. $hash = md5($char);
  76. $this->escape_table[$char] = $hash;
  77. $this->backslash_escape_table["\\$char"] = $hash;
  78. }
  79. # Sort document, block, and span gamut in ascendent priority order.
  80. asort($this->document_gamut);
  81. asort($this->block_gamut);
  82. asort($this->span_gamut);
  83. }
  84. # Internal hashes used during transformation.
  85. var $urls = array();
  86. var $titles = array();
  87. var $html_blocks = array();
  88. var $html_hashes = array(); # Contains both blocks and span hashes.
  89. function transform($text) {
  90. #
  91. # Main function. The order in which other subs are called here is
  92. # essential. Link and image substitutions need to happen before
  93. # _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the <a>
  94. # and <img> tags get encoded.
  95. #
  96. # Clear the global hashes. If we don't clear these, you get conflicts
  97. # from other articles when generating a page which contains more than
  98. # one article (e.g. an index page that shows the N most recent
  99. # articles):
  100. $this->urls = array();
  101. $this->titles = array();
  102. $this->html_blocks = array();
  103. $this->html_hashes = array();
  104. # Standardize line endings:
  105. # DOS to Unix and Mac to Unix
  106. $text = str_replace(array("\r\n", "\r"), "\n", $text);
  107. # Make sure $text ends with a couple of newlines:
  108. $text .= "\n\n";
  109. # Convert all tabs to spaces.
  110. $text = $this->detab($text);
  111. # Turn block-level HTML blocks into hash entries
  112. $text = $this->hashHTMLBlocks($text);
  113. # Strip any lines consisting only of spaces and tabs.
  114. # This makes subsequent regexen easier to write, because we can
  115. # match consecutive blank lines with /\n+/ instead of something
  116. # contorted like /[ \t]*\n+/ .
  117. $text = preg_replace('/^[ \t]+$/m', '', $text);
  118. # Run document gamut methods.
  119. foreach ($this->document_gamut as $method => $priority) {
  120. $text = $this->$method($text);
  121. }
  122. return $text . "\n";
  123. }
  124. var $document_gamut = array(
  125. # Strip link definitions, store in hashes.
  126. "stripLinkDefinitions" => 20,
  127. "runBasicBlockGamut" => 30,
  128. "unescapeSpecialChars" => 90,
  129. );
  130. function stripLinkDefinitions($text) {
  131. #
  132. # Strips link definitions from text, stores the URLs and titles in
  133. # hash references.
  134. #
  135. $less_than_tab = $this->tab_width - 1;
  136. # Link defs are in the form: ^[id]: url "optional title"
  137. $text = preg_replace_callback('{
  138. ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
  139. [ \t]*
  140. \n? # maybe *one* newline
  141. [ \t]*
  142. <?(\S+?)>? # url = $2
  143. [ \t]*
  144. \n? # maybe one newline
  145. [ \t]*
  146. (?:
  147. (?<=\s) # lookbehind for whitespace
  148. ["(]
  149. (.*?) # title = $3
  150. [")]
  151. [ \t]*
  152. )? # title is optional
  153. (?:\n+|\Z)
  154. }xm',
  155. array(&$this, '_stripLinkDefinitions_callback'),
  156. $text);
  157. return $text;
  158. }
  159. function _stripLinkDefinitions_callback($matches) {
  160. $link_id = strtolower($matches[1]);
  161. $this->urls[$link_id] = $this->encodeAmpsAndAngles($matches[2]);
  162. if (isset($matches[3]))
  163. $this->titles[$link_id] = str_replace('"', '&quot;', $matches[3]);
  164. return ''; # String that will replace the block
  165. }
  166. function hashHTMLBlocks($text) {
  167. $less_than_tab = $this->tab_width - 1;
  168. # Hashify HTML blocks:
  169. # We only want to do this for block-level HTML tags, such as headers,
  170. # lists, and tables. That's because we still want to wrap <p>s around
  171. # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
  172. # phrase emphasis, and spans. The list of tags we're looking for is
  173. # hard-coded:
  174. $block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
  175. 'script|noscript|form|fieldset|iframe|math|ins|del';
  176. $block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
  177. 'script|noscript|form|fieldset|iframe|math';
  178. # Regular expression for the content of a block tag.
  179. $nested_tags_level = 4;
  180. $attr = '
  181. (?> # optional tag attributes
  182. \s # starts with whitespace
  183. (?>
  184. [^>"/]+ # text outside quotes
  185. |
  186. /+(?!>) # slash not followed by ">"
  187. |
  188. "[^"]*" # text inside double quotes (tolerate ">")
  189. |
  190. \'[^\']*\' # text inside single quotes (tolerate ">")
  191. )*
  192. )?
  193. ';
  194. $content =
  195. str_repeat('
  196. (?>
  197. [^<]+ # content without tag
  198. |
  199. <\2 # nested opening tag
  200. '.$attr.' # attributes
  201. (?:
  202. />
  203. |
  204. >', $nested_tags_level). # end of opening tag
  205. '.*?'. # last level nested tag content
  206. str_repeat('
  207. </\2\s*> # closing nested tag
  208. )
  209. |
  210. <(?!/\2\s*> # other tags with a different name
  211. )
  212. )*',
  213. $nested_tags_level);
  214. # First, look for nested blocks, e.g.:
  215. # <div>
  216. # <div>
  217. # tags for inner block must be indented.
  218. # </div>
  219. # </div>
  220. #
  221. # The outermost tags must start at the left margin for this to match, and
  222. # the inner nested divs must be indented.
  223. # We need to do this before the next, more liberal match, because the next
  224. # match will start at the first `<div>` and stop at the first `</div>`.
  225. $text = preg_replace_callback('{
  226. ( # save in $1
  227. ^ # start of line (with /m)
  228. <('.$block_tags_a.')# start tag = $2
  229. '.$attr.'>\n # attributes followed by > and \n
  230. '.$content.' # content, support nesting
  231. </\2> # the matching end tag
  232. [ \t]* # trailing spaces/tabs
  233. (?=\n+|\Z) # followed by a newline or end of document
  234. )
  235. }xm',
  236. array(&$this, '_hashHTMLBlocks_callback'),
  237. $text);
  238. #
  239. # Match from `\n<tag>` to `</tag>\n`, handling nested tags in between.
  240. #
  241. $text = preg_replace_callback('{
  242. ( # save in $1
  243. ^ # start of line (with /m)
  244. <('.$block_tags_b.')# start tag = $2
  245. '.$attr.'> # attributes followed by >
  246. '.$content.' # content, support nesting
  247. </\2> # the matching end tag
  248. [ \t]* # trailing spaces/tabs
  249. (?=\n+|\Z) # followed by a newline or end of document
  250. )
  251. }xm',
  252. array(&$this, '_hashHTMLBlocks_callback'),
  253. $text);
  254. # Special case just for <hr />. It was easier to make a special case than
  255. # to make the other regex more complicated.
  256. $text = preg_replace_callback('{
  257. (?:
  258. (?<=\n\n) # Starting after a blank line
  259. | # or
  260. \A\n? # the beginning of the doc
  261. )
  262. ( # save in $1
  263. [ ]{0,'.$less_than_tab.'}
  264. <(hr) # start tag = $2
  265. \b # word break
  266. ([^<>])*? #
  267. /?> # the matching end tag
  268. [ \t]*
  269. (?=\n{2,}|\Z) # followed by a blank line or end of document
  270. )
  271. }x',
  272. array(&$this, '_hashHTMLBlocks_callback'),
  273. $text);
  274. # Special case for standalone HTML comments:
  275. $text = preg_replace_callback('{
  276. (?:
  277. (?<=\n\n) # Starting after a blank line
  278. | # or
  279. \A\n? # the beginning of the doc
  280. )
  281. ( # save in $1
  282. [ ]{0,'.$less_than_tab.'}
  283. (?s:
  284. <!-- .*? -->
  285. )
  286. [ \t]*
  287. (?=\n{2,}|\Z) # followed by a blank line or end of document
  288. )
  289. }x',
  290. array(&$this, '_hashHTMLBlocks_callback'),
  291. $text);
  292. # PHP and ASP-style processor instructions (<? and <%)
  293. $text = preg_replace_callback('{
  294. (?:
  295. (?<=\n\n) # Starting after a blank line
  296. | # or
  297. \A\n? # the beginning of the doc
  298. )
  299. ( # save in $1
  300. [ ]{0,'.$less_than_tab.'}
  301. (?s:
  302. <([?%]) # $2
  303. .*?
  304. \2>
  305. )
  306. [ \t]*
  307. (?=\n{2,}|\Z) # followed by a blank line or end of document
  308. )
  309. }x',
  310. array(&$this, '_hashHTMLBlocks_callback'),
  311. $text);
  312. return $text;
  313. }
  314. function _hashHTMLBlocks_callback($matches) {
  315. $text = $matches[1];
  316. $key = $this->hashBlock($text);
  317. return "\n\n$key\n\n";
  318. }
  319. function hashBlock($text) {
  320. #
  321. # Called whenever a tag must be hashed when a function insert a block-level
  322. # tag in $text, it pass through this function and is automaticaly escaped,
  323. # which remove the need to call _HashHTMLBlocks at every step.
  324. #
  325. # Swap back any tag hash found in $text so we do not have to `unhash`
  326. # multiple times at the end.
  327. $text = $this->unhash($text);
  328. # Then hash the block.
  329. $key = md5($text);
  330. $this->html_hashes[$key] = $text;
  331. $this->html_blocks[$key] = $text;
  332. return $key; # String that will replace the tag.
  333. }
  334. function hashSpan($text) {
  335. #
  336. # Called whenever a tag must be hashed when a function insert a span-level
  337. # element in $text, it pass through this function and is automaticaly
  338. # escaped, blocking invalid nested overlap.
  339. #
  340. # Swap back any tag hash found in $text so we do not have to `unhash`
  341. # multiple times at the end.
  342. $text = $this->unhash($text);
  343. # Then hash the span.
  344. $key = md5($text);
  345. $this->html_hashes[$key] = $text;
  346. return $key; # String that will replace the span tag.
  347. }
  348. var $block_gamut = array(
  349. #
  350. # These are all the transformations that form block-level
  351. # tags like paragraphs, headers, and list items.
  352. #
  353. "doHeaders" => 10,
  354. "doHorizontalRules" => 20,
  355. "doLists" => 40,
  356. "doCodeBlocks" => 50,
  357. "doBlockQuotes" => 60,
  358. );
  359. function runBlockGamut($text) {
  360. #
  361. # Run block gamut tranformations.
  362. #
  363. # We need to escape raw HTML in Markdown source before doing anything
  364. # else. This need to be done for each block, and not only at the
  365. # begining in the Markdown function since hashed blocks can be part of
  366. # list items and could have been indented. Indented blocks would have
  367. # been seen as a code block in a previous pass of hashHTMLBlocks.
  368. $text = $this->hashHTMLBlocks($text);
  369. return $this->runBasicBlockGamut($text);
  370. }
  371. function runBasicBlockGamut($text) {
  372. #
  373. # Run block gamut tranformations, without hashing HTML blocks. This is
  374. # useful when HTML blocks are known to be already hashed, like in the first
  375. # whole-document pass.
  376. #
  377. foreach ($this->block_gamut as $method => $priority) {
  378. $text = $this->$method($text);
  379. }
  380. # Finally form paragraph and restore hashed blocks.
  381. $text = $this->formParagraphs($text);
  382. return $text;
  383. }
  384. function doHorizontalRules($text) {
  385. # Do Horizontal Rules:
  386. return preg_replace(
  387. array('{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}mx',
  388. '{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}mx',
  389. '{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}mx'),
  390. "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
  391. $text);
  392. }
  393. var $span_gamut = array(
  394. #
  395. # These are all the transformations that occur *within* block-level
  396. # tags like paragraphs, headers, and list items.
  397. #
  398. "escapeSpecialCharsWithinTagAttributes" => -20,
  399. "doCodeSpans" => -10,
  400. "encodeBackslashEscapes" => -5,
  401. # Process anchor and image tags. Images must come first,
  402. # because ![foo][f] looks like an anchor.
  403. "doImages" => 10,
  404. "doAnchors" => 20,
  405. # Make links out of things like `<http://example.com/>`
  406. # Must come after doAnchors, because you can use < and >
  407. # delimiters in inline links like [this](<url>).
  408. "doAutoLinks" => 30,
  409. "encodeAmpsAndAngles" => 40,
  410. "doItalicsAndBold" => 50,
  411. "doHardBreaks" => 60,
  412. );
  413. function runSpanGamut($text) {
  414. #
  415. # Run span gamut tranformations.
  416. #
  417. foreach ($this->span_gamut as $method => $priority) {
  418. $text = $this->$method($text);
  419. }
  420. return $text;
  421. }
  422. function doHardBreaks($text) {
  423. # Do hard breaks:
  424. $br_tag = $this->hashSpan("<br$this->empty_element_suffix\n");
  425. return preg_replace('/ {2,}\n/', $br_tag, $text);
  426. }
  427. function escapeSpecialCharsWithinTagAttributes($text) {
  428. #
  429. # Within tags -- meaning between < and > -- encode [\ ` * _] so they
  430. # don't conflict with their use in Markdown for code, italics and strong.
  431. # We're replacing each such character with its corresponding MD5 checksum
  432. # value; this is likely overkill, but it should prevent us from colliding
  433. # with the escape values by accident.
  434. #
  435. $tokens = $this->tokenizeHTML($text);
  436. $text = ''; # rebuild $text from the tokens
  437. foreach ($tokens as $cur_token) {
  438. if ($cur_token[0] == 'tag') {
  439. $cur_token[1] = str_replace('\\', $this->escape_table['\\'], $cur_token[1]);
  440. $cur_token[1] = str_replace(array('`'), $this->escape_table['`'], $cur_token[1]);
  441. $cur_token[1] = str_replace('*', $this->escape_table['*'], $cur_token[1]);
  442. $cur_token[1] = str_replace('_', $this->escape_table['_'], $cur_token[1]);
  443. }
  444. $text .= $cur_token[1];
  445. }
  446. return $text;
  447. }
  448. function doAnchors($text) {
  449. #
  450. # Turn Markdown link shortcuts into XHTML <a> tags.
  451. #
  452. #
  453. # First, handle reference-style links: [link text] [id]
  454. #
  455. $text = preg_replace_callback('{
  456. ( # wrap whole match in $1
  457. \[
  458. ('.$this->nested_brackets.') # link text = $2
  459. \]
  460. [ ]? # one optional space
  461. (?:\n[ ]*)? # one optional newline followed by spaces
  462. \[
  463. (.*?) # id = $3
  464. \]
  465. )
  466. }xs',
  467. array(&$this, '_doAnchors_reference_callback'), $text);
  468. #
  469. # Next, inline-style links: [link text](url "optional title")
  470. #
  471. $text = preg_replace_callback('{
  472. ( # wrap whole match in $1
  473. \[
  474. ('.$this->nested_brackets.') # link text = $2
  475. \]
  476. \( # literal paren
  477. [ \t]*
  478. <?(.*?)>? # href = $3
  479. [ \t]*
  480. ( # $4
  481. ([\'"]) # quote char = $5
  482. (.*?) # Title = $6
  483. \5 # matching quote
  484. [ \t]* # ignore any spaces/tabs between closing quote and )
  485. )? # title is optional
  486. \)
  487. )
  488. }xs',
  489. array(&$this, '_DoAnchors_inline_callback'), $text);
  490. #
  491. # Last, handle reference-style shortcuts: [link text]
  492. # These must come last in case you've also got [link test][1]
  493. # or [link test](/foo)
  494. #
  495. // $text = preg_replace_callback('{
  496. // ( # wrap whole match in $1
  497. // \[
  498. // ([^\[\]]+) # link text = $2; can\'t contain [ or ]
  499. // \]
  500. // )
  501. // }xs',
  502. // array(&$this, '_doAnchors_reference_callback'), $text);
  503. return $text;
  504. }
  505. function _doAnchors_reference_callback($matches) {
  506. $whole_match = $matches[1];
  507. $link_text = $matches[2];
  508. $link_id =& $matches[3];
  509. if ($link_id == "") {
  510. # for shortcut links like [this][] or [this].
  511. $link_id = $link_text;
  512. }
  513. # lower-case and turn embedded newlines into spaces
  514. $link_id = strtolower($link_id);
  515. $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
  516. if (isset($this->urls[$link_id])) {
  517. $url = $this->urls[$link_id];
  518. $url = $this->encodeAmpsAndAngles($url);
  519. $result = "<a href=\"$url\"";
  520. if ( isset( $this->titles[$link_id] ) ) {
  521. $title = $this->titles[$link_id];
  522. $title = $this->encodeAmpsAndAngles($title);
  523. $result .= " title=\"$title\"";
  524. }
  525. $link_text = $this->runSpanGamut($link_text);
  526. $result .= ">$link_text</a>";
  527. $result = $this->hashSpan($result);
  528. }
  529. else {
  530. $result = $whole_match;
  531. }
  532. return $result;
  533. }
  534. function _doAnchors_inline_callback($matches) {
  535. $whole_match = $matches[1];
  536. $link_text = $this->runSpanGamut($matches[2]);
  537. $url = $matches[3];
  538. $title =& $matches[6];
  539. $url = $this->encodeAmpsAndAngles($url);
  540. $result = "<a href=\"$url\"";
  541. if (isset($title)) {
  542. $title = str_replace('"', '&quot;', $title);
  543. $title = $this->encodeAmpsAndAngles($title);
  544. $result .= " title=\"$title\"";
  545. }
  546. $link_text = $this->runSpanGamut($link_text);
  547. $result .= ">$link_text</a>";
  548. return $this->hashSpan($result);
  549. }
  550. function doImages($text) {
  551. #
  552. # Turn Markdown image shortcuts into <img> tags.
  553. #
  554. #
  555. # First, handle reference-style labeled images: ![alt text][id]
  556. #
  557. $text = preg_replace_callback('{
  558. ( # wrap whole match in $1
  559. !\[
  560. ('.$this->nested_brackets.') # alt text = $2
  561. \]
  562. [ ]? # one optional space
  563. (?:\n[ ]*)? # one optional newline followed by spaces
  564. \[
  565. (.*?) # id = $3
  566. \]
  567. )
  568. }xs',
  569. array(&$this, '_doImages_reference_callback'), $text);
  570. #
  571. # Next, handle inline images: ![alt text](url "optional title")
  572. # Don't forget: encode * and _
  573. #
  574. $text = preg_replace_callback('{
  575. ( # wrap whole match in $1
  576. !\[
  577. ('.$this->nested_brackets.') # alt text = $2
  578. \]
  579. \s? # One optional whitespace character
  580. \( # literal paren
  581. [ \t]*
  582. <?(\S+?)>? # src url = $3
  583. [ \t]*
  584. ( # $4
  585. ([\'"]) # quote char = $5
  586. (.*?) # title = $6
  587. \5 # matching quote
  588. [ \t]*
  589. )? # title is optional
  590. \)
  591. )
  592. }xs',
  593. array(&$this, '_doImages_inline_callback'), $text);
  594. return $text;
  595. }
  596. function _doImages_reference_callback($matches) {
  597. $whole_match = $matches[1];
  598. $alt_text = $matches[2];
  599. $link_id = strtolower($matches[3]);
  600. if ($link_id == "") {
  601. $link_id = strtolower($alt_text); # for shortcut links like ![this][].
  602. }
  603. $alt_text = str_replace('"', '&quot;', $alt_text);
  604. if (isset($this->urls[$link_id])) {
  605. $url = $this->urls[$link_id];
  606. $result = "<img src=\"$url\" alt=\"$alt_text\"";
  607. if (isset($this->titles[$link_id])) {
  608. $title = $this->titles[$link_id];
  609. $result .= " title=\"$title\"";
  610. }
  611. $result .= $this->empty_element_suffix;
  612. $result = $this->hashSpan($result);
  613. }
  614. else {
  615. # If there's no such link ID, leave intact:
  616. $result = $whole_match;
  617. }
  618. return $result;
  619. }
  620. function _doImages_inline_callback($matches) {
  621. $whole_match = $matches[1];
  622. $alt_text = $matches[2];
  623. $url = $matches[3];
  624. $title =& $matches[6];
  625. $alt_text = str_replace('"', '&quot;', $alt_text);
  626. $result = "<img src=\"$url\" alt=\"$alt_text\"";
  627. if (isset($title)) {
  628. $title = str_replace('"', '&quot;', $title);
  629. $result .= " title=\"$title\""; # $title already quoted
  630. }
  631. $result .= $this->empty_element_suffix;
  632. return $this->hashSpan($result);
  633. }
  634. function doHeaders($text) {
  635. # Setext-style headers:
  636. # Header 1
  637. # ========
  638. #
  639. # Header 2
  640. # --------
  641. #
  642. $text = preg_replace_callback('{ ^(.+)[ \t]*\n=+[ \t]*\n+ }mx',
  643. array(&$this, '_doHeaders_callback_setext_h1'), $text);
  644. $text = preg_replace_callback('{ ^(.+)[ \t]*\n-+[ \t]*\n+ }mx',
  645. array(&$this, '_doHeaders_callback_setext_h2'), $text);
  646. # atx-style headers:
  647. # # Header 1
  648. # ## Header 2
  649. # ## Header 2 with closing hashes ##
  650. # ...
  651. # ###### Header 6
  652. #
  653. $text = preg_replace_callback('{
  654. ^(\#{1,6}) # $1 = string of #\'s
  655. [ \t]*
  656. (.+?) # $2 = Header text
  657. [ \t]*
  658. \#* # optional closing #\'s (not counted)
  659. \n+
  660. }xm',
  661. array(&$this, '_doHeaders_callback_atx'), $text);
  662. return $text;
  663. }
  664. function _doHeaders_callback_setext_h1($matches) {
  665. $block = "<h1>".$this->runSpanGamut($matches[1])."</h1>";
  666. return "\n" . $this->hashBlock($block) . "\n\n";
  667. }
  668. function _doHeaders_callback_setext_h2($matches) {
  669. $block = "<h2>".$this->runSpanGamut($matches[1])."</h2>";
  670. return "\n" . $this->hashBlock($block) . "\n\n";
  671. }
  672. function _doHeaders_callback_atx($matches) {
  673. $level = strlen($matches[1]);
  674. $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>";
  675. return "\n" . $this->hashBlock($block) . "\n\n";
  676. }
  677. function doLists($text) {
  678. #
  679. # Form HTML ordered (numbered) and unordered (bulleted) lists.
  680. #
  681. $less_than_tab = $this->tab_width - 1;
  682. # Re-usable patterns to match list item bullets and number markers:
  683. $marker_ul = '[*+-]';
  684. $marker_ol = '\d+[.]';
  685. $marker_any = "(?:$marker_ul|$marker_ol)";
  686. $markers = array($marker_ul, $marker_ol);
  687. foreach ($markers as $marker) {
  688. # Re-usable pattern to match any entirel ul or ol list:
  689. $whole_list = '
  690. ( # $1 = whole list
  691. ( # $2
  692. [ ]{0,'.$less_than_tab.'}
  693. ('.$marker.') # $3 = first list item marker
  694. [ \t]+
  695. )
  696. (?s:.+?)
  697. ( # $4
  698. \z
  699. |
  700. \n{2,}
  701. (?=\S)
  702. (?! # Negative lookahead for another list item marker
  703. [ \t]*
  704. '.$marker.'[ \t]+
  705. )
  706. )
  707. )
  708. '; // mx
  709. # We use a different prefix before nested lists than top-level lists.
  710. # See extended comment in _ProcessListItems().
  711. if ($this->list_level) {
  712. $text = preg_replace_callback('{
  713. ^
  714. '.$whole_list.'
  715. }mx',
  716. array(&$this, '_doLists_callback'), $text);
  717. }
  718. else {
  719. $text = preg_replace_callback('{
  720. (?:(?<=\n)\n|\A\n?) # Must eat the newline
  721. '.$whole_list.'
  722. }mx',
  723. array(&$this, '_doLists_callback'), $text);
  724. }
  725. }
  726. return $text;
  727. }
  728. function _doLists_callback($matches) {
  729. # Re-usable patterns to match list item bullets and number markers:
  730. $marker_ul = '[*+-]';
  731. $marker_ol = '\d+[.]';
  732. $marker_any = "(?:$marker_ul|$marker_ol)";
  733. $list = $matches[1];
  734. $list_type = preg_match("/$marker_ul/", $matches[3]) ? "ul" : "ol";
  735. $marker_any = ( $list_type == "ul" ? $marker_ul : $marker_ol );
  736. $list .= "\n";
  737. $result = $this->processListItems($list, $marker_any);
  738. $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
  739. return "\n". $result ."\n\n";
  740. }
  741. var $list_level = 0;
  742. function processListItems($list_str, $marker_any) {
  743. #
  744. # Process the contents of a single ordered or unordered list, splitting it
  745. # into individual list items.
  746. #
  747. # The $this->list_level global keeps track of when we're inside a list.
  748. # Each time we enter a list, we increment it; when we leave a list,
  749. # we decrement. If it's zero, we're not in a list anymore.
  750. #
  751. # We do this because when we're not inside a list, we want to treat
  752. # something like this:
  753. #
  754. # I recommend upgrading to version
  755. # 8. Oops, now this line is treated
  756. # as a sub-list.
  757. #
  758. # As a single paragraph, despite the fact that the second line starts
  759. # with a digit-period-space sequence.
  760. #
  761. # Whereas when we're inside a list (or sub-list), that line will be
  762. # treated as the start of a sub-list. What a kludge, huh? This is
  763. # an aspect of Markdown's syntax that's hard to parse perfectly
  764. # without resorting to mind-reading. Perhaps the solution is to
  765. # change the syntax rules such that sub-lists must start with a
  766. # starting cardinal number; e.g. "1." or "a.".
  767. $this->list_level++;
  768. # trim trailing blank lines:
  769. $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
  770. $list_str = preg_replace_callback('{
  771. (\n)? # leading line = $1
  772. (^[ \t]*) # leading whitespace = $2
  773. ('.$marker_any.') [ \t]+ # list marker = $3
  774. ((?s:.+?)) # list item text = $4
  775. (?:(\n+(?=\n))|\n) # tailing blank line = $5
  776. (?= \n* (\z | \2 ('.$marker_any.') [ \t]+))
  777. }xm',
  778. array(&$this, '_processListItems_callback'), $list_str);
  779. $this->list_level--;
  780. return $list_str;
  781. }
  782. function _processListItems_callback($matches) {
  783. $item = $matches[4];
  784. $leading_line =& $matches[1];
  785. $leading_space =& $matches[2];
  786. $tailing_blank_line =& $matches[5];
  787. if ($leading_line || $tailing_blank_line ||
  788. preg_match('/\n{2,}/', $item))
  789. {
  790. $item = $this->runBlockGamut($this->outdent($item)."\n");
  791. }
  792. else {
  793. # Recursion for sub-lists:
  794. $item = $this->doLists($this->outdent($item));
  795. $item = preg_replace('/\n+$/', '', $item);
  796. $item = $this->runSpanGamut($item);
  797. }
  798. return "<li>" . $item . "</li>\n";
  799. }
  800. function doCodeBlocks($text) {
  801. #
  802. # Process Markdown `<pre><code>` blocks.
  803. #
  804. $text = preg_replace_callback('{
  805. (?:\n\n|\A)
  806. ( # $1 = the code block -- one or more lines, starting with a space/tab
  807. (?:
  808. (?:[ ]{'.$this->tab_width.'} | \t) # Lines must start with a tab or a tab-width of spaces
  809. .*\n+
  810. )+
  811. )
  812. ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
  813. }xm',
  814. array(&$this, '_doCodeBlocks_callback'), $text);
  815. return $text;
  816. }
  817. function _doCodeBlocks_callback($matches) {
  818. $codeblock = $matches[1];
  819. $codeblock = $this->encodeCode($this->outdent($codeblock));
  820. // $codeblock = $this->detab($codeblock);
  821. # trim leading newlines and trailing whitespace
  822. $codeblock = preg_replace(array('/\A\n+/', '/\n+\z/'), '', $codeblock);
  823. $result = "\n\n".$this->hashBlock("<pre><code>" . $codeblock . "\n</code></pre>")."\n\n";
  824. return $result;
  825. }
  826. function doCodeSpans($text) {
  827. #
  828. # * Backtick quotes are used for <code></code> spans.
  829. #
  830. # * You can use multiple backticks as the delimiters if you want to
  831. # include literal backticks in the code span. So, this input:
  832. #
  833. # Just type ``foo `bar` baz`` at the prompt.
  834. #
  835. # Will translate to:
  836. #
  837. # <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
  838. #
  839. # There's no arbitrary limit to the number of backticks you
  840. # can use as delimters. If you need three consecutive backticks
  841. # in your code, use four for delimiters, etc.
  842. #
  843. # * You can use spaces to get literal backticks at the edges:
  844. #
  845. # ... type `` `bar` `` ...
  846. #
  847. # Turns to:
  848. #
  849. # ... type <code>`bar`</code> ...
  850. #
  851. $text = preg_replace_callback('@
  852. (?<!\\\) # Character before opening ` can\'t be a backslash
  853. (`+) # $1 = Opening run of `
  854. (.+?) # $2 = The code block
  855. (?<!`)
  856. \1 # Matching closer
  857. (?!`)
  858. @xs',
  859. array(&$this, '_doCodeSpans_callback'), $text);
  860. return $text;
  861. }
  862. function _doCodeSpans_callback($matches) {
  863. $c = $matches[2];
  864. $c = preg_replace('/^[ \t]*/', '', $c); # leading whitespace
  865. $c = preg_replace('/[ \t]*$/', '', $c); # trailing whitespace
  866. $c = $this->encodeCode($c);
  867. return $this->hashSpan("<code>$c</code>");
  868. }
  869. function encodeCode($_) {
  870. #
  871. # Encode/escape certain characters inside Markdown code runs.
  872. # The point is that in code, these characters are literals,
  873. # and lose their special Markdown meanings.
  874. #
  875. # Encode all ampersands; HTML entities are not
  876. # entities within a Markdown code span.
  877. $_ = str_replace('&', '&amp;', $_);
  878. # Do the angle bracket song and dance:
  879. $_ = str_replace(array('<', '>'),
  880. array('&lt;', '&gt;'), $_);
  881. # Now, escape characters that are magic in Markdown:
  882. // $_ = str_replace(array_keys($this->escape_table),
  883. // array_values($this->escape_table), $_);
  884. return $_;
  885. }
  886. function doItalicsAndBold($text) {
  887. # <strong> must go first:
  888. $text = preg_replace_callback('{
  889. ( # $1: Marker
  890. (?<!\*\*) \* | # (not preceded by two chars of
  891. (?<!__) _ # the same marker)
  892. )
  893. \1
  894. (?=\S) # Not followed by whitespace
  895. (?!\1\1) # or two others marker chars.
  896. ( # $2: Content
  897. (?:
  898. [^*_]+? # Anthing not em markers.
  899. |
  900. # Balence any regular emphasis inside.
  901. \1 (?=\S) .+? (?<=\S) \1
  902. |
  903. (?! \1 ) . # Allow unbalenced * and _.
  904. )+?
  905. )
  906. (?<=\S) \1\1 # End mark not preceded by whitespace.
  907. }sx',
  908. array(&$this, '_doItalicAndBold_strong_callback'), $text);
  909. # Then <em>:
  910. $text = preg_replace_callback(
  911. '{ ( (?<!\*)\* | (?<!_)_ ) (?=\S) (?! \1) (.+?) (?<=\S) \1 }sx',
  912. array(&$this, '_doItalicAndBold_em_callback'), $text);
  913. return $text;
  914. }
  915. function _doItalicAndBold_em_callback($matches) {
  916. $text = $matches[2];
  917. $text = $this->runSpanGamut($text);
  918. return $this->hashSpan("<em>$text</em>");
  919. }
  920. function _doItalicAndBold_strong_callback($matches) {
  921. $text = $matches[2];
  922. $text = $this->runSpanGamut($text);
  923. return $this->hashSpan("<strong>$text</strong>");
  924. }
  925. function doBlockQuotes($text) {
  926. $text = preg_replace_callback('/
  927. ( # Wrap whole match in $1
  928. (
  929. ^[ \t]*>[ \t]? # ">" at the start of a line
  930. .+\n # rest of the first line
  931. (.+\n)* # subsequent consecutive lines
  932. \n* # blanks
  933. )+
  934. )
  935. /xm',
  936. array(&$this, '_doBlockQuotes_callback'), $text);
  937. return $text;
  938. }
  939. function _doBlockQuotes_callback($matches) {
  940. $bq = $matches[1];
  941. # trim one level of quoting - trim whitespace-only lines
  942. $bq = preg_replace(array('/^[ \t]*>[ \t]?/m', '/^[ \t]+$/m'), '', $bq);
  943. $bq = $this->runBlockGamut($bq); # recurse
  944. $bq = preg_replace('/^/m', " ", $bq);
  945. # These leading spaces cause problem with <pre> content,
  946. # so we need to fix that:
  947. $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
  948. array(&$this, '_DoBlockQuotes_callback2'), $bq);
  949. return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
  950. }
  951. function _doBlockQuotes_callback2($matches) {
  952. $pre = $matches[1];
  953. $pre = preg_replace('/^ /m', '', $pre);
  954. return $pre;
  955. }
  956. function formParagraphs($text) {
  957. #
  958. # Params:
  959. # $text - string to process with html <p> tags
  960. #
  961. # Strip leading and trailing lines:
  962. $text = preg_replace(array('/\A\n+/', '/\n+\z/'), '', $text);
  963. $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
  964. #
  965. # Wrap <p> tags.
  966. #
  967. foreach ($grafs as $key => $value) {
  968. if (!isset( $this->html_blocks[$value] )) {
  969. $value = $this->runSpanGamut($value);
  970. $value = preg_replace('/^([ \t]*)/', "<p>", $value);
  971. $value .= "</p>";
  972. $grafs[$key] = $this->unhash($value);
  973. }
  974. }
  975. #
  976. # Unhashify HTML blocks
  977. #
  978. foreach ($grafs as $key => $graf) {
  979. # Modify elements of @grafs in-place...
  980. if (isset($this->html_blocks[$graf])) {
  981. $block = $this->html_blocks[$graf];
  982. $graf = $block;
  983. // if (preg_match('{
  984. // \A
  985. // ( # $1 = <div> tag
  986. // <div \s+
  987. // [^>]*
  988. // \b
  989. // markdown\s*=\s* ([\'"]) # $2 = attr quote char
  990. // 1
  991. // \2
  992. // [^>]*
  993. // >
  994. // )
  995. // ( # $3 = contents
  996. // .*
  997. // )
  998. // (</div>) # $4 = closing tag
  999. // \z
  1000. // }xs', $block, $matches))
  1001. // {
  1002. // list(, $div_open, , $div_content, $div_close) = $matches;
  1003. //
  1004. // # We can't call Markdown(), because that resets the hash;
  1005. // # that initialization code should be pulled into its own sub, though.
  1006. // $div_content = $this->hashHTMLBlocks($div_content);
  1007. //
  1008. // # Run document gamut methods on the content.
  1009. // foreach ($this->document_gamut as $method => $priority) {
  1010. // $div_content = $this->$method($div_content);
  1011. // }
  1012. //
  1013. // $div_open = preg_replace(
  1014. // '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
  1015. //
  1016. // $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
  1017. // }
  1018. $grafs[$key] = $graf;
  1019. }
  1020. }
  1021. return implode("\n\n", $grafs);
  1022. }
  1023. function encodeAmpsAndAngles($text) {
  1024. # Smart processing for ampersands and angle brackets that need to be encoded.
  1025. # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
  1026. # http://bumppo.net/projects/amputator/
  1027. $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
  1028. '&amp;', $text);;
  1029. # Encode naked <'s
  1030. $text = preg_replace('{<(?![a-z/?\$!%])}i', '&lt;', $text);
  1031. return $text;
  1032. }
  1033. function encodeBackslashEscapes($text) {
  1034. #
  1035. # Parameter: String.
  1036. # Returns: The string, with after processing the following backslash
  1037. # escape sequences.
  1038. #
  1039. # Must process escaped backslashes first.
  1040. return str_replace(array_keys($this->backslash_escape_table),
  1041. array_values($this->backslash_escape_table), $text);
  1042. }
  1043. function doAutoLinks($text) {
  1044. $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}',
  1045. array(&$this, '_doAutoLinks_url_callback'), $text);
  1046. # Email addresses: <address@domain.foo>
  1047. $text = preg_replace_callback('{
  1048. <
  1049. (?:mailto:)?
  1050. (
  1051. [-.\w\x80-\xFF]+
  1052. \@
  1053. [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
  1054. )
  1055. >
  1056. }xi',
  1057. array(&$this, '_doAutoLinks_email_callback'), $text);
  1058. return $text;
  1059. }
  1060. function _doAutoLinks_url_callback($matches) {
  1061. $url = $this->encodeAmpsAndAngles($matches[1]);
  1062. $link = "<a href=\"$url\">$url</a>";
  1063. return $this->hashSpan($link);
  1064. }
  1065. function _doAutoLinks_email_callback($matches) {
  1066. $address = $matches[1];
  1067. $address = $this->unescapeSpecialChars($address);
  1068. $link = $this->encodeEmailAddress($address);
  1069. return $this->hashSpan($link);
  1070. }
  1071. function encodeEmailAddress($addr) {
  1072. #
  1073. # Input: an email address, e.g. "foo@example.com"
  1074. #
  1075. # Output: the email address as a mailto link, with each character
  1076. # of the address encoded as either a decimal or hex entity, in
  1077. # the hopes of foiling most address harvesting spam bots. E.g.:
  1078. #
  1079. # <p><a href="&#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
  1080. # &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
  1081. # &#x6d;">&#x66;o&#111;&#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;
  1082. # &#101;&#46;&#x63;&#111;&#x6d;</a></p>
  1083. #
  1084. # Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
  1085. # With some optimizations by Milian Wolff.
  1086. #
  1087. $addr = "mailto:" . $addr;
  1088. $chars = preg_split('/(?<!^)(?!$)/', $addr);
  1089. $seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed.
  1090. foreach ($chars as $key => $char) {
  1091. $ord = ord($char);
  1092. # Ignore non-ascii chars.
  1093. if ($ord < 128) {
  1094. $r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
  1095. # roughly 10% raw, 45% hex, 45% dec
  1096. # '@' *must* be encoded. I insist.
  1097. if ($r > 90 && $char != '@') /* do nothing */;
  1098. else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
  1099. else $chars[$key] = '&#'.$ord.';';
  1100. }
  1101. }
  1102. $addr = implode('', $chars);
  1103. $text = implode('', array_slice($chars, 7)); # text without `mailto:`
  1104. $addr = "<a href=\"$addr\">$text</a>";
  1105. return $addr;
  1106. }
  1107. function unescapeSpecialChars($text) {
  1108. #
  1109. # Swap back in all the special characters we've hidden.
  1110. #
  1111. return str_replace(array_values($this->escape_table),
  1112. array_keys($this->escape_table), $text);
  1113. }
  1114. function tokenizeHTML($str) {
  1115. #
  1116. # Parameter: String containing HTML + Markdown markup.
  1117. # Returns: An array of the tokens comprising the input
  1118. # string. Each token is either a tag or a run of text
  1119. # between tags. Each element of the array is a
  1120. # two-element array; the first is either 'tag' or 'text';
  1121. # the second is the actual value.
  1122. # Note: Markdown code spans are taken into account: no tag token is
  1123. # generated within a code span.
  1124. #
  1125. $tokens = array();
  1126. while ($str != "") {
  1127. #
  1128. # Each loop iteration seach for either the next tag or the next
  1129. # openning code span marker. If a code span marker is found, the
  1130. # code span is extracted in entierty and will result in an extra
  1131. # text token.
  1132. #
  1133. $parts = preg_split('{
  1134. (
  1135. (?<![`\\\\])
  1136. `+ # code span marker
  1137. |
  1138. <!-- .*? --> # comment
  1139. |
  1140. <\?.*?\?> | <%.*?%> # processing instruction
  1141. |
  1142. <[/!$]?[-a-zA-Z0-9:]+ # regular tags
  1143. (?:
  1144. \s
  1145. (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
  1146. )?
  1147. >
  1148. )
  1149. }xs', $str, 2, PREG_SPLIT_DELIM_CAPTURE);
  1150. # Create token from text preceding tag.
  1151. if ($parts[0] != "") {
  1152. $tokens[] = array('text', $parts[0]);
  1153. }
  1154. # Check if we reach the end.
  1155. if (count($parts) < 3) {
  1156. break;
  1157. }
  1158. # Create token from tag or code span.
  1159. if ($parts[1]{0} == "`") {
  1160. $tokens[] = array('text', $parts[1]);
  1161. $str = $parts[2];
  1162. # Skip the whole code span, pass as text token.
  1163. if (preg_match('/^(.*(?<!`\\\\)'.$parts[1].'(?!`))(.*)$/sm',
  1164. $str, $matches))
  1165. {
  1166. $tokens[] = array('text', $matches[1]);
  1167. $str = $matches[2];
  1168. }
  1169. } else {
  1170. $tokens[] = array('tag', $parts[1]);
  1171. $str = $parts[2];
  1172. }
  1173. }
  1174. return $tokens;
  1175. }
  1176. function outdent($text) {
  1177. #
  1178. # Remove one level of line-leading tabs or spaces
  1179. #
  1180. return preg_replace("/^(\\t|[ ]{1,$this->tab_width})/m", "", $text);
  1181. }
  1182. # String length function for detab. `_initDetab` will create a function to
  1183. # hanlde UTF-8 if the default function does not exist.
  1184. var $utf8_strlen = 'mb_strlen';
  1185. function detab($text) {
  1186. #
  1187. # Replace tabs with the appropriate amount of space.
  1188. #
  1189. # For each line we separate the line in blocks delemited by
  1190. # tab characters. Then we reconstruct every line by adding the
  1191. # appropriate number of space between each blocks.
  1192. $strlen = $this->utf8_strlen; # best strlen function for UTF-8.
  1193. $lines = explode("\n", $text);
  1194. $text = "";
  1195. foreach ($lines as $line) {
  1196. # Split in blocks.
  1197. $blocks = explode("\t", $line);
  1198. # Add each blocks to the line.
  1199. $line = $blocks[0];
  1200. unset($blocks[0]); # Do not add first block twice.
  1201. foreach ($blocks as $block) {
  1202. # Calculate amount of space, insert spaces, insert block.
  1203. $amount = $this->tab_width -
  1204. $strlen($line, 'UTF-8') % $this->tab_width;
  1205. $line .= str_repeat(" ", $amount) . $block;
  1206. }
  1207. $text .= "$line\n";
  1208. }
  1209. return $text;
  1210. }
  1211. function _initDetab() {
  1212. #
  1213. # Check for the availability of the function in the `utf8_strlen` property
  1214. # (probably `mb_strlen`). If the function is not available, create a
  1215. # function that will loosely count the number of UTF-8 characters with a
  1216. # regular expression.
  1217. #
  1218. if (function_exists($this->utf8_strlen)) return;
  1219. $this->utf8_strlen = 'Markdown_UTF8_strlen';
  1220. if (function_exists($this->utf8_strlen)) return;
  1221. function Markdown_UTF8_strlen($text) {
  1222. return preg_match_all('/[\x00-\xBF]|[\xC0-\xFF][\x80-\xBF]*/',
  1223. $text, $m);
  1224. }
  1225. }
  1226. function unhash($text) {
  1227. #
  1228. # Swap back in all the tags hashed by _HashHTMLBlocks.
  1229. #
  1230. return str_replace(array_keys($this->html_hashes),
  1231. array_values($this->html_hashes), $text);
  1232. }
  1233. }
  1234. #
  1235. # Markdown Extra Parser Class
  1236. #
  1237. class MarkdownExtra_Parser extends Markdown_Parser {
  1238. # Prefix for footnote ids.
  1239. var $fn_id_prefix = "";
  1240. # Optional title attribute for footnote links and backlinks.
  1241. var $fn_link_title = MARKDOWN_FN_LINK_TITLE;
  1242. var $fn_backlink_title = MARKDOWN_FN_BACKLINK_TITLE;
  1243. # Optional class attribute for footnote links and backlinks.
  1244. var $fn_link_class = MARKDOWN_FN_LINK_CLASS;
  1245. var $fn_backlink_class = MARKDOWN_FN_BACKLINK_CLASS;
  1246. function MarkdownExtra_Parser() {
  1247. #
  1248. # Constructor function. Initialize the parser object.
  1249. #
  1250. # Add extra escapable characters before parent constructor
  1251. # initialize the table.
  1252. $this->escape_chars .= ':|';
  1253. # Insert extra document, block, and span transformations.
  1254. # Parent constructor will do the sorting.
  1255. $this->document_gamut += array(
  1256. "stripFootnotes" => 15,
  1257. "stripAbbreviations" => 25,
  1258. "appendFootnotes" => 50,
  1259. );
  1260. $this->block_gamut += array(
  1261. "doTables" => 15,
  1262. "doDefLists" => 45,
  1263. );
  1264. $this->span_gamut += array(
  1265. "doFootnotes" => 4,
  1266. "doAbbreviations" => 5,
  1267. );
  1268. parent::Markdown_Parser();
  1269. }
  1270. # Extra hashes used during extra transformations.
  1271. var $footnotes = array();
  1272. var $footnotes_ordered = array();
  1273. var $abbr_desciptions = array();
  1274. var $abbr_matches = array();
  1275. var $html_cleans = array();
  1276. function transform($text) {
  1277. #
  1278. # Added clear to the new $html_hashes, reordered `hashHTMLBlocks` before
  1279. # blank line stripping and added extra parameter to `runBlockGamut`.
  1280. #
  1281. # Clear the global hashes. If we don't clear these, you get conflicts
  1282. # from other articles when generating a page which contains more than
  1283. # one article (e.g. an index page that shows the N most recent
  1284. # articles):
  1285. $this->footnotes = array();
  1286. $this->footnotes_ordered = array();
  1287. $this->abbr_desciptions = array();
  1288. $this->abbr_matches = array();
  1289. $this->html_cleans = array();
  1290. return parent::transform($text);
  1291. }
  1292. ### HTML Block Parser ###
  1293. # Tags that are always treated as block tags:
  1294. var $block_tags = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend';
  1295. # Tags treated as block tags only if the opening tag is alone on it's line:
  1296. var $context_block_tags = 'script|noscript|math|ins|del';
  1297. # Tags where markdown="1" default to span mode:
  1298. var $contain_span_tags = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
  1299. # Tags which must not have their contents modified, no matter where
  1300. # they appear:
  1301. var $clean_tags = 'script|math';
  1302. # Tags that do not need to be closed.
  1303. var $auto_close_tags = 'hr|img';
  1304. function hashHTMLBlocks($text) {
  1305. #
  1306. # Hashify HTML Blocks and "clean tags".
  1307. #
  1308. # We only want to do this for block-level HTML tags, such as headers,
  1309. # lists, and tables. That's because we still want to wrap <p>s around
  1310. # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
  1311. # phrase emphasis, and spans. The list of tags we're looking for is
  1312. # hard-coded.
  1313. #
  1314. # This works by calling _HashHTMLBlocks_InMarkdown, which then calls
  1315. # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
  1316. # attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back
  1317. # _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
  1318. # These two functions are calling each other. It's recursive!
  1319. #
  1320. #
  1321. # Call the HTML-in-Markdown hasher.
  1322. #
  1323. list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
  1324. return $text;
  1325. }
  1326. function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
  1327. $enclosing_tag = '', $span = false)
  1328. {
  1329. #
  1330. # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
  1331. #
  1332. # * $indent is the number of space to be ignored when checking for code
  1333. # blocks. This is important because if we don't take the indent into
  1334. # account, something like this (which looks right) won't work as expected:
  1335. #
  1336. # <div>
  1337. # <div markdown="1">
  1338. # Hello World. <-- Is this a Markdown code block or text?
  1339. # </div> <-- Is this a Markdown code block or a real tag?
  1340. # <div>
  1341. #
  1342. # If you don't like this, just don't indent the tag on which
  1343. # you apply the markdown="1" attribute.
  1344. #
  1345. # * If $enclosing_tag is not empty, stops at the first unmatched closing
  1346. # tag with that name. Nested tags supported.
  1347. #
  1348. # * If $span is true, text inside must treated as span. So any double
  1349. # newline will be replaced by a single newline so that it does not create
  1350. # paragraphs.
  1351. #
  1352. # Returns an array of that form: ( processed text , remaining text )
  1353. #
  1354. if ($text === '') return array('', '');
  1355. # Regex to check for the presense of newlines around a block tag.
  1356. $newline_match_before = '/(?:^\n?|\n\n)*$/';
  1357. $newline_match_after =
  1358. '{
  1359. ^ # Start of text following the tag.
  1360. (?:[ ]*<!--.*?-->)? # Optional comment.
  1361. [ ]*\n # Must be followed by newline.
  1362. }xs';
  1363. # Regex to match any tag.
  1364. $block_tag_match =
  1365. '{
  1366. ( # $2: Capture hole tag.
  1367. </? # Any opening or closing tag.
  1368. (?: # Tag name.
  1369. '.$this->block_tags.' |
  1370. '.$this->context_block_tags.' |
  1371. '.$this->clean_tags.' |
  1372. (?!\s)'.$enclosing_tag.'
  1373. )
  1374. \s* # Whitespace.
  1375. (?:
  1376. ".*?" | # Double quotes (can contain `>`)
  1377. \'.*?\' | # Single quotes (can contain `>`)
  1378. .+? # Anything but quotes and `>`.
  1379. )*?
  1380. > # End of tag.
  1381. |
  1382. <!-- .*? --> # HTML Comment
  1383. |
  1384. <\?.*?\?> | <%.*?%> # Processing instruction
  1385. |
  1386. <!\[CDATA\[.*?\]\]> # CData Block
  1387. )
  1388. }xs';
  1389. $depth = 0; # Current depth inside the tag tree.
  1390. $parsed = ""; # Parsed text that will be returned.
  1391. #
  1392. # Loop through every tag until we find the closing tag of the parent
  1393. # or loop until reaching the end of text if no parent tag specified.
  1394. #
  1395. do {
  1396. #
  1397. # Split the text using the first $tag_match pattern found.
  1398. # Text before pattern will be first in the array, text after
  1399. # pattern will be at the end, and between will be any catches made
  1400. # by the pattern.
  1401. #
  1402. $parts = preg_split($block_tag_match, $text, 2,
  1403. PREG_SPLIT_DELIM_CAPTURE);
  1404. # If in Markdown span mode, add a empty-string span-level hash
  1405. # after each newline to prevent triggering any block element.
  1406. if ($span) {
  1407. $newline = $this->hashSpan("") . "\n";
  1408. $parts[0] = str_replace("\n", $newline, $parts[0]);
  1409. }
  1410. $parsed .= $parts[0]; # Text before current tag.
  1411. # If end of $text has been reached. Stop loop.
  1412. if (count($parts) < 3) {
  1413. $text = "";
  1414. break;
  1415. }
  1416. $tag = $parts[1]; # Tag to handle.
  1417. $text = $parts[2]; # Remaining text after current tag.
  1418. #
  1419. # Check for: Tag inside code block or span
  1420. #
  1421. if (# Find current paragraph
  1422. preg_match('/(?>^\n?|\n\n)((?>.\n?)+?)$/', $parsed, $matches) &&
  1423. (
  1424. # Then match in it either a code block...
  1425. preg_match('/^ {'.($indent+4).'}.*(?>\n {'.($indent+4).'}.*)*'.
  1426. '(?!\n)$/', $matches[1], $x) ||
  1427. # ...or unbalenced code span markers. (the regex matches balenced)
  1428. !preg_match('/^(?>[^`]+|(`+)(?>[^`]+|(?!\1[^`])`)*?\1(?!`))*$/s',
  1429. $matches[1])
  1430. ))
  1431. {
  1432. # Tag is in code block or span and may not be a tag at all. So we
  1433. # simply skip the first char (should be a `<`).
  1434. $parsed .= $tag{0};
  1435. $text = substr($tag, 1) . $text; # Put back $tag minus first char.
  1436. }
  1437. #
  1438. # Check for: Opening Block level tag or
  1439. # Opening Content Block tag (like ins and del)
  1440. # used as a block tag (tag is alone on it's line).
  1441. #
  1442. else if (preg_match("{^<(?:$this->block_tags)\b}", $tag) ||
  1443. ( preg_match("{^<(?:$this->context_block_tags)\b}", $tag) &&
  1444. preg_match($newline_match_before, $parsed) &&
  1445. preg_match($newline_match_after, $text) )
  1446. )
  1447. {
  1448. # Need to parse tag and following text using the HTML parser.
  1449. list($block_text, $text) =
  1450. $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
  1451. # Make sure it stays outside of any paragraph by adding newlines.
  1452. $parsed .= "\n\n$block_text\n\n";
  1453. }
  1454. #
  1455. # Check for: Clean tag (like script, math)
  1456. # HTML Comments, processing instructions.
  1457. #
  1458. else if (preg_match("{^<(?:$this->clean_tags)\b}", $tag) ||
  1459. $tag{1} == '!' || $tag{1} == '?')
  1460. {
  1461. # Need to parse tag and following text using the HTML parser.
  1462. # (don't check for markdown attribute)
  1463. list($block_text, $text) =
  1464. $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
  1465. $parsed .= $block_text;
  1466. }
  1467. #
  1468. # Check for: Tag with same name as enclosing tag.
  1469. #
  1470. else if ($enclosing_tag !== '' &&
  1471. # Same name as enclosing tag.
  1472. preg_match("{^</?(?:$enclosing_tag)\b}", $tag))
  1473. {
  1474. #
  1475. # Increase/decrease nested tag count.
  1476. #
  1477. if ($tag{1} == '/') $depth--;
  1478. else if ($tag{strlen($tag)-2} != '/') $depth++;
  1479. if ($depth < 0) {
  1480. #
  1481. # Going out of parent element. Clean up and break so we
  1482. # return to the calling function.
  1483. #
  1484. $text = $tag . $text;
  1485. break;
  1486. }
  1487. $parsed .= $tag;
  1488. }
  1489. else {
  1490. $parsed .= $tag;
  1491. }
  1492. } while ($depth >= 0);
  1493. return array($parsed, $text);
  1494. }
  1495. function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
  1496. #
  1497. # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
  1498. #
  1499. # * Calls $hash_method to convert any blocks.
  1500. # * Stops when the first opening tag closes.
  1501. # * $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
  1502. # (it is not inside clean tags)
  1503. #
  1504. # Returns an array of that form: ( processed text , remaining text )
  1505. #
  1506. if ($text === '') return array('', '');
  1507. # Regex to match `markdown` attribute inside of a tag.
  1508. $markdown_attr_match = '
  1509. {
  1510. \s* # Eat whitespace before the `markdown` attribute
  1511. markdown
  1512. \s*=\s*
  1513. (["\']) # $1: quote delimiter
  1514. (.*?) # $2: attribute value
  1515. \1 # matching delimiter
  1516. }xs';
  1517. # Regex to match any tag.
  1518. $tag_match = '{
  1519. ( # $2: Capture hole tag.
  1520. </? # Any opening or closing tag.
  1521. [\w:$]+ # Tag name.
  1522. \s* # Whitespace.
  1523. (?:
  1524. ".*?" | # Double quotes (can contain `>`)
  1525. \'.*?\' | # Single quotes (can contain `>`)
  1526. .+? # Anything but quotes and `>`.
  1527. )*?
  1528. > # End of tag.
  1529. |
  1530. <!-- .*? --> # HTML Comment
  1531. |
  1532. <\?.*?\?> | <%.*?%> # Processing instruction

Large files files are truncated, but you can click here to view the full file