PageRenderTime 53ms CodeModel.GetById 11ms RepoModel.GetById 1ms app.codeStats 0ms

/system/Services/Markdown.php

https://github.com/sony88/answion
PHP | 1348 lines | 972 code | 138 blank | 238 comment | 45 complexity | 4ce15305e83164c3d205b803ce146a03 MD5 | raw file
  1. <?php
  2. #
  3. # Markdown - A text-to-HTML conversion tool for web writers
  4. #
  5. # PHP Markdown
  6. # Copyright (c) 2004-2012 Michel Fortin
  7. # <http://michelf.com/projects/php-markdown/>
  8. #
  9. # Original Markdown
  10. # Copyright (c) 2004-2006 John Gruber
  11. # <http://daringfireball.net/projects/markdown/>
  12. #
  13. # Change to ">" for HTML output
  14. @define('MARKDOWN_EMPTY_ELEMENT_SUFFIX', " />");
  15. # Define the width of a tab for code blocks.
  16. @define('MARKDOWN_TAB_WIDTH', 4);
  17. class Services_Markdown
  18. {
  19. # Regex to match balanced [brackets].
  20. # Needed to insert a maximum bracked depth while converting to PHP.
  21. var $nested_brackets_depth = 6;
  22. var $nested_brackets_re;
  23. var $nested_url_parenthesis_depth = 4;
  24. var $nested_url_parenthesis_re;
  25. # Table of hash values for escaped characters:
  26. var $escape_chars = '\`*_{}[]()>#+-.!';
  27. var $escape_chars_re;
  28. # Change to ">" for HTML output.
  29. var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX;
  30. var $tab_width = MARKDOWN_TAB_WIDTH;
  31. # Change to `true` to disallow markup or entities.
  32. var $no_markup = false;
  33. var $no_entities = false;
  34. # Predefined urls and titles for reference links and images.
  35. var $predef_urls = array();
  36. var $predef_titles = array();
  37. function __construct()
  38. {
  39. #
  40. # Constructor function. Initialize appropriate member variables.
  41. #
  42. $this->_initDetab();
  43. $this->prepareItalicsAndBold();
  44. $this->nested_brackets_re = str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth) . str_repeat('\])*', $this->nested_brackets_depth);
  45. $this->nested_url_parenthesis_re = str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth) . str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
  46. $this->escape_chars_re = '[' . preg_quote($this->escape_chars) . ']';
  47. # Sort document, block, and span gamut in ascendent priority order.
  48. asort($this->document_gamut);
  49. asort($this->block_gamut);
  50. asort($this->span_gamut);
  51. }
  52. # Internal hashes used during transformation.
  53. var $urls = array();
  54. var $titles = array();
  55. var $html_hashes = array();
  56. # Status flag to avoid invalid nesting.
  57. var $in_anchor = false;
  58. function setup()
  59. {
  60. #
  61. # Called before the transformation process starts to setup parser
  62. # states.
  63. #
  64. # Clear global hashes.
  65. $this->urls = $this->predef_urls;
  66. $this->titles = $this->predef_titles;
  67. $this->html_hashes = array();
  68. $in_anchor = false;
  69. }
  70. function teardown()
  71. {
  72. #
  73. # Called after the transformation process to clear any variable
  74. # which may be taking up memory unnecessarly.
  75. #
  76. $this->urls = array();
  77. $this->titles = array();
  78. $this->html_hashes = array();
  79. }
  80. function transform($text)
  81. {
  82. #
  83. # Main function. Performs some preprocessing on the input text
  84. # and pass it through the document gamut.
  85. #
  86. $this->setup();
  87. # Remove UTF-8 BOM and marker character in input, if present.
  88. $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
  89. # Standardize line endings:
  90. # DOS to Unix and Mac to Unix
  91. $text = preg_replace('{\r\n?}', "\n", $text);
  92. # Make sure $text ends with a couple of newlines:
  93. $text .= "\n";
  94. # Convert all tabs to spaces.
  95. $text = $this->detab($text);
  96. # Turn block-level HTML blocks into hash entries
  97. $text = $this->hashHTMLBlocks($text);
  98. # Strip any lines consisting only of spaces and tabs.
  99. # This makes subsequent regexen easier to write, because we can
  100. # match consecutive blank lines with /\n+/ instead of something
  101. # contorted like /[ ]*\n+/ .
  102. $text = preg_replace('/^[ ]+$/m', '', $text);
  103. # Run document gamut methods.
  104. foreach ($this->document_gamut as $method => $priority)
  105. {
  106. $text = $this->$method($text);
  107. }
  108. $text = FORMAT::parse_links($text);
  109. $text = $this->md5Hash_decode($text);
  110. $this->teardown();
  111. return $text;
  112. }
  113. var $document_gamut = array(
  114. # Strip link definitions, store in hashes.
  115. "stripLinkDefinitions" => 20,
  116. "runBasicBlockGamut" => 30
  117. );
  118. function stripLinkDefinitions($text)
  119. {
  120. #
  121. # Strips link definitions from text, stores the URLs and titles in
  122. # hash references.
  123. #
  124. $less_than_tab = $this->tab_width - 1;
  125. # Link defs are in the form: ^[id]: url "optional title"
  126. $text = preg_replace_callback('{
  127. ^[ ]{0,' . $less_than_tab . '}\[(.+)\][ ]?: # id = $1
  128. [ ]*
  129. \n? # maybe *one* newline
  130. [ ]*
  131. (?:
  132. <(.+?)> # url = $2
  133. |
  134. (\S+?) # url = $3
  135. )
  136. [ ]*
  137. \n? # maybe one newline
  138. [ ]*
  139. (?:
  140. (?<=\s) # lookbehind for whitespace
  141. ["(]
  142. (.*?) # title = $4
  143. [")]
  144. [ ]*
  145. )? # title is optional
  146. (?:\n+|\Z)
  147. }xm', array(
  148. &$this,
  149. '_stripLinkDefinitions_callback'
  150. ), $text);
  151. return $text;
  152. }
  153. function _stripLinkDefinitions_callback($matches)
  154. {
  155. $link_id = strtolower($matches[1]);
  156. $url = $matches[2] == '' ? $matches[3] : $matches[2];
  157. $this->urls[$link_id] = $url;
  158. $this->titles[$link_id] = & $matches[4];
  159. return ''; # String that will replace the block
  160. }
  161. function hashHTMLBlocks($text)
  162. {
  163. if ($this->no_markup)
  164. return $text;
  165. $less_than_tab = $this->tab_width - 1;
  166. # Hashify HTML blocks:
  167. # We only want to do this for block-level HTML tags, such as headers,
  168. # lists, and tables. That's because we still want to wrap <p>s around
  169. # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
  170. # phrase emphasis, and spans. The list of tags we're looking for is
  171. # hard-coded:
  172. #
  173. # * List "a" is made of tags which can be both inline or block-level.
  174. # These will be treated block-level when the start tag is alone on
  175. # its line, otherwise they're not matched here and will be taken as
  176. # inline later.
  177. # * List "b" is made of tags which are always block-level;
  178. #
  179. $block_tags_a_re = 'ins|del';
  180. $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|' . 'script|noscript|form|fieldset|iframe|math';
  181. # Regular expression for the content of a block tag.
  182. $nested_tags_level = 4;
  183. $attr = '
  184. (?> # optional tag attributes
  185. \s # starts with whitespace
  186. (?>
  187. [^>"/]+ # text outside quotes
  188. |
  189. /+(?!>) # slash not followed by ">"
  190. |
  191. "[^"]*" # text inside double quotes (tolerate ">")
  192. |
  193. \'[^\']*\' # text inside single quotes (tolerate ">")
  194. )*
  195. )?
  196. ';
  197. $content = str_repeat('
  198. (?>
  199. [^<]+ # content without tag
  200. |
  201. <\2 # nested opening tag
  202. ' . $attr . ' # attributes
  203. (?>
  204. />
  205. |
  206. >', $nested_tags_level) . # end of opening tag
  207. '.*?' . # last level nested tag content
  208. str_repeat('
  209. </\2\s*> # closing nested tag
  210. )
  211. |
  212. <(?!/\2\s*> # other tags with a different name
  213. )
  214. )*', $nested_tags_level);
  215. $content2 = str_replace('\2', '\3', $content);
  216. # First, look for nested blocks, e.g.:
  217. # <div>
  218. # <div>
  219. # tags for inner block must be indented.
  220. # </div>
  221. # </div>
  222. #
  223. # The outermost tags must start at the left margin for this to match, and
  224. # the inner nested divs must be indented.
  225. # We need to do this before the next, more liberal match, because the next
  226. # match will start at the first `<div>` and stop at the first `</div>`.
  227. $text = preg_replace_callback('{(?>
  228. (?>
  229. (?<=\n\n) # Starting after a blank line
  230. | # or
  231. \A\n? # the beginning of the doc
  232. )
  233. ( # save in $1
  234. # Match from `\n<tag>` to `</tag>\n`, handling nested tags
  235. # in between.
  236. [ ]{0,' . $less_than_tab . '}
  237. <(' . $block_tags_b_re . ')# start tag = $2
  238. ' . $attr . '> # attributes followed by > and \n
  239. ' . $content . ' # content, support nesting
  240. </\2> # the matching end tag
  241. [ ]* # trailing spaces/tabs
  242. (?=\n+|\Z) # followed by a newline or end of document
  243. | # Special version for tags of group a.
  244. [ ]{0,' . $less_than_tab . '}
  245. <(' . $block_tags_a_re . ')# start tag = $3
  246. ' . $attr . '>[ ]*\n # attributes followed by >
  247. ' . $content2 . ' # content, support nesting
  248. </\3> # the matching end tag
  249. [ ]* # trailing spaces/tabs
  250. (?=\n+|\Z) # followed by a newline or end of document
  251. | # Special case just for <hr />. It was easier to make a special
  252. # case than to make the other regex more complicated.
  253. [ ]{0,' . $less_than_tab . '}
  254. <(hr) # start tag = $2
  255. ' . $attr . ' # attributes
  256. /?> # the matching end tag
  257. [ ]*
  258. (?=\n{2,}|\Z) # followed by a blank line or end of document
  259. | # Special case for standalone HTML comments:
  260. [ ]{0,' . $less_than_tab . '}
  261. (?s:
  262. <!-- .*? -->
  263. )
  264. [ ]*
  265. (?=\n{2,}|\Z) # followed by a blank line or end of document
  266. | # PHP and ASP-style processor instructions (<? and <%)
  267. [ ]{0,' . $less_than_tab . '}
  268. (?s:
  269. <([?%]) # $2
  270. .*?
  271. \2>
  272. )
  273. [ ]*
  274. (?=\n{2,}|\Z) # followed by a blank line or end of document
  275. )
  276. )}Sxmi', array(
  277. &$this,
  278. '_hashHTMLBlocks_callback'
  279. ), $text);
  280. return $text;
  281. }
  282. function _hashHTMLBlocks_callback($matches)
  283. {
  284. $text = $matches[1];
  285. $key = $this->hashBlock($text);
  286. return "\n\n$key\n\n";
  287. }
  288. function hashPart($text, $boundary = 'X')
  289. {
  290. #
  291. # Called whenever a tag must be hashed when a function insert an atomic
  292. # element in the text stream. Passing $text to through this function gives
  293. # a unique text-token which will be reverted back when calling unhash.
  294. #
  295. # The $boundary argument specify what character should be used to surround
  296. # the token. By convension, "B" is used for block elements that needs not
  297. # to be wrapped into paragraph tags at the end, ":" is used for elements
  298. # that are word separators and "X" is used in the general case.
  299. #
  300. # Swap back any tag hash found in $text so we do not have to `unhash`
  301. # multiple times at the end.
  302. $text = $this->unhash($text);
  303. # Then hash the block.
  304. static $i = 0;
  305. $key = "$boundary\x1A" . ++ $i . $boundary;
  306. $this->html_hashes[$key] = $text;
  307. return $key; # String that will replace the tag.
  308. }
  309. function hashBlock($text)
  310. {
  311. #
  312. # Shortcut function for hashPart with block-level boundaries.
  313. #
  314. return $this->hashPart($text, 'B');
  315. }
  316. var $block_gamut = array(
  317. #
  318. # These are all the transformations that form block-level
  319. # tags like paragraphs, headers, and list items.
  320. #
  321. "doCodeBlocks" => 5,
  322. "doHeaders" => 10,
  323. "doLists" => 40,
  324. "doBlockQuotes" => 60
  325. );
  326. function runBlockGamut($text)
  327. {
  328. #
  329. # Run block gamut tranformations.
  330. #
  331. # We need to escape raw HTML in Markdown source before doing anything
  332. # else. This need to be done for each block, and not only at the
  333. # begining in the Markdown function since hashed blocks can be part of
  334. # list items and could have been indented. Indented blocks would have
  335. # been seen as a code block in a previous pass of hashHTMLBlocks.
  336. $text = $this->hashHTMLBlocks($text);
  337. return $this->runBasicBlockGamut($text);
  338. }
  339. function runBasicBlockGamut($text)
  340. {
  341. #
  342. # Run block gamut tranformations, without hashing HTML blocks. This is
  343. # useful when HTML blocks are known to be already hashed, like in the first
  344. # whole-document pass.
  345. #
  346. foreach ($this->block_gamut as $method => $priority)
  347. {
  348. $text = $this->$method($text);
  349. }
  350. # Finally form paragraph and restore hashed blocks.
  351. $text = $this->formParagraphs($text);
  352. return $text;
  353. }
  354. var $span_gamut = array(
  355. #
  356. # These are all the transformations that occur *within* block-level
  357. # tags like paragraphs, headers, and list items.
  358. #
  359. # Process character escapes, code spans, and inline HTML
  360. # in one shot.
  361. "parseSpan" => - 30,
  362. # Process anchor and image tags. Images must come first,
  363. # because ![foo][f] looks like an anchor.
  364. "doImages" => 10,
  365. # Make links out of things like `<http://example.com/>`
  366. # Must come after doAnchors, because you can use < and >
  367. # delimiters in inline links like [this](<url>).
  368. "encodeAmpsAndAngles" => 40,
  369. "doItalicsAndBold" => 50,
  370. "doHardBreaks" => 60
  371. );
  372. function runSpanGamut($text)
  373. {
  374. #
  375. # Run span gamut tranformations.
  376. #
  377. foreach ($this->span_gamut as $method => $priority)
  378. {
  379. $text = $this->$method($text);
  380. }
  381. return $text;
  382. }
  383. function doHardBreaks($text)
  384. {
  385. # Do hard breaks:
  386. return preg_replace_callback('/ {2,}\n/', array(
  387. &$this,
  388. '_doHardBreaks_callback'
  389. ), $text);
  390. }
  391. function _doHardBreaks_callback($matches)
  392. {
  393. return $this->hashPart("<br$this->empty_element_suffix\n");
  394. }
  395. function doImages($text)
  396. {
  397. #
  398. # Turn Markdown image shortcuts into <img> tags.
  399. #
  400. #
  401. # First, handle reference-style labeled images: ![alt text][id]
  402. #
  403. $text = preg_replace_callback('{
  404. ( # wrap whole match in $1
  405. !\[
  406. (' . $this->nested_brackets_re . ') # alt text = $2
  407. \]
  408. [ ]? # one optional space
  409. (?:\n[ ]*)? # one optional newline followed by spaces
  410. \[
  411. (.*?) # id = $3
  412. \]
  413. )
  414. }xs', array(
  415. &$this,
  416. '_doImages_reference_callback'
  417. ), $text);
  418. #
  419. # Next, handle inline images: ![alt text](url "optional title")
  420. # Don't forget: encode * and _
  421. #
  422. $text = preg_replace_callback('{
  423. ( # wrap whole match in $1
  424. !\[
  425. (' . $this->nested_brackets_re . ') # alt text = $2
  426. \]
  427. \s? # One optional whitespace character
  428. \( # literal paren
  429. [ \n]*
  430. (?:
  431. <(\S*)> # src url = $3
  432. |
  433. (' . $this->nested_url_parenthesis_re . ') # src url = $4
  434. )
  435. [ \n]*
  436. ( # $5
  437. ([\'"]) # quote char = $6
  438. (.*?) # title = $7
  439. \6 # matching quote
  440. [ \n]*
  441. )? # title is optional
  442. \)
  443. )
  444. }xs', array(
  445. &$this,
  446. '_doImages_inline_callback'
  447. ), $text);
  448. return $text;
  449. }
  450. function _doImages_reference_callback($matches)
  451. {
  452. $whole_match = $matches[1];
  453. $alt_text = $matches[2];
  454. $link_id = strtolower($matches[3]);
  455. if ($link_id == "")
  456. {
  457. $link_id = strtolower($alt_text); # for shortcut links like ![this][].
  458. }
  459. $alt_text = $this->encodeAttribute($alt_text);
  460. if (isset($this->urls[$link_id]))
  461. {
  462. $url = $this->encodeAttribute($this->urls[$link_id]);
  463. $result = "<img src=\"$url\" alt=\"$alt_text\"";
  464. if (isset($this->titles[$link_id]))
  465. {
  466. $title = $this->titles[$link_id];
  467. $title = $this->encodeAttribute($title);
  468. $result .= " title=\"$title\"";
  469. }
  470. $result .= $this->empty_element_suffix;
  471. $result = $this->hashPart($result);
  472. }
  473. else
  474. {
  475. # If there's no such link ID, leave intact:
  476. $result = $whole_match;
  477. }
  478. return $result;
  479. }
  480. function _doImages_inline_callback($matches)
  481. {
  482. $whole_match = $matches[1];
  483. $alt_text = $matches[2];
  484. $url = $matches[3] == '' ? $matches[4] : $matches[3];
  485. $title = & $matches[7];
  486. $alt_text = $this->encodeAttribute($alt_text);
  487. $url = $this->encodeAttribute($url);
  488. $result = "<img src=\"$url\" alt=\"$alt_text\"";
  489. if (isset($title))
  490. {
  491. $title = $this->encodeAttribute($title);
  492. $result .= " title=\"$title\""; # $title already quoted
  493. }
  494. $result .= $this->empty_element_suffix;
  495. return $this->hashPart($result);
  496. }
  497. function doHeaders($text)
  498. {
  499. $text = preg_replace_callback('{
  500. ^(\#{2,3}) # $1 = string of #\'s
  501. [ ]*
  502. ([^\n]+?) # $2 = Header text
  503. [ ]*
  504. \#* # optional closing #\'s (not counted)
  505. [\n]
  506. }xm', array(
  507. &$this,
  508. '_doHeaders_callback_atx'
  509. ), $text);
  510. return $text;
  511. }
  512. function _doHeaders_callback_atx($matches)
  513. {
  514. $level = strlen($matches[1]);
  515. $block = "<h$level>" . $this->runSpanGamut($matches[2]) . "</h$level>";
  516. return $this->hashBlock($block);
  517. }
  518. function doLists($text)
  519. {
  520. #
  521. # Form HTML ordered (numbered) and unordered (bulleted) lists.
  522. #
  523. $less_than_tab = $this->tab_width - 1;
  524. # Re-usable patterns to match list item bullets and number markers:
  525. $marker_ul_re = '[-]';
  526. $marker_ol_re = '\d+[\.]';
  527. $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
  528. $markers_relist = array(
  529. $marker_ul_re => $marker_ol_re,
  530. $marker_ol_re => $marker_ul_re
  531. );
  532. foreach ($markers_relist as $marker_re => $other_marker_re)
  533. {
  534. # Re-usable pattern to match any entirel ul or ol list:
  535. $whole_list_re = '
  536. ( # $1 = whole list
  537. ( # $2
  538. ([ ]{0,' . $less_than_tab . '}) # $3 = number of spaces
  539. (' . $marker_re . ') # $4 = first list item marker
  540. [ ]+
  541. )
  542. (?s:.+?)
  543. ( # $5
  544. \z
  545. |
  546. \n{2,}
  547. (?=\S)
  548. (?! # Negative lookahead for another list item marker
  549. [ ]*
  550. ' . $marker_re . '[ ]+
  551. )
  552. |
  553. (?= # Lookahead for another kind of list
  554. \n
  555. \3 # Must have the same indentation
  556. ' . $other_marker_re . '[ ]+
  557. )
  558. )
  559. )
  560. '; // mx
  561. # We use a different prefix before nested lists than top-level lists.
  562. # See extended comment in _ProcessListItems().
  563. if ($this->list_level)
  564. {
  565. $text = preg_replace_callback('{
  566. ^
  567. ' . $whole_list_re . '
  568. }mx', array(
  569. &$this,
  570. '_doLists_callback'
  571. ), $text);
  572. }
  573. else
  574. {
  575. $text = preg_replace_callback('{
  576. (?:(?<=\n)\n|\A\n?) # Must eat the newline
  577. ' . $whole_list_re . '
  578. }mx', array(
  579. &$this,
  580. '_doLists_callback'
  581. ), $text);
  582. }
  583. }
  584. return $text;
  585. }
  586. function _doLists_callback($matches)
  587. {
  588. # Re-usable patterns to match list item bullets and number markers:
  589. $marker_ul_re = '[*+-]';
  590. $marker_ol_re = '\d+[\.]';
  591. $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
  592. $list = $matches[1];
  593. $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol";
  594. $marker_any_re = ($list_type == "ul" ? $marker_ul_re : $marker_ol_re);
  595. $list .= "\n";
  596. $result = $this->processListItems($list, $marker_any_re);
  597. $result = $this->hashBlock("<$list_type>" . $result . "</$list_type>");
  598. return $result;
  599. }
  600. var $list_level = 0;
  601. function processListItems($list_str, $marker_any_re)
  602. {
  603. #
  604. # Process the contents of a single ordered or unordered list, splitting it
  605. # into individual list items.
  606. #
  607. # The $this->list_level global keeps track of when we're inside a list.
  608. # Each time we enter a list, we increment it; when we leave a list,
  609. # we decrement. If it's zero, we're not in a list anymore.
  610. #
  611. # We do this because when we're not inside a list, we want to treat
  612. # something like this:
  613. #
  614. # I recommend upgrading to version
  615. # 8. Oops, now this line is treated
  616. # as a sub-list.
  617. #
  618. # As a single paragraph, despite the fact that the second line starts
  619. # with a digit-period-space sequence.
  620. #
  621. # Whereas when we're inside a list (or sub-list), that line will be
  622. # treated as the start of a sub-list. What a kludge, huh? This is
  623. # an aspect of Markdown's syntax that's hard to parse perfectly
  624. # without resorting to mind-reading. Perhaps the solution is to
  625. # change the syntax rules such that sub-lists must start with a
  626. # starting cardinal number; e.g. "1." or "a.".
  627. $this->list_level ++;
  628. # trim trailing blank lines:
  629. $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
  630. $list_str = preg_replace_callback('{
  631. (\n)? # leading line = $1
  632. (^[ ]*) # leading whitespace = $2
  633. (' . $marker_any_re . ' # list marker and space = $3
  634. (?:[ ]+|(?=\n)) # space only required if item is not empty
  635. )
  636. ((?s:.*?)) # list item text = $4
  637. (?:(\n+(?=\n))|\n) # tailing blank line = $5
  638. (?= \n* (\z | \2 (' . $marker_any_re . ') (?:[ ]+|(?=\n))))
  639. }xm', array(
  640. &$this,
  641. '_processListItems_callback'
  642. ), $list_str);
  643. $this->list_level --;
  644. return $list_str;
  645. }
  646. function _processListItems_callback($matches)
  647. {
  648. $item = $matches[4];
  649. $leading_line = & $matches[1];
  650. $leading_space = & $matches[2];
  651. $marker_space = $matches[3];
  652. $tailing_blank_line = & $matches[5];
  653. if ($leading_line || $tailing_blank_line || preg_match('/\n{2,}/', $item))
  654. {
  655. # Replace marker with the appropriate whitespace indentation
  656. $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
  657. $item = $this->runBlockGamut($this->outdent($item) . "\n");
  658. }
  659. else
  660. {
  661. # Recursion for sub-lists:
  662. $item = $this->doLists($this->outdent($item));
  663. $item = preg_replace('/\n+$/', '', $item);
  664. $item = $this->runSpanGamut($item);
  665. }
  666. return "<li>" . $item . "</li>";
  667. }
  668. function doCodeBlocks($text)
  669. {
  670. #
  671. # Process Markdown `<code>` blocks.
  672. #
  673. preg_match('/\{\{\{/i', $text, $_m_c_open);
  674. preg_match('/\}\}\}/i', $text, $_m_c_close);
  675. if (count($_m_c_open) == count($_m_c_close))
  676. {
  677. $text = preg_replace_callback('/\{\{\{[ \n]*(.*?)\}\}\}/is', array(
  678. &$this,
  679. 'code_block_callback'
  680. ), $text);
  681. }
  682. return $text;
  683. }
  684. function code_block_callback($matches)
  685. {
  686. $str = str_replace(array(
  687. "\t",
  688. " "
  689. ), array(
  690. "&nbsp;&nbsp;&nbsp;&nbsp;",
  691. "&nbsp;"
  692. ), $matches[1]);
  693. $str = $this->md5Hash($str);
  694. return '<code>' . $str . '</code>';
  695. }
  696. function get_hash_table()
  697. {
  698. $md_reg = array(
  699. '\\',
  700. '`',
  701. '*',
  702. '_',
  703. '{',
  704. '}',
  705. '[',
  706. ']',
  707. '(',
  708. ')',
  709. '#',
  710. '.',
  711. '!',
  712. ':',
  713. '<',
  714. '>'
  715. );
  716. $rd_reg = array();
  717. foreach ($md_reg as $val)
  718. {
  719. $rd_reg[] = md5($val);
  720. }
  721. return array(
  722. $md_reg,
  723. $rd_reg
  724. );
  725. }
  726. function md5Hash($text)
  727. {
  728. $hash = $this->get_hash_table();
  729. return str_replace($hash[0], $hash[1], $text);
  730. }
  731. function md5Hash_decode($text)
  732. {
  733. $hash = $this->get_hash_table();
  734. return str_replace($hash[1], $hash[0], $text);
  735. }
  736. function makeCodeSpan($code)
  737. {
  738. #
  739. # Create a code span markup for $code. Called from handleSpanToken.
  740. #
  741. $code = htmlspecialchars(trim($code), ENT_NOQUOTES);
  742. return $this->hashPart("<code>$code</code>");
  743. }
  744. var $em_relist = array(
  745. '' => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?=\S|$)(?![\.,:;]\s)',
  746. '*' => '(?<=\S|^)(?<!\*)\*(?!\*)',
  747. '_' => '(?<=\S|^)(?<!_)_(?!_)'
  748. );
  749. var $strong_relist = array(
  750. '' => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?=\S|$)(?![\.,:;]\s)',
  751. '**' => '(?<=\S|^)(?<!\*)\*\*(?!\*)',
  752. '__' => '(?<=\S|^)(?<!_)__(?!_)'
  753. );
  754. var $em_strong_relist = array(
  755. '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?=\S|$)(?![\.,:;]\s)',
  756. '***' => '(?<=\S|^)(?<!\*)\*\*\*(?!\*)',
  757. '___' => '(?<=\S|^)(?<!_)___(?!_)'
  758. );
  759. var $em_strong_prepared_relist;
  760. function prepareItalicsAndBold()
  761. {
  762. #
  763. # Prepare regular expressions for searching emphasis tokens in any
  764. # context.
  765. #
  766. foreach ($this->em_relist as $em => $em_re)
  767. {
  768. foreach ($this->strong_relist as $strong => $strong_re)
  769. {
  770. # Construct list of allowed token expressions.
  771. $token_relist = array();
  772. if (isset($this->em_strong_relist["$em$strong"]))
  773. {
  774. $token_relist[] = $this->em_strong_relist["$em$strong"];
  775. }
  776. $token_relist[] = $em_re;
  777. $token_relist[] = $strong_re;
  778. # Construct master expression from list.
  779. $token_re = '{(' . implode('|', $token_relist) . ')}';
  780. $this->em_strong_prepared_relist["$em$strong"] = $token_re;
  781. }
  782. }
  783. }
  784. function doItalicsAndBold($text)
  785. {
  786. $token_stack = array(
  787. ''
  788. );
  789. $text_stack = array(
  790. ''
  791. );
  792. $em = '';
  793. $strong = '';
  794. $tree_char_em = false;
  795. while (1)
  796. {
  797. #
  798. # Get prepared regular expression for seraching emphasis tokens
  799. # in current context.
  800. #
  801. $token_re = $this->em_strong_prepared_relist["$em$strong"];
  802. #
  803. # Each loop iteration search for the next emphasis token.
  804. # Each token is then passed to handleSpanToken.
  805. #
  806. $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
  807. $text_stack[0] .= $parts[0];
  808. $token = & $parts[1];
  809. $text = & $parts[2];
  810. if (empty($token))
  811. {
  812. # Reached end of text span: empty stack without emitting.
  813. # any more emphasis.
  814. while ($token_stack[0])
  815. {
  816. $text_stack[1] .= array_shift($token_stack);
  817. $text_stack[0] .= array_shift($text_stack);
  818. }
  819. break;
  820. }
  821. $token_len = strlen($token);
  822. if ($tree_char_em)
  823. {
  824. # Reached closing marker while inside a three-char emphasis.
  825. if ($token_len == 3)
  826. {
  827. # Three-char closing marker, close em and strong.
  828. array_shift($token_stack);
  829. $span = array_shift($text_stack);
  830. $span = $this->runSpanGamut($span);
  831. $span = "<strong><em>$span</em></strong>";
  832. $text_stack[0] .= $this->hashPart($span);
  833. $em = '';
  834. $strong = '';
  835. }
  836. else
  837. {
  838. # Other closing marker: close one em or strong and
  839. # change current token state to match the other
  840. $token_stack[0] = str_repeat($token{0}, 3 - $token_len);
  841. $tag = $token_len == 2 ? "strong" : "em";
  842. $span = $text_stack[0];
  843. $span = $this->runSpanGamut($span);
  844. $span = "<$tag>$span</$tag>";
  845. $text_stack[0] = $this->hashPart($span);
  846. $$tag = ''; # $$tag stands for $em or $strong
  847. }
  848. $tree_char_em = false;
  849. }
  850. else if ($token_len == 3)
  851. {
  852. if ($em)
  853. {
  854. # Reached closing marker for both em and strong.
  855. # Closing strong marker:
  856. for ($i = 0; $i < 2; ++ $i)
  857. {
  858. $shifted_token = array_shift($token_stack);
  859. $tag = strlen($shifted_token) == 2 ? "strong" : "em";
  860. $span = array_shift($text_stack);
  861. $span = $this->runSpanGamut($span);
  862. $span = "<$tag>$span</$tag>";
  863. $text_stack[0] .= $this->hashPart($span);
  864. $$tag = ''; # $$tag stands for $em or $strong
  865. }
  866. }
  867. else
  868. {
  869. # Reached opening three-char emphasis marker. Push on token
  870. # stack; will be handled by the special condition above.
  871. $em = $token{0};
  872. $strong = "$em$em";
  873. array_unshift($token_stack, $token);
  874. array_unshift($text_stack, '');
  875. $tree_char_em = true;
  876. }
  877. }
  878. else if ($token_len == 2)
  879. {
  880. if ($token == '__') // remove __ support
  881. {
  882. $text_stack[0] .= $token;
  883. }
  884. else if ($strong)
  885. {
  886. # Unwind any dangling emphasis marker:
  887. if (strlen($token_stack[0]) == 1)
  888. {
  889. $text_stack[1] .= array_shift($token_stack);
  890. $text_stack[0] .= array_shift($text_stack);
  891. }
  892. # Closing strong marker:
  893. array_shift($token_stack);
  894. $span = array_shift($text_stack);
  895. $span = $this->runSpanGamut($span);
  896. $span = "<strong>$span</strong>";
  897. $text_stack[0] .= $this->hashPart($span);
  898. $strong = '';
  899. }
  900. else
  901. {
  902. array_unshift($token_stack, $token);
  903. array_unshift($text_stack, '');
  904. $strong = $token;
  905. }
  906. }
  907. else
  908. {
  909. # Here $token_len == 1
  910. if ($em)
  911. {
  912. if (strlen($token_stack[0]) == 1 && ($token != '_'))
  913. { // remove _ support
  914. # Closing emphasis marker:
  915. array_shift($token_stack);
  916. $span = array_shift($text_stack);
  917. $span = $this->runSpanGamut($span);
  918. $span = "<em>$span</em>";
  919. $text_stack[0] .= $this->hashPart($span);
  920. $em = '';
  921. }
  922. else
  923. {
  924. $text_stack[0] .= $token;
  925. }
  926. }
  927. else
  928. {
  929. array_unshift($token_stack, $token);
  930. array_unshift($text_stack, '');
  931. $em = $token;
  932. }
  933. }
  934. }
  935. return $text_stack[0];
  936. }
  937. function doBlockQuotes($text)
  938. {
  939. $text = preg_replace_callback('/
  940. ( # Wrap whole match in $1
  941. (?>
  942. ^[ ]*>[ ]? # ">" at the start of a line
  943. .+\n # rest of the first line
  944. (.+\n)* # subsequent consecutive lines
  945. \n* # blanks
  946. )+
  947. )
  948. /xm', array(
  949. &$this,
  950. '_doBlockQuotes_callback'
  951. ), $text);
  952. return $text;
  953. }
  954. function _doBlockQuotes_callback($matches)
  955. {
  956. $bq = $matches[1];
  957. # trim one level of quoting - trim whitespace-only lines
  958. $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
  959. $bq = $this->runBlockGamut($bq); # recurse
  960. $bq = preg_replace('/^/m', " ", $bq);
  961. # These leading spaces cause problem with <pre> content,
  962. # so we need to fix that:
  963. $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx', array(
  964. &$this,
  965. '_doBlockQuotes_callback2'
  966. ), $bq);
  967. $bq = $this->md5Hash($bq);
  968. return $this->hashBlock("<blockquote>$bq</blockquote>\n");
  969. }
  970. function _doBlockQuotes_callback2($matches)
  971. {
  972. $pre = $matches[1];
  973. $pre = preg_replace('/^ /m', '', $pre);
  974. return $pre;
  975. }
  976. function formParagraphs($text)
  977. {
  978. #
  979. # Params:
  980. # $text - string to process with html <p> tags
  981. #
  982. # Strip leading and trailing lines:
  983. $text = preg_replace('/\A\n+|\n+\z/', '', $text);
  984. $grafs = preg_split('/\n{2,}/', $text, - 1, PREG_SPLIT_NO_EMPTY);
  985. foreach ($grafs as $key => $value)
  986. {
  987. if (! preg_match('/^B\x1A[0-9]+B$/', $value))
  988. {
  989. # Is a paragraph.
  990. $value = $this->runSpanGamut($value);
  991. /*$value = preg_replace('/^([ ]*)/', "<p>", $value);
  992. $value .= "</p>";*/
  993. $grafs[$key] = $this->unhash($value);
  994. }
  995. else
  996. {
  997. # Is a block.
  998. # Modify elements of @grafs in-place...
  999. $graf = $value;
  1000. $block = $this->html_hashes[$graf];
  1001. $graf = $block;
  1002. $grafs[$key] = $graf;
  1003. }
  1004. }
  1005. return implode("\n\n", $grafs);
  1006. }
  1007. function encodeAttribute($text)
  1008. {
  1009. #
  1010. # Encode text for a double-quoted HTML attribute. This function
  1011. # is *not* suitable for attributes enclosed in single quotes.
  1012. #
  1013. $text = $this->encodeAmpsAndAngles($text);
  1014. $text = str_replace('"', '&quot;', $text);
  1015. return $text;
  1016. }
  1017. function encodeAmpsAndAngles($text)
  1018. {
  1019. #
  1020. # Smart processing for ampersands and angle brackets that need to
  1021. # be encoded. Valid character entities are left alone unless the
  1022. # no-entities mode is set.
  1023. #
  1024. if ($this->no_entities)
  1025. {
  1026. $text = str_replace('&', '&amp;', $text);
  1027. }
  1028. else
  1029. {
  1030. # Ampersand-encoding based entirely on Nat Irons's Amputator
  1031. # MT plugin: <http://bumppo.net/projects/amputator/>
  1032. $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/', '&amp;', $text);
  1033. ;
  1034. }
  1035. # Encode remaining <'s
  1036. $text = str_replace('<', '&lt;', $text);
  1037. return $text;
  1038. }
  1039. function parseSpan($str)
  1040. {
  1041. #
  1042. # Take the string $str and parse it into tokens, hashing embeded HTML,
  1043. # escaped characters and handling code spans.
  1044. #
  1045. $output = '';
  1046. $span_re = '{
  1047. (
  1048. \\\\' . $this->escape_chars_re . '
  1049. |
  1050. (?<![`\\\\])
  1051. `+ # code span marker
  1052. ' . ($this->no_markup ? '' : '
  1053. |
  1054. <!-- .*? --> # comment
  1055. |
  1056. <\?.*?\?> | <%.*?%> # processing instruction
  1057. |
  1058. <[/!$]?[-a-zA-Z0-9:_]+ # regular tags
  1059. (?>
  1060. \s
  1061. (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
  1062. )?
  1063. >
  1064. ') . '
  1065. )
  1066. }xs';
  1067. while (1)
  1068. {
  1069. #
  1070. # Each loop iteration seach for either the next tag, the next
  1071. # openning code span marker, or the next escaped character.
  1072. # Each token is then passed to handleSpanToken.
  1073. #
  1074. $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
  1075. # Create token from text preceding tag.
  1076. if ($parts[0] != "")
  1077. {
  1078. $output .= $parts[0];
  1079. }
  1080. # Check if we reach the end.
  1081. if (isset($parts[1]))
  1082. {
  1083. $output .= $this->handleSpanToken($parts[1], $parts[2]);
  1084. $str = $parts[2];
  1085. }
  1086. else
  1087. {
  1088. break;
  1089. }
  1090. }
  1091. return $output;
  1092. }
  1093. function handleSpanToken($token, &$str)
  1094. {
  1095. #
  1096. # Handle $token provided by parseSpan by determining its nature and
  1097. # returning the corresponding value that should replace it.
  1098. #
  1099. switch ($token{0})
  1100. {
  1101. case "\\" :
  1102. return $token;
  1103. return $this->hashPart("&#" . ord($token{1}) . ";");
  1104. case "`" :
  1105. return $token; // return as text since no ending marker found.
  1106. default :
  1107. return $this->hashPart($token);
  1108. }
  1109. }
  1110. function outdent($text)
  1111. {
  1112. #
  1113. # Remove one level of line-leading tabs or spaces
  1114. #
  1115. return preg_replace('/^(\t|[ ]{1,' . $this->tab_width . '})/m', '', $text);
  1116. }
  1117. # String length function for detab. `_initDetab` will create a function to
  1118. # hanlde UTF-8 if the default function does not exist.
  1119. var $utf8_strlen = 'mb_strlen';
  1120. function detab($text)
  1121. {
  1122. #
  1123. # Replace tabs with the appropriate amount of space.
  1124. #
  1125. # For each line we separate the line in blocks delemited by
  1126. # tab characters. Then we reconstruct every line by adding the
  1127. # appropriate number of space between each blocks.
  1128. $text = preg_replace_callback('/^.*\t.*$/m', array(
  1129. &$this,
  1130. '_detab_callback'
  1131. ), $text);
  1132. return $text;
  1133. }
  1134. function _detab_callback($matches)
  1135. {
  1136. $line = $matches[0];
  1137. $strlen = $this->utf8_strlen; # strlen function for UTF-8.
  1138. # Split in blocks.
  1139. $blocks = explode("\t", $line);
  1140. # Add each blocks to the line.
  1141. $line = $blocks[0];
  1142. unset($blocks[0]); # Do not add first block twice.
  1143. foreach ($blocks as $block)
  1144. {
  1145. # Calculate amount of space, insert spaces, insert block.
  1146. $amount = $this->tab_width - $strlen($line, 'UTF-8') % $this->tab_width;
  1147. $line .= str_repeat(" ", $amount) . $block;
  1148. }
  1149. return $line;
  1150. }
  1151. function _initDetab()
  1152. {
  1153. #
  1154. # Check for the availability of the function in the `utf8_strlen` property
  1155. # (initially `mb_strlen`). If the function is not available, create a
  1156. # function that will loosely count the number of UTF-8 characters with a
  1157. # regular expression.
  1158. #
  1159. if (function_exists($this->utf8_strlen))
  1160. return;
  1161. $this->utf8_strlen = create_function('$text', 'return preg_match_all(
  1162. "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/",
  1163. $text, $m);');
  1164. }
  1165. function unhash($text)
  1166. {
  1167. #
  1168. # Swap back in all the tags hashed by _HashHTMLBlocks.
  1169. #
  1170. return preg_replace_callback('/(.)\x1A[0-9]+\1/', array(
  1171. &$this,
  1172. '_unhash_callback'
  1173. ), $text);
  1174. }
  1175. function _unhash_callback($matches)
  1176. {
  1177. return $this->html_hashes[$matches[0]];
  1178. }
  1179. }