PageRenderTime 70ms CodeModel.GetById 33ms RepoModel.GetById 0ms app.codeStats 0ms

/wp-content/plugins/s2member/includes/externals/markdown/nc-markdown.inc.php

https://gitlab.com/Gashler/dp
PHP | 1475 lines | 964 code | 161 blank | 350 comment | 68 complexity | 80f4b77d2cdf952277b5efd306d110a0 MD5 | raw file
  1. <?php
  2. /**
  3. * PHP Markdown class.
  4. *
  5. * Copyright {@link http://www.michelf.com/projects/php-markdown/ Michel Fortin}.
  6. * Original Markdown. Copyright {@link http://daringfireball.net/projects/markdown/ John Gruber}.
  7. *
  8. * Modified by {@link http://www.websharks-inc.com/ WebSharks, Inc.}.
  9. * Excludes WordPress® and all other interfaces.
  10. * Uses a custom class name and interface.
  11. *
  12. * This file is included with all WordPress® themes/plugins by WebSharks, Inc.
  13. *
  14. * @package WebSharks\Xtnls\Markdown
  15. * @since x.xx
  16. */
  17. /**
  18. * PHP Markdown interface.
  19. *
  20. * @package WebSharks\Xtnls\Markdown
  21. * @since x.xx
  22. *
  23. * @param str $text Text to be parsed by the Markdown class.
  24. * @return str HTML output; after having been parsed by the Markdown class.
  25. */
  26. function NC_Markdown($text) {
  27. static $parser;
  28. if (!isset($parser)) {
  29. $parser_class = NC_Markdown_Parser;
  30. $parser = new $parser_class;
  31. }
  32. return $parser->transform($text);
  33. }
  34. /**
  35. * PHP Markdown class.
  36. * @package Xtnls\Markdown
  37. * @since x.xx
  38. */
  39. class NC_Markdown_Parser {
  40. # Regex to match balanced [brackets].
  41. # Needed to insert a maximum bracked depth while converting to PHP.
  42. var $nested_brackets_depth = 6;
  43. var $nested_brackets_re;
  44. var $nested_url_parenthesis_depth = 4;
  45. var $nested_url_parenthesis_re;
  46. # Table of hash values for escaped characters:
  47. var $escape_chars = '\`*_{}[]()>#+-.!';
  48. var $escape_chars_re;
  49. # Change to ">" for HTML output.
  50. var $empty_element_suffix = " />";
  51. var $tab_width = 4;
  52. # Change to `true` to disallow markup or entities.
  53. var $no_markup = false;
  54. var $no_entities = false;
  55. # Predefined urls and titles for reference links and images.
  56. var $predef_urls = array();
  57. var $predef_titles = array();
  58. function NC_Markdown_Parser() {
  59. #
  60. # Constructor function. Initialize appropriate member variables.
  61. #
  62. $this->_initDetab();
  63. $this->prepareItalicsAndBold();
  64. $this->nested_brackets_re =
  65. str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
  66. str_repeat('\])*', $this->nested_brackets_depth);
  67. $this->nested_url_parenthesis_re =
  68. str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
  69. str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
  70. $this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
  71. # Sort document, block, and span gamut in ascendent priority order.
  72. asort($this->document_gamut);
  73. asort($this->block_gamut);
  74. asort($this->span_gamut);
  75. }
  76. # Internal hashes used during transformation.
  77. var $urls = array();
  78. var $titles = array();
  79. var $html_hashes = array();
  80. # Status flag to avoid invalid nesting.
  81. var $in_anchor = false;
  82. function setup() {
  83. #
  84. # Called before the transformation process starts to setup parser
  85. # states.
  86. #
  87. # Clear global hashes.
  88. $this->urls = $this->predef_urls;
  89. $this->titles = $this->predef_titles;
  90. $this->html_hashes = array();
  91. $in_anchor = false;
  92. }
  93. function teardown() {
  94. #
  95. # Called after the transformation process to clear any variable
  96. # which may be taking up memory unnecessarly.
  97. #
  98. $this->urls = array();
  99. $this->titles = array();
  100. $this->html_hashes = array();
  101. }
  102. function transform($text) {
  103. #
  104. # Main function. Performs some preprocessing on the input text
  105. # and pass it through the document gamut.
  106. #
  107. $this->setup();
  108. # Remove UTF-8 BOM and marker character in input, if present.
  109. $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
  110. # Standardize line endings:
  111. # DOS to Unix and Mac to Unix
  112. $text = preg_replace('{\r\n?}', "\n", $text);
  113. # Make sure $text ends with a couple of newlines:
  114. $text .= "\n\n";
  115. # Convert all tabs to spaces.
  116. $text = $this->detab($text);
  117. # Turn block-level HTML blocks into hash entries
  118. $text = $this->hashHTMLBlocks($text);
  119. # Strip any lines consisting only of spaces and tabs.
  120. # This makes subsequent regexen easier to write, because we can
  121. # match consecutive blank lines with /\n+/ instead of something
  122. # contorted like /[ ]*\n+/ .
  123. $text = preg_replace('/^[ ]+$/m', '', $text);
  124. # Run document gamut methods.
  125. foreach ($this->document_gamut as $method => $priority) {
  126. $text = $this->$method($text);
  127. }
  128. $this->teardown();
  129. return $text . "\n";
  130. }
  131. var $document_gamut = array(
  132. # Strip link definitions, store in hashes.
  133. "stripLinkDefinitions" => 20,
  134. "runBasicBlockGamut" => 30,
  135. );
  136. function stripLinkDefinitions($text) {
  137. #
  138. # Strips link definitions from text, stores the URLs and titles in
  139. # hash references.
  140. #
  141. $less_than_tab = $this->tab_width - 1;
  142. # Link defs are in the form: ^[id]: url "optional title"
  143. $text = preg_replace_callback('{
  144. ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
  145. [ ]*
  146. \n? # maybe *one* newline
  147. [ ]*
  148. <?(\S+?)>? # url = $2
  149. [ ]*
  150. \n? # maybe one newline
  151. [ ]*
  152. (?:
  153. (?<=\s) # lookbehind for whitespace
  154. ["(]
  155. (.*?) # title = $3
  156. [")]
  157. [ ]*
  158. )? # title is optional
  159. (?:\n+|\Z)
  160. }xm',
  161. array(&$this, '_stripLinkDefinitions_callback'),
  162. $text);
  163. return $text;
  164. }
  165. function _stripLinkDefinitions_callback($matches) {
  166. $link_id = strtolower($matches[1]);
  167. $this->urls[$link_id] = $matches[2];
  168. $this->titles[$link_id] =& $matches[3];
  169. return ''; # String that will replace the block
  170. }
  171. function hashHTMLBlocks($text) {
  172. if ($this->no_markup) return $text;
  173. $less_than_tab = $this->tab_width - 1;
  174. # Hashify HTML blocks:
  175. # We only want to do this for block-level HTML tags, such as headers,
  176. # lists, and tables. That's because we still want to wrap <p>s around
  177. # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
  178. # phrase emphasis, and spans. The list of tags we're looking for is
  179. # hard-coded:
  180. #
  181. # * List "a" is made of tags which can be both inline or block-level.
  182. # These will be treated block-level when the start tag is alone on
  183. # its line, otherwise they're not matched here and will be taken as
  184. # inline later.
  185. # * List "b" is made of tags which are always block-level;
  186. #
  187. $block_tags_a_re = 'ins|del';
  188. $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
  189. 'script|noscript|form|fieldset|iframe|math';
  190. # Regular expression for the content of a block tag.
  191. $nested_tags_level = 4;
  192. $attr = '
  193. (?> # optional tag attributes
  194. \s # starts with whitespace
  195. (?>
  196. [^>"/]+ # text outside quotes
  197. |
  198. /+(?!>) # slash not followed by ">"
  199. |
  200. "[^"]*" # text inside double quotes (tolerate ">")
  201. |
  202. \'[^\']*\' # text inside single quotes (tolerate ">")
  203. )*
  204. )?
  205. ';
  206. $content =
  207. str_repeat('
  208. (?>
  209. [^<]+ # content without tag
  210. |
  211. <\2 # nested opening tag
  212. '.$attr.' # attributes
  213. (?>
  214. />
  215. |
  216. >', $nested_tags_level). # end of opening tag
  217. '.*?'. # last level nested tag content
  218. str_repeat('
  219. </\2\s*> # closing nested tag
  220. )
  221. |
  222. <(?!/\2\s*> # other tags with a different name
  223. )
  224. )*',
  225. $nested_tags_level);
  226. $content2 = str_replace('\2', '\3', $content);
  227. # First, look for nested blocks, e.g.:
  228. # <div>
  229. # <div>
  230. # tags for inner block must be indented.
  231. # </div>
  232. # </div>
  233. #
  234. # The outermost tags must start at the left margin for this to match, and
  235. # the inner nested divs must be indented.
  236. # We need to do this before the next, more liberal match, because the next
  237. # match will start at the first `<div>` and stop at the first `</div>`.
  238. $text = preg_replace_callback('{(?>
  239. (?>
  240. (?<=\n\n) # Starting after a blank line
  241. | # or
  242. \A\n? # the beginning of the doc
  243. )
  244. ( # save in $1
  245. # Match from `\n<tag>` to `</tag>\n`, handling nested tags
  246. # in between.
  247. [ ]{0,'.$less_than_tab.'}
  248. <('.$block_tags_b_re.')# start tag = $2
  249. '.$attr.'> # attributes followed by > and \n
  250. '.$content.' # content, support nesting
  251. </\2> # the matching end tag
  252. [ ]* # trailing spaces/tabs
  253. (?=\n+|\Z) # followed by a newline or end of document
  254. | # Special version for tags of group a.
  255. [ ]{0,'.$less_than_tab.'}
  256. <('.$block_tags_a_re.')# start tag = $3
  257. '.$attr.'>[ ]*\n # attributes followed by >
  258. '.$content2.' # content, support nesting
  259. </\3> # the matching end tag
  260. [ ]* # trailing spaces/tabs
  261. (?=\n+|\Z) # followed by a newline or end of document
  262. | # Special case just for <hr />. It was easier to make a special
  263. # case than to make the other regex more complicated.
  264. [ ]{0,'.$less_than_tab.'}
  265. <(hr) # start tag = $2
  266. '.$attr.' # attributes
  267. /?> # the matching end tag
  268. [ ]*
  269. (?=\n{2,}|\Z) # followed by a blank line or end of document
  270. | # Special case for standalone HTML comments:
  271. [ ]{0,'.$less_than_tab.'}
  272. (?s:
  273. <!-- .*? -->
  274. )
  275. [ ]*
  276. (?=\n{2,}|\Z) # followed by a blank line or end of document
  277. | # PHP and ASP-style processor instructions (<? and <%)
  278. [ ]{0,'.$less_than_tab.'}
  279. (?s:
  280. <([?%]) # $2
  281. .*?
  282. \2>
  283. )
  284. [ ]*
  285. (?=\n{2,}|\Z) # followed by a blank line or end of document
  286. )
  287. )}Sxmi',
  288. array(&$this, '_hashHTMLBlocks_callback'),
  289. $text);
  290. return $text;
  291. }
  292. function _hashHTMLBlocks_callback($matches) {
  293. $text = $matches[1];
  294. $key = $this->hashBlock($text);
  295. return "\n\n$key\n\n";
  296. }
  297. function hashPart($text, $boundary = 'X') {
  298. #
  299. # Called whenever a tag must be hashed when a function insert an atomic
  300. # element in the text stream. Passing $text to through this function gives
  301. # a unique text-token which will be reverted back when calling unhash.
  302. #
  303. # The $boundary argument specify what character should be used to surround
  304. # the token. By convension, "B" is used for block elements that needs not
  305. # to be wrapped into paragraph tags at the end, ":" is used for elements
  306. # that are word separators and "X" is used in the general case.
  307. #
  308. # Swap back any tag hash found in $text so we do not have to `unhash`
  309. # multiple times at the end.
  310. $text = $this->unhash($text);
  311. # Then hash the block.
  312. static $i = 0;
  313. $key = "$boundary\x1A" . ++$i . $boundary;
  314. $this->html_hashes[$key] = $text;
  315. return $key; # String that will replace the tag.
  316. }
  317. function hashBlock($text) {
  318. #
  319. # Shortcut function for hashPart with block-level boundaries.
  320. #
  321. return $this->hashPart($text, 'B');
  322. }
  323. var $block_gamut = array(
  324. #
  325. # These are all the transformations that form block-level
  326. # tags like paragraphs, headers, and list items.
  327. #
  328. "doHeaders" => 10,
  329. "doHorizontalRules" => 20,
  330. "doLists" => 40,
  331. "doCodeBlocks" => 50,
  332. "doBlockQuotes" => 60,
  333. );
  334. function runBlockGamut($text) {
  335. #
  336. # Run block gamut tranformations.
  337. #
  338. # We need to escape raw HTML in Markdown source before doing anything
  339. # else. This need to be done for each block, and not only at the
  340. # begining in the Markdown function since hashed blocks can be part of
  341. # list items and could have been indented. Indented blocks would have
  342. # been seen as a code block in a previous pass of hashHTMLBlocks.
  343. $text = $this->hashHTMLBlocks($text);
  344. return $this->runBasicBlockGamut($text);
  345. }
  346. function runBasicBlockGamut($text) {
  347. #
  348. # Run block gamut tranformations, without hashing HTML blocks. This is
  349. # useful when HTML blocks are known to be already hashed, like in the first
  350. # whole-document pass.
  351. #
  352. foreach ($this->block_gamut as $method => $priority) {
  353. $text = $this->$method($text);
  354. }
  355. # Finally form paragraph and restore hashed blocks.
  356. $text = $this->formParagraphs($text);
  357. return $text;
  358. }
  359. function doHorizontalRules($text) {
  360. # Do Horizontal Rules:
  361. return preg_replace(
  362. '{
  363. ^[ ]{0,3} # Leading space
  364. ([-*_]) # $1: First marker
  365. (?> # Repeated marker group
  366. [ ]{0,2} # Zero, one, or two spaces.
  367. \1 # Marker character
  368. ){2,} # Group repeated at least twice
  369. [ ]* # Tailing spaces
  370. $ # End of line.
  371. }mx',
  372. "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
  373. $text);
  374. }
  375. var $span_gamut = array(
  376. #
  377. # These are all the transformations that occur *within* block-level
  378. # tags like paragraphs, headers, and list items.
  379. #
  380. # Process character escapes, code spans, and inline HTML
  381. # in one shot.
  382. "parseSpan" => -30,
  383. # Process anchor and image tags. Images must come first,
  384. # because ![foo][f] looks like an anchor.
  385. "doImages" => 10,
  386. "doAnchors" => 20,
  387. # Make links out of things like `<http://example.com/>`
  388. # Must come after doAnchors, because you can use < and >
  389. # delimiters in inline links like [this](<url>).
  390. "doAutoLinks" => 30,
  391. "encodeAmpsAndAngles" => 40,
  392. "doItalicsAndBold" => 50,
  393. "doHardBreaks" => 60,
  394. );
  395. function runSpanGamut($text) {
  396. #
  397. # Run span gamut tranformations.
  398. #
  399. foreach ($this->span_gamut as $method => $priority) {
  400. $text = $this->$method($text);
  401. }
  402. return $text;
  403. }
  404. function doHardBreaks($text) {
  405. # Do hard breaks:
  406. return preg_replace_callback('/ {2,}\n/',
  407. array(&$this, '_doHardBreaks_callback'), $text);
  408. }
  409. function _doHardBreaks_callback($matches) {
  410. return $this->hashPart("<br$this->empty_element_suffix\n");
  411. }
  412. function doAnchors($text) {
  413. #
  414. # Turn Markdown link shortcuts into XHTML <a> tags.
  415. #
  416. if ($this->in_anchor) return $text;
  417. $this->in_anchor = true;
  418. #
  419. # First, handle reference-style links: [link text] [id]
  420. #
  421. $text = preg_replace_callback('{
  422. ( # wrap whole match in $1
  423. \[
  424. ('.$this->nested_brackets_re.') # link text = $2
  425. \]
  426. [ ]? # one optional space
  427. (?:\n[ ]*)? # one optional newline followed by spaces
  428. \[
  429. (.*?) # id = $3
  430. \]
  431. )
  432. }xs',
  433. array(&$this, '_doAnchors_reference_callback'), $text);
  434. #
  435. # Next, inline-style links: [link text](url "optional title")
  436. #
  437. $text = preg_replace_callback('{
  438. ( # wrap whole match in $1
  439. \[
  440. ('.$this->nested_brackets_re.') # link text = $2
  441. \]
  442. \( # literal paren
  443. [ ]*
  444. (?:
  445. <(\S*)> # href = $3
  446. |
  447. ('.$this->nested_url_parenthesis_re.') # href = $4
  448. )
  449. [ ]*
  450. ( # $5
  451. ([\'"]) # quote char = $6
  452. (.*?) # Title = $7
  453. \6 # matching quote
  454. [ ]* # ignore any spaces/tabs between closing quote and )
  455. )? # title is optional
  456. \)
  457. )
  458. }xs',
  459. array(&$this, '_DoAnchors_inline_callback'), $text);
  460. #
  461. # Last, handle reference-style shortcuts: [link text]
  462. # These must come last in case you've also got [link test][1]
  463. # or [link test](/foo)
  464. #
  465. // $text = preg_replace_callback('{
  466. // ( # wrap whole match in $1
  467. // \[
  468. // ([^\[\]]+) # link text = $2; can\'t contain [ or ]
  469. // \]
  470. // )
  471. // }xs',
  472. // array(&$this, '_doAnchors_reference_callback'), $text);
  473. $this->in_anchor = false;
  474. return $text;
  475. }
  476. function _doAnchors_reference_callback($matches) {
  477. $whole_match = $matches[1];
  478. $link_text = $matches[2];
  479. $link_id =& $matches[3];
  480. if ($link_id == "") {
  481. # for shortcut links like [this][] or [this].
  482. $link_id = $link_text;
  483. }
  484. # lower-case and turn embedded newlines into spaces
  485. $link_id = strtolower($link_id);
  486. $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
  487. if (isset($this->urls[$link_id])) {
  488. $url = $this->urls[$link_id];
  489. $url = $this->encodeAttribute($url);
  490. $result = "<a href=\"$url\"";
  491. if ( isset( $this->titles[$link_id] ) ) {
  492. $title = $this->titles[$link_id];
  493. $title = $this->encodeAttribute($title);
  494. $result .= " title=\"$title\"";
  495. }
  496. $link_text = $this->runSpanGamut($link_text);
  497. $result .= ">$link_text</a>";
  498. $result = $this->hashPart($result);
  499. }
  500. else {
  501. $result = $whole_match;
  502. }
  503. return $result;
  504. }
  505. function _doAnchors_inline_callback($matches) {
  506. $whole_match = $matches[1];
  507. $link_text = $this->runSpanGamut($matches[2]);
  508. $url = $matches[3] == '' ? $matches[4] : $matches[3];
  509. $title =& $matches[7];
  510. $url = $this->encodeAttribute($url);
  511. $result = "<a href=\"$url\"";
  512. if (isset($title)) {
  513. $title = $this->encodeAttribute($title);
  514. $result .= " title=\"$title\"";
  515. }
  516. $link_text = $this->runSpanGamut($link_text);
  517. $result .= ">$link_text</a>";
  518. return $this->hashPart($result);
  519. }
  520. function doImages($text) {
  521. #
  522. # Turn Markdown image shortcuts into <img> tags.
  523. #
  524. #
  525. # First, handle reference-style labeled images: ![alt text][id]
  526. #
  527. $text = preg_replace_callback('{
  528. ( # wrap whole match in $1
  529. !\[
  530. ('.$this->nested_brackets_re.') # alt text = $2
  531. \]
  532. [ ]? # one optional space
  533. (?:\n[ ]*)? # one optional newline followed by spaces
  534. \[
  535. (.*?) # id = $3
  536. \]
  537. )
  538. }xs',
  539. array(&$this, '_doImages_reference_callback'), $text);
  540. #
  541. # Next, handle inline images: ![alt text](url "optional title")
  542. # Don't forget: encode * and _
  543. #
  544. $text = preg_replace_callback('{
  545. ( # wrap whole match in $1
  546. !\[
  547. ('.$this->nested_brackets_re.') # alt text = $2
  548. \]
  549. \s? # One optional whitespace character
  550. \( # literal paren
  551. [ ]*
  552. (?:
  553. <(\S*)> # src url = $3
  554. |
  555. ('.$this->nested_url_parenthesis_re.') # src url = $4
  556. )
  557. [ ]*
  558. ( # $5
  559. ([\'"]) # quote char = $6
  560. (.*?) # title = $7
  561. \6 # matching quote
  562. [ ]*
  563. )? # title is optional
  564. \)
  565. )
  566. }xs',
  567. array(&$this, '_doImages_inline_callback'), $text);
  568. return $text;
  569. }
  570. function _doImages_reference_callback($matches) {
  571. $whole_match = $matches[1];
  572. $alt_text = $matches[2];
  573. $link_id = strtolower($matches[3]);
  574. if ($link_id == "") {
  575. $link_id = strtolower($alt_text); # for shortcut links like ![this][].
  576. }
  577. $alt_text = $this->encodeAttribute($alt_text);
  578. if (isset($this->urls[$link_id])) {
  579. $url = $this->encodeAttribute($this->urls[$link_id]);
  580. $result = "<img src=\"$url\" alt=\"$alt_text\"";
  581. if (isset($this->titles[$link_id])) {
  582. $title = $this->titles[$link_id];
  583. $title = $this->encodeAttribute($title);
  584. $result .= " title=\"$title\"";
  585. }
  586. $result .= $this->empty_element_suffix;
  587. $result = $this->hashPart($result);
  588. }
  589. else {
  590. # If there's no such link ID, leave intact:
  591. $result = $whole_match;
  592. }
  593. return $result;
  594. }
  595. function _doImages_inline_callback($matches) {
  596. $whole_match = $matches[1];
  597. $alt_text = $matches[2];
  598. $url = $matches[3] == '' ? $matches[4] : $matches[3];
  599. $title =& $matches[7];
  600. $alt_text = $this->encodeAttribute($alt_text);
  601. $url = $this->encodeAttribute($url);
  602. $result = "<img src=\"$url\" alt=\"$alt_text\"";
  603. if (isset($title)) {
  604. $title = $this->encodeAttribute($title);
  605. $result .= " title=\"$title\""; # $title already quoted
  606. }
  607. $result .= $this->empty_element_suffix;
  608. return $this->hashPart($result);
  609. }
  610. function doHeaders($text) {
  611. # Setext-style headers:
  612. # Header 1
  613. # ========
  614. #
  615. # Header 2
  616. # --------
  617. #
  618. $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
  619. array(&$this, '_doHeaders_callback_setext'), $text);
  620. # atx-style headers:
  621. # # Header 1
  622. # ## Header 2
  623. # ## Header 2 with closing hashes ##
  624. # ...
  625. # ###### Header 6
  626. #
  627. $text = preg_replace_callback('{
  628. ^(\#{1,6}) # $1 = string of #\'s
  629. [ ]*
  630. (.+?) # $2 = Header text
  631. [ ]*
  632. \#* # optional closing #\'s (not counted)
  633. \n+
  634. }xm',
  635. array(&$this, '_doHeaders_callback_atx'), $text);
  636. return $text;
  637. }
  638. function _doHeaders_callback_setext($matches) {
  639. # Terrible hack to check we haven't found an empty list item.
  640. if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1]))
  641. return $matches[0];
  642. $level = $matches[2]{0} == '=' ? 1 : 2;
  643. $block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>";
  644. return "\n" . $this->hashBlock($block) . "\n\n";
  645. }
  646. function _doHeaders_callback_atx($matches) {
  647. $level = strlen($matches[1]);
  648. $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>";
  649. return "\n" . $this->hashBlock($block) . "\n\n";
  650. }
  651. function doLists($text) {
  652. #
  653. # Form HTML ordered (numbered) and unordered (bulleted) lists.
  654. #
  655. $less_than_tab = $this->tab_width - 1;
  656. # Re-usable patterns to match list item bullets and number markers:
  657. $marker_ul_re = '[*+-]';
  658. $marker_ol_re = '\d+[.]';
  659. $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
  660. $markers_relist = array($marker_ul_re, $marker_ol_re);
  661. foreach ($markers_relist as $marker_re) {
  662. # Re-usable pattern to match any entirel ul or ol list:
  663. $whole_list_re = '
  664. ( # $1 = whole list
  665. ( # $2
  666. [ ]{0,'.$less_than_tab.'}
  667. ('.$marker_re.') # $3 = first list item marker
  668. [ ]+
  669. )
  670. (?s:.+?)
  671. ( # $4
  672. \z
  673. |
  674. \n{2,}
  675. (?=\S)
  676. (?! # Negative lookahead for another list item marker
  677. [ ]*
  678. '.$marker_re.'[ ]+
  679. )
  680. )
  681. )
  682. '; // mx
  683. # We use a different prefix before nested lists than top-level lists.
  684. # See extended comment in _ProcessListItems().
  685. if ($this->list_level) {
  686. $text = preg_replace_callback('{
  687. ^
  688. '.$whole_list_re.'
  689. }mx',
  690. array(&$this, '_doLists_callback'), $text);
  691. }
  692. else {
  693. $text = preg_replace_callback('{
  694. (?:(?<=\n)\n|\A\n?) # Must eat the newline
  695. '.$whole_list_re.'
  696. }mx',
  697. array(&$this, '_doLists_callback'), $text);
  698. }
  699. }
  700. return $text;
  701. }
  702. function _doLists_callback($matches) {
  703. # Re-usable patterns to match list item bullets and number markers:
  704. $marker_ul_re = '[*+-]';
  705. $marker_ol_re = '\d+[.]';
  706. $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
  707. $list = $matches[1];
  708. $list_type = preg_match("/$marker_ul_re/", $matches[3]) ? "ul" : "ol";
  709. $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
  710. $list .= "\n";
  711. $result = $this->processListItems($list, $marker_any_re);
  712. $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
  713. return "\n". $result ."\n\n";
  714. }
  715. var $list_level = 0;
  716. function processListItems($list_str, $marker_any_re) {
  717. #
  718. # Process the contents of a single ordered or unordered list, splitting it
  719. # into individual list items.
  720. #
  721. # The $this->list_level global keeps track of when we're inside a list.
  722. # Each time we enter a list, we increment it; when we leave a list,
  723. # we decrement. If it's zero, we're not in a list anymore.
  724. #
  725. # We do this because when we're not inside a list, we want to treat
  726. # something like this:
  727. #
  728. # I recommend upgrading to version
  729. # 8. Oops, now this line is treated
  730. # as a sub-list.
  731. #
  732. # As a single paragraph, despite the fact that the second line starts
  733. # with a digit-period-space sequence.
  734. #
  735. # Whereas when we're inside a list (or sub-list), that line will be
  736. # treated as the start of a sub-list. What a kludge, huh? This is
  737. # an aspect of Markdown's syntax that's hard to parse perfectly
  738. # without resorting to mind-reading. Perhaps the solution is to
  739. # change the syntax rules such that sub-lists must start with a
  740. # starting cardinal number; e.g. "1." or "a.".
  741. $this->list_level++;
  742. # trim trailing blank lines:
  743. $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
  744. $list_str = preg_replace_callback('{
  745. (\n)? # leading line = $1
  746. (^[ ]*) # leading whitespace = $2
  747. ('.$marker_any_re.' # list marker and space = $3
  748. (?:[ ]+|(?=\n)) # space only required if item is not empty
  749. )
  750. ((?s:.*?)) # list item text = $4
  751. (?:(\n+(?=\n))|\n) # tailing blank line = $5
  752. (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
  753. }xm',
  754. array(&$this, '_processListItems_callback'), $list_str);
  755. $this->list_level--;
  756. return $list_str;
  757. }
  758. function _processListItems_callback($matches) {
  759. $item = $matches[4];
  760. $leading_line =& $matches[1];
  761. $leading_space =& $matches[2];
  762. $marker_space = $matches[3];
  763. $tailing_blank_line =& $matches[5];
  764. if ($leading_line || $tailing_blank_line ||
  765. preg_match('/\n{2,}/', $item))
  766. {
  767. # Replace marker with the appropriate whitespace indentation
  768. $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
  769. $item = $this->runBlockGamut($this->outdent($item)."\n");
  770. }
  771. else {
  772. # Recursion for sub-lists:
  773. $item = $this->doLists($this->outdent($item));
  774. $item = preg_replace('/\n+$/', '', $item);
  775. $item = $this->runSpanGamut($item);
  776. }
  777. return "<li>" . $item . "</li>\n";
  778. }
  779. function doCodeBlocks($text) {
  780. #
  781. # Process Markdown `<pre><code>` blocks.
  782. #
  783. $text = preg_replace_callback('{
  784. (?:\n\n|\A\n?)
  785. ( # $1 = the code block -- one or more lines, starting with a space/tab
  786. (?>
  787. [ ]{'.$this->tab_width.'} # Lines must start with a tab or a tab-width of spaces
  788. .*\n+
  789. )+
  790. )
  791. ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
  792. }xm',
  793. array(&$this, '_doCodeBlocks_callback'), $text);
  794. return $text;
  795. }
  796. function _doCodeBlocks_callback($matches) {
  797. $codeblock = $matches[1];
  798. $codeblock = $this->outdent($codeblock);
  799. $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
  800. # trim leading newlines and trailing newlines
  801. $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
  802. $codeblock = "<pre><code>$codeblock\n</code></pre>";
  803. return "\n\n".$this->hashBlock($codeblock)."\n\n";
  804. }
  805. function makeCodeSpan($code) {
  806. #
  807. # Create a code span markup for $code. Called from handleSpanToken.
  808. #
  809. $code = htmlspecialchars(trim($code), ENT_NOQUOTES);
  810. return $this->hashPart("<code>$code</code>");
  811. }
  812. var $em_relist = array(
  813. '' => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?=\S)(?![.,:;]\s)',
  814. '*' => '(?<=\S)(?<!\*)\*(?!\*)',
  815. '_' => '(?<=\S)(?<!_)_(?!_)',
  816. );
  817. var $strong_relist = array(
  818. '' => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?=\S)(?![.,:;]\s)',
  819. '**' => '(?<=\S)(?<!\*)\*\*(?!\*)',
  820. '__' => '(?<=\S)(?<!_)__(?!_)',
  821. );
  822. var $em_strong_relist = array(
  823. '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?=\S)(?![.,:;]\s)',
  824. '***' => '(?<=\S)(?<!\*)\*\*\*(?!\*)',
  825. '___' => '(?<=\S)(?<!_)___(?!_)',
  826. );
  827. var $em_strong_prepared_relist;
  828. function prepareItalicsAndBold() {
  829. #
  830. # Prepare regular expressions for seraching emphasis tokens in any
  831. # context.
  832. #
  833. foreach ($this->em_relist as $em => $em_re) {
  834. foreach ($this->strong_relist as $strong => $strong_re) {
  835. # Construct list of allowed token expressions.
  836. $token_relist = array();
  837. if (isset($this->em_strong_relist["$em$strong"])) {
  838. $token_relist[] = $this->em_strong_relist["$em$strong"];
  839. }
  840. $token_relist[] = $em_re;
  841. $token_relist[] = $strong_re;
  842. # Construct master expression from list.
  843. $token_re = '{('. implode('|', $token_relist) .')}';
  844. $this->em_strong_prepared_relist["$em$strong"] = $token_re;
  845. }
  846. }
  847. }
  848. function doItalicsAndBold($text) {
  849. $token_stack = array('');
  850. $text_stack = array('');
  851. $em = '';
  852. $strong = '';
  853. $tree_char_em = false;
  854. while (1) {
  855. #
  856. # Get prepared regular expression for seraching emphasis tokens
  857. # in current context.
  858. #
  859. $token_re = $this->em_strong_prepared_relist["$em$strong"];
  860. #
  861. # Each loop iteration seach for the next emphasis token.
  862. # Each token is then passed to handleSpanToken.
  863. #
  864. $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
  865. $text_stack[0] .= $parts[0];
  866. $token =& $parts[1];
  867. $text =& $parts[2];
  868. if (empty($token)) {
  869. # Reached end of text span: empty stack without emitting.
  870. # any more emphasis.
  871. while ($token_stack[0]) {
  872. $text_stack[1] .= array_shift($token_stack);
  873. $text_stack[0] .= array_shift($text_stack);
  874. }
  875. break;
  876. }
  877. $token_len = strlen($token);
  878. if ($tree_char_em) {
  879. # Reached closing marker while inside a three-char emphasis.
  880. if ($token_len == 3) {
  881. # Three-char closing marker, close em and strong.
  882. array_shift($token_stack);
  883. $span = array_shift($text_stack);
  884. $span = $this->runSpanGamut($span);
  885. $span = "<strong><em>$span</em></strong>";
  886. $text_stack[0] .= $this->hashPart($span);
  887. $em = '';
  888. $strong = '';
  889. } else {
  890. # Other closing marker: close one em or strong and
  891. # change current token state to match the other
  892. $token_stack[0] = str_repeat($token{0}, 3-$token_len);
  893. $tag = $token_len == 2 ? "strong" : "em";
  894. $span = $text_stack[0];
  895. $span = $this->runSpanGamut($span);
  896. $span = "<$tag>$span</$tag>";
  897. $text_stack[0] = $this->hashPart($span);
  898. $$tag = ''; # $$tag stands for $em or $strong
  899. }
  900. $tree_char_em = false;
  901. } else if ($token_len == 3) {
  902. if ($em) {
  903. # Reached closing marker for both em and strong.
  904. # Closing strong marker:
  905. for ($i = 0; $i < 2; ++$i) {
  906. $shifted_token = array_shift($token_stack);
  907. $tag = strlen($shifted_token) == 2 ? "strong" : "em";
  908. $span = array_shift($text_stack);
  909. $span = $this->runSpanGamut($span);
  910. $span = "<$tag>$span</$tag>";
  911. $text_stack[0] .= $this->hashPart($span);
  912. $$tag = ''; # $$tag stands for $em or $strong
  913. }
  914. } else {
  915. # Reached opening three-char emphasis marker. Push on token
  916. # stack; will be handled by the special condition above.
  917. $em = $token{0};
  918. $strong = "$em$em";
  919. array_unshift($token_stack, $token);
  920. array_unshift($text_stack, '');
  921. $tree_char_em = true;
  922. }
  923. } else if ($token_len == 2) {
  924. if ($strong) {
  925. # Unwind any dangling emphasis marker:
  926. if (strlen($token_stack[0]) == 1) {
  927. $text_stack[1] .= array_shift($token_stack);
  928. $text_stack[0] .= array_shift($text_stack);
  929. }
  930. # Closing strong marker:
  931. array_shift($token_stack);
  932. $span = array_shift($text_stack);
  933. $span = $this->runSpanGamut($span);
  934. $span = "<strong>$span</strong>";
  935. $text_stack[0] .= $this->hashPart($span);
  936. $strong = '';
  937. } else {
  938. array_unshift($token_stack, $token);
  939. array_unshift($text_stack, '');
  940. $strong = $token;
  941. }
  942. } else {
  943. # Here $token_len == 1
  944. if ($em) {
  945. if (strlen($token_stack[0]) == 1) {
  946. # Closing emphasis marker:
  947. array_shift($token_stack);
  948. $span = array_shift($text_stack);
  949. $span = $this->runSpanGamut($span);
  950. $span = "<em>$span</em>";
  951. $text_stack[0] .= $this->hashPart($span);
  952. $em = '';
  953. } else {
  954. $text_stack[0] .= $token;
  955. }
  956. } else {
  957. array_unshift($token_stack, $token);
  958. array_unshift($text_stack, '');
  959. $em = $token;
  960. }
  961. }
  962. }
  963. return $text_stack[0];
  964. }
  965. function doBlockQuotes($text) {
  966. $text = preg_replace_callback('/
  967. ( # Wrap whole match in $1
  968. (?>
  969. ^[ ]*>[ ]? # ">" at the start of a line
  970. .+\n # rest of the first line
  971. (.+\n)* # subsequent consecutive lines
  972. \n* # blanks
  973. )+
  974. )
  975. /xm',
  976. array(&$this, '_doBlockQuotes_callback'), $text);
  977. return $text;
  978. }
  979. function _doBlockQuotes_callback($matches) {
  980. $bq = $matches[1];
  981. # trim one level of quoting - trim whitespace-only lines
  982. $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
  983. $bq = $this->runBlockGamut($bq); # recurse
  984. $bq = preg_replace('/^/m', " ", $bq);
  985. # These leading spaces cause problem with <pre> content,
  986. # so we need to fix that:
  987. $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
  988. array(&$this, '_DoBlockQuotes_callback2'), $bq);
  989. return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
  990. }
  991. function _doBlockQuotes_callback2($matches) {
  992. $pre = $matches[1];
  993. $pre = preg_replace('/^ /m', '', $pre);
  994. return $pre;
  995. }
  996. function formParagraphs($text) {
  997. #
  998. # Params:
  999. # $text - string to process with html <p> tags
  1000. #
  1001. # Strip leading and trailing lines:
  1002. $text = preg_replace('/\A\n+|\n+\z/', '', $text);
  1003. $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
  1004. #
  1005. # Wrap <p> tags and unhashify HTML blocks
  1006. #
  1007. foreach ($grafs as $key => $value) {
  1008. if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
  1009. # Is a paragraph.
  1010. $value = $this->runSpanGamut($value);
  1011. $value = preg_replace('/^([ ]*)/', "<p>", $value);
  1012. $value .= "</p>";
  1013. $grafs[$key] = $this->unhash($value);
  1014. }
  1015. else {
  1016. # Is a block.
  1017. # Modify elements of @grafs in-place...
  1018. $graf = $value;
  1019. $block = $this->html_hashes[$graf];
  1020. $graf = $block;
  1021. // if (preg_match('{
  1022. // \A
  1023. // ( # $1 = <div> tag
  1024. // <div \s+
  1025. // [^>]*
  1026. // \b
  1027. // markdown\s*=\s* ([\'"]) # $2 = attr quote char
  1028. // 1
  1029. // \2
  1030. // [^>]*
  1031. // >
  1032. // )
  1033. // ( # $3 = contents
  1034. // .*
  1035. // )
  1036. // (</div>) # $4 = closing tag
  1037. // \z
  1038. // }xs', $block, $matches))
  1039. // {
  1040. // list(, $div_open, , $div_content, $div_close) = $matches;
  1041. //
  1042. // # We can't call Markdown(), because that resets the hash;
  1043. // # that initialization code should be pulled into its own sub, though.
  1044. // $div_content = $this->hashHTMLBlocks($div_content);
  1045. //
  1046. // # Run document gamut methods on the content.
  1047. // foreach ($this->document_gamut as $method => $priority) {
  1048. // $div_content = $this->$method($div_content);
  1049. // }
  1050. //
  1051. // $div_open = preg_replace(
  1052. // '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
  1053. //
  1054. // $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
  1055. // }
  1056. $grafs[$key] = $graf;
  1057. }
  1058. }
  1059. return implode("\n\n", $grafs);
  1060. }
  1061. function encodeAttribute($text) {
  1062. #
  1063. # Encode text for a double-quoted HTML attribute. This function
  1064. # is *not* suitable for attributes enclosed in single quotes.
  1065. #
  1066. $text = $this->encodeAmpsAndAngles($text);
  1067. $text = str_replace('"', '&quot;', $text);
  1068. return $text;
  1069. }
  1070. function encodeAmpsAndAngles($text) {
  1071. #
  1072. # Smart processing for ampersands and angle brackets that need to
  1073. # be encoded. Valid character entities are left alone unless the
  1074. # no-entities mode is set.
  1075. #
  1076. if ($this->no_entities) {
  1077. $text = str_replace('&', '&amp;', $text);
  1078. } else {
  1079. # Ampersand-encoding based entirely on Nat Irons's Amputator
  1080. # MT plugin: <http://bumppo.net/projects/amputator/>
  1081. $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
  1082. '&amp;', $text);;
  1083. }
  1084. # Encode remaining <'s
  1085. $text = str_replace('<', '&lt;', $text);
  1086. return $text;
  1087. }
  1088. function doAutoLinks($text) {
  1089. $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i',
  1090. array(&$this, '_doAutoLinks_url_callback'), $text);
  1091. # Email addresses: <address@domain.foo>
  1092. $text = preg_replace_callback('{
  1093. <
  1094. (?:mailto:)?
  1095. (
  1096. [-.\w\x80-\xFF]+
  1097. \@
  1098. [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
  1099. )
  1100. >
  1101. }xi',
  1102. array(&$this, '_doAutoLinks_email_callback'), $text);
  1103. return $text;
  1104. }
  1105. function _doAutoLinks_url_callback($matches) {
  1106. $url = $this->encodeAttribute($matches[1]);
  1107. $link = "<a href=\"$url\">$url</a>";
  1108. return $this->hashPart($link);
  1109. }
  1110. function _doAutoLinks_email_callback($matches) {
  1111. $address = $matches[1];
  1112. $link = $this->encodeEmailAddress($address);
  1113. return $this->hashPart($link);
  1114. }
  1115. function encodeEmailAddress($addr) {
  1116. #
  1117. # Input: an email address, e.g. "foo@example.com"
  1118. #
  1119. # Output: the email address as a mailto link, with each character
  1120. # of the address encoded as either a decimal or hex entity, in
  1121. # the hopes of foiling most address harvesting spam bots. E.g.:
  1122. #
  1123. # <p><a href="&#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
  1124. # &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
  1125. # &#x6d;">&#x66;o&#111;&#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;
  1126. # &#101;&#46;&#x63;&#111;&#x6d;</a></p>
  1127. #
  1128. # Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
  1129. # With some optimizations by Milian Wolff.
  1130. #
  1131. $addr = "mailto:" . $addr;
  1132. $chars = preg_split('/(?<!^)(?!$)/', $addr);
  1133. $seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed.
  1134. foreach ($chars as $key => $char) {
  1135. $ord = ord($char);
  1136. # Ignore non-ascii chars.
  1137. if ($ord < 128) {
  1138. $r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
  1139. # roughly 10% raw, 45% hex, 45% dec
  1140. # '@' *must* be encoded. I insist.
  1141. if ($r > 90 && $char != '@') /* do nothing */;
  1142. else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
  1143. else $chars[$key] = '&#'.$ord.';';
  1144. }
  1145. }
  1146. $addr = implode('', $chars);
  1147. $text = implode('', array_slice($chars, 7)); # text without `mailto:`
  1148. $addr = "<a href=\"$addr\">$text</a>";
  1149. return $addr;
  1150. }
  1151. function parseSpan($str) {
  1152. #
  1153. # Take the string $str and parse it into tokens, hashing embeded HTML,
  1154. # escaped characters and handling code spans.
  1155. #
  1156. $output = '';
  1157. $span_re = '{
  1158. (
  1159. \\\\'.$this->escape_chars_re.'
  1160. |
  1161. (?<![`\\\\])
  1162. `+ # code span marker
  1163. '.( $this->no_markup ? '' : '
  1164. |
  1165. <!-- .*? --> # comment
  1166. |
  1167. <\?.*?\?> | <%.*?%> # processing instruction
  1168. |
  1169. <[/!$]?[-a-zA-Z0-9:]+ # regular tags
  1170. (?>
  1171. \s
  1172. (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
  1173. )?
  1174. >
  1175. ').'
  1176. )
  1177. }xs';
  1178. while (1) {
  1179. #
  1180. # Each loop iteration seach for either the next tag, the next
  1181. # openning code span marker, or the next escaped character.
  1182. # Each token is then passed to handleSpanToken.
  1183. #
  1184. $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
  1185. # Create token from text preceding tag.
  1186. if ($parts[0] != "") {
  1187. $output .= $parts[0];
  1188. }
  1189. # Check if we reach the end.
  1190. if (isset($parts[1])) {
  1191. $output .= $this->handleSpanToken($parts[1], $parts[2]);
  1192. $str = $parts[2];
  1193. }
  1194. else {
  1195. break;
  1196. }
  1197. }
  1198. return $output;
  1199. }
  1200. function handleSpanToken($token, &$str) {
  1201. #
  1202. # Handle $token provided by parseSpan by determining its nature and
  1203. # returning the corresponding value that should replace it.
  1204. #
  1205. switch ($token{0}) {
  1206. case "\\":
  1207. return $this->hashPart("&#". ord($token{1}). ";");
  1208. case "`":
  1209. # Search for end marker in remaining text.
  1210. if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm',
  1211. $str, $matches))
  1212. {
  1213. $str = $matches[2];
  1214. $codespan = $this->makeCodeSpan($matches[1]);
  1215. return $this->hashPart($codespan);
  1216. }
  1217. return $token; // return as text since no ending marker found.
  1218. default:
  1219. return $this->hashPart($token);
  1220. }
  1221. }
  1222. function outdent($text) {
  1223. #
  1224. # Remove one level of line-leading tabs or spaces
  1225. #
  1226. return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text);
  1227. }
  1228. # String length function for detab. `_initDetab` will create a function to
  1229. # hanlde UTF-8 if the default function does not exist.
  1230. var $utf8_strlen = 'mb_strlen';
  1231. function detab($text) {
  1232. #
  1233. # Replace tabs with the appropriate amount of space.
  1234. #
  1235. # For each line we separate the line in blocks delemited by
  1236. # tab characters. Then we reconstruct every line by adding the
  1237. # appropriate number of space between each blocks.
  1238. $text = preg_replace_callback('/^.*\t.*$/m',
  1239. array(&$this, '_detab_callback'), $text);
  1240. return $text;
  1241. }
  1242. function _detab_callback($matches) {
  1243. $line = $matches[0];
  1244. $strlen = $this->utf8_strlen; # strlen function for UTF-8.
  1245. # Split in blocks.
  1246. $blocks = explode("\t", $line);
  1247. # Add each blocks to the line.
  1248. $line = $blocks[0];
  1249. unset($blocks[0]); # Do not add first block twice.
  1250. foreach ($blocks as $block) {
  1251. # Calculate amount of space, insert spaces, insert block.
  1252. $amount = $this->tab_width -
  1253. $strlen($line, 'UTF-8') % $this->tab_width;
  1254. $line .= str_repeat(" ", $amount) . $block;
  1255. }
  1256. return $line;
  1257. }
  1258. function _initDetab() {
  1259. #
  1260. # Check for the availability of the function in the `utf8_strlen` property
  1261. # (initially `mb_strlen`). If the function is not available, create a
  1262. # function that will loosely count the number of UTF-8 characters with a
  1263. # regular expression.
  1264. #
  1265. if (function_exists($this->utf8_strlen)) return;
  1266. $this->utf8_strlen = create_function('$text', 'return preg_match_all(
  1267. "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/",
  1268. $text, $m);');
  1269. }
  1270. function unhash($text) {
  1271. #
  1272. # Swap back in all the tags hashed by _HashHTMLBlocks.
  1273. #
  1274. return preg_replace_callback('/(.)\x1A[0-9]+\1/',
  1275. array(&$this, '_unhash_callback'), $text);
  1276. }
  1277. function _unhash_callback($matches) {
  1278. return $this->html_hashes[$matches[0]];
  1279. }
  1280. }
  1281. ?>