PageRenderTime 42ms CodeModel.GetById 15ms RepoModel.GetById 1ms app.codeStats 0ms

/blog/wp-content/plugins/jetpack/_inc/lib/markdown/gfm.php

https://gitlab.com/relacilia/cakra
PHP | 373 lines | 168 code | 46 blank | 159 comment | 10 complexity | 30c1616cd85fd39402191526492d3d65 MD5 | raw file
  1. <?php
  2. /**
  3. * GitHub-Flavoured Markdown. Inspired by Evan's plugin, but modified.
  4. *
  5. * @author Evan Solomon
  6. * @author Matt Wiebe <wiebe@automattic.com>
  7. * @link https://github.com/evansolomon/wp-github-flavored-markdown-comments
  8. *
  9. * Add a few extras from GitHub's Markdown implementation. Must be used in a WordPress environment.
  10. */
  11. class WPCom_GHF_Markdown_Parser extends MarkdownExtra_Parser {
  12. /**
  13. * Hooray somewhat arbitrary numbers that are fearful of 1.0.x.
  14. */
  15. const WPCOM_GHF_MARDOWN_VERSION = '0.9.0';
  16. /**
  17. * Use a [code] shortcode when encountering a fenced code block
  18. * @var boolean
  19. */
  20. public $use_code_shortcode = true;
  21. /**
  22. * Preserve shortcodes, untouched by Markdown.
  23. * This requires use within a WordPress installation.
  24. * @var boolean
  25. */
  26. public $preserve_shortcodes = true;
  27. /**
  28. * Preserve the legacy $latex your-latex-code-here$ style
  29. * LaTeX markup
  30. */
  31. public $preserve_latex = true;
  32. /**
  33. * Preserve single-line <code> blocks.
  34. * @var boolean
  35. */
  36. public $preserve_inline_code_blocks = true;
  37. /**
  38. * Strip paragraphs from the output. This is the right default for WordPress,
  39. * which generally wants to create its own paragraphs with `wpautop`
  40. * @var boolean
  41. */
  42. public $strip_paras = true;
  43. // Will run through sprintf - you can supply your own syntax if you want
  44. public $shortcode_start = '[code lang=%s]';
  45. public $shortcode_end = '[/code]';
  46. // Stores shortcodes we remove and then replace
  47. protected $preserve_text_hash = array();
  48. /**
  49. * Set environment defaults based on presence of key functions/classes.
  50. */
  51. public function __construct() {
  52. $this->use_code_shortcode = class_exists( 'SyntaxHighlighter' );
  53. $this->preserve_shortcodes = function_exists( 'get_shortcode_regex' );
  54. $this->preserve_latex = function_exists( 'latex_markup' );
  55. $this->strip_paras = function_exists( 'wpautop' );
  56. parent::__construct();
  57. }
  58. /**
  59. * Overload to specify heading styles only if the hash has space(s) after it. This is actually in keeping with
  60. * the documentation and eases the semantic overload of the hash character.
  61. * #Will Not Produce a Heading 1
  62. * # This Will Produce a Heading 1
  63. *
  64. * @param string $text Markdown text
  65. * @return string HTML-transformed text
  66. */
  67. public function transform( $text ) {
  68. // Preserve anything inside a single-line <code> element
  69. if ( $this->preserve_inline_code_blocks ) {
  70. $text = $this->single_line_code_preserve( $text );
  71. }
  72. // Remove all shortcodes so their interiors are left intact
  73. if ( $this->preserve_shortcodes ) {
  74. $text = $this->shortcode_preserve( $text );
  75. }
  76. // Remove legacy LaTeX so it's left intact
  77. if ( $this->preserve_latex ) {
  78. $text = $this->latex_preserve( $text );
  79. }
  80. // escape line-beginning # chars that do not have a space after them.
  81. $text = preg_replace_callback( '|^#{1,6}( )?|um', array( $this, '_doEscapeForHashWithoutSpacing' ), $text );
  82. // run through core Markdown
  83. $text = parent::transform( $text );
  84. // Occasionally Markdown Extra chokes on a para structure, producing odd paragraphs.
  85. $text = str_replace( "<p>&lt;</p>\n\n<p>p>", '<p>', $text );
  86. // put start-of-line # chars back in place
  87. $text = $this->restore_leading_hash( $text );
  88. // Strip paras if set
  89. if ( $this->strip_paras ) {
  90. $text = $this->unp( $text );
  91. }
  92. // Restore preserved things like shortcodes/LaTeX
  93. $text = $this->do_restore( $text );
  94. return $text;
  95. }
  96. /**
  97. * Prevents blocks like <code>__this__</code> from turning into <code><strong>this</strong></code>
  98. * @param string $text Text that may need preserving
  99. * @return string Text that was preserved if needed
  100. */
  101. public function single_line_code_preserve( $text ) {
  102. return preg_replace_callback( '|<code\b[^>]*>(.*?)</code>|', array( $this, 'do_single_line_code_preserve' ), $text );
  103. }
  104. /**
  105. * Regex callback for inline code presevation
  106. * @param array $matches Regex matches
  107. * @return string Hashed content for later restoration
  108. */
  109. public function do_single_line_code_preserve( $matches ) {
  110. return '<code>' . $this->hash_block( $matches[1] ) . '</code>';
  111. }
  112. /**
  113. * Preserve code block contents by HTML encoding them. Useful before getting to KSES stripping.
  114. * @param string $text Markdown/HTML content
  115. * @return string Markdown/HTML content with escaped code blocks
  116. */
  117. public function codeblock_preserve( $text ) {
  118. return preg_replace_callback( "/^([`~]{3})([^`\n]+)?\n([^`~]+)(\\1)/m", array( $this, 'do_codeblock_preserve' ), $text );
  119. }
  120. /**
  121. * Regex callback for code block preservation.
  122. * @param array $matches Regex matches
  123. * @return string Codeblock with escaped interior
  124. */
  125. public function do_codeblock_preserve( $matches ) {
  126. $block = stripslashes( $matches[3] );
  127. $block = esc_html( $block );
  128. $block = str_replace( '\\', '\\\\', $block );
  129. $open = $matches[1] . $matches[2] . "\n";
  130. return $open . $block . $matches[4];
  131. }
  132. /**
  133. * Restore previously preserved (i.e. escaped) code block contents.
  134. * @param string $text Markdown/HTML content with escaped code blocks
  135. * @return string Markdown/HTML content
  136. */
  137. public function codeblock_restore( $text ) {
  138. return preg_replace_callback( "/^([`~]{3})([^`\n]+)?\n([^`~]+)(\\1)/m", array( $this, 'do_codeblock_restore' ), $text );
  139. }
  140. /**
  141. * Regex callback for code block restoration (unescaping).
  142. * @param array $matches Regex matches
  143. * @return string Codeblock with unescaped interior
  144. */
  145. public function do_codeblock_restore( $matches ) {
  146. $block = html_entity_decode( $matches[3], ENT_QUOTES );
  147. $open = $matches[1] . $matches[2] . "\n";
  148. return $open . $block . $matches[4];
  149. }
  150. /**
  151. * Called to preserve legacy LaTeX like $latex some-latex-text $
  152. * @param string $text Text in which to preserve LaTeX
  153. * @return string Text with LaTeX replaced by a hash that will be restored later
  154. */
  155. protected function latex_preserve( $text ) {
  156. // regex from latex_remove()
  157. $regex = '%
  158. \$latex(?:=\s*|\s+)
  159. ((?:
  160. [^$]+ # Not a dollar
  161. |
  162. (?<=(?<!\\\\)\\\\)\$ # Dollar preceded by exactly one slash
  163. )+)
  164. (?<!\\\\)\$ # Dollar preceded by zero slashes
  165. %ix';
  166. $text = preg_replace_callback( $regex, array( $this, '_doRemoveText'), $text );
  167. return $text;
  168. }
  169. /**
  170. * Called to preserve WP shortcodes from being formatted by Markdown in any way.
  171. * @param string $text Text in which to preserve shortcodes
  172. * @return string Text with shortcodes replaced by a hash that will be restored later
  173. */
  174. protected function shortcode_preserve( $text ) {
  175. $text = preg_replace_callback( $this->get_shortcode_regex(), array( $this, '_doRemoveText' ), $text );
  176. return $text;
  177. }
  178. /**
  179. * Restores any text preserved by $this->hash_block()
  180. * @param string $text Text that may have hashed preservation placeholders
  181. * @return string Text with hashed preseravtion placeholders replaced by original text
  182. */
  183. protected function do_restore( $text ) {
  184. foreach( $this->preserve_text_hash as $hash => $value ) {
  185. $placeholder = $this->hash_maker( $hash );
  186. $text = str_replace( $placeholder, $value, $text );
  187. }
  188. // reset the hash
  189. $this->preserve_text_hash = array();
  190. return $text;
  191. }
  192. /**
  193. * Regex callback for text preservation
  194. * @param array $m Regex $matches array
  195. * @return string A placeholder that will later be replaced by the original text
  196. */
  197. protected function _doRemoveText( $m ) {
  198. return $this->hash_block( $m[0] );
  199. }
  200. /**
  201. * Call this to store a text block for later restoration.
  202. * @param string $text Text to preserve for later
  203. * @return string Placeholder that will be swapped out later for the original text
  204. */
  205. protected function hash_block( $text ) {
  206. $hash = md5( $text );
  207. $this->preserve_text_hash[ $hash ] = $text;
  208. $placeholder = $this->hash_maker( $hash );
  209. return $placeholder;
  210. }
  211. /**
  212. * Less glamorous than the Keymaker
  213. * @param string $hash An md5 hash
  214. * @return string A placeholder hash
  215. */
  216. protected function hash_maker( $hash ) {
  217. return 'MARKDOWN_HASH' . $hash . 'MARKDOWN_HASH';
  218. }
  219. /**
  220. * Remove bare <p> elements. <p>s with attributes will be preserved.
  221. * @param string $text HTML content
  222. * @return string <p>-less content
  223. */
  224. public function unp( $text ) {
  225. return preg_replace( "#<p>(.*?)</p>(\n|$)#ums", '$1$2', $text );
  226. }
  227. /**
  228. * A regex of all shortcodes currently registered by the current
  229. * WordPress installation
  230. * @uses get_shortcode_regex()
  231. * @return string A regex for grabbing shortcodes.
  232. */
  233. protected function get_shortcode_regex() {
  234. $pattern = get_shortcode_regex();
  235. // don't match markdown link anchors that could be mistaken for shortcodes.
  236. $pattern .= '(?!\()';
  237. return "/$pattern/s";
  238. }
  239. /**
  240. * Since we escape unspaced #Headings, put things back later.
  241. * @param string $text text with a leading escaped hash
  242. * @return string text with leading hashes unescaped
  243. */
  244. protected function restore_leading_hash( $text ) {
  245. return preg_replace( "/^(<p>)?(&#35;|\\\\#)/um", "$1#", $text );
  246. }
  247. /**
  248. * Overload to support ```-fenced code blocks for pre-Markdown Extra 1.2.8
  249. * https://help.github.com/articles/github-flavored-markdown#fenced-code-blocks
  250. */
  251. public function doFencedCodeBlocks( $text ) {
  252. // If we're at least at 1.2.8, native fenced code blocks are in.
  253. // Below is just copied from it in case we somehow got loaded on
  254. // top of someone else's Markdown Extra
  255. if ( version_compare( MARKDOWNEXTRA_VERSION, '1.2.8', '>=' ) )
  256. return parent::doFencedCodeBlocks( $text );
  257. #
  258. # Adding the fenced code block syntax to regular Markdown:
  259. #
  260. # ~~~
  261. # Code block
  262. # ~~~
  263. #
  264. $less_than_tab = $this->tab_width;
  265. $text = preg_replace_callback('{
  266. (?:\n|\A)
  267. # 1: Opening marker
  268. (
  269. (?:~{3,}|`{3,}) # 3 or more tildes/backticks.
  270. )
  271. [ ]*
  272. (?:
  273. \.?([-_:a-zA-Z0-9]+) # 2: standalone class name
  274. |
  275. '.$this->id_class_attr_catch_re.' # 3: Extra attributes
  276. )?
  277. [ ]* \n # Whitespace and newline following marker.
  278. # 4: Content
  279. (
  280. (?>
  281. (?!\1 [ ]* \n) # Not a closing marker.
  282. .*\n+
  283. )+
  284. )
  285. # Closing marker.
  286. \1 [ ]* (?= \n )
  287. }xm',
  288. array($this, '_doFencedCodeBlocks_callback'), $text);
  289. return $text;
  290. }
  291. /**
  292. * Callback for pre-processing start of line hashes to slyly escape headings that don't
  293. * have a leading space
  294. * @param array $m preg_match matches
  295. * @return string possibly escaped start of line hash
  296. */
  297. public function _doEscapeForHashWithoutSpacing( $m ) {
  298. if ( ! isset( $m[1] ) )
  299. $m[0] = '\\' . $m[0];
  300. return $m[0];
  301. }
  302. /**
  303. * Overload to support Viper's [code] shortcode. Because awesome.
  304. */
  305. public function _doFencedCodeBlocks_callback( $matches ) {
  306. // in case we have some escaped leading hashes right at the start of the block
  307. $matches[4] = $this->restore_leading_hash( $matches[4] );
  308. // just MarkdownExtra_Parser if we're not going ultra-deluxe
  309. if ( ! $this->use_code_shortcode ) {
  310. return parent::_doFencedCodeBlocks_callback( $matches );
  311. }
  312. // default to a "text" class if one wasn't passed. Helps with encoding issues later.
  313. if ( empty( $matches[2] ) ) {
  314. $matches[2] = 'text';
  315. }
  316. $classname =& $matches[2];
  317. $codeblock = preg_replace_callback('/^\n+/', array( $this, '_doFencedCodeBlocks_newlines' ), $matches[4] );
  318. if ( $classname{0} == '.' )
  319. $classname = substr( $classname, 1 );
  320. $codeblock = esc_html( $codeblock );
  321. $codeblock = sprintf( $this->shortcode_start, $classname ) . "\n{$codeblock}" . $this->shortcode_end;
  322. return "\n\n" . $this->hashBlock( $codeblock ). "\n\n";
  323. }
  324. }