PageRenderTime 69ms CodeModel.GetById 27ms RepoModel.GetById 1ms app.codeStats 0ms

/website/lib/markdown.php

https://github.com/simon2k6/scratch-o-sphere
PHP | 565 lines | 413 code | 23 blank | 129 comment | 30 complexity | 3fde47513f38a59aac8bf163249497c1 MD5 | raw file
Possible License(s): GPL-3.0
  1. <?php
  2. /*
  3. Copyright (c) 2009-2013 F3::Factory/Bong Cosca, All rights reserved.
  4. This file is part of the Fat-Free Framework (http://fatfree.sf.net).
  5. THE SOFTWARE AND DOCUMENTATION ARE PROVIDED "AS IS" WITHOUT WARRANTY OF
  6. ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
  7. IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
  8. PURPOSE.
  9. Please see the license.txt file for more information.
  10. */
  11. //! Markdown-to-HTML converter
  12. class Markdown extends Prefab {
  13. protected
  14. //! Parsing rules
  15. $blocks,
  16. //! Special characters
  17. $special;
  18. /**
  19. * Process blockquote
  20. * @return string
  21. * @param $str string
  22. **/
  23. protected function _blockquote($str) {
  24. $str=preg_replace('/(?<=^|\n)\h?>\h?(.*?(?:\n+|$))/','\1',$str);
  25. return strlen($str)?
  26. ('<blockquote>'.$this->build($str).'</blockquote>'."\n\n"):'';
  27. }
  28. /**
  29. * Process whitespace-prefixed code block
  30. * @return string
  31. * @param $str string
  32. **/
  33. protected function _pre($str) {
  34. $str=preg_replace('/(?<=^|\n)(?: {4}|\t)(.+?(?:\n+|$))/','\1',
  35. $this->esc($str));
  36. return strlen($str)?
  37. ('<pre><code>'.
  38. $this->esc($this->snip($str)).
  39. '</code></pre>'."\n\n"):
  40. '';
  41. }
  42. /**
  43. * Process fenced code block
  44. * @return string
  45. * @param $hint string
  46. * @param $str string
  47. **/
  48. protected function _fence($hint,$str) {
  49. $str=$this->snip($str);
  50. $fw=Base::instance();
  51. if ($fw->get('HIGHLIGHT')) {
  52. switch (strtolower($hint)) {
  53. case 'php':
  54. $str=$fw->highlight($str);
  55. break;
  56. case 'apache':
  57. preg_match_all('/(?<=^|\n)(\h*)'.
  58. '(?:(<\/?)(\w+)((?:\h+[^>]+)*)(>)|'.
  59. '(?:(\w+)(\h.+?)))(\h*(?:\n+|$))/',
  60. $str,$matches,PREG_SET_ORDER);
  61. $out='';
  62. foreach ($matches as $match)
  63. $out.=$match[1].
  64. ($match[3]?
  65. ('<span class="section">'.
  66. $this->esc($match[2]).$match[3].
  67. '</span>'.
  68. ($match[4]?
  69. ('<span class="data">'.
  70. $this->esc($match[4]).
  71. '</span>'):
  72. '').
  73. '<span class="section">'.
  74. $this->esc($match[5]).
  75. '</span>'):
  76. ('<span class="directive">'.
  77. $match[6].
  78. '</span>'.
  79. '<span class="data">'.
  80. $this->esc($match[7]).
  81. '</span>')).
  82. $match[8];
  83. $str='<code>'.$out.'</code>';
  84. break;
  85. case 'html':
  86. preg_match_all(
  87. '/(?:(?:<(\/?)(\w+)'.
  88. '((?:\h+(?:\w+\h*=\h*)?".+?"|[^>]+)*|'.
  89. '\h+.+?)(\h*\/?)>)|(.+?))/s',
  90. $str,$matches,PREG_SET_ORDER
  91. );
  92. $out='';
  93. foreach ($matches as $match) {
  94. if ($match[2]) {
  95. $out.='<span class="xml_tag">&lt;'.
  96. $match[1].$match[2].'</span>';
  97. if ($match[3]) {
  98. preg_match_all(
  99. '/(?:\h+(?:(?:(\w+)\h*=\h*)?'.
  100. '(".+?")|(.+)))/',
  101. $match[3],$parts,PREG_SET_ORDER
  102. );
  103. foreach ($parts as $part)
  104. $out.=' '.
  105. (empty($part[3])?
  106. ((empty($part[1])?
  107. '':
  108. ('<span class="xml_attr">'.
  109. $part[1].'</span>=')).
  110. '<span class="xml_data">'.
  111. $part[2].'</span>'):
  112. ('<span class="xml_tag">'.
  113. $part[3].'</span>'));
  114. }
  115. $out.='<span class="xml_tag">'.
  116. $match[4].'&gt;</span>';
  117. }
  118. else
  119. $out.=$this->esc($match[5]);
  120. }
  121. $str='<code>'.$out.'</code>';
  122. break;
  123. case 'ini':
  124. preg_match_all(
  125. '/(?<=^|\n)(?:'.
  126. '(;[^\n]*)|(?:<\?php.+?\?>?)|'.
  127. '(?:\[(.+?)\])|'.
  128. '(.+?)\h*=\h*'.
  129. '((?:\\\\\h*\r?\n|.+?)*)'.
  130. ')((?:\r?\n)+|$)/',
  131. $str,$matches,PREG_SET_ORDER
  132. );
  133. $out='';
  134. foreach ($matches as $match) {
  135. if ($match[1])
  136. $out.='<span class="comment">'.$match[1].
  137. '</span>';
  138. elseif ($match[2])
  139. $out.='<span class="ini_section">['.$match[2].']'.
  140. '</span>';
  141. elseif ($match[3])
  142. $out.='<span class="ini_key">'.$match[3].
  143. '</span>='.
  144. ($match[4]?
  145. ('<span class="ini_value">'.
  146. $match[4].'</span>'):'');
  147. else
  148. $out.=$match[0];
  149. if (isset($match[5]))
  150. $out.=$match[5];
  151. }
  152. $str='<code>'.$out.'</code>';
  153. break;
  154. default:
  155. $str='<code>'.$this->esc($str).'</code>';
  156. break;
  157. }
  158. }
  159. else
  160. $str='<code>'.$this->esc($str).'</code>';
  161. return '<pre>'.$str.'</pre>'."\n\n";
  162. }
  163. /**
  164. * Process horizontal rule
  165. * @return string
  166. **/
  167. protected function _hr() {
  168. return '<hr />'."\n\n";
  169. }
  170. /**
  171. * Process atx-style heading
  172. * @return string
  173. * @param $type string
  174. * @param $str string
  175. **/
  176. protected function _atx($type,$str) {
  177. $level=strlen($type);
  178. return '<h'.$level.' id="'.Web::instance()->slug($str).'">'.
  179. $this->scan($str).'</h'.$level.'>'."\n\n";
  180. }
  181. /**
  182. * Process setext-style heading
  183. * @return string
  184. * @param $str string
  185. * @param $type string
  186. **/
  187. protected function _setext($str,$type) {
  188. $level=strpos('=-',$type)+1;
  189. return '<h'.$level.' id="'.Web::instance()->slug($str).'">'.
  190. $this->scan($str).'</h'.$level.'>'."\n\n";
  191. }
  192. /**
  193. * Process ordered/unordered list
  194. * @return string
  195. * @param $str string
  196. **/
  197. protected function _li($str) {
  198. // Initialize list parser
  199. $len=strlen($str);
  200. $ptr=0;
  201. $dst='';
  202. $first=TRUE;
  203. $tight=TRUE;
  204. $type='ul';
  205. // Main loop
  206. while ($ptr<$len) {
  207. if (preg_match('/^\h*[*-](?:\h?[*-]){2,}(?:\n+|$)/',
  208. substr($str,$ptr),$match)) {
  209. $ptr+=strlen($match[0]);
  210. // Embedded horizontal rule
  211. return (strlen($dst)?
  212. ('<'.$type.'>'."\n".$dst.'</'.$type.'>'."\n\n"):'').
  213. '<hr />'."\n\n".$this->build(substr($str,$ptr));
  214. }
  215. elseif (preg_match('/(?<=^|\n)([*+-]|\d+\.)\h'.
  216. '(.+?(?:\n+|$))((?:(?: {4}|\t)+.+?(?:\n+|$))*)/s',
  217. substr($str,$ptr),$match)) {
  218. $match[3]=preg_replace('/(?<=^|\n)(?: {4}|\t)/','',$match[3]);
  219. $found=FALSE;
  220. foreach (array_slice($this->blocks,0,-1) as $regex)
  221. if (preg_match($regex,$match[3])) {
  222. $found=TRUE;
  223. break;
  224. }
  225. // List
  226. if ($first) {
  227. // First pass
  228. if (is_numeric($match[1]))
  229. $type='ol';
  230. if (preg_match('/\n{2,}$/',$match[2].
  231. ($found?'':$match[3])))
  232. // Loose structure; Use paragraphs
  233. $tight=FALSE;
  234. $first=FALSE;
  235. }
  236. // Strip leading whitespaces
  237. $ptr+=strlen($match[0]);
  238. $tmp=$this->snip($match[2].$match[3]);
  239. if ($tight) {
  240. if ($found)
  241. $tmp=$match[2].$this->build($this->snip($match[3]));
  242. }
  243. else
  244. $tmp=$this->build($tmp);
  245. $dst.='<li>'.$this->scan(trim($tmp)).'</li>'."\n";
  246. }
  247. }
  248. return strlen($dst)?
  249. ('<'.$type.'>'."\n".$dst.'</'.$type.'>'."\n\n"):'';
  250. }
  251. /**
  252. * Ignore raw HTML
  253. * @return string
  254. * @param $str string
  255. **/
  256. protected function _raw($str) {
  257. return $str;
  258. }
  259. /**
  260. * Process paragraph
  261. * @return string
  262. * @param $str string
  263. **/
  264. protected function _p($str) {
  265. $str=trim($str);
  266. if (strlen($str)) {
  267. if (preg_match('/(.+?\n)([>#].+)/',$str,$parts))
  268. return $this->_p($parts[1]).$this->build($parts[2]);
  269. $self=$this;
  270. $str=preg_replace_callback(
  271. '/([^<>\[]+)?(<.+?>|\[.+?\]\s*\(.+?\))([^<>\]]+)?|(.+)/s',
  272. function($expr) use($self) {
  273. $tmp='';
  274. if (isset($expr[4]))
  275. $tmp.=$self->esc($expr[4]);
  276. else {
  277. if (isset($expr[1]))
  278. $tmp.=$self->esc($expr[1]);
  279. $tmp.=$expr[2];
  280. if (isset($expr[3]))
  281. $tmp.=$self->esc($expr[3]);
  282. }
  283. return $tmp;
  284. },
  285. $str
  286. );
  287. return '<p>'.$this->scan($str).'</p>'."\n\n";
  288. }
  289. return '';
  290. }
  291. /**
  292. * Process strong/em spans
  293. * @return string
  294. * @param $str string
  295. **/
  296. protected function _text($str) {
  297. $tmp='';
  298. while ($str!=$tmp)
  299. $str=preg_replace_callback(
  300. '/(?<!\\\\)([*_]{1,3})(.*?)(?!\\\\)\1(?=[\s[:punct:]]|$)/',
  301. function($expr) {
  302. switch (strlen($expr[1])) {
  303. case 1:
  304. return '<em>'.$expr[2].'</em>';
  305. case 2:
  306. return '<strong>'.$expr[2].'</strong>';
  307. case 3:
  308. return '<strong><em>'.$expr[2].'</em></strong>';
  309. }
  310. },
  311. $tmp=$str
  312. );
  313. return $str;
  314. }
  315. /**
  316. * Process image span
  317. * @return string
  318. * @param $str string
  319. **/
  320. protected function _img($str) {
  321. $self=$this;
  322. return preg_replace_callback(
  323. '/!(?:\[(.+?)\])?\h*\(<?(.*?)>?(?:\h*"(.*?)"\h*)?\)/',
  324. function($expr) use($self) {
  325. return '<img src="'.$expr[2].'"'.
  326. (empty($expr[1])?
  327. '':
  328. (' alt="'.$self->esc($expr[1]).'"')).
  329. (empty($expr[3])?
  330. '':
  331. (' title="'.$self->esc($expr[3]).'"')).' />';
  332. },
  333. $str
  334. );
  335. }
  336. /**
  337. * Process anchor span
  338. * @return string
  339. * @param $str string
  340. **/
  341. protected function _a($str) {
  342. $self=$this;
  343. return preg_replace_callback(
  344. '/(?<!\\\\)\[(.+?)(?!\\\\)\]\h*\(<?(.*?)>?(?:\h*"(.*?)"\h*)?\)/',
  345. function($expr) use($self) {
  346. return '<a href="'.$self->esc($expr[2]).'"'.
  347. (empty($expr[3])?
  348. '':
  349. (' title="'.$self->esc($expr[3]).'"')).
  350. '>'.$self->scan($expr[1]).'</a>';
  351. },
  352. $str
  353. );
  354. }
  355. /**
  356. * Auto-convert links
  357. * @return string
  358. * @param $str string
  359. **/
  360. protected function _auto($str) {
  361. $self=$this;
  362. return preg_replace_callback(
  363. '/`.*?<(.+?)>.*?`|<(.+?)>/',
  364. function($expr) use($self) {
  365. if (empty($expr[1]) && parse_url($expr[2],PHP_URL_SCHEME)) {
  366. $expr[2]=$self->esc($expr[2]);
  367. return '<a href="'.$expr[2].'">'.$expr[2].'</a>';
  368. }
  369. return $expr[0];
  370. },
  371. $str
  372. );
  373. }
  374. /**
  375. * Process code span
  376. * @return string
  377. * @param $str string
  378. **/
  379. protected function _code($str) {
  380. $self=$this;
  381. return preg_replace_callback(
  382. '/`` (.+?) ``|(?<!\\\\)`(.+?)(?!\\\\)`/',
  383. function($expr) use($self) {
  384. return '<code>'.
  385. $self->esc(empty($expr[1])?$expr[2]:$expr[1]).'</code>';
  386. },
  387. $str
  388. );
  389. }
  390. /**
  391. * Convert characters to HTML entities
  392. * @return string
  393. * @param $str string
  394. **/
  395. function esc($str) {
  396. if (!$this->special)
  397. $this->special=array(
  398. '...'=>'&hellip;',
  399. '(tm)'=>'&trade;',
  400. '(r)'=>'&reg;',
  401. '(c)'=>'&copy;'
  402. );
  403. foreach ($this->special as $key=>$val)
  404. $str=preg_replace('/'.preg_quote($key,'/').'/i',$val,$str);
  405. return htmlspecialchars($str,ENT_COMPAT,
  406. Base::instance()->get('ENCODING'),FALSE);
  407. }
  408. /**
  409. * Reduce multiple line feeds
  410. * @return string
  411. * @param $str string
  412. **/
  413. protected function snip($str) {
  414. return preg_replace('/(?:(?<=\n)\n+)|\n+$/',"\n",$str);
  415. }
  416. /**
  417. * Scan line for convertible spans
  418. * @return string
  419. * @param $str string
  420. **/
  421. function scan($str) {
  422. $inline=array('img','a','text','auto','code');
  423. foreach ($inline as $func)
  424. $str=$this->{'_'.$func}($str);
  425. return $str;
  426. }
  427. /**
  428. * Assemble blocks
  429. * @return string
  430. * @param $str string
  431. **/
  432. protected function build($str) {
  433. if (!$this->blocks) {
  434. // Regexes for capturing entire blocks
  435. $this->blocks=array(
  436. 'blockquote'=>'/^(?:\h?>\h?.*?(?:\n+|$))+/',
  437. 'pre'=>'/^(?:(?: {4}|\t).+?(?:\n+|$))+/',
  438. 'fence'=>'/^`{3}\h*(\w+)?.*?[^\n]*\n+(.+?)`{3}[^\n]*'.
  439. '(?:\n+|$)/s',
  440. 'hr'=>'/^\h*[*_-](?:\h?[\*_-]){2,}\h*(?:\n+|$)/',
  441. 'atx'=>'/^\h*(#{1,6})\h?(.+?)\h*(?:#.*)?(?:\n+|$)/',
  442. 'setext'=>'/^\h*(.+?)\h*\n([=-])+\h*(?:\n+|$)/',
  443. 'li'=>'/^(?:(?:[*+-]|\d+\.)\h.+?(?:\n+|$)'.
  444. '(?:(?: {4}|\t)+.+?(?:\n+|$))*)+/s',
  445. 'raw'=>'/^((?:<!--.+?-->|<\?.+?\?>|<%.+?%>|'.
  446. '<(address|article|aside|audio|blockquote|canvas|dd|'.
  447. 'div|dl|fieldset|figcaption|figure|footer|form|h\d|'.
  448. 'header|hgroup|hr|noscript|object|ol|output|p|pre|'.
  449. 'section|table|tfoot|ul|video).*?'.
  450. '(?:\/>|>(?:(?>[^><]+)|(?R))*<\/\2>))'.
  451. '\h*(?:\n{2,}|\n?$))/s',
  452. 'p'=>'/^(.+?(?:\n{2,}|\n?$))/s'
  453. );
  454. }
  455. $self=$this;
  456. // Treat lines with nothing but whitespaces as empty lines
  457. $str=preg_replace('/\n\h+(?=\n)/',"\n",$str);
  458. // Initialize block parser
  459. $len=strlen($str);
  460. $ptr=0;
  461. $dst='';
  462. // Main loop
  463. while ($ptr<$len) {
  464. if (preg_match('/^ {0,3}\[([^\[\]]+)\]:\s*<?(.*?)>?\s*'.
  465. '(?:"([^\n]*)")?(?:\n+|$)/s',substr($str,$ptr),$match)) {
  466. // Reference-style link; Backtrack
  467. $ptr+=strlen($match[0]);
  468. $tmp='';
  469. // Catch line breaks in title attribute
  470. $ref=preg_replace('/\h/','\s',preg_quote($match[1],'/'));
  471. while ($dst!=$tmp) {
  472. $dst=preg_replace_callback(
  473. '/(?<!\\\\)\[('.$ref.')(?!\\\\)\]\s*\[\]|'.
  474. '(!?)(?:\[([^\[\]]+)\]\s*)?'.
  475. '(?<!\\\\)\[('.$ref.')(?!\\\\)\]/',
  476. function($expr) use($match,$self) {
  477. return (empty($expr[2]))?
  478. // Anchor
  479. ('<a href="'.$self->esc($match[2]).'"'.
  480. (empty($match[3])?
  481. '':
  482. (' title="'.
  483. $self->esc($match[3]).'"')).'>'.
  484. // Link
  485. $self->scan(
  486. empty($expr[3])?
  487. (empty($expr[1])?
  488. $expr[4]:
  489. $expr[1]):
  490. $expr[3]
  491. ).'</a>'):
  492. // Image
  493. ('<img src="'.$match[2].'"'.
  494. (empty($expr[2])?
  495. '':
  496. (' alt="'.
  497. $self->esc($expr[3]).'"')).
  498. (empty($match[3])?
  499. '':
  500. (' title="'.
  501. $self->esc($match[3]).'"')).
  502. ' />');
  503. },
  504. $tmp=$dst
  505. );
  506. }
  507. }
  508. else
  509. foreach ($this->blocks as $func=>$regex)
  510. if (preg_match($regex,substr($str,$ptr),$match)) {
  511. $ptr+=strlen($match[0]);
  512. $dst.=call_user_func_array(
  513. array($this,'_'.$func),
  514. count($match)>1?array_slice($match,1):$match
  515. );
  516. break;
  517. }
  518. }
  519. return $dst;
  520. }
  521. /**
  522. * Render HTML equivalent of markdown
  523. * @return string
  524. * @param $txt string
  525. **/
  526. function convert($txt) {
  527. $txt=preg_replace_callback(
  528. '/(<code.*?>.+?<\/code>|'.
  529. '<[^>\n]+>|\([^\n\)]+\)|"[^"\n]+")|'.
  530. '\\\\(.)/s',
  531. function($expr) {
  532. // Process escaped characters
  533. return empty($expr[1])?$expr[2]:$expr[1];
  534. },
  535. $this->build(preg_replace('/\r\n|\r/',"\n",$txt))
  536. );
  537. return $this->snip($txt);
  538. }
  539. }