PageRenderTime 48ms CodeModel.GetById 25ms RepoModel.GetById 1ms app.codeStats 0ms

/code/DocumentationParser.php

https://github.com/pixeltricks/silverstripe-docsviewer
PHP | 431 lines | 259 code | 59 blank | 113 comment | 42 complexity | 334ac85ad2be287bfefc2e731f71c374 MD5 | raw file
Possible License(s): LGPL-3.0
  1. <?php
  2. /**
  3. * Parser wrapping the Markdown Extra parser.
  4. *
  5. * @see http://michelf.com/projects/php-markdown/extra/
  6. *
  7. * @package docsviewer
  8. */
  9. class DocumentationParser {
  10. const CODE_BLOCK_BACKTICK = 1;
  11. const CODE_BLOCK_COLON = 2;
  12. /**
  13. * @var string Rewriting of api links in the format "[api:MyClass]" or "[api:MyClass::$my_property]".
  14. */
  15. public static $api_link_base = 'http://api.silverstripe.org/search/lookup/?q=%s&version=%s&module=%s';
  16. /**
  17. * @var array
  18. */
  19. public static $heading_counts = array();
  20. /**
  21. * Parse a given path to the documentation for a file. Performs a case
  22. * insensitive lookup on the file system. Automatically appends the file
  23. * extension to one of the markdown extensions as well so /install/ in a
  24. * web browser will match /install.md or /INSTALL.md.
  25. *
  26. * Filepath: /var/www/myproject/src/cms/en/folder/subfolder/page.md
  27. * URL: http://myhost/mywebroot/dev/docs/2.4/cms/en/folder/subfolder/page
  28. * Webroot: http://myhost/mywebroot/
  29. * Baselink: dev/docs/2.4/cms/en/
  30. * Pathparts: folder/subfolder/page
  31. *
  32. * @param DocumentationPage $page
  33. * @param String $baselink Link relative to webroot, up until the "root"
  34. * of the module. Necessary to rewrite relative
  35. * links
  36. *
  37. * @return String
  38. */
  39. public static function parse(DocumentationPage $page, $baselink = null) {
  40. if(!$page || (!$page instanceof DocumentationPage)) return false;
  41. $md = $page->getMarkdown();
  42. // Pre-processing
  43. $md = self::rewrite_image_links($md, $page);
  44. $md = self::rewrite_relative_links($md, $page, $baselink);
  45. $md = self::rewrite_api_links($md, $page);
  46. $md = self::rewrite_heading_anchors($md, $page);
  47. $md = self::rewrite_code_blocks($md);
  48. require_once(DOCSVIEWER_PATH .'/thirdparty/markdown/markdown.php');
  49. $parser = new MarkdownExtra_Parser();
  50. $parser->no_markup = true;
  51. return $parser->transform($md);
  52. }
  53. public static function rewrite_code_blocks($md) {
  54. $started = false;
  55. $inner = false;
  56. $mode = false;
  57. $end = false;
  58. $lines = explode("\n", $md);
  59. $output = array();
  60. foreach($lines as $i => $line) {
  61. if(!$started && preg_match('/^\t*:::\s*(.*)/', $line, $matches)) {
  62. // first line with custom formatting
  63. $started = true;
  64. $mode = self::CODE_BLOCK_COLON;
  65. $output[$i] = sprintf('<pre class="brush: %s">', (isset($matches[1])) ? $matches[1] : "");
  66. }
  67. elseif(!$started && preg_match('/^\t*```\s*(.*)/', $line, $matches)) {
  68. $started = true;
  69. $mode = self::CODE_BLOCK_BACKTICK;
  70. $output[$i] = sprintf('<pre class="brush: %s">', (isset($matches[1])) ? $matches[1] : "");
  71. }
  72. elseif($started && $mode == self::CODE_BLOCK_BACKTICK) {
  73. // inside a backtick fenced box
  74. if(preg_match('/^\t*```\s*/', $line, $matches)) {
  75. // end of the backtick fenced box. Unset the line that contains the backticks
  76. $end = true;
  77. }
  78. else {
  79. // still inside the line.
  80. $output[$i] = ($started) ? '' : '<pre>' . "\n";
  81. $output[$i] .= htmlentities($line, ENT_COMPAT, 'UTF-8');
  82. $inner = true;
  83. }
  84. }
  85. elseif(preg_match('/^\t(.*)/', $line, $matches)) {
  86. // inner line of block, or first line of standard markdown code block
  87. // regex removes first tab (any following tabs are part of the code).
  88. $output[$i] = ($started) ? '' : '<pre>' . "\n";
  89. $output[$i] .= htmlentities($matches[1], ENT_COMPAT, 'UTF-8');
  90. $inner = true;
  91. $started = true;
  92. }
  93. elseif($started && $inner && $mode == self::CODE_BLOCK_COLON && trim($line) === "") {
  94. // still inside a colon based block, if the line is only whitespace
  95. // then continue with with it. We can continue with it for now as
  96. // it'll be tidied up later in the $end section.
  97. $inner = true;
  98. $output[$i] = $line;
  99. }
  100. elseif($started && $inner) {
  101. // line contains something other than whitespace, or tabbed. E.g
  102. // > code
  103. // > \n
  104. // > some message
  105. //
  106. // So actually want to reset $i to the line before this new line
  107. // and include this line. The edge case where this will fail is
  108. // new the following segment contains a code block as well as it
  109. // will not open.
  110. $end = true;
  111. $output[$i] = $line;
  112. $i = $i -1;
  113. }
  114. else {
  115. $output[$i] = $line;
  116. }
  117. if($end) {
  118. $output = self::finalize_code_output($i, $output);
  119. // reset state
  120. $started = $inner = $mode = $end = false;
  121. }
  122. }
  123. if($started) {
  124. $output = self::finalize_code_output($i, $output);
  125. }
  126. return join("\n", $output);
  127. }
  128. /**
  129. * @param int
  130. * @param array
  131. *
  132. * @return array
  133. */
  134. private static function finalize_code_output($i, $output) {
  135. $j = $i;
  136. while(isset($output[$j]) && trim($output[$j]) === "") {
  137. unset($output[$j]);
  138. $j--;
  139. }
  140. if(isset($output[$j])) {
  141. $output[$j] .= "</pre>\n";
  142. }
  143. else {
  144. $output[$j] = "</pre>\n\n";
  145. }
  146. return $output;
  147. }
  148. static function rewrite_image_links($md, $page) {
  149. // Links with titles
  150. $re = '/
  151. !
  152. \[
  153. (.*?) # image title (non greedy)
  154. \]
  155. \(
  156. (.*?) # image url (non greedy)
  157. \)
  158. /x';
  159. preg_match_all($re, $md, $images);
  160. if($images) foreach($images[0] as $i => $match) {
  161. $title = $images[1][$i];
  162. $url = $images[2][$i];
  163. // Don't process absolute links (based on protocol detection)
  164. $urlParts = parse_url($url);
  165. if($urlParts && isset($urlParts['scheme'])) continue;
  166. // Rewrite URL (relative or absolute)
  167. $baselink = Director::makeRelative(dirname($page->getPath()));
  168. $relativeUrl = rtrim($baselink, '/') . '/' . ltrim($url, '/');
  169. // Resolve relative paths
  170. while(strpos($relativeUrl, '/..') !== FALSE) {
  171. $relativeUrl = preg_replace('/\w+\/\.\.\//', '', $relativeUrl);
  172. }
  173. // Replace any double slashes (apart from protocol)
  174. $relativeUrl = preg_replace('/([^:])\/{2,}/', '$1/', $relativeUrl);
  175. // Make it absolute again
  176. $absoluteUrl = Director::absoluteBaseURL() . $relativeUrl;
  177. // Replace in original content
  178. $md = str_replace(
  179. $match,
  180. sprintf('![%s](%s)', $title, $absoluteUrl),
  181. $md
  182. );
  183. }
  184. return $md;
  185. }
  186. /**
  187. * Rewrite links with special "api:" prefix, from two possible formats:
  188. * 1. [api:DataObject]
  189. * 2. (My Title)(api:DataObject)
  190. *
  191. * Hack: Replaces any backticks with "<code>" blocks,
  192. * as the currently used markdown parser doesn't resolve links in backticks,
  193. * but does resolve in "<code>" blocks.
  194. *
  195. * @param String $md
  196. * @param DocumentationPage $page
  197. * @return String
  198. */
  199. static function rewrite_api_links($md, $page) {
  200. // Links with titles
  201. $re = '/
  202. `?
  203. \[
  204. (.*?) # link title (non greedy)
  205. \]
  206. \(
  207. api:(.*?) # link url (non greedy)
  208. \)
  209. `?
  210. /x';
  211. preg_match_all($re, $md, $linksWithTitles);
  212. if($linksWithTitles) {
  213. foreach($linksWithTitles[0] as $i => $match) {
  214. $title = $linksWithTitles[1][$i];
  215. $subject = $linksWithTitles[2][$i];
  216. $url = sprintf(self::$api_link_base, $subject, $page->getVersion(), $page->getEntity()->getFolder());
  217. $md = str_replace(
  218. $match,
  219. sprintf('[%s](%s)', $title, $url),
  220. $md
  221. );
  222. }
  223. }
  224. // Bare links
  225. $re = '/
  226. `?
  227. \[
  228. api:(.*?)
  229. \]
  230. `?
  231. /x';
  232. preg_match_all($re, $md, $links);
  233. if($links) {
  234. foreach($links[0] as $i => $match) {
  235. $subject = $links[1][$i];
  236. $url = sprintf(self::$api_link_base, $subject, $page->getVersion(), $page->getEntity()->getFolder());
  237. $md = str_replace(
  238. $match,
  239. sprintf('[%s](%s)', $subject, $url),
  240. $md
  241. );
  242. }
  243. }
  244. return $md;
  245. }
  246. /**
  247. *
  248. */
  249. public static function rewrite_heading_anchors($md, $page) {
  250. $re = '/^\#+(.*)/m';
  251. $md = preg_replace_callback($re, array('DocumentationParser', '_rewrite_heading_anchors_callback'), $md);
  252. return $md;
  253. }
  254. public static function _rewrite_heading_anchors_callback($matches) {
  255. $heading = $matches[0];
  256. $headingText = $matches[1];
  257. if(preg_match('/\{\#.*\}/', $headingText)) return $heading;
  258. if(!isset(self::$heading_counts[$headingText])) {
  259. self::$heading_counts[$headingText] = 1;
  260. }
  261. else {
  262. self::$heading_counts[$headingText]++;
  263. $headingText .= "-" . self::$heading_counts[$headingText];
  264. }
  265. return sprintf("%s {#%s}", preg_replace('/\n/', '', $heading), self::generate_html_id($headingText));
  266. }
  267. /**
  268. * Generate an html element id from a string
  269. *
  270. * @return String
  271. */
  272. static function generate_html_id($title) {
  273. $t = $title;
  274. $t = str_replace('&amp;','-and-',$t);
  275. $t = str_replace('&','-and-',$t);
  276. $t = preg_replace('/[^A-Za-z0-9]+/','-',$t);
  277. $t = preg_replace('/-+/','-',$t);
  278. $t = trim($t, '-');
  279. $t = strtolower($t);
  280. return $t;
  281. }
  282. /**
  283. * Resolves all relative links within markdown.
  284. *
  285. * @param String $md Markdown content
  286. * @param DocumentationPage $page
  287. * @param String $baselink
  288. * @return String Markdown
  289. */
  290. static function rewrite_relative_links($md, $page, $baselink = null) {
  291. if(!$baselink) $baselink = $page->getEntity()->getRelativeLink();
  292. $re = '/
  293. ([^\!]?) # exclude image format
  294. \[
  295. (.*?) # link title (non greedy)
  296. \]
  297. \(
  298. (.*?) # link url (non greedy)
  299. \)
  300. /x';
  301. preg_match_all($re, $md, $matches);
  302. // relative path (relative to module base folder), without the filename.
  303. // For "sapphire/en/current/topics/templates", this would be "templates"
  304. $relativePath = dirname($page->getRelativePath());
  305. if($relativePath == '.') $relativePath = '';
  306. // file base link
  307. $fileBaseLink = Director::makeRelative(dirname($page->getPath()));
  308. if($matches) {
  309. foreach($matches[0] as $i => $match) {
  310. $title = $matches[2][$i];
  311. $url = $matches[3][$i];
  312. // Don't process API links
  313. if(preg_match('/^api:/', $url)) continue;
  314. // Don't process absolute links (based on protocol detection)
  315. $urlParts = parse_url($url);
  316. if($urlParts && isset($urlParts['scheme'])) continue;
  317. // for images we need to use the file base path
  318. if(preg_match('/_images/', $url)) {
  319. $relativeUrl = Controller::join_links(
  320. Director::absoluteBaseURL(),
  321. $fileBaseLink,
  322. $url
  323. );
  324. }
  325. else {
  326. // Rewrite public URL
  327. if(preg_match('/^\//', $url)) {
  328. // Absolute: Only path to module base
  329. $relativeUrl = Controller::join_links($baselink, $url);
  330. } else {
  331. // Relative: Include path to module base and any folders
  332. $relativeUrl = Controller::join_links($baselink, $relativePath, $url);
  333. }
  334. }
  335. // Resolve relative paths
  336. while(strpos($relativeUrl, '..') !== FALSE) {
  337. $relativeUrl = preg_replace('/\w+\/\.\.\//', '', $relativeUrl);
  338. }
  339. // Replace any double slashes (apart from protocol)
  340. $relativeUrl = preg_replace('/([^:])\/{2,}/', '$1/', $relativeUrl);
  341. // Replace in original content
  342. $md = str_replace(
  343. $match,
  344. sprintf('%s[%s](%s)', $matches[1][$i], $title, $relativeUrl),
  345. $md
  346. );
  347. }
  348. }
  349. return $md;
  350. }
  351. /**
  352. * Strips out the metadata for a page
  353. *
  354. * @param DocumentationPage
  355. */
  356. public static function retrieve_meta_data(DocumentationPage &$page) {
  357. if($md = $page->getMarkdown()) {
  358. $matches = preg_match_all('/
  359. (?<key>[A-Za-z0-9_-]+):
  360. \s*
  361. (?<value>.*)
  362. /x', $md, $meta);
  363. if($matches) {
  364. foreach($meta['key'] as $index => $key) {
  365. if(isset($meta['value'][$index])) {
  366. $page->setMetaData($key, $meta['value'][$index]);
  367. }
  368. }
  369. }
  370. }
  371. }
  372. }