PageRenderTime 63ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/src/markdown.c

https://github.com/erlingmat/upskirt
C | 2202 lines | 2039 code | 93 blank | 70 comment | 181 complexity | 48652e388e42df46ac160f58d5c18817 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

  1. /* markdown.c - generic markdown parser */
  2. /*
  3. * Copyright (c) 2009, Natacha Porté
  4. * Copyright (c) 2011, Vicent Marti
  5. *
  6. * Permission to use, copy, modify, and distribute this software for any
  7. * purpose with or without fee is hereby granted, provided that the above
  8. * copyright notice and this permission notice appear in all copies.
  9. *
  10. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  11. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  12. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  13. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  14. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  15. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  16. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  17. */
  18. #include "markdown.h"
  19. #include "array.h"
  20. #include <assert.h>
  21. #include <string.h>
  22. #include <strings.h> /* for strncasecmp */
  23. #include <ctype.h>
  24. #include <stdio.h>
  25. #define BUFFER_BLOCK 0
  26. #define BUFFER_SPAN 1
  27. #define MKD_LI_END 8 /* internal list flag */
  28. /***************
  29. * LOCAL TYPES *
  30. ***************/
  31. /* link_ref • reference to a link */
  32. struct link_ref {
  33. struct buf *id;
  34. struct buf *link;
  35. struct buf *title;
  36. };
  37. /* char_trigger • function pointer to render active chars */
  38. /* returns the number of chars taken care of */
  39. /* data is the pointer of the beginning of the span */
  40. /* offset is the number of valid chars before data */
  41. struct render;
  42. typedef size_t
  43. (*char_trigger)(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
  44. static size_t char_emphasis(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
  45. static size_t char_linebreak(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
  46. static size_t char_codespan(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
  47. static size_t char_escape(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
  48. static size_t char_entity(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
  49. static size_t char_langle_tag(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
  50. static size_t char_autolink(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
  51. static size_t char_link(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
  52. enum markdown_char_t {
  53. MD_CHAR_NONE = 0,
  54. MD_CHAR_EMPHASIS,
  55. MD_CHAR_CODESPAN,
  56. MD_CHAR_LINEBREAK,
  57. MD_CHAR_LINK,
  58. MD_CHAR_LANGLE,
  59. MD_CHAR_ESCAPE,
  60. MD_CHAR_ENTITITY,
  61. MD_CHAR_AUTOLINK,
  62. };
  63. static char_trigger markdown_char_ptrs[] = {
  64. NULL,
  65. &char_emphasis,
  66. &char_codespan,
  67. &char_linebreak,
  68. &char_link,
  69. &char_langle_tag,
  70. &char_escape,
  71. &char_entity,
  72. &char_autolink,
  73. };
  74. /* render • structure containing one particular render */
  75. struct render {
  76. struct mkd_renderer make;
  77. struct array refs;
  78. char active_char[256];
  79. struct parray work_bufs[2];
  80. unsigned int ext_flags;
  81. size_t max_nesting;
  82. };
  83. /* html_tag • structure for quick HTML tag search (inspired from discount) */
  84. struct html_tag {
  85. const char *text;
  86. size_t size;
  87. };
  88. static inline struct buf *
  89. rndr_newbuf(struct render *rndr, int type)
  90. {
  91. static const size_t buf_size[2] = {256, 64};
  92. struct buf *work = NULL;
  93. struct parray *queue = &rndr->work_bufs[type];
  94. if (queue->size < queue->asize) {
  95. work = queue->item[queue->size++];
  96. work->size = 0;
  97. } else {
  98. work = bufnew(buf_size[type]);
  99. parr_push(queue, work);
  100. }
  101. return work;
  102. }
  103. static inline void
  104. rndr_popbuf(struct render *rndr, int type)
  105. {
  106. rndr->work_bufs[type].size--;
  107. }
  108. /********************
  109. * GLOBAL VARIABLES *
  110. ********************/
  111. /* block_tags • recognised block tags, sorted by cmp_html_tag */
  112. static struct html_tag block_tags[] = {
  113. /*0*/ { "p", 1 },
  114. { "dl", 2 },
  115. { "h1", 2 },
  116. { "h2", 2 },
  117. { "h3", 2 },
  118. { "h4", 2 },
  119. { "h5", 2 },
  120. { "h6", 2 },
  121. { "ol", 2 },
  122. { "ul", 2 },
  123. /*10*/ { "del", 3 },
  124. { "div", 3 },
  125. /*12*/ { "ins", 3 },
  126. { "pre", 3 },
  127. { "form", 4 },
  128. { "math", 4 },
  129. { "table", 5 },
  130. { "iframe", 6 },
  131. { "script", 6 },
  132. { "fieldset", 8 },
  133. { "noscript", 8 },
  134. { "blockquote", 10 }
  135. };
  136. #define INS_TAG (block_tags + 12)
  137. #define DEL_TAG (block_tags + 10)
  138. /***************************
  139. * HELPER FUNCTIONS *
  140. ***************************/
  141. int
  142. is_safe_link(const char *link, size_t link_len)
  143. {
  144. static const size_t valid_uris_count = 4;
  145. static const char *valid_uris[] = {
  146. "http://", "https://", "ftp://", "mailto://"
  147. };
  148. size_t i;
  149. for (i = 0; i < valid_uris_count; ++i) {
  150. size_t len = strlen(valid_uris[i]);
  151. if (link_len > len && strncasecmp(link, valid_uris[i], len) == 0)
  152. return 1;
  153. }
  154. return 0;
  155. }
  156. static void
  157. unscape_text(struct buf *ob, struct buf *src)
  158. {
  159. size_t i = 0, org;
  160. while (i < src->size) {
  161. org = i;
  162. while (i < src->size && src->data[i] != '\\')
  163. i++;
  164. if (i > org)
  165. bufput(ob, src->data + org, i - org);
  166. if (i + 1 >= src->size)
  167. break;
  168. bufputc(ob, src->data[i + 1]);
  169. i += 2;
  170. }
  171. }
  172. /* cmp_link_ref • comparison function for link_ref sorted arrays */
  173. static int
  174. cmp_link_ref(void *key, void *array_entry)
  175. {
  176. struct link_ref *lr = array_entry;
  177. return bufcasecmp(key, lr->id);
  178. }
  179. /* cmp_link_ref_sort • comparison function for link_ref qsort */
  180. static int
  181. cmp_link_ref_sort(const void *a, const void *b)
  182. {
  183. const struct link_ref *lra = a;
  184. const struct link_ref *lrb = b;
  185. return bufcasecmp(lra->id, lrb->id);
  186. }
  187. /* cmp_html_tag • comparison function for bsearch() (stolen from discount) */
  188. static int
  189. cmp_html_tag(const void *a, const void *b)
  190. {
  191. const struct html_tag *hta = a;
  192. const struct html_tag *htb = b;
  193. if (hta->size != htb->size) return (int)((ssize_t)hta->size - (ssize_t)htb->size);
  194. return strncasecmp(hta->text, htb->text, hta->size);
  195. }
  196. /* find_block_tag • returns the current block tag */
  197. static struct html_tag *
  198. find_block_tag(char *data, size_t size)
  199. {
  200. size_t i = 0;
  201. struct html_tag key;
  202. /* looking for the word end */
  203. while (i < size && ((data[i] >= '0' && data[i] <= '9')
  204. || (data[i] >= 'A' && data[i] <= 'Z')
  205. || (data[i] >= 'a' && data[i] <= 'z')))
  206. i += 1;
  207. if (i >= size) return 0;
  208. /* binary search of the tag */
  209. key.text = data;
  210. key.size = i;
  211. return bsearch(&key, block_tags,
  212. sizeof block_tags / sizeof block_tags[0],
  213. sizeof block_tags[0], cmp_html_tag);
  214. }
  215. /****************************
  216. * INLINE PARSING FUNCTIONS *
  217. ****************************/
  218. /* is_mail_autolink • looks for the address part of a mail autolink and '>' */
  219. /* this is less strict than the original markdown e-mail address matching */
  220. static size_t
  221. is_mail_autolink(char *data, size_t size)
  222. {
  223. size_t i = 0, nb = 0;
  224. /* address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' */
  225. while (i < size && (data[i] == '-' || data[i] == '.'
  226. || data[i] == '_' || data[i] == '@'
  227. || (data[i] >= 'a' && data[i] <= 'z')
  228. || (data[i] >= 'A' && data[i] <= 'Z')
  229. || (data[i] >= '0' && data[i] <= '9'))) {
  230. if (data[i] == '@') nb += 1;
  231. i += 1; }
  232. if (i >= size || data[i] != '>' || nb != 1) return 0;
  233. return i + 1;
  234. }
  235. /* tag_length • returns the length of the given tag, or 0 is it's not valid */
  236. static size_t
  237. tag_length(char *data, size_t size, enum mkd_autolink *autolink)
  238. {
  239. size_t i, j;
  240. /* a valid tag can't be shorter than 3 chars */
  241. if (size < 3) return 0;
  242. /* begins with a '<' optionally followed by '/', followed by letter */
  243. if (data[0] != '<') return 0;
  244. i = (data[1] == '/') ? 2 : 1;
  245. if ((data[i] < 'a' || data[i] > 'z')
  246. && (data[i] < 'A' || data[i] > 'Z')) return 0;
  247. /* scheme test */
  248. *autolink = MKDA_NOT_AUTOLINK;
  249. /* try to find the beggining of an URI */
  250. while (i < size && (isalpha(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-'))
  251. i++;
  252. if (i > 1 && data[i] == '@') {
  253. if ((j = is_mail_autolink(data + i, size - i)) != 0) {
  254. *autolink = MKDA_EMAIL;
  255. return i + j;
  256. }
  257. }
  258. if (i > 2 && data[i] == ':') {
  259. *autolink = MKDA_NORMAL;
  260. i++;
  261. }
  262. /* completing autolink test: no whitespace or ' or " */
  263. if (i >= size)
  264. *autolink = MKDA_NOT_AUTOLINK;
  265. else if (*autolink) {
  266. j = i;
  267. while (i < size) {
  268. if (data[i] == '\\') i += 2;
  269. else if (data[i] == '>' || data[i] == '\'' ||
  270. data[i] == '"' || isspace(data[i])) break;
  271. else i += 1;
  272. }
  273. if (i >= size) return 0;
  274. if (i > j && data[i] == '>') return i + 1;
  275. /* one of the forbidden chars has been found */
  276. *autolink = MKDA_NOT_AUTOLINK;
  277. }
  278. /* looking for sometinhg looking like a tag end */
  279. while (i < size && data[i] != '>') i += 1;
  280. if (i >= size) return 0;
  281. return i + 1;
  282. }
  283. /* parse_inline • parses inline markdown elements */
  284. static void
  285. parse_inline(struct buf *ob, struct render *rndr, char *data, size_t size)
  286. {
  287. size_t i = 0, end = 0;
  288. char action = 0;
  289. struct buf work = { 0, 0, 0, 0, 0 };
  290. if (rndr->work_bufs[BUFFER_SPAN].size +
  291. rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting)
  292. return;
  293. while (i < size) {
  294. /* copying inactive chars into the output */
  295. while (end < size && (action = rndr->active_char[(unsigned char)data[end]]) == 0) {
  296. end++;
  297. }
  298. if (rndr->make.normal_text) {
  299. work.data = data + i;
  300. work.size = end - i;
  301. rndr->make.normal_text(ob, &work, rndr->make.opaque);
  302. }
  303. else
  304. bufput(ob, data + i, end - i);
  305. if (end >= size) break;
  306. i = end;
  307. /* calling the trigger */
  308. end = markdown_char_ptrs[(int)action](ob, rndr, data + i, i, size - i);
  309. if (!end) /* no action from the callback */
  310. end = i + 1;
  311. else {
  312. i += end;
  313. end = i;
  314. }
  315. }
  316. }
  317. /* find_emph_char • looks for the next emph char, skipping other constructs */
  318. static size_t
  319. find_emph_char(char *data, size_t size, char c)
  320. {
  321. size_t i = 1;
  322. while (i < size) {
  323. while (i < size && data[i] != c
  324. && data[i] != '`' && data[i] != '[')
  325. i += 1;
  326. if (data[i] == c) return i;
  327. /* not counting escaped chars */
  328. if (i && data[i - 1] == '\\') { i += 1; continue; }
  329. /* skipping a code span */
  330. if (data[i] == '`') {
  331. size_t tmp_i = 0;
  332. i += 1;
  333. while (i < size && data[i] != '`') {
  334. if (!tmp_i && data[i] == c) tmp_i = i;
  335. i += 1; }
  336. if (i >= size) return tmp_i;
  337. i += 1; }
  338. /* skipping a link */
  339. else if (data[i] == '[') {
  340. size_t tmp_i = 0;
  341. char cc;
  342. i += 1;
  343. while (i < size && data[i] != ']') {
  344. if (!tmp_i && data[i] == c) tmp_i = i;
  345. i += 1; }
  346. i += 1;
  347. while (i < size && (data[i] == ' '
  348. || data[i] == '\t' || data[i] == '\n'))
  349. i += 1;
  350. if (i >= size) return tmp_i;
  351. if (data[i] != '[' && data[i] != '(') { /* not a link*/
  352. if (tmp_i) return tmp_i;
  353. else continue; }
  354. cc = data[i];
  355. i += 1;
  356. while (i < size && data[i] != cc) {
  357. if (!tmp_i && data[i] == c) tmp_i = i;
  358. i += 1; }
  359. if (i >= size) return tmp_i;
  360. i += 1; } }
  361. return 0;
  362. }
  363. /* parse_emph1 • parsing single emphase */
  364. /* closed by a symbol not preceded by whitespace and not followed by symbol */
  365. static size_t
  366. parse_emph1(struct buf *ob, struct render *rndr, char *data, size_t size, char c)
  367. {
  368. size_t i = 0, len;
  369. struct buf *work = 0;
  370. int r;
  371. if (!rndr->make.emphasis) return 0;
  372. /* skipping one symbol if coming from emph3 */
  373. if (size > 1 && data[0] == c && data[1] == c) i = 1;
  374. while (i < size) {
  375. len = find_emph_char(data + i, size - i, c);
  376. if (!len) return 0;
  377. i += len;
  378. if (i >= size) return 0;
  379. if (i + 1 < size && data[i + 1] == c) {
  380. i += 1;
  381. continue;
  382. }
  383. if (data[i] == c && !isspace(data[i - 1])) {
  384. if (rndr->ext_flags & MKDEXT_NO_INTRA_EMPHASIS) {
  385. if (!(i + 1 == size || isspace(data[i + 1]) || ispunct(data[i + 1])))
  386. continue;
  387. }
  388. work = rndr_newbuf(rndr, BUFFER_SPAN);
  389. parse_inline(work, rndr, data, i);
  390. r = rndr->make.emphasis(ob, work, rndr->make.opaque);
  391. rndr_popbuf(rndr, BUFFER_SPAN);
  392. return r ? i + 1 : 0;
  393. }
  394. }
  395. return 0;
  396. }
  397. /* parse_emph2 • parsing single emphase */
  398. static size_t
  399. parse_emph2(struct buf *ob, struct render *rndr, char *data, size_t size, char c)
  400. {
  401. int (*render_method)(struct buf *ob, struct buf *text, void *opaque);
  402. size_t i = 0, len;
  403. struct buf *work = 0;
  404. int r;
  405. render_method = (c == '~') ? rndr->make.strikethrough : rndr->make.double_emphasis;
  406. if (!render_method)
  407. return 0;
  408. while (i < size) {
  409. len = find_emph_char(data + i, size - i, c);
  410. if (!len) return 0;
  411. i += len;
  412. if (i + 1 < size && data[i] == c && data[i + 1] == c && i && !isspace(data[i - 1])) {
  413. work = rndr_newbuf(rndr, BUFFER_SPAN);
  414. parse_inline(work, rndr, data, i);
  415. r = render_method(ob, work, rndr->make.opaque);
  416. rndr_popbuf(rndr, BUFFER_SPAN);
  417. return r ? i + 2 : 0;
  418. }
  419. i++;
  420. }
  421. return 0;
  422. }
  423. /* parse_emph3 • parsing single emphase */
  424. /* finds the first closing tag, and delegates to the other emph */
  425. static size_t
  426. parse_emph3(struct buf *ob, struct render *rndr, char *data, size_t size, char c)
  427. {
  428. size_t i = 0, len;
  429. int r;
  430. while (i < size) {
  431. len = find_emph_char(data + i, size - i, c);
  432. if (!len) return 0;
  433. i += len;
  434. /* skip whitespace preceded symbols */
  435. if (data[i] != c || isspace(data[i - 1]))
  436. continue;
  437. if (i + 2 < size && data[i + 1] == c && data[i + 2] == c && rndr->make.triple_emphasis) {
  438. /* triple symbol found */
  439. struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN);
  440. parse_inline(work, rndr, data, i);
  441. r = rndr->make.triple_emphasis(ob, work, rndr->make.opaque);
  442. rndr_popbuf(rndr, BUFFER_SPAN);
  443. return r ? i + 3 : 0;
  444. } else if (i + 1 < size && data[i + 1] == c) {
  445. /* double symbol found, handing over to emph1 */
  446. len = parse_emph1(ob, rndr, data - 2, size + 2, c);
  447. if (!len) return 0;
  448. else return len - 2;
  449. } else {
  450. /* single symbol found, handing over to emph2 */
  451. len = parse_emph2(ob, rndr, data - 1, size + 1, c);
  452. if (!len) return 0;
  453. else return len - 1;
  454. }
  455. }
  456. return 0;
  457. }
  458. /* char_emphasis • single and double emphasis parsing */
  459. static size_t
  460. char_emphasis(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
  461. {
  462. char c = data[0];
  463. size_t ret;
  464. if (size > 2 && data[1] != c) {
  465. /* whitespace cannot follow an opening emphasis;
  466. * strikethrough only takes two characters '~~' */
  467. if (c == '~' || isspace(data[1]) || (ret = parse_emph1(ob, rndr, data + 1, size - 1, c)) == 0)
  468. return 0;
  469. return ret + 1;
  470. }
  471. if (size > 3 && data[1] == c && data[2] != c) {
  472. if (isspace(data[2]) || (ret = parse_emph2(ob, rndr, data + 2, size - 2, c)) == 0)
  473. return 0;
  474. return ret + 2;
  475. }
  476. if (size > 4 && data[1] == c && data[2] == c && data[3] != c) {
  477. if (c == '~' || isspace(data[3]) || (ret = parse_emph3(ob, rndr, data + 3, size - 3, c)) == 0)
  478. return 0;
  479. return ret + 3;
  480. }
  481. return 0;
  482. }
  483. /* char_linebreak • '\n' preceded by two spaces (assuming linebreak != 0) */
  484. static size_t
  485. char_linebreak(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
  486. {
  487. if (offset < 2 || data[-1] != ' ' || data[-2] != ' ')
  488. return 0;
  489. /* removing the last space from ob and rendering */
  490. while (ob->size && ob->data[ob->size - 1] == ' ')
  491. ob->size--;
  492. return rndr->make.linebreak(ob, rndr->make.opaque) ? 1 : 0;
  493. }
  494. /* char_codespan • '`' parsing a code span (assuming codespan != 0) */
  495. static size_t
  496. char_codespan(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
  497. {
  498. size_t end, nb = 0, i, f_begin, f_end;
  499. /* counting the number of backticks in the delimiter */
  500. while (nb < size && data[nb] == '`')
  501. nb++;
  502. /* finding the next delimiter */
  503. i = 0;
  504. for (end = nb; end < size && i < nb; end++) {
  505. if (data[end] == '`') i++;
  506. else i = 0;
  507. }
  508. if (i < nb && end >= size)
  509. return 0; /* no matching delimiter */
  510. /* trimming outside whitespaces */
  511. f_begin = nb;
  512. while (f_begin < end && (data[f_begin] == ' ' || data[f_begin] == '\t'))
  513. f_begin++;
  514. f_end = end - nb;
  515. while (f_end > nb && (data[f_end-1] == ' ' || data[f_end-1] == '\t'))
  516. f_end--;
  517. /* real code span */
  518. if (f_begin < f_end) {
  519. struct buf work = { data + f_begin, f_end - f_begin, 0, 0, 0 };
  520. if (!rndr->make.codespan(ob, &work, rndr->make.opaque))
  521. end = 0;
  522. } else {
  523. if (!rndr->make.codespan(ob, 0, rndr->make.opaque))
  524. end = 0;
  525. }
  526. return end;
  527. }
  528. /* char_escape • '\\' backslash escape */
  529. static size_t
  530. char_escape(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
  531. {
  532. struct buf work = { 0, 0, 0, 0, 0 };
  533. if (size > 1) {
  534. if (rndr->make.normal_text) {
  535. work.data = data + 1;
  536. work.size = 1;
  537. rndr->make.normal_text(ob, &work, rndr->make.opaque);
  538. }
  539. else bufputc(ob, data[1]);
  540. }
  541. return 2;
  542. }
  543. /* char_entity • '&' escaped when it doesn't belong to an entity */
  544. /* valid entities are assumed to be anything mathing &#?[A-Za-z0-9]+; */
  545. static size_t
  546. char_entity(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
  547. {
  548. size_t end = 1;
  549. struct buf work;
  550. if (end < size && data[end] == '#')
  551. end++;
  552. while (end < size && isalnum(data[end]))
  553. end++;
  554. if (end < size && data[end] == ';')
  555. end += 1; /* real entity */
  556. else
  557. return 0; /* lone '&' */
  558. if (rndr->make.entity) {
  559. work.data = data;
  560. work.size = end;
  561. rndr->make.entity(ob, &work, rndr->make.opaque);
  562. }
  563. else bufput(ob, data, end);
  564. return end;
  565. }
  566. /* char_langle_tag • '<' when tags or autolinks are allowed */
  567. static size_t
  568. char_langle_tag(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
  569. {
  570. enum mkd_autolink altype = MKDA_NOT_AUTOLINK;
  571. size_t end = tag_length(data, size, &altype);
  572. struct buf work = { data, end, 0, 0, 0 };
  573. int ret = 0;
  574. if (end > 2) {
  575. if (rndr->make.autolink && altype != MKDA_NOT_AUTOLINK) {
  576. struct buf *u_link = rndr_newbuf(rndr, BUFFER_SPAN);
  577. work.data = data + 1;
  578. work.size = end - 2;
  579. unscape_text(u_link, &work);
  580. ret = rndr->make.autolink(ob, u_link, altype, rndr->make.opaque);
  581. rndr_popbuf(rndr, BUFFER_SPAN);
  582. }
  583. else if (rndr->make.raw_html_tag)
  584. ret = rndr->make.raw_html_tag(ob, &work, rndr->make.opaque);
  585. }
  586. if (!ret) return 0;
  587. else return end;
  588. }
  589. static size_t
  590. char_autolink(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
  591. {
  592. struct buf work = { data, 0, 0, 0, 0 };
  593. char cclose = 0;
  594. size_t link_end;
  595. /* TODO:
  596. * what's the fastest check we can do, previous char
  597. * or URI prefix? We want to do the fastest one first
  598. * to break asap
  599. */
  600. if (offset > 0) {
  601. switch (data[-1]) {
  602. case '"': cclose = '"'; break;
  603. case '\'': cclose = '\''; break;
  604. case '(': cclose = ')'; break;
  605. case '[': cclose = ']'; break;
  606. case '{': cclose = '}'; break;
  607. case ' ': case '\t': case '\n': break;
  608. default:
  609. return 0;
  610. }
  611. }
  612. if (!is_safe_link(data, size))
  613. return 0;
  614. link_end = 0;
  615. while (link_end < size && !isspace(data[link_end]))
  616. link_end++;
  617. if (cclose != 0) {
  618. size_t i = link_end;
  619. while (i > 0 && data[i] != cclose)
  620. i--;
  621. if (i > 0)
  622. link_end = i;
  623. }
  624. work.size = link_end;
  625. if (rndr->make.autolink) {
  626. struct buf *u_link = rndr_newbuf(rndr, BUFFER_SPAN);
  627. unscape_text(u_link, &work);
  628. rndr->make.autolink(ob, u_link, MKDA_NORMAL, rndr->make.opaque);
  629. rndr_popbuf(rndr, BUFFER_SPAN);
  630. }
  631. return work.size;
  632. }
  633. /* char_link • '[': parsing a link or an image */
  634. static size_t
  635. char_link(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
  636. {
  637. int is_img = (offset && data[-1] == '!'), level;
  638. size_t i = 1, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0;
  639. struct buf *content = 0;
  640. struct buf *link = 0;
  641. struct buf *title = 0;
  642. struct buf *u_link = 0;
  643. size_t org_work_size = rndr->work_bufs[BUFFER_SPAN].size;
  644. int text_has_nl = 0, ret = 0;
  645. /* checking whether the correct renderer exists */
  646. if ((is_img && !rndr->make.image) || (!is_img && !rndr->make.link))
  647. goto cleanup;
  648. /* looking for the matching closing bracket */
  649. for (level = 1; i < size; i += 1) {
  650. if (data[i] == '\n')
  651. text_has_nl = 1;
  652. else if (data[i - 1] == '\\')
  653. continue;
  654. else if (data[i] == '[')
  655. level++;
  656. else if (data[i] == ']') {
  657. level--;
  658. if (level <= 0)
  659. break;
  660. }
  661. }
  662. if (i >= size)
  663. goto cleanup;
  664. txt_e = i;
  665. i += 1;
  666. /* skip any amount of whitespace or newline */
  667. /* (this is much more laxist than original markdown syntax) */
  668. while (i < size && isspace(data[i]))
  669. i++;
  670. /* inline style link */
  671. if (i < size && data[i] == '(') {
  672. /* skipping initial whitespace */
  673. i += 1;
  674. while (i < size && isspace(data[i]))
  675. i++;
  676. link_b = i;
  677. /* looking for link end: ' " ) */
  678. while (i < size) {
  679. if (data[i] == '\\') i += 2;
  680. else if (data[i] == ')' || data[i] == '\'' || data[i] == '"') break;
  681. else i += 1;
  682. }
  683. if (i >= size) goto cleanup;
  684. link_e = i;
  685. /* looking for title end if present */
  686. if (data[i] == '\'' || data[i] == '"') {
  687. i++;
  688. title_b = i;
  689. while (i < size) {
  690. if (data[i] == '\\') i += 2;
  691. else if (data[i] == ')') break;
  692. else i += 1;
  693. }
  694. if (i >= size) goto cleanup;
  695. /* skipping whitespaces after title */
  696. title_e = i - 1;
  697. while (title_e > title_b && isspace(data[title_e]))
  698. title_e--;
  699. /* checking for closing quote presence */
  700. if (data[title_e] != '\'' && data[title_e] != '"') {
  701. title_b = title_e = 0;
  702. link_e = i;
  703. }
  704. }
  705. /* remove whitespace at the end of the link */
  706. while (link_e > link_b && isspace(data[link_e - 1]))
  707. link_e--;
  708. /* remove optional angle brackets around the link */
  709. if (data[link_b] == '<') link_b++;
  710. if (data[link_e - 1] == '>') link_e--;
  711. /* building escaped link and title */
  712. if (link_e > link_b) {
  713. link = rndr_newbuf(rndr, BUFFER_SPAN);
  714. bufput(link, data + link_b, link_e - link_b);
  715. }
  716. if (title_e > title_b) {
  717. title = rndr_newbuf(rndr, BUFFER_SPAN);
  718. bufput(title, data + title_b, title_e - title_b);
  719. }
  720. i++;
  721. }
  722. /* reference style link */
  723. else if (i < size && data[i] == '[') {
  724. struct buf id = { 0, 0, 0, 0, 0 };
  725. struct link_ref *lr;
  726. /* looking for the id */
  727. i += 1;
  728. link_b = i;
  729. while (i < size && data[i] != ']') i++;
  730. if (i >= size) goto cleanup;
  731. link_e = i;
  732. /* finding the link_ref */
  733. if (link_b == link_e) {
  734. if (text_has_nl) {
  735. struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN);
  736. size_t j;
  737. for (j = 1; j < txt_e; j++) {
  738. if (data[j] != '\n')
  739. bufputc(b, data[j]);
  740. else if (data[j - 1] != ' ')
  741. bufputc(b, ' ');
  742. }
  743. id.data = b->data;
  744. id.size = b->size;
  745. } else {
  746. id.data = data + 1;
  747. id.size = txt_e - 1;
  748. }
  749. } else {
  750. id.data = data + link_b;
  751. id.size = link_e - link_b;
  752. }
  753. lr = arr_sorted_find(&rndr->refs, &id, cmp_link_ref);
  754. if (!lr) goto cleanup;
  755. /* keeping link and title from link_ref */
  756. link = lr->link;
  757. title = lr->title;
  758. i += 1;
  759. }
  760. /* shortcut reference style link */
  761. else {
  762. struct buf id = { 0, 0, 0, 0, 0 };
  763. struct link_ref *lr;
  764. /* crafting the id */
  765. if (text_has_nl) {
  766. struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN);
  767. size_t j;
  768. for (j = 1; j < txt_e; j++) {
  769. if (data[j] != '\n')
  770. bufputc(b, data[j]);
  771. else if (data[j - 1] != ' ')
  772. bufputc(b, ' ');
  773. }
  774. id.data = b->data;
  775. id.size = b->size;
  776. } else {
  777. id.data = data + 1;
  778. id.size = txt_e - 1;
  779. }
  780. /* finding the link_ref */
  781. lr = arr_sorted_find(&rndr->refs, &id, cmp_link_ref);
  782. if (!lr) goto cleanup;
  783. /* keeping link and title from link_ref */
  784. link = lr->link;
  785. title = lr->title;
  786. /* rewinding the whitespace */
  787. i = txt_e + 1;
  788. }
  789. /* building content: img alt is escaped, link content is parsed */
  790. if (txt_e > 1) {
  791. content = rndr_newbuf(rndr, BUFFER_SPAN);
  792. if (is_img) bufput(content, data + 1, txt_e - 1);
  793. else parse_inline(content, rndr, data + 1, txt_e - 1);
  794. }
  795. if (link) {
  796. u_link = rndr_newbuf(rndr, BUFFER_SPAN);
  797. unscape_text(u_link, link);
  798. }
  799. /* calling the relevant rendering function */
  800. if (is_img) {
  801. if (ob->size && ob->data[ob->size - 1] == '!')
  802. ob->size -= 1;
  803. ret = rndr->make.image(ob, u_link, title, content, rndr->make.opaque);
  804. } else {
  805. ret = rndr->make.link(ob, u_link, title, content, rndr->make.opaque);
  806. }
  807. /* cleanup */
  808. cleanup:
  809. rndr->work_bufs[BUFFER_SPAN].size = (int)org_work_size;
  810. return ret ? i : 0;
  811. }
  812. /*********************************
  813. * BLOCK-LEVEL PARSING FUNCTIONS *
  814. *********************************/
  815. /* is_empty • returns the line length when it is empty, 0 otherwise */
  816. static size_t
  817. is_empty(char *data, size_t size)
  818. {
  819. size_t i;
  820. for (i = 0; i < size && data[i] != '\n'; i += 1)
  821. if (data[i] != ' ' && data[i] != '\t') return 0;
  822. return i + 1;
  823. }
  824. /* is_hrule • returns whether a line is a horizontal rule */
  825. static int
  826. is_hrule(char *data, size_t size)
  827. {
  828. size_t i = 0, n = 0;
  829. char c;
  830. /* skipping initial spaces */
  831. if (size < 3) return 0;
  832. if (data[0] == ' ') { i += 1;
  833. if (data[1] == ' ') { i += 1;
  834. if (data[2] == ' ') { i += 1; } } }
  835. /* looking at the hrule char */
  836. if (i + 2 >= size
  837. || (data[i] != '*' && data[i] != '-' && data[i] != '_'))
  838. return 0;
  839. c = data[i];
  840. /* the whole line must be the char or whitespace */
  841. while (i < size && data[i] != '\n') {
  842. if (data[i] == c) n += 1;
  843. else if (data[i] != ' ' && data[i] != '\t')
  844. return 0;
  845. i += 1; }
  846. return n >= 3;
  847. }
  848. /* check if a line is a code fence; return its size if it is */
  849. static size_t
  850. is_codefence(char *data, size_t size, struct buf *syntax)
  851. {
  852. size_t i = 0, n = 0;
  853. char c;
  854. /* skipping initial spaces */
  855. if (size < 3) return 0;
  856. if (data[0] == ' ') { i += 1;
  857. if (data[1] == ' ') { i += 1;
  858. if (data[2] == ' ') { i += 1; } } }
  859. /* looking at the hrule char */
  860. if (i + 2 >= size || !(data[i] == '~' || data[i] == '`'))
  861. return 0;
  862. c = data[i];
  863. /* the whole line must be the char or whitespace */
  864. while (i < size && data[i] == c) {
  865. n++; i++;
  866. }
  867. if (n < 3)
  868. return 0;
  869. if (syntax != NULL) {
  870. size_t syn = 0;
  871. while (i < size && (data[i] == ' ' || data[i] == '\t'))
  872. i++;
  873. syntax->data = data + i;
  874. if (i < size && data[i] == '{') {
  875. i++; syntax->data++;
  876. while (i < size && data[i] != '}' && data[i] != '\n') {
  877. syn++; i++;
  878. }
  879. if (i == size || data[i] != '}')
  880. return 0;
  881. /* strip all whitespace at the beggining and the end
  882. * of the {} block */
  883. while (syn > 0 && isspace(syntax->data[0])) {
  884. syntax->data++; syn--;
  885. }
  886. while (syn > 0 && isspace(syntax->data[syn - 1]))
  887. syn--;
  888. i++;
  889. } else {
  890. while (i < size && !isspace(data[i])) {
  891. syn++; i++;
  892. }
  893. }
  894. syntax->size = syn;
  895. }
  896. while (i < size && data[i] != '\n') {
  897. if (!isspace(data[i]))
  898. return 0;
  899. i++;
  900. }
  901. return i + 1;
  902. }
  903. /* is_headerline • returns whether the line is a setext-style hdr underline */
  904. static int
  905. is_headerline(char *data, size_t size)
  906. {
  907. size_t i = 0;
  908. /* test of level 1 header */
  909. if (data[i] == '=') {
  910. for (i = 1; i < size && data[i] == '='; i += 1);
  911. while (i < size && (data[i] == ' ' || data[i] == '\t')) i += 1;
  912. return (i >= size || data[i] == '\n') ? 1 : 0; }
  913. /* test of level 2 header */
  914. if (data[i] == '-') {
  915. for (i = 1; i < size && data[i] == '-'; i += 1);
  916. while (i < size && (data[i] == ' ' || data[i] == '\t')) i += 1;
  917. return (i >= size || data[i] == '\n') ? 2 : 0; }
  918. return 0;
  919. }
  920. /* prefix_quote • returns blockquote prefix length */
  921. static size_t
  922. prefix_quote(char *data, size_t size)
  923. {
  924. size_t i = 0;
  925. if (i < size && data[i] == ' ') i += 1;
  926. if (i < size && data[i] == ' ') i += 1;
  927. if (i < size && data[i] == ' ') i += 1;
  928. if (i < size && data[i] == '>') {
  929. if (i + 1 < size && (data[i + 1] == ' ' || data[i+1] == '\t'))
  930. return i + 2;
  931. else return i + 1; }
  932. else return 0;
  933. }
  934. /* prefix_code • returns prefix length for block code*/
  935. static size_t
  936. prefix_code(char *data, size_t size)
  937. {
  938. if (size > 0 && data[0] == '\t') return 1;
  939. if (size > 3 && data[0] == ' ' && data[1] == ' '
  940. && data[2] == ' ' && data[3] == ' ') return 4;
  941. return 0;
  942. }
  943. /* prefix_oli • returns ordered list item prefix */
  944. static size_t
  945. prefix_oli(char *data, size_t size)
  946. {
  947. size_t i = 0;
  948. if (i < size && data[i] == ' ') i += 1;
  949. if (i < size && data[i] == ' ') i += 1;
  950. if (i < size && data[i] == ' ') i += 1;
  951. if (i >= size || data[i] < '0' || data[i] > '9') return 0;
  952. while (i < size && data[i] >= '0' && data[i] <= '9') i += 1;
  953. if (i + 1 >= size || data[i] != '.'
  954. || (data[i + 1] != ' ' && data[i + 1] != '\t')) return 0;
  955. return i + 2;
  956. }
  957. /* prefix_uli • returns ordered list item prefix */
  958. static size_t
  959. prefix_uli(char *data, size_t size)
  960. {
  961. size_t i = 0;
  962. if (i < size && data[i] == ' ') i += 1;
  963. if (i < size && data[i] == ' ') i += 1;
  964. if (i < size && data[i] == ' ') i += 1;
  965. if (i + 1 >= size
  966. || (data[i] != '*' && data[i] != '+' && data[i] != '-')
  967. || (data[i + 1] != ' ' && data[i + 1] != '\t'))
  968. return 0;
  969. return i + 2;
  970. }
  971. /* parse_block • parsing of one block, returning next char to parse */
  972. static void parse_block(struct buf *ob, struct render *rndr,
  973. char *data, size_t size);
  974. /* parse_blockquote • hanldes parsing of a blockquote fragment */
  975. static size_t
  976. parse_blockquote(struct buf *ob, struct render *rndr, char *data, size_t size)
  977. {
  978. size_t beg, end = 0, pre, work_size = 0;
  979. char *work_data = 0;
  980. struct buf *out = 0;
  981. out = rndr_newbuf(rndr, BUFFER_BLOCK);
  982. beg = 0;
  983. while (beg < size) {
  984. for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
  985. pre = prefix_quote(data + beg, end - beg);
  986. if (pre)
  987. beg += pre; /* skipping prefix */
  988. /* empty line followed by non-quote line */
  989. else if (is_empty(data + beg, end - beg) &&
  990. (end >= size || (prefix_quote(data + end, size - end) == 0 &&
  991. !is_empty(data + end, size - end))))
  992. break;
  993. if (beg < end) { /* copy into the in-place working buffer */
  994. /* bufput(work, data + beg, end - beg); */
  995. if (!work_data)
  996. work_data = data + beg;
  997. else if (data + beg != work_data + work_size)
  998. memmove(work_data + work_size, data + beg, end - beg);
  999. work_size += end - beg;
  1000. }
  1001. beg = end;
  1002. }
  1003. parse_block(out, rndr, work_data, work_size);
  1004. if (rndr->make.blockquote)
  1005. rndr->make.blockquote(ob, out, rndr->make.opaque);
  1006. rndr_popbuf(rndr, BUFFER_BLOCK);
  1007. return end;
  1008. }
  1009. static size_t
  1010. parse_htmlblock(struct buf *ob, struct render *rndr, char *data, size_t size, int do_render);
  1011. /* parse_blockquote • hanldes parsing of a regular paragraph */
  1012. static size_t
  1013. parse_paragraph(struct buf *ob, struct render *rndr, char *data, size_t size)
  1014. {
  1015. size_t i = 0, end = 0;
  1016. int level = 0;
  1017. struct buf work = { data, 0, 0, 0, 0 }; /* volatile working buffer */
  1018. while (i < size) {
  1019. for (end = i + 1; end < size && data[end - 1] != '\n'; end++) /* empty */;
  1020. if (is_empty(data + i, size - i) || (level = is_headerline(data + i, size - i)) != 0)
  1021. break;
  1022. if (rndr->ext_flags & MKDEXT_LAX_HTML_BLOCKS) {
  1023. if (data[i] == '<' && rndr->make.blockhtml && parse_htmlblock(ob, rndr, data + i, size - i, 0)) {
  1024. end = i;
  1025. break;
  1026. }
  1027. }
  1028. if (data[i] == '#' || is_hrule(data + i, size - i)) {
  1029. end = i;
  1030. break;
  1031. }
  1032. i = end;
  1033. }
  1034. work.size = i;
  1035. while (work.size && data[work.size - 1] == '\n')
  1036. work.size--;
  1037. if (!level) {
  1038. struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK);
  1039. parse_inline(tmp, rndr, work.data, work.size);
  1040. if (rndr->make.paragraph)
  1041. rndr->make.paragraph(ob, tmp, rndr->make.opaque);
  1042. rndr_popbuf(rndr, BUFFER_BLOCK);
  1043. } else {
  1044. struct buf *header_work;
  1045. if (work.size) {
  1046. size_t beg;
  1047. i = work.size;
  1048. work.size -= 1;
  1049. while (work.size && data[work.size] != '\n')
  1050. work.size -= 1;
  1051. beg = work.size + 1;
  1052. while (work.size && data[work.size - 1] == '\n')
  1053. work.size -= 1;
  1054. if (work.size > 0) {
  1055. struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK);
  1056. parse_inline(tmp, rndr, work.data, work.size);
  1057. if (rndr->make.paragraph)
  1058. rndr->make.paragraph(ob, tmp, rndr->make.opaque);
  1059. rndr_popbuf(rndr, BUFFER_BLOCK);
  1060. work.data += beg;
  1061. work.size = i - beg;
  1062. }
  1063. else work.size = i;
  1064. }
  1065. header_work = rndr_newbuf(rndr, BUFFER_SPAN);
  1066. parse_inline(header_work, rndr, work.data, work.size);
  1067. if (rndr->make.header)
  1068. rndr->make.header(ob, header_work, (int)level, rndr->make.opaque);
  1069. rndr_popbuf(rndr, BUFFER_SPAN);
  1070. }
  1071. return end;
  1072. }
  1073. /* parse_fencedcode • hanldes parsing of a block-level code fragment */
  1074. static size_t
  1075. parse_fencedcode(struct buf *ob, struct render *rndr, char *data, size_t size)
  1076. {
  1077. size_t beg, end;
  1078. struct buf *work = 0;
  1079. struct buf lang = { 0, 0, 0, 0, 0 };
  1080. beg = is_codefence(data, size, &lang);
  1081. if (beg == 0) return 0;
  1082. work = rndr_newbuf(rndr, BUFFER_BLOCK);
  1083. while (beg < size) {
  1084. size_t fence_end;
  1085. fence_end = is_codefence(data + beg, size - beg, NULL);
  1086. if (fence_end != 0) {
  1087. beg += fence_end;
  1088. break;
  1089. }
  1090. for (end = beg + 1; end < size && data[end - 1] != '\n'; end += 1);
  1091. if (beg < end) {
  1092. /* verbatim copy to the working buffer,
  1093. escaping entities */
  1094. if (is_empty(data + beg, end - beg))
  1095. bufputc(work, '\n');
  1096. else bufput(work, data + beg, end - beg);
  1097. }
  1098. beg = end;
  1099. }
  1100. if (work->size && work->data[work->size - 1] != '\n')
  1101. bufputc(work, '\n');
  1102. if (rndr->make.blockcode)
  1103. rndr->make.blockcode(ob, work, lang.size ? &lang : NULL, rndr->make.opaque);
  1104. rndr_popbuf(rndr, BUFFER_BLOCK);
  1105. return beg;
  1106. }
  1107. static size_t
  1108. parse_blockcode(struct buf *ob, struct render *rndr, char *data, size_t size)
  1109. {
  1110. size_t beg, end, pre;
  1111. struct buf *work = 0;
  1112. work = rndr_newbuf(rndr, BUFFER_BLOCK);
  1113. beg = 0;
  1114. while (beg < size) {
  1115. for (end = beg + 1; end < size && data[end - 1] != '\n'; end++) {};
  1116. pre = prefix_code(data + beg, end - beg);
  1117. if (pre)
  1118. beg += pre; /* skipping prefix */
  1119. else if (!is_empty(data + beg, end - beg))
  1120. /* non-empty non-prefixed line breaks the pre */
  1121. break;
  1122. if (beg < end) {
  1123. /* verbatim copy to the working buffer,
  1124. escaping entities */
  1125. if (is_empty(data + beg, end - beg))
  1126. bufputc(work, '\n');
  1127. else bufput(work, data + beg, end - beg);
  1128. }
  1129. beg = end;
  1130. }
  1131. while (work->size && work->data[work->size - 1] == '\n')
  1132. work->size -= 1;
  1133. bufputc(work, '\n');
  1134. if (rndr->make.blockcode)
  1135. rndr->make.blockcode(ob, work, NULL, rndr->make.opaque);
  1136. rndr_popbuf(rndr, BUFFER_BLOCK);
  1137. return beg;
  1138. }
  1139. /* parse_listitem • parsing of a single list item */
  1140. /* assuming initial prefix is already removed */
  1141. static size_t
  1142. parse_listitem(struct buf *ob, struct render *rndr, char *data, size_t size, int *flags)
  1143. {
  1144. struct buf *work = 0, *inter = 0;
  1145. size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i;
  1146. int in_empty = 0, has_inside_empty = 0;
  1147. /* keeping book of the first indentation prefix */
  1148. while (orgpre < 3 && orgpre < size && data[orgpre] == ' ')
  1149. orgpre++;
  1150. beg = prefix_uli(data, size);
  1151. if (!beg)
  1152. beg = prefix_oli(data, size);
  1153. if (!beg)
  1154. return 0;
  1155. /* skipping to the beginning of the following line */
  1156. end = beg;
  1157. while (end < size && data[end - 1] != '\n')
  1158. end++;
  1159. /* getting working buffers */
  1160. work = rndr_newbuf(rndr, BUFFER_SPAN);
  1161. inter = rndr_newbuf(rndr, BUFFER_SPAN);
  1162. /* putting the first line into the working buffer */
  1163. bufput(work, data + beg, end - beg);
  1164. beg = end;
  1165. /* process the following lines */
  1166. while (beg < size) {
  1167. end++;
  1168. while (end < size && data[end - 1] != '\n')
  1169. end++;
  1170. /* process an empty line */
  1171. if (is_empty(data + beg, end - beg)) {
  1172. in_empty = 1;
  1173. beg = end;
  1174. continue;
  1175. }
  1176. /* calculating the indentation */
  1177. i = 0;
  1178. while (i < 4 && beg + i < end && data[beg + i] == ' ')
  1179. i++;
  1180. pre = i;
  1181. if (data[beg] == '\t') { i = 1; pre = 8; }
  1182. /* checking for a new item */
  1183. if ((prefix_uli(data + beg + i, end - beg - i) &&
  1184. !is_hrule(data + beg + i, end - beg - i)) ||
  1185. prefix_oli(data + beg + i, end - beg - i)) {
  1186. if (in_empty)
  1187. has_inside_empty = 1;
  1188. if (pre == orgpre) /* the following item must have */
  1189. break; /* the same indentation */
  1190. if (!sublist)
  1191. sublist = work->size;
  1192. }
  1193. /* joining only indented stuff after empty lines */
  1194. else if (in_empty && i < 4 && data[beg] != '\t') {
  1195. *flags |= MKD_LI_END;
  1196. break;
  1197. }
  1198. else if (in_empty) {
  1199. bufputc(work, '\n');
  1200. has_inside_empty = 1;
  1201. }
  1202. in_empty = 0;
  1203. /* adding the line without prefix into the working buffer */
  1204. bufput(work, data + beg + i, end - beg - i);
  1205. beg = end;
  1206. }
  1207. /* render of li contents */
  1208. if (has_inside_empty)
  1209. *flags |= MKD_LI_BLOCK;
  1210. if (*flags & MKD_LI_BLOCK) {
  1211. /* intermediate render of block li */
  1212. if (sublist && sublist < work->size) {
  1213. parse_block(inter, rndr, work->data, sublist);
  1214. parse_block(inter, rndr, work->data + sublist, work->size - sublist);
  1215. }
  1216. else
  1217. parse_block(inter, rndr, work->data, work->size);
  1218. } else {
  1219. /* intermediate render of inline li */
  1220. if (sublist && sublist < work->size) {
  1221. parse_inline(inter, rndr, work->data, sublist);
  1222. parse_block(inter, rndr, work->data + sublist, work->size - sublist);
  1223. }
  1224. else
  1225. parse_inline(inter, rndr, work->data, work->size);
  1226. }
  1227. /* render of li itself */
  1228. if (rndr->make.listitem)
  1229. rndr->make.listitem(ob, inter, *flags, rndr->make.opaque);
  1230. rndr_popbuf(rndr, BUFFER_SPAN);
  1231. rndr_popbuf(rndr, BUFFER_SPAN);
  1232. return beg;
  1233. }
  1234. /* parse_list • parsing ordered or unordered list block */
  1235. static size_t
  1236. parse_list(struct buf *ob, struct render *rndr, char *data, size_t size, int flags)
  1237. {
  1238. struct buf *work = 0;
  1239. size_t i = 0, j;
  1240. work = rndr_newbuf(rndr, BUFFER_BLOCK);
  1241. while (i < size) {
  1242. j = parse_listitem(work, rndr, data + i, size - i, &flags);
  1243. i += j;
  1244. if (!j || (flags & MKD_LI_END))
  1245. break;
  1246. }
  1247. if (rndr->make.list)
  1248. rndr->make.list(ob, work, flags, rndr->make.opaque);
  1249. rndr_popbuf(rndr, BUFFER_BLOCK);
  1250. return i;
  1251. }
  1252. /* parse_atxheader • parsing of atx-style headers */
  1253. static size_t
  1254. parse_atxheader(struct buf *ob, struct render *rndr, char *data, size_t size)
  1255. {
  1256. size_t level = 0;
  1257. size_t i, end, skip;
  1258. if (!size || data[0] != '#')
  1259. return 0;
  1260. while (level < size && level < 6 && data[level] == '#')
  1261. level++;
  1262. for (i = level; i < size && (data[i] == ' ' || data[i] == '\t'); i++);
  1263. for (end = i; end < size && data[end] != '\n'; end++);
  1264. skip = end;
  1265. while (end && data[end - 1] == '#')
  1266. end--;
  1267. while (end && (data[end - 1] == ' ' || data[end - 1] == '\t'))
  1268. end--;
  1269. if (end > i) {
  1270. struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN);
  1271. parse_inline(work, rndr, data + i, end - i);
  1272. if (rndr->make.header)
  1273. rndr->make.header(ob, work, (int)level, rndr->make.opaque);
  1274. rndr_popbuf(rndr, BUFFER_SPAN);
  1275. }
  1276. return skip;
  1277. }
  1278. /* htmlblock_end • checking end of HTML block : </tag>[ \t]*\n[ \t*]\n */
  1279. /* returns the length on match, 0 otherwise */
  1280. static size_t
  1281. htmlblock_end(struct html_tag *tag, struct render *rndr, char *data, size_t size)
  1282. {
  1283. size_t i, w;
  1284. /* assuming data[0] == '<' && data[1] == '/' already tested */
  1285. /* checking tag is a match */
  1286. if (tag->size + 3 >= size
  1287. || strncasecmp(data + 2, tag->text, tag->size)
  1288. || data[tag->size + 2] != '>')
  1289. return 0;
  1290. /* checking white lines */
  1291. i = tag->size + 3;
  1292. w = 0;
  1293. if (i < size && (w = is_empty(data + i, size - i)) == 0)
  1294. return 0; /* non-blank after tag */
  1295. i += w;
  1296. w = 0;
  1297. if (rndr->ext_flags & MKDEXT_LAX_HTML_BLOCKS) {
  1298. if (i < size)
  1299. w = is_empty(data + i, size - i);
  1300. } else {
  1301. if (i < size && (w = is_empty(data + i, size - i)) == 0)
  1302. return 0; /* non-blank line after tag line */
  1303. }
  1304. return i + w;
  1305. }
  1306. /* parse_htmlblock • parsing of inline HTML block */
  1307. static size_t
  1308. parse_htmlblock(struct buf *ob, struct render *rndr, char *data, size_t size, int do_render)
  1309. {
  1310. size_t i, j = 0;
  1311. struct html_tag *curtag;
  1312. int found;
  1313. struct buf work = { data, 0, 0, 0, 0 };
  1314. /* identification of the opening tag */
  1315. if (size < 2 || data[0] != '<') return 0;
  1316. curtag = find_block_tag(data + 1, size - 1);
  1317. /* handling of special cases */
  1318. if (!curtag) {
  1319. /* HTML comment, laxist form */
  1320. if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') {
  1321. i = 5;
  1322. while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>'))
  1323. i++;
  1324. i++;
  1325. if (i < size)
  1326. j = is_empty(data + i, size - i);
  1327. if (j) {
  1328. work.size = i + j;
  1329. if (do_render && rndr->make.blockhtml)
  1330. rndr->make.blockhtml(ob, &work, rndr->make.opaque);
  1331. return work.size;
  1332. }
  1333. }
  1334. /* HR, which is the only self-closing block tag considered */
  1335. if (size > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R')) {
  1336. i = 3;
  1337. while (i < size && data[i] != '>')
  1338. i += 1;
  1339. if (i + 1 < size) {
  1340. i += 1;
  1341. j = is_empty(data + i, size - i);
  1342. if (j) {
  1343. work.size = i + j;
  1344. if (do_render && rndr->make.blockhtml)
  1345. rndr->make.blockhtml(ob, &work, rndr->make.opaque);
  1346. return work.size;
  1347. }
  1348. }
  1349. }
  1350. /* no special case recognised */
  1351. return 0;
  1352. }
  1353. /* looking for an unindented matching closing tag */
  1354. /* followed by a blank line */
  1355. i = 1;
  1356. found = 0;
  1357. /* if not found, trying a second pass looking for indented match */
  1358. /* but not if tag is "ins" or "del" (following original Markdown.pl) */
  1359. if (curtag != INS_TAG && curtag != DEL_TAG) {
  1360. i = 1;
  1361. while (i < size) {
  1362. i++;
  1363. while (i < size && !(data[i - 1] == '<' && data[i] == '/'))
  1364. i++;
  1365. if (i + 2 + curtag->size >= size)
  1366. break;
  1367. j = htmlblock_end(curtag, rndr, data + i - 1, size - i + 1);
  1368. if (j) {
  1369. i += j - 1;
  1370. found = 1;
  1371. break;
  1372. }
  1373. }
  1374. }
  1375. if (!found) return 0;
  1376. /* the end of the block has been found */
  1377. work.size = i;
  1378. if (do_render && rndr->make.blockhtml)
  1379. rndr->make.blockhtml(ob, &work, rndr->make.opaque);
  1380. return i;
  1381. }
  1382. static void
  1383. parse_table_row(struct buf *ob, struct render *rndr, char *data, size_t size, size_t columns, int *col_data)
  1384. {
  1385. size_t i = 0, col;
  1386. struct buf *row_work = 0;
  1387. row_work = rndr_newbuf(rndr, BUFFER_SPAN);
  1388. if (i < size && data[i] == '|')
  1389. i++;
  1390. for (col = 0; col < columns && i < size; ++col) {
  1391. size_t cell_start, cell_end;
  1392. struct buf *cell_work;
  1393. cell_work = rndr_newbuf(rndr, BUFFER_SPAN);
  1394. while (i < size && isspace(data[i]))
  1395. i++;
  1396. cell_start = i;
  1397. while (i < size && data[i] != '|')
  1398. i++;
  1399. cell_end = i - 1;
  1400. while (cell_end > cell_start && isspace(data[cell_end]))
  1401. cell_end--;
  1402. parse_inline(cell_work, rndr, data + cell_start, 1 + cell_end - cell_start);
  1403. if (rndr->make.table_cell)
  1404. rndr->make.table_cell(row_work, cell_work, col_data ? col_data[col] : 0, rndr->make.opaque);
  1405. rndr_popbuf(rndr, BUFFER_SPAN);
  1406. i++;
  1407. }
  1408. for (; col < columns; ++col) {
  1409. struct buf empty_cell = {0, 0, 0, 0, 0};
  1410. if (rndr->make.table_cell)
  1411. rndr->make.table_cell(row_work, &empty_cell, col_data ? col_data[col] : 0, rndr->make.opaque);
  1412. }
  1413. if (rndr->make.table_row)
  1414. rndr->make.table_row(ob, row_work, rndr->make.opaque);
  1415. rndr_popbuf(rndr, BUFFER_SPAN);
  1416. }
  1417. static size_t
  1418. parse_table_header(struct buf *ob, struct render *rndr, char *data, size_t size, size_t *columns, int **column_data)
  1419. {
  1420. int pipes;
  1421. size_t i = 0, col, header_end, under_end;
  1422. pipes = 0;
  1423. while (i < size && data[i] != '\n')
  1424. if (data[i++] == '|')
  1425. pipes++;
  1426. if (i == size || pipes == 0)
  1427. return 0;
  1428. header_end = i;
  1429. if (data[0] == '|')
  1430. pipes--;
  1431. if (i > 2 && data[i - 1] == '|')
  1432. pipes--;
  1433. *columns = pipes + 1;
  1434. *column_data = calloc(*columns, sizeof(int));
  1435. /* Parse the header underline */
  1436. i++;
  1437. if (i < size && data[i] == '|')
  1438. i++;
  1439. under_end = i;
  1440. while (under_end < size && data[under_end] != '\n')
  1441. under_end++;
  1442. for (col = 0; col < *columns && i < under_end; ++col) {
  1443. size_t dashes = 0;
  1444. while (i < under_end && (data[i] == ' ' || data[i] == '\t'))
  1445. i++;
  1446. if (data[i] == ':') {
  1447. i++; (*column_data)[col] |= MKD_TABLE_ALIGN_L;
  1448. dashes++;
  1449. }
  1450. while (i < under_end && data[i] == '-') {
  1451. i++; dashes++;
  1452. }
  1453. if (i < under_end && data[i] == ':') {
  1454. i++; (*column_data)[col] |= MKD_TABLE_ALIGN_R;
  1455. dashes++;
  1456. }
  1457. while (i < under_end && (data[i] == ' ' || data[i] == '\t'))
  1458. i++;
  1459. if (i < under_end && data[i] != '|')
  1460. break;
  1461. if (dashes < 3)
  1462. break;
  1463. i++;
  1464. }
  1465. if (col < *columns)
  1466. return 0;
  1467. parse_table_row(ob, rndr, data, header_end, *columns, *column_data);
  1468. return under_end + 1;
  1469. }
  1470. static size_t
  1471. parse_table(struct buf *ob, struct render *rndr, char *data, size_t size)
  1472. {
  1473. size_t i;
  1474. struct buf *header_work = 0;
  1475. struct buf *body_work = 0;
  1476. size_t columns;
  1477. int *col_data = NULL;
  1478. header_work = rndr_newbuf(rndr, BUFFER_SPAN);
  1479. body_work = rndr_newbuf(rndr, BUFFER_BLOCK);
  1480. i = parse_table_header(header_work, rndr, data, size, &columns, &col_data);
  1481. if (i > 0) {
  1482. while (i < size) {
  1483. size_t row_start;
  1484. int pipes = 0;
  1485. row_start = i;
  1486. while (i < size && data[i] != '\n')
  1487. if (data[i++] == '|')
  1488. pipes++;
  1489. if (pipes == 0 || i == size) {
  1490. i = row_start;
  1491. break;
  1492. }
  1493. parse_table_row(body_work, rndr, data + row_start, i - row_start, columns, col_data);
  1494. i++;
  1495. }
  1496. if (rndr->make.table)
  1497. rndr->make.table(ob, header_work, body_work, rndr->make.opaque);
  1498. }
  1499. free(col_data);
  1500. rndr_popbuf(rndr, BUFFER_SPAN);
  1501. rndr_popbuf(rndr, BUFFER_BLOCK);
  1502. return i;
  1503. }
  1504. /* parse_block • parsing of one block, returning next char to parse */
  1505. static void
  1506. parse_block(struct buf *ob, struct render *rndr, char *data, size_t size)
  1507. {
  1508. size_t beg, end, i;
  1509. char *txt_data;
  1510. beg = 0;
  1511. if (rndr->work_bufs[BUFFER_SPAN].size +
  1512. rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting)
  1513. return;
  1514. while (beg < size) {
  1515. txt_data = data + beg;
  1516. end = size - beg;
  1517. if (data[beg] == '#')
  1518. beg += parse_atxheader(ob, rndr, txt_data, end);
  1519. else if (data[beg] == '<' && rndr->make.blockhtml &&
  1520. (i = parse_htmlblock(ob, rndr, txt_data, end, 1)) != 0)
  1521. beg += i;
  1522. else if ((i = is_empty(txt_data, end)) != 0)
  1523. beg += i;
  1524. else if (is_hrule(txt_data, end)) {
  1525. if (rndr->make.hrule)
  1526. rndr->make.hrule(ob, rndr->make.opaque);
  1527. while (beg < size && data[beg] != '\n')
  1528. beg++;
  1529. beg++;
  1530. }
  1531. else if ((rndr->ext_flags & MKDEXT_FENCED_CODE) != 0 &&
  1532. (i = parse_fencedcode(ob, rndr, txt_data, end)) != 0)
  1533. beg += i;
  1534. else if ((rndr->ext_flags & MKDEXT_TABLES) != 0 &&
  1535. (i = parse_table(ob, rndr, txt_data, end)) != 0)
  1536. beg += i;
  1537. else if (prefix_quote(txt_data, end))
  1538. beg += parse_blockquote(ob, rndr, txt_data, end);
  1539. else if (prefix_code(txt_data, end))
  1540. beg += parse_blockcode(ob, rndr, txt_data, end);
  1541. else if (prefix_uli(txt_data, end))
  1542. beg += parse_list(ob, rndr, txt_data, end, 0);
  1543. else if (prefix_oli(txt_data, end))
  1544. beg += parse_list(ob, rndr, txt_data, end, MKD_LIST_ORDERED);
  1545. else
  1546. beg += parse_paragraph(ob, rndr, txt_data, end);
  1547. }
  1548. }
  1549. /*********************
  1550. * REFERENCE PARSING *
  1551. *********************/
  1552. /* is_ref • returns whether a line is a reference or not */
  1553. static int
  1554. is_ref(char *data, size_t beg, size_t end, size_t *last, struct array *refs)
  1555. {
  1556. /* int n; */
  1557. size_t i = 0;
  1558. size_t id_offset, id_end;
  1559. size_t link_offset, link_end;
  1560. size_t title_offset, title_end;
  1561. size_t line_end;
  1562. struct link_ref *lr;
  1563. /* struct buf id = { 0, 0, 0, 0, 0 }; / * volatile buf for id search */
  1564. /* up to 3 optional leading spaces */
  1565. if (beg + 3 >= end) return 0;
  1566. if (data[beg] == ' ') { i = 1;
  1567. if (data[beg + 1] == ' ') { i = 2;
  1568. if (data[beg + 2] == ' ') { i = 3;
  1569. if (data[beg + 3] == ' ') return 0; } } }
  1570. i += beg;
  1571. /* id part: anything but a newline between brackets */
  1572. if (data[i] != '[') return 0;
  1573. i += 1;
  1574. id_offset = i;
  1575. while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']')
  1576. i += 1;
  1577. if (i >= end || data[i] != ']') return 0;
  1578. id_end = i;
  1579. /* spacer: colon (space | tab)* newline? (space | tab)* */
  1580. i += 1;
  1581. if (i >= end || data[i] != ':') return 0;
  1582. i += 1;
  1583. while (i < end && (data[i] == ' ' || data[i] == '\t')) i += 1;
  1584. if (i < end && (data[i] == '\n' || data[i] == '\r')) {
  1585. i += 1;
  1586. if (i < end && data[i] == '\r' && data[i - 1] == '\n') i += 1; }
  1587. while (i < end && (data[i] == ' ' || data[i] == '\t')) i += 1;
  1588. if (i >= end) return 0;
  1589. /* link: whitespace-free sequence, optionally between angle brackets */
  1590. if (data[i] == '<') i += 1;
  1591. link_offset = i;
  1592. while (i < end && data[i] != ' ' && data[i] != '\t'
  1593. && data[i] != '\n' && data[i] != '\r') i += 1;
  1594. if (data[i - 1] == '>') link_end = i - 1;
  1595. else link_end = i;
  1596. /* optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) */
  1597. while (i < end && (data[i] == ' ' || data[i] == '\t')) i += 1;
  1598. if (i < end && data[i] != '\n' && data[i] != '\r'
  1599. && data[i] != '\'' && data[i] != '"' && data[i] != '(')
  1600. return 0;
  1601. line_end = 0;
  1602. /* computing end-of-line */
  1603. if (i >= end || data[i] == '\r' || data[i] == '\n') line_end = i;
  1604. if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
  1605. line_end = i + 1;
  1606. /* optional (space|tab)* spacer after a newline */
  1607. if (line_end) {
  1608. i = line_end + 1;
  1609. while (i < end && (data[i] == ' ' || data[i] == '\t')) i += 1; }
  1610. /* optional title: any non-newline sequence enclosed in '"()
  1611. alone on its line */
  1612. title_offset = title_end = 0;
  1613. if (i + 1 < end
  1614. && (data[i] == '\'' || data[i] == '"' || data[i] == '(')) {
  1615. i += 1;
  1616. title_offset = i;
  1617. /* looking for EOL */
  1618. while (i < end && data[i] != '\n' && data[i] != '\r') i += 1;
  1619. if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
  1620. title_end = i + 1;
  1621. else title_end = i;
  1622. /* stepping back */
  1623. i -= 1;
  1624. while (i > title_offset && (data[i] == ' ' || data[i] == '\t'))
  1625. i -= 1;
  1626. if (i > title_offset
  1627. && (data[i] == '\'' || data[i] == '"' || data[i] == ')')) {
  1628. line_end = title_end;
  1629. title_end = i; } }
  1630. if (!line_end) return 0; /* garbage after the link */
  1631. /* a valid ref has been found, filling-in return structures */
  1632. if (last) *last = line_end;
  1633. if (!refs) return 1;
  1634. lr = arr_item(refs, arr_newitem(refs));
  1635. lr->id = bufnew(id_end - id_offset);
  1636. bufput(lr->id, data + id_offset, id_end - id_offset);
  1637. lr->link = bufnew(link_end - link_offset);
  1638. bufput(lr->link, data + link_offset, link_end - link_offset);
  1639. if (title_end > title_offset) {
  1640. lr->title = bufnew(title_end - title_offset);
  1641. bufput(lr->title, data + title_offset,
  1642. title_end - title_offset); }
  1643. else lr->title = 0;
  1644. return 1;
  1645. }
  1646. static void expand_tabs(struct buf *ob, const char *line, size_t size)
  1647. {
  1648. size_t i = 0, tab = 0;
  1649. while (i < size) {
  1650. size_t org = i;
  1651. while (i < size && line[i] != '\t') {

Large files files are truncated, but you can click here to view the full file