PageRenderTime 53ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/src/markdown.c

https://github.com/nono/upskirt
C | 2115 lines | 1956 code | 90 blank | 69 comment | 179 complexity | b6e96eafb00c47f3c0ebc671c982c409 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

  1. /* markdown.c - generic markdown parser */
  2. /*
  3. * Copyright (c) 2009, Natacha Porté
  4. * Copyright (c) 2011, Vicent Marti
  5. *
  6. * Permission to use, copy, modify, and distribute this software for any
  7. * purpose with or without fee is hereby granted, provided that the above
  8. * copyright notice and this permission notice appear in all copies.
  9. *
  10. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  11. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  12. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  13. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  14. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  15. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  16. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  17. */
  18. #include "markdown.h"
  19. #include "array.h"
  20. #include <assert.h>
  21. #include <string.h>
  22. #include <strings.h> /* for strncasecmp */
  23. #include <ctype.h>
  24. #include <stdio.h>
  25. #define TEXT_UNIT 64 /* unit for the copy of the input buffer */
  26. #define WORK_UNIT 64 /* block-level working buffer */
  27. #define MKD_LI_END 8 /* internal list flag */
  28. /***************
  29. * LOCAL TYPES *
  30. ***************/
  31. /* link_ref • reference to a link */
  32. struct link_ref {
  33. struct buf *id;
  34. struct buf *link;
  35. struct buf *title;
  36. };
  37. /* char_trigger • function pointer to render active chars */
  38. /* returns the number of chars taken care of */
  39. /* data is the pointer of the beginning of the span */
  40. /* offset is the number of valid chars before data */
  41. struct render;
  42. typedef size_t
  43. (*char_trigger)(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
  44. /* render • structure containing one particular render */
  45. struct render {
  46. struct mkd_renderer make;
  47. struct array refs;
  48. char_trigger active_char[256];
  49. struct parray work;
  50. unsigned int ext_flags;
  51. size_t max_nesting;
  52. };
  53. /* html_tag • structure for quick HTML tag search (inspired from discount) */
  54. struct html_tag {
  55. const char *text;
  56. size_t size;
  57. };
  58. static inline struct buf *
  59. rndr_newbuf(struct render *rndr)
  60. {
  61. struct buf *work = NULL;
  62. if (rndr->work.size < rndr->work.asize) {
  63. work = rndr->work.item[rndr->work.size++];
  64. work->size = 0;
  65. } else {
  66. work = bufnew(WORK_UNIT);
  67. parr_push(&rndr->work, work);
  68. }
  69. return work;
  70. }
  71. static inline void
  72. rndr_popbuf(struct render *rndr)
  73. {
  74. rndr->work.size--;
  75. }
  76. /********************
  77. * GLOBAL VARIABLES *
  78. ********************/
  79. /* block_tags • recognised block tags, sorted by cmp_html_tag */
  80. static struct html_tag block_tags[] = {
  81. /*0*/ { "p", 1 },
  82. { "dl", 2 },
  83. { "h1", 2 },
  84. { "h2", 2 },
  85. { "h3", 2 },
  86. { "h4", 2 },
  87. { "h5", 2 },
  88. { "h6", 2 },
  89. { "ol", 2 },
  90. { "ul", 2 },
  91. /*10*/ { "del", 3 },
  92. { "div", 3 },
  93. /*12*/ { "ins", 3 },
  94. { "pre", 3 },
  95. { "form", 4 },
  96. { "math", 4 },
  97. { "table", 5 },
  98. { "iframe", 6 },
  99. { "script", 6 },
  100. { "fieldset", 8 },
  101. { "noscript", 8 },
  102. { "blockquote", 10 }
  103. };
  104. #define INS_TAG (block_tags + 12)
  105. #define DEL_TAG (block_tags + 10)
  106. /***************************
  107. * HELPER FUNCTIONS *
  108. ***************************/
  109. int
  110. is_safe_link(const char *link, size_t link_len)
  111. {
  112. static const size_t valid_uris_count = 4;
  113. static const char *valid_uris[] = {
  114. "http://", "https://", "ftp://", "mailto://"
  115. };
  116. size_t i;
  117. for (i = 0; i < valid_uris_count; ++i) {
  118. size_t len = strlen(valid_uris[i]);
  119. if (link_len > len && strncasecmp(link, valid_uris[i], len) == 0)
  120. return 1;
  121. }
  122. return 0;
  123. }
  124. static void
  125. unscape_text(struct buf *ob, struct buf *src)
  126. {
  127. size_t i = 0, org;
  128. while (i < src->size) {
  129. org = i;
  130. while (i < src->size && src->data[i] != '\\')
  131. i++;
  132. if (i > org)
  133. bufput(ob, src->data + org, i - org);
  134. if (i + 1 >= src->size)
  135. break;
  136. bufputc(ob, src->data[i + 1]);
  137. i += 2;
  138. }
  139. }
  140. /* cmp_link_ref • comparison function for link_ref sorted arrays */
  141. static int
  142. cmp_link_ref(void *key, void *array_entry)
  143. {
  144. struct link_ref *lr = array_entry;
  145. return bufcasecmp(key, lr->id);
  146. }
  147. /* cmp_link_ref_sort • comparison function for link_ref qsort */
  148. static int
  149. cmp_link_ref_sort(const void *a, const void *b)
  150. {
  151. const struct link_ref *lra = a;
  152. const struct link_ref *lrb = b;
  153. return bufcasecmp(lra->id, lrb->id);
  154. }
  155. /* cmp_html_tag • comparison function for bsearch() (stolen from discount) */
  156. static int
  157. cmp_html_tag(const void *a, const void *b)
  158. {
  159. const struct html_tag *hta = a;
  160. const struct html_tag *htb = b;
  161. if (hta->size != htb->size) return (int)((ssize_t)hta->size - (ssize_t)htb->size);
  162. return strncasecmp(hta->text, htb->text, hta->size);
  163. }
  164. /* find_block_tag • returns the current block tag */
  165. static struct html_tag *
  166. find_block_tag(char *data, size_t size)
  167. {
  168. size_t i = 0;
  169. struct html_tag key;
  170. /* looking for the word end */
  171. while (i < size && ((data[i] >= '0' && data[i] <= '9')
  172. || (data[i] >= 'A' && data[i] <= 'Z')
  173. || (data[i] >= 'a' && data[i] <= 'z')))
  174. i += 1;
  175. if (i >= size) return 0;
  176. /* binary search of the tag */
  177. key.text = data;
  178. key.size = i;
  179. return bsearch(&key, block_tags,
  180. sizeof block_tags / sizeof block_tags[0],
  181. sizeof block_tags[0], cmp_html_tag);
  182. }
  183. /****************************
  184. * INLINE PARSING FUNCTIONS *
  185. ****************************/
  186. /* is_mail_autolink • looks for the address part of a mail autolink and '>' */
  187. /* this is less strict than the original markdown e-mail address matching */
  188. static size_t
  189. is_mail_autolink(char *data, size_t size)
  190. {
  191. size_t i = 0, nb = 0;
  192. /* address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' */
  193. while (i < size && (data[i] == '-' || data[i] == '.'
  194. || data[i] == '_' || data[i] == '@'
  195. || (data[i] >= 'a' && data[i] <= 'z')
  196. || (data[i] >= 'A' && data[i] <= 'Z')
  197. || (data[i] >= '0' && data[i] <= '9'))) {
  198. if (data[i] == '@') nb += 1;
  199. i += 1; }
  200. if (i >= size || data[i] != '>' || nb != 1) return 0;
  201. return i + 1;
  202. }
  203. /* tag_length • returns the length of the given tag, or 0 is it's not valid */
  204. static size_t
  205. tag_length(char *data, size_t size, enum mkd_autolink *autolink)
  206. {
  207. size_t i, j;
  208. /* a valid tag can't be shorter than 3 chars */
  209. if (size < 3) return 0;
  210. /* begins with a '<' optionally followed by '/', followed by letter */
  211. if (data[0] != '<') return 0;
  212. i = (data[1] == '/') ? 2 : 1;
  213. if ((data[i] < 'a' || data[i] > 'z')
  214. && (data[i] < 'A' || data[i] > 'Z')) return 0;
  215. /* scheme test */
  216. *autolink = MKDA_NOT_AUTOLINK;
  217. /* try to find the beggining of an URI */
  218. while (i < size && (isalpha(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-'))
  219. i++;
  220. if (i > 1 && data[i] == '@') {
  221. if ((j = is_mail_autolink(data + i, size - i)) != 0) {
  222. *autolink = MKDA_EMAIL;
  223. return i + j;
  224. }
  225. }
  226. if (i > 2 && data[i] == ':') {
  227. *autolink = MKDA_NORMAL;
  228. i++;
  229. }
  230. /* completing autolink test: no whitespace or ' or " */
  231. if (i >= size)
  232. *autolink = MKDA_NOT_AUTOLINK;
  233. else if (*autolink) {
  234. j = i;
  235. while (i < size) {
  236. if (data[i] == '\\') i += 2;
  237. else if (data[i] == '>' || data[i] == '\'' ||
  238. data[i] == '"' || isspace(data[i])) break;
  239. else i += 1;
  240. }
  241. if (i >= size) return 0;
  242. if (i > j && data[i] == '>') return i + 1;
  243. /* one of the forbidden chars has been found */
  244. *autolink = MKDA_NOT_AUTOLINK;
  245. }
  246. /* looking for sometinhg looking like a tag end */
  247. while (i < size && data[i] != '>') i += 1;
  248. if (i >= size) return 0;
  249. return i + 1;
  250. }
  251. /* parse_inline • parses inline markdown elements */
  252. static void
  253. parse_inline(struct buf *ob, struct render *rndr, char *data, size_t size)
  254. {
  255. size_t i = 0, end = 0;
  256. char_trigger action = 0;
  257. struct buf work = { 0, 0, 0, 0, 0 };
  258. if (rndr->work.size > rndr->max_nesting)
  259. return;
  260. while (i < size) {
  261. /* copying inactive chars into the output */
  262. while (end < size && (action = rndr->active_char[(unsigned char)data[end]]) == 0) {
  263. end++;
  264. }
  265. if (rndr->make.normal_text) {
  266. work.data = data + i;
  267. work.size = end - i;
  268. rndr->make.normal_text(ob, &work, rndr->make.opaque);
  269. }
  270. else
  271. bufput(ob, data + i, end - i);
  272. if (end >= size) break;
  273. i = end;
  274. /* calling the trigger */
  275. end = action(ob, rndr, data + i, i, size - i);
  276. if (!end) /* no action from the callback */
  277. end = i + 1;
  278. else {
  279. i += end;
  280. end = i;
  281. }
  282. }
  283. }
  284. /* find_emph_char • looks for the next emph char, skipping other constructs */
  285. static size_t
  286. find_emph_char(char *data, size_t size, char c)
  287. {
  288. size_t i = 1;
  289. while (i < size) {
  290. while (i < size && data[i] != c
  291. && data[i] != '`' && data[i] != '[')
  292. i += 1;
  293. if (data[i] == c) return i;
  294. /* not counting escaped chars */
  295. if (i && data[i - 1] == '\\') { i += 1; continue; }
  296. /* skipping a code span */
  297. if (data[i] == '`') {
  298. size_t tmp_i = 0;
  299. i += 1;
  300. while (i < size && data[i] != '`') {
  301. if (!tmp_i && data[i] == c) tmp_i = i;
  302. i += 1; }
  303. if (i >= size) return tmp_i;
  304. i += 1; }
  305. /* skipping a link */
  306. else if (data[i] == '[') {
  307. size_t tmp_i = 0;
  308. char cc;
  309. i += 1;
  310. while (i < size && data[i] != ']') {
  311. if (!tmp_i && data[i] == c) tmp_i = i;
  312. i += 1; }
  313. i += 1;
  314. while (i < size && (data[i] == ' '
  315. || data[i] == '\t' || data[i] == '\n'))
  316. i += 1;
  317. if (i >= size) return tmp_i;
  318. if (data[i] != '[' && data[i] != '(') { /* not a link*/
  319. if (tmp_i) return tmp_i;
  320. else continue; }
  321. cc = data[i];
  322. i += 1;
  323. while (i < size && data[i] != cc) {
  324. if (!tmp_i && data[i] == c) tmp_i = i;
  325. i += 1; }
  326. if (i >= size) return tmp_i;
  327. i += 1; } }
  328. return 0;
  329. }
  330. /* parse_emph1 • parsing single emphase */
  331. /* closed by a symbol not preceded by whitespace and not followed by symbol */
  332. static size_t
  333. parse_emph1(struct buf *ob, struct render *rndr, char *data, size_t size, char c)
  334. {
  335. size_t i = 0, len;
  336. struct buf *work = 0;
  337. int r;
  338. if (!rndr->make.emphasis) return 0;
  339. /* skipping one symbol if coming from emph3 */
  340. if (size > 1 && data[0] == c && data[1] == c) i = 1;
  341. while (i < size) {
  342. len = find_emph_char(data + i, size - i, c);
  343. if (!len) return 0;
  344. i += len;
  345. if (i >= size) return 0;
  346. if (i + 1 < size && data[i + 1] == c) {
  347. i += 1;
  348. continue;
  349. }
  350. if (data[i] == c && !isspace(data[i - 1])) {
  351. if ((rndr->ext_flags & MKDEXT_LAX_EMPHASIS) == 0) {
  352. if (!(i + 1 == size || isspace(data[i + 1]) || ispunct(data[i + 1])))
  353. continue;
  354. }
  355. work = rndr_newbuf(rndr);
  356. parse_inline(work, rndr, data, i);
  357. r = rndr->make.emphasis(ob, work, rndr->make.opaque);
  358. rndr_popbuf(rndr);
  359. return r ? i + 1 : 0;
  360. }
  361. }
  362. return 0;
  363. }
  364. /* parse_emph2 • parsing single emphase */
  365. static size_t
  366. parse_emph2(struct buf *ob, struct render *rndr, char *data, size_t size, char c)
  367. {
  368. int (*render_method)(struct buf *ob, struct buf *text, void *opaque);
  369. size_t i = 0, len;
  370. struct buf *work = 0;
  371. int r;
  372. render_method = (c == '~') ? rndr->make.strikethrough : rndr->make.double_emphasis;
  373. if (!render_method)
  374. return 0;
  375. while (i < size) {
  376. len = find_emph_char(data + i, size - i, c);
  377. if (!len) return 0;
  378. i += len;
  379. if (i + 1 < size && data[i] == c && data[i + 1] == c && i && !isspace(data[i - 1])) {
  380. work = rndr_newbuf(rndr);
  381. parse_inline(work, rndr, data, i);
  382. r = render_method(ob, work, rndr->make.opaque);
  383. rndr_popbuf(rndr);
  384. return r ? i + 2 : 0;
  385. }
  386. i++;
  387. }
  388. return 0;
  389. }
  390. /* parse_emph3 • parsing single emphase */
  391. /* finds the first closing tag, and delegates to the other emph */
  392. static size_t
  393. parse_emph3(struct buf *ob, struct render *rndr, char *data, size_t size, char c)
  394. {
  395. size_t i = 0, len;
  396. int r;
  397. while (i < size) {
  398. len = find_emph_char(data + i, size - i, c);
  399. if (!len) return 0;
  400. i += len;
  401. /* skip whitespace preceded symbols */
  402. if (data[i] != c || isspace(data[i - 1]))
  403. continue;
  404. if (i + 2 < size && data[i + 1] == c && data[i + 2] == c && rndr->make.triple_emphasis) {
  405. /* triple symbol found */
  406. struct buf *work = rndr_newbuf(rndr);
  407. parse_inline(work, rndr, data, i);
  408. r = rndr->make.triple_emphasis(ob, work, rndr->make.opaque);
  409. rndr_popbuf(rndr);
  410. return r ? i + 3 : 0;
  411. } else if (i + 1 < size && data[i + 1] == c) {
  412. /* double symbol found, handing over to emph1 */
  413. len = parse_emph1(ob, rndr, data - 2, size + 2, c);
  414. if (!len) return 0;
  415. else return len - 2;
  416. } else {
  417. /* single symbol found, handing over to emph2 */
  418. len = parse_emph2(ob, rndr, data - 1, size + 1, c);
  419. if (!len) return 0;
  420. else return len - 1;
  421. }
  422. }
  423. return 0;
  424. }
  425. /* char_emphasis • single and double emphasis parsing */
  426. static size_t
  427. char_emphasis(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
  428. {
  429. char c = data[0];
  430. size_t ret;
  431. if (size > 2 && data[1] != c) {
  432. /* whitespace cannot follow an opening emphasis;
  433. * strikethrough only takes two characters '~~' */
  434. if (c == '~' || isspace(data[1]) || (ret = parse_emph1(ob, rndr, data + 1, size - 1, c)) == 0)
  435. return 0;
  436. return ret + 1;
  437. }
  438. if (size > 3 && data[1] == c && data[2] != c) {
  439. if (isspace(data[2]) || (ret = parse_emph2(ob, rndr, data + 2, size - 2, c)) == 0)
  440. return 0;
  441. return ret + 2;
  442. }
  443. if (size > 4 && data[1] == c && data[2] == c && data[3] != c) {
  444. if (c == '~' || isspace(data[3]) || (ret = parse_emph3(ob, rndr, data + 3, size - 3, c)) == 0)
  445. return 0;
  446. return ret + 3;
  447. }
  448. return 0;
  449. }
  450. /* char_linebreak • '\n' preceded by two spaces (assuming linebreak != 0) */
  451. static size_t
  452. char_linebreak(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
  453. {
  454. if (offset < 2 || data[-1] != ' ' || data[-2] != ' ')
  455. return 0;
  456. /* removing the last space from ob and rendering */
  457. while (ob->size && ob->data[ob->size - 1] == ' ')
  458. ob->size--;
  459. return rndr->make.linebreak(ob, rndr->make.opaque) ? 1 : 0;
  460. }
  461. /* char_codespan • '`' parsing a code span (assuming codespan != 0) */
  462. static size_t
  463. char_codespan(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
  464. {
  465. size_t end, nb = 0, i, f_begin, f_end;
  466. /* counting the number of backticks in the delimiter */
  467. while (nb < size && data[nb] == '`')
  468. nb++;
  469. /* finding the next delimiter */
  470. i = 0;
  471. for (end = nb; end < size && i < nb; end++) {
  472. if (data[end] == '`') i++;
  473. else i = 0;
  474. }
  475. if (i < nb && end >= size)
  476. return 0; /* no matching delimiter */
  477. /* trimming outside whitespaces */
  478. f_begin = nb;
  479. while (f_begin < end && (data[f_begin] == ' ' || data[f_begin] == '\t'))
  480. f_begin++;
  481. f_end = end - nb;
  482. while (f_end > nb && (data[f_end-1] == ' ' || data[f_end-1] == '\t'))
  483. f_end--;
  484. /* real code span */
  485. if (f_begin < f_end) {
  486. struct buf work = { data + f_begin, f_end - f_begin, 0, 0, 0 };
  487. if (!rndr->make.codespan(ob, &work, rndr->make.opaque))
  488. end = 0;
  489. } else {
  490. if (!rndr->make.codespan(ob, 0, rndr->make.opaque))
  491. end = 0;
  492. }
  493. return end;
  494. }
  495. /* char_escape • '\\' backslash escape */
  496. static size_t
  497. char_escape(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
  498. {
  499. struct buf work = { 0, 0, 0, 0, 0 };
  500. if (size > 1) {
  501. if (rndr->make.normal_text) {
  502. work.data = data + 1;
  503. work.size = 1;
  504. rndr->make.normal_text(ob, &work, rndr->make.opaque);
  505. }
  506. else bufputc(ob, data[1]);
  507. }
  508. return 2;
  509. }
  510. /* char_entity • '&' escaped when it doesn't belong to an entity */
  511. /* valid entities are assumed to be anything mathing &#?[A-Za-z0-9]+; */
  512. static size_t
  513. char_entity(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
  514. {
  515. size_t end = 1;
  516. struct buf work;
  517. if (end < size && data[end] == '#')
  518. end++;
  519. while (end < size && isalnum(data[end]))
  520. end++;
  521. if (end < size && data[end] == ';')
  522. end += 1; /* real entity */
  523. else
  524. return 0; /* lone '&' */
  525. if (rndr->make.entity) {
  526. work.data = data;
  527. work.size = end;
  528. rndr->make.entity(ob, &work, rndr->make.opaque);
  529. }
  530. else bufput(ob, data, end);
  531. return end;
  532. }
  533. /* char_langle_tag • '<' when tags or autolinks are allowed */
  534. static size_t
  535. char_langle_tag(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
  536. {
  537. enum mkd_autolink altype = MKDA_NOT_AUTOLINK;
  538. size_t end = tag_length(data, size, &altype);
  539. struct buf work = { data, end, 0, 0, 0 };
  540. int ret = 0;
  541. if (end > 2) {
  542. if (rndr->make.autolink && altype != MKDA_NOT_AUTOLINK) {
  543. struct buf *u_link = rndr_newbuf(rndr);
  544. work.data = data + 1;
  545. work.size = end - 2;
  546. unscape_text(u_link, &work);
  547. ret = rndr->make.autolink(ob, u_link, altype, rndr->make.opaque);
  548. rndr_popbuf(rndr);
  549. }
  550. else if (rndr->make.raw_html_tag)
  551. ret = rndr->make.raw_html_tag(ob, &work, rndr->make.opaque);
  552. }
  553. if (!ret) return 0;
  554. else return end;
  555. }
  556. static size_t
  557. char_autolink(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
  558. {
  559. struct buf work = { data, 0, 0, 0, 0 };
  560. if (offset > 0 && !isspace(data[-1]))
  561. return 0;
  562. if (!is_safe_link(data, size))
  563. return 0;
  564. while (work.size < size && !isspace(data[work.size]))
  565. work.size++;
  566. if (rndr->make.autolink) {
  567. struct buf *u_link = rndr_newbuf(rndr);
  568. unscape_text(u_link, &work);
  569. rndr->make.autolink(ob, u_link, MKDA_NORMAL, rndr->make.opaque);
  570. rndr_popbuf(rndr);
  571. }
  572. return work.size;
  573. }
  574. /* char_link • '[': parsing a link or an image */
  575. static size_t
  576. char_link(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
  577. {
  578. int is_img = (offset && data[-1] == '!'), level;
  579. size_t i = 1, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0;
  580. struct buf *content = 0;
  581. struct buf *link = 0;
  582. struct buf *title = 0;
  583. struct buf *u_link = 0;
  584. size_t org_work_size = rndr->work.size;
  585. int text_has_nl = 0, ret = 0;
  586. /* checking whether the correct renderer exists */
  587. if ((is_img && !rndr->make.image) || (!is_img && !rndr->make.link))
  588. goto cleanup;
  589. /* looking for the matching closing bracket */
  590. for (level = 1; i < size; i += 1) {
  591. if (data[i] == '\n')
  592. text_has_nl = 1;
  593. else if (data[i - 1] == '\\')
  594. continue;
  595. else if (data[i] == '[')
  596. level++;
  597. else if (data[i] == ']') {
  598. level--;
  599. if (level <= 0)
  600. break;
  601. }
  602. }
  603. if (i >= size)
  604. goto cleanup;
  605. txt_e = i;
  606. i += 1;
  607. /* skip any amount of whitespace or newline */
  608. /* (this is much more laxist than original markdown syntax) */
  609. while (i < size && isspace(data[i]))
  610. i++;
  611. /* inline style link */
  612. if (i < size && data[i] == '(') {
  613. /* skipping initial whitespace */
  614. i += 1;
  615. while (i < size && isspace(data[i]))
  616. i++;
  617. link_b = i;
  618. /* looking for link end: ' " ) */
  619. while (i < size) {
  620. if (data[i] == '\\') i += 2;
  621. else if (data[i] == ')' || data[i] == '\'' || data[i] == '"') break;
  622. else i += 1;
  623. }
  624. if (i >= size) goto cleanup;
  625. link_e = i;
  626. /* looking for title end if present */
  627. if (data[i] == '\'' || data[i] == '"') {
  628. i++;
  629. title_b = i;
  630. while (i < size) {
  631. if (data[i] == '\\') i += 2;
  632. else if (data[i] == ')') break;
  633. else i += 1;
  634. }
  635. if (i >= size) goto cleanup;
  636. /* skipping whitespaces after title */
  637. title_e = i - 1;
  638. while (title_e > title_b && isspace(data[title_e]))
  639. title_e--;
  640. /* checking for closing quote presence */
  641. if (data[title_e] != '\'' && data[title_e] != '"') {
  642. title_b = title_e = 0;
  643. link_e = i;
  644. }
  645. }
  646. /* remove whitespace at the end of the link */
  647. while (link_e > link_b && isspace(data[link_e - 1]))
  648. link_e--;
  649. /* remove optional angle brackets around the link */
  650. if (data[link_b] == '<') link_b++;
  651. if (data[link_e - 1] == '>') link_e--;
  652. /* building escaped link and title */
  653. if (link_e > link_b) {
  654. link = rndr_newbuf(rndr);
  655. bufput(link, data + link_b, link_e - link_b);
  656. }
  657. if (title_e > title_b) {
  658. title = rndr_newbuf(rndr);
  659. bufput(title, data + title_b, title_e - title_b);
  660. }
  661. i++;
  662. }
  663. /* reference style link */
  664. else if (i < size && data[i] == '[') {
  665. struct buf id = { 0, 0, 0, 0, 0 };
  666. struct link_ref *lr;
  667. /* looking for the id */
  668. i += 1;
  669. link_b = i;
  670. while (i < size && data[i] != ']') i++;
  671. if (i >= size) goto cleanup;
  672. link_e = i;
  673. /* finding the link_ref */
  674. if (link_b == link_e) {
  675. if (text_has_nl) {
  676. struct buf *b = rndr_newbuf(rndr);
  677. size_t j;
  678. for (j = 1; j < txt_e; j++) {
  679. if (data[j] != '\n')
  680. bufputc(b, data[j]);
  681. else if (data[j - 1] != ' ')
  682. bufputc(b, ' ');
  683. }
  684. id.data = b->data;
  685. id.size = b->size;
  686. } else {
  687. id.data = data + 1;
  688. id.size = txt_e - 1;
  689. }
  690. } else {
  691. id.data = data + link_b;
  692. id.size = link_e - link_b;
  693. }
  694. lr = arr_sorted_find(&rndr->refs, &id, cmp_link_ref);
  695. if (!lr) goto cleanup;
  696. /* keeping link and title from link_ref */
  697. link = lr->link;
  698. title = lr->title;
  699. i += 1;
  700. }
  701. /* shortcut reference style link */
  702. else {
  703. struct buf id = { 0, 0, 0, 0, 0 };
  704. struct link_ref *lr;
  705. /* crafting the id */
  706. if (text_has_nl) {
  707. struct buf *b = rndr_newbuf(rndr);
  708. size_t j;
  709. for (j = 1; j < txt_e; j++) {
  710. if (data[j] != '\n')
  711. bufputc(b, data[j]);
  712. else if (data[j - 1] != ' ')
  713. bufputc(b, ' ');
  714. }
  715. id.data = b->data;
  716. id.size = b->size;
  717. } else {
  718. id.data = data + 1;
  719. id.size = txt_e - 1;
  720. }
  721. /* finding the link_ref */
  722. lr = arr_sorted_find(&rndr->refs, &id, cmp_link_ref);
  723. if (!lr) goto cleanup;
  724. /* keeping link and title from link_ref */
  725. link = lr->link;
  726. title = lr->title;
  727. /* rewinding the whitespace */
  728. i = txt_e + 1;
  729. }
  730. /* building content: img alt is escaped, link content is parsed */
  731. if (txt_e > 1) {
  732. content = rndr_newbuf(rndr);
  733. if (is_img) bufput(content, data + 1, txt_e - 1);
  734. else parse_inline(content, rndr, data + 1, txt_e - 1);
  735. }
  736. if (link) {
  737. u_link = rndr_newbuf(rndr);
  738. unscape_text(u_link, link);
  739. }
  740. /* calling the relevant rendering function */
  741. if (is_img) {
  742. if (ob->size && ob->data[ob->size - 1] == '!')
  743. ob->size -= 1;
  744. ret = rndr->make.image(ob, u_link, title, content, rndr->make.opaque);
  745. } else {
  746. ret = rndr->make.link(ob, u_link, title, content, rndr->make.opaque);
  747. }
  748. /* cleanup */
  749. cleanup:
  750. rndr->work.size = (int)org_work_size;
  751. return ret ? i : 0;
  752. }
  753. /*********************************
  754. * BLOCK-LEVEL PARSING FUNCTIONS *
  755. *********************************/
  756. /* is_empty • returns the line length when it is empty, 0 otherwise */
  757. static size_t
  758. is_empty(char *data, size_t size)
  759. {
  760. size_t i;
  761. for (i = 0; i < size && data[i] != '\n'; i += 1)
  762. if (data[i] != ' ' && data[i] != '\t') return 0;
  763. return i + 1;
  764. }
  765. /* is_hrule • returns whether a line is a horizontal rule */
  766. static int
  767. is_hrule(char *data, size_t size)
  768. {
  769. size_t i = 0, n = 0;
  770. char c;
  771. /* skipping initial spaces */
  772. if (size < 3) return 0;
  773. if (data[0] == ' ') { i += 1;
  774. if (data[1] == ' ') { i += 1;
  775. if (data[2] == ' ') { i += 1; } } }
  776. /* looking at the hrule char */
  777. if (i + 2 >= size
  778. || (data[i] != '*' && data[i] != '-' && data[i] != '_'))
  779. return 0;
  780. c = data[i];
  781. /* the whole line must be the char or whitespace */
  782. while (i < size && data[i] != '\n') {
  783. if (data[i] == c) n += 1;
  784. else if (data[i] != ' ' && data[i] != '\t')
  785. return 0;
  786. i += 1; }
  787. return n >= 3;
  788. }
  789. /* check if a line is a code fence; return its size if it is */
  790. static size_t
  791. is_codefence(char *data, size_t size, struct buf *syntax)
  792. {
  793. size_t i = 0, n = 0;
  794. char c;
  795. /* skipping initial spaces */
  796. if (size < 3) return 0;
  797. if (data[0] == ' ') { i += 1;
  798. if (data[1] == ' ') { i += 1;
  799. if (data[2] == ' ') { i += 1; } } }
  800. /* looking at the hrule char */
  801. if (i + 2 >= size || !(data[i] == '~' || data[i] == '`'))
  802. return 0;
  803. c = data[i];
  804. /* the whole line must be the char or whitespace */
  805. while (i < size && data[i] == c) {
  806. n++; i++;
  807. }
  808. if (n < 3)
  809. return 0;
  810. if (syntax != NULL) {
  811. size_t syn = 0;
  812. while (i < size && (data[i] == ' ' || data[i] == '\t'))
  813. i++;
  814. syntax->data = data + i;
  815. if (i < size && data[i] == '{') {
  816. i++; syntax->data++;
  817. while (i < size && data[i] != '}' && data[i] != '\n') {
  818. syn++; i++;
  819. }
  820. if (i == size || data[i] != '}')
  821. return 0;
  822. /* strip all whitespace at the beggining and the end
  823. * of the {} block */
  824. while (syn > 0 && isspace(syntax->data[0])) {
  825. syntax->data++; syn--;
  826. }
  827. while (syn > 0 && isspace(syntax->data[syn - 1]))
  828. syn--;
  829. i++;
  830. } else {
  831. while (i < size && !isspace(data[i])) {
  832. syn++; i++;
  833. }
  834. }
  835. syntax->size = syn;
  836. }
  837. while (i < size && data[i] != '\n') {
  838. if (!isspace(data[i]))
  839. return 0;
  840. i++;
  841. }
  842. return i + 1;
  843. }
  844. /* is_headerline • returns whether the line is a setext-style hdr underline */
  845. static int
  846. is_headerline(char *data, size_t size)
  847. {
  848. size_t i = 0;
  849. /* test of level 1 header */
  850. if (data[i] == '=') {
  851. for (i = 1; i < size && data[i] == '='; i += 1);
  852. while (i < size && (data[i] == ' ' || data[i] == '\t')) i += 1;
  853. return (i >= size || data[i] == '\n') ? 1 : 0; }
  854. /* test of level 2 header */
  855. if (data[i] == '-') {
  856. for (i = 1; i < size && data[i] == '-'; i += 1);
  857. while (i < size && (data[i] == ' ' || data[i] == '\t')) i += 1;
  858. return (i >= size || data[i] == '\n') ? 2 : 0; }
  859. return 0;
  860. }
  861. /* prefix_quote • returns blockquote prefix length */
  862. static size_t
  863. prefix_quote(char *data, size_t size)
  864. {
  865. size_t i = 0;
  866. if (i < size && data[i] == ' ') i += 1;
  867. if (i < size && data[i] == ' ') i += 1;
  868. if (i < size && data[i] == ' ') i += 1;
  869. if (i < size && data[i] == '>') {
  870. if (i + 1 < size && (data[i + 1] == ' ' || data[i+1] == '\t'))
  871. return i + 2;
  872. else return i + 1; }
  873. else return 0;
  874. }
  875. /* prefix_code • returns prefix length for block code*/
  876. static size_t
  877. prefix_code(char *data, size_t size)
  878. {
  879. if (size > 0 && data[0] == '\t') return 1;
  880. if (size > 3 && data[0] == ' ' && data[1] == ' '
  881. && data[2] == ' ' && data[3] == ' ') return 4;
  882. return 0;
  883. }
  884. /* prefix_oli • returns ordered list item prefix */
  885. static size_t
  886. prefix_oli(char *data, size_t size)
  887. {
  888. size_t i = 0;
  889. if (i < size && data[i] == ' ') i += 1;
  890. if (i < size && data[i] == ' ') i += 1;
  891. if (i < size && data[i] == ' ') i += 1;
  892. if (i >= size || data[i] < '0' || data[i] > '9') return 0;
  893. while (i < size && data[i] >= '0' && data[i] <= '9') i += 1;
  894. if (i + 1 >= size || data[i] != '.'
  895. || (data[i + 1] != ' ' && data[i + 1] != '\t')) return 0;
  896. return i + 2;
  897. }
  898. /* prefix_uli • returns ordered list item prefix */
  899. static size_t
  900. prefix_uli(char *data, size_t size)
  901. {
  902. size_t i = 0;
  903. if (i < size && data[i] == ' ') i += 1;
  904. if (i < size && data[i] == ' ') i += 1;
  905. if (i < size && data[i] == ' ') i += 1;
  906. if (i + 1 >= size
  907. || (data[i] != '*' && data[i] != '+' && data[i] != '-')
  908. || (data[i + 1] != ' ' && data[i + 1] != '\t'))
  909. return 0;
  910. return i + 2;
  911. }
  912. /* parse_block • parsing of one block, returning next char to parse */
  913. static void parse_block(struct buf *ob, struct render *rndr,
  914. char *data, size_t size);
  915. /* parse_blockquote • hanldes parsing of a blockquote fragment */
  916. static size_t
  917. parse_blockquote(struct buf *ob, struct render *rndr, char *data, size_t size)
  918. {
  919. size_t beg, end = 0, pre, work_size = 0;
  920. char *work_data = 0;
  921. struct buf *out = 0;
  922. out = rndr_newbuf(rndr);
  923. beg = 0;
  924. while (beg < size) {
  925. for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
  926. pre = prefix_quote(data + beg, end - beg);
  927. if (pre)
  928. beg += pre; /* skipping prefix */
  929. /* empty line followed by non-quote line */
  930. else if (is_empty(data + beg, end - beg) &&
  931. (end >= size || (prefix_quote(data + end, size - end) == 0 &&
  932. !is_empty(data + end, size - end))))
  933. break;
  934. if (beg < end) { /* copy into the in-place working buffer */
  935. /* bufput(work, data + beg, end - beg); */
  936. if (!work_data)
  937. work_data = data + beg;
  938. else if (data + beg != work_data + work_size)
  939. memmove(work_data + work_size, data + beg, end - beg);
  940. work_size += end - beg;
  941. }
  942. beg = end;
  943. }
  944. parse_block(out, rndr, work_data, work_size);
  945. if (rndr->make.blockquote)
  946. rndr->make.blockquote(ob, out, rndr->make.opaque);
  947. rndr_popbuf(rndr);
  948. return end;
  949. }
  950. static size_t
  951. parse_htmlblock(struct buf *ob, struct render *rndr, char *data, size_t size, int do_render);
  952. /* parse_blockquote • hanldes parsing of a regular paragraph */
  953. static size_t
  954. parse_paragraph(struct buf *ob, struct render *rndr, char *data, size_t size)
  955. {
  956. size_t i = 0, end = 0;
  957. int level = 0;
  958. struct buf work = { data, 0, 0, 0, 0 }; /* volatile working buffer */
  959. while (i < size) {
  960. for (end = i + 1; end < size && data[end - 1] != '\n'; end++) /* empty */;
  961. if (is_empty(data + i, size - i) || (level = is_headerline(data + i, size - i)) != 0)
  962. break;
  963. if (rndr->ext_flags & MKDEXT_LAX_HTML_BLOCKS) {
  964. if (data[i] == '<' && rndr->make.blockhtml && parse_htmlblock(ob, rndr, data + i, size - i, 0)) {
  965. end = i;
  966. break;
  967. }
  968. }
  969. if (data[i] == '#' || is_hrule(data + i, size - i)) {
  970. end = i;
  971. break;
  972. }
  973. i = end;
  974. }
  975. work.size = i;
  976. while (work.size && data[work.size - 1] == '\n')
  977. work.size--;
  978. if (!level) {
  979. struct buf *tmp = rndr_newbuf(rndr);
  980. parse_inline(tmp, rndr, work.data, work.size);
  981. if (rndr->make.paragraph)
  982. rndr->make.paragraph(ob, tmp, rndr->make.opaque);
  983. rndr_popbuf(rndr);
  984. } else {
  985. struct buf *header_work;
  986. if (work.size) {
  987. size_t beg;
  988. i = work.size;
  989. work.size -= 1;
  990. while (work.size && data[work.size] != '\n')
  991. work.size -= 1;
  992. beg = work.size + 1;
  993. while (work.size && data[work.size - 1] == '\n')
  994. work.size -= 1;
  995. if (work.size > 0) {
  996. struct buf *tmp = rndr_newbuf(rndr);
  997. parse_inline(tmp, rndr, work.data, work.size);
  998. if (rndr->make.paragraph)
  999. rndr->make.paragraph(ob, tmp, rndr->make.opaque);
  1000. rndr_popbuf(rndr);
  1001. work.data += beg;
  1002. work.size = i - beg;
  1003. }
  1004. else work.size = i;
  1005. }
  1006. header_work = rndr_newbuf(rndr);
  1007. parse_inline(header_work, rndr, work.data, work.size);
  1008. if (rndr->make.header)
  1009. rndr->make.header(ob, header_work, (int)level, rndr->make.opaque);
  1010. rndr_popbuf(rndr);
  1011. }
  1012. return end;
  1013. }
  1014. /* parse_fencedcode • hanldes parsing of a block-level code fragment */
  1015. static size_t
  1016. parse_fencedcode(struct buf *ob, struct render *rndr, char *data, size_t size)
  1017. {
  1018. size_t beg, end;
  1019. struct buf *work = 0;
  1020. struct buf lang = { 0, 0, 0, 0, 0 };
  1021. beg = is_codefence(data, size, &lang);
  1022. if (beg == 0) return 0;
  1023. work = rndr_newbuf(rndr);
  1024. while (beg < size) {
  1025. size_t fence_end;
  1026. fence_end = is_codefence(data + beg, size - beg, NULL);
  1027. if (fence_end != 0) {
  1028. beg += fence_end;
  1029. break;
  1030. }
  1031. for (end = beg + 1; end < size && data[end - 1] != '\n'; end += 1);
  1032. if (beg < end) {
  1033. /* verbatim copy to the working buffer,
  1034. escaping entities */
  1035. if (is_empty(data + beg, end - beg))
  1036. bufputc(work, '\n');
  1037. else bufput(work, data + beg, end - beg);
  1038. }
  1039. beg = end;
  1040. }
  1041. if (work->size && work->data[work->size - 1] != '\n')
  1042. bufputc(work, '\n');
  1043. if (rndr->make.blockcode)
  1044. rndr->make.blockcode(ob, work, lang.size ? &lang : NULL, rndr->make.opaque);
  1045. rndr_popbuf(rndr);
  1046. return beg;
  1047. }
  1048. static size_t
  1049. parse_blockcode(struct buf *ob, struct render *rndr, char *data, size_t size)
  1050. {
  1051. size_t beg, end, pre;
  1052. struct buf *work = 0;
  1053. work = rndr_newbuf(rndr);
  1054. beg = 0;
  1055. while (beg < size) {
  1056. for (end = beg + 1; end < size && data[end - 1] != '\n'; end++) {};
  1057. pre = prefix_code(data + beg, end - beg);
  1058. if (pre)
  1059. beg += pre; /* skipping prefix */
  1060. else if (!is_empty(data + beg, end - beg))
  1061. /* non-empty non-prefixed line breaks the pre */
  1062. break;
  1063. if (beg < end) {
  1064. /* verbatim copy to the working buffer,
  1065. escaping entities */
  1066. if (is_empty(data + beg, end - beg))
  1067. bufputc(work, '\n');
  1068. else bufput(work, data + beg, end - beg);
  1069. }
  1070. beg = end;
  1071. }
  1072. while (work->size && work->data[work->size - 1] == '\n')
  1073. work->size -= 1;
  1074. bufputc(work, '\n');
  1075. if (rndr->make.blockcode)
  1076. rndr->make.blockcode(ob, work, NULL, rndr->make.opaque);
  1077. rndr_popbuf(rndr);
  1078. return beg;
  1079. }
  1080. /* parse_listitem • parsing of a single list item */
  1081. /* assuming initial prefix is already removed */
  1082. static size_t
  1083. parse_listitem(struct buf *ob, struct render *rndr, char *data, size_t size, int *flags)
  1084. {
  1085. struct buf *work = 0, *inter = 0;
  1086. size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i;
  1087. int in_empty = 0, has_inside_empty = 0;
  1088. /* keeping book of the first indentation prefix */
  1089. while (orgpre < 3 && orgpre < size && data[orgpre] == ' ')
  1090. orgpre++;
  1091. beg = prefix_uli(data, size);
  1092. if (!beg)
  1093. beg = prefix_oli(data, size);
  1094. if (!beg)
  1095. return 0;
  1096. /* skipping to the beginning of the following line */
  1097. end = beg;
  1098. while (end < size && data[end - 1] != '\n')
  1099. end++;
  1100. /* getting working buffers */
  1101. work = rndr_newbuf(rndr);
  1102. inter = rndr_newbuf(rndr);
  1103. /* putting the first line into the working buffer */
  1104. bufput(work, data + beg, end - beg);
  1105. beg = end;
  1106. /* process the following lines */
  1107. while (beg < size) {
  1108. end++;
  1109. while (end < size && data[end - 1] != '\n')
  1110. end++;
  1111. /* process an empty line */
  1112. if (is_empty(data + beg, end - beg)) {
  1113. in_empty = 1;
  1114. beg = end;
  1115. continue;
  1116. }
  1117. /* calculating the indentation */
  1118. i = 0;
  1119. while (i < 4 && beg + i < end && data[beg + i] == ' ')
  1120. i++;
  1121. pre = i;
  1122. if (data[beg] == '\t') { i = 1; pre = 8; }
  1123. /* checking for a new item */
  1124. if ((prefix_uli(data + beg + i, end - beg - i) &&
  1125. !is_hrule(data + beg + i, end - beg - i)) ||
  1126. prefix_oli(data + beg + i, end - beg - i)) {
  1127. if (in_empty)
  1128. has_inside_empty = 1;
  1129. if (pre == orgpre) /* the following item must have */
  1130. break; /* the same indentation */
  1131. if (!sublist)
  1132. sublist = work->size;
  1133. }
  1134. /* joining only indented stuff after empty lines */
  1135. else if (in_empty && i < 4 && data[beg] != '\t') {
  1136. *flags |= MKD_LI_END;
  1137. break;
  1138. }
  1139. else if (in_empty) {
  1140. bufputc(work, '\n');
  1141. has_inside_empty = 1;
  1142. }
  1143. in_empty = 0;
  1144. /* adding the line without prefix into the working buffer */
  1145. bufput(work, data + beg + i, end - beg - i);
  1146. beg = end;
  1147. }
  1148. /* render of li contents */
  1149. if (has_inside_empty)
  1150. *flags |= MKD_LI_BLOCK;
  1151. if (*flags & MKD_LI_BLOCK) {
  1152. /* intermediate render of block li */
  1153. if (sublist && sublist < work->size) {
  1154. parse_block(inter, rndr, work->data, sublist);
  1155. parse_block(inter, rndr, work->data + sublist, work->size - sublist);
  1156. }
  1157. else
  1158. parse_block(inter, rndr, work->data, work->size);
  1159. } else {
  1160. /* intermediate render of inline li */
  1161. if (sublist && sublist < work->size) {
  1162. parse_inline(inter, rndr, work->data, sublist);
  1163. parse_block(inter, rndr, work->data + sublist, work->size - sublist);
  1164. }
  1165. else
  1166. parse_inline(inter, rndr, work->data, work->size);
  1167. }
  1168. /* render of li itself */
  1169. if (rndr->make.listitem)
  1170. rndr->make.listitem(ob, inter, *flags, rndr->make.opaque);
  1171. rndr_popbuf(rndr);
  1172. rndr_popbuf(rndr);
  1173. return beg;
  1174. }
  1175. /* parse_list • parsing ordered or unordered list block */
  1176. static size_t
  1177. parse_list(struct buf *ob, struct render *rndr, char *data, size_t size, int flags)
  1178. {
  1179. struct buf *work = 0;
  1180. size_t i = 0, j;
  1181. work = rndr_newbuf(rndr);
  1182. while (i < size) {
  1183. j = parse_listitem(work, rndr, data + i, size - i, &flags);
  1184. i += j;
  1185. if (!j || (flags & MKD_LI_END))
  1186. break;
  1187. }
  1188. if (rndr->make.list)
  1189. rndr->make.list(ob, work, flags, rndr->make.opaque);
  1190. rndr_popbuf(rndr);
  1191. return i;
  1192. }
  1193. /* parse_atxheader • parsing of atx-style headers */
  1194. static size_t
  1195. parse_atxheader(struct buf *ob, struct render *rndr, char *data, size_t size)
  1196. {
  1197. size_t level = 0;
  1198. size_t i, end, skip;
  1199. if (!size || data[0] != '#')
  1200. return 0;
  1201. while (level < size && level < 6 && data[level] == '#')
  1202. level++;
  1203. for (i = level; i < size && (data[i] == ' ' || data[i] == '\t'); i++);
  1204. for (end = i; end < size && data[end] != '\n'; end++);
  1205. skip = end;
  1206. while (end && data[end - 1] == '#')
  1207. end--;
  1208. while (end && (data[end - 1] == ' ' || data[end - 1] == '\t'))
  1209. end--;
  1210. if (end > i) {
  1211. struct buf *work = rndr_newbuf(rndr);
  1212. parse_inline(work, rndr, data + i, end - i);
  1213. if (rndr->make.header)
  1214. rndr->make.header(ob, work, (int)level, rndr->make.opaque);
  1215. rndr_popbuf(rndr);
  1216. }
  1217. return skip;
  1218. }
  1219. /* htmlblock_end • checking end of HTML block : </tag>[ \t]*\n[ \t*]\n */
  1220. /* returns the length on match, 0 otherwise */
  1221. static size_t
  1222. htmlblock_end(struct html_tag *tag, struct render *rndr, char *data, size_t size)
  1223. {
  1224. size_t i, w;
  1225. /* assuming data[0] == '<' && data[1] == '/' already tested */
  1226. /* checking tag is a match */
  1227. if (tag->size + 3 >= size
  1228. || strncasecmp(data + 2, tag->text, tag->size)
  1229. || data[tag->size + 2] != '>')
  1230. return 0;
  1231. /* checking white lines */
  1232. i = tag->size + 3;
  1233. w = 0;
  1234. if (i < size && (w = is_empty(data + i, size - i)) == 0)
  1235. return 0; /* non-blank after tag */
  1236. i += w;
  1237. w = 0;
  1238. if (rndr->ext_flags & MKDEXT_LAX_HTML_BLOCKS) {
  1239. if (i < size)
  1240. w = is_empty(data + i, size - i);
  1241. } else {
  1242. if (i < size && (w = is_empty(data + i, size - i)) == 0)
  1243. return 0; /* non-blank line after tag line */
  1244. }
  1245. return i + w;
  1246. }
  1247. /* parse_htmlblock • parsing of inline HTML block */
  1248. static size_t
  1249. parse_htmlblock(struct buf *ob, struct render *rndr, char *data, size_t size, int do_render)
  1250. {
  1251. size_t i, j = 0;
  1252. struct html_tag *curtag;
  1253. int found;
  1254. struct buf work = { data, 0, 0, 0, 0 };
  1255. /* identification of the opening tag */
  1256. if (size < 2 || data[0] != '<') return 0;
  1257. curtag = find_block_tag(data + 1, size - 1);
  1258. /* handling of special cases */
  1259. if (!curtag) {
  1260. /* HTML comment, laxist form */
  1261. if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') {
  1262. i = 5;
  1263. while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>'))
  1264. i++;
  1265. i++;
  1266. if (i < size)
  1267. j = is_empty(data + i, size - i);
  1268. if (j) {
  1269. work.size = i + j;
  1270. if (do_render && rndr->make.blockhtml)
  1271. rndr->make.blockhtml(ob, &work, rndr->make.opaque);
  1272. return work.size;
  1273. }
  1274. }
  1275. /* HR, which is the only self-closing block tag considered */
  1276. if (size > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R')) {
  1277. i = 3;
  1278. while (i < size && data[i] != '>')
  1279. i += 1;
  1280. if (i + 1 < size) {
  1281. i += 1;
  1282. j = is_empty(data + i, size - i);
  1283. if (j) {
  1284. work.size = i + j;
  1285. if (do_render && rndr->make.blockhtml)
  1286. rndr->make.blockhtml(ob, &work, rndr->make.opaque);
  1287. return work.size;
  1288. }
  1289. }
  1290. }
  1291. /* no special case recognised */
  1292. return 0;
  1293. }
  1294. /* looking for an unindented matching closing tag */
  1295. /* followed by a blank line */
  1296. i = 1;
  1297. found = 0;
  1298. /* if not found, trying a second pass looking for indented match */
  1299. /* but not if tag is "ins" or "del" (following original Markdown.pl) */
  1300. if (curtag != INS_TAG && curtag != DEL_TAG) {
  1301. i = 1;
  1302. while (i < size) {
  1303. i++;
  1304. while (i < size && !(data[i - 1] == '<' && data[i] == '/'))
  1305. i++;
  1306. if (i + 2 + curtag->size >= size)
  1307. break;
  1308. j = htmlblock_end(curtag, rndr, data + i - 1, size - i + 1);
  1309. if (j) {
  1310. i += j - 1;
  1311. found = 1;
  1312. break;
  1313. }
  1314. }
  1315. }
  1316. if (!found) return 0;
  1317. /* the end of the block has been found */
  1318. work.size = i;
  1319. if (do_render && rndr->make.blockhtml)
  1320. rndr->make.blockhtml(ob, &work, rndr->make.opaque);
  1321. return i;
  1322. }
  1323. static void
  1324. parse_table_row(struct buf *ob, struct render *rndr, char *data, size_t size, size_t columns, int *col_data)
  1325. {
  1326. size_t i = 0, col;
  1327. struct buf *row_work = 0;
  1328. row_work = rndr_newbuf(rndr);
  1329. if (i < size && data[i] == '|')
  1330. i++;
  1331. for (col = 0; col < columns && i < size; ++col) {
  1332. size_t cell_start, cell_end;
  1333. struct buf *cell_work;
  1334. cell_work = rndr_newbuf(rndr);
  1335. while (i < size && isspace(data[i]))
  1336. i++;
  1337. cell_start = i;
  1338. while (i < size && data[i] != '|')
  1339. i++;
  1340. cell_end = i - 1;
  1341. while (cell_end > cell_start && isspace(data[cell_end]))
  1342. cell_end--;
  1343. parse_inline(cell_work, rndr, data + cell_start, 1 + cell_end - cell_start);
  1344. if (rndr->make.table_cell)
  1345. rndr->make.table_cell(row_work, cell_work, col_data ? col_data[col] : 0, rndr->make.opaque);
  1346. rndr_popbuf(rndr);
  1347. i++;
  1348. }
  1349. for (; col < columns; ++col) {
  1350. struct buf empty_cell = {0, 0, 0, 0, 0};
  1351. if (rndr->make.table_cell)
  1352. rndr->make.table_cell(row_work, &empty_cell, col_data ? col_data[col] : 0, rndr->make.opaque);
  1353. }
  1354. if (rndr->make.table_row)
  1355. rndr->make.table_row(ob, row_work, rndr->make.opaque);
  1356. rndr_popbuf(rndr);
  1357. }
  1358. static size_t
  1359. parse_table_header(struct buf *ob, struct render *rndr, char *data, size_t size, size_t *columns, int **column_data)
  1360. {
  1361. int pipes;
  1362. size_t i = 0, col, header_end, under_end;
  1363. pipes = 0;
  1364. while (i < size && data[i] != '\n')
  1365. if (data[i++] == '|')
  1366. pipes++;
  1367. if (i == size || pipes == 0)
  1368. return 0;
  1369. header_end = i;
  1370. if (data[0] == '|')
  1371. pipes--;
  1372. if (i > 2 && data[i - 1] == '|')
  1373. pipes--;
  1374. *columns = pipes + 1;
  1375. *column_data = calloc(*columns, sizeof(int));
  1376. /* Parse the header underline */
  1377. i++;
  1378. if (i < size && data[i] == '|')
  1379. i++;
  1380. under_end = i;
  1381. while (under_end < size && data[under_end] != '\n')
  1382. under_end++;
  1383. for (col = 0; col < *columns && i < under_end; ++col) {
  1384. if (data[i] == ':') {
  1385. i++; (*column_data)[col] |= MKD_TABLE_ALIGN_L;
  1386. }
  1387. while (i < under_end && data[i] == '-')
  1388. i++;
  1389. if (i < under_end && data[i] == ':') {
  1390. i++; (*column_data)[col] |= MKD_TABLE_ALIGN_R;
  1391. }
  1392. if (i < under_end && data[i] != '|')
  1393. break;
  1394. i++;
  1395. }
  1396. if (col < *columns)
  1397. return 0;
  1398. parse_table_row(ob, rndr, data, header_end, *columns, *column_data);
  1399. return under_end + 1;
  1400. }
  1401. static size_t
  1402. parse_table(struct buf *ob, struct render *rndr, char *data, size_t size)
  1403. {
  1404. size_t i;
  1405. struct buf *header_work = 0;
  1406. struct buf *body_work = 0;
  1407. size_t columns;
  1408. int *col_data = NULL;
  1409. header_work = rndr_newbuf(rndr);
  1410. body_work = rndr_newbuf(rndr);
  1411. i = parse_table_header(header_work, rndr, data, size, &columns, &col_data);
  1412. if (i > 0) {
  1413. while (i < size) {
  1414. size_t row_start;
  1415. int pipes = 0;
  1416. row_start = i;
  1417. while (i < size && data[i] != '\n')
  1418. if (data[i++] == '|')
  1419. pipes++;
  1420. if (pipes == 0 || i == size) {
  1421. i = row_start;
  1422. break;
  1423. }
  1424. parse_table_row(body_work, rndr, data + row_start, i - row_start, columns, col_data);
  1425. i++;
  1426. }
  1427. if (rndr->make.table)
  1428. rndr->make.table(ob, header_work, body_work, rndr->make.opaque);
  1429. }
  1430. free(col_data);
  1431. rndr_popbuf(rndr);
  1432. rndr_popbuf(rndr);
  1433. return i;
  1434. }
  1435. /* parse_block • parsing of one block, returning next char to parse */
  1436. static void
  1437. parse_block(struct buf *ob, struct render *rndr, char *data, size_t size)
  1438. {
  1439. size_t beg, end, i;
  1440. char *txt_data;
  1441. beg = 0;
  1442. if (rndr->work.size > rndr->max_nesting)
  1443. return;
  1444. while (beg < size) {
  1445. txt_data = data + beg;
  1446. end = size - beg;
  1447. if (data[beg] == '#')
  1448. beg += parse_atxheader(ob, rndr, txt_data, end);
  1449. else if (data[beg] == '<' && rndr->make.blockhtml &&
  1450. (i = parse_htmlblock(ob, rndr, txt_data, end, 1)) != 0)
  1451. beg += i;
  1452. else if ((i = is_empty(txt_data, end)) != 0)
  1453. beg += i;
  1454. else if (is_hrule(txt_data, end)) {
  1455. if (rndr->make.hrule)
  1456. rndr->make.hrule(ob, rndr->make.opaque);
  1457. while (beg < size && data[beg] != '\n')
  1458. beg++;
  1459. beg++;
  1460. }
  1461. else if ((rndr->ext_flags & MKDEXT_FENCED_CODE) != 0 &&
  1462. (i = parse_fencedcode(ob, rndr, txt_data, end)) != 0)
  1463. beg += i;
  1464. else if ((rndr->ext_flags & MKDEXT_TABLES) != 0 &&
  1465. (i = parse_table(ob, rndr, txt_data, end)) != 0)
  1466. beg += i;
  1467. else if (prefix_quote(txt_data, end))
  1468. beg += parse_blockquote(ob, rndr, txt_data, end);
  1469. else if (prefix_code(txt_data, end))
  1470. beg += parse_blockcode(ob, rndr, txt_data, end);
  1471. else if (prefix_uli(txt_data, end))
  1472. beg += parse_list(ob, rndr, txt_data, end, 0);
  1473. else if (prefix_oli(txt_data, end))
  1474. beg += parse_list(ob, rndr, txt_data, end, MKD_LIST_ORDERED);
  1475. else
  1476. beg += parse_paragraph(ob, rndr, txt_data, end);
  1477. }
  1478. }
  1479. /*********************
  1480. * REFERENCE PARSING *
  1481. *********************/
  1482. /* is_ref • returns whether a line is a reference or not */
  1483. static int
  1484. is_ref(char *data, size_t beg, size_t end, size_t *last, struct array *refs)
  1485. {
  1486. /* int n; */
  1487. size_t i = 0;
  1488. size_t id_offset, id_end;
  1489. size_t link_offset, link_end;
  1490. size_t title_offset, title_end;
  1491. size_t line_end;
  1492. struct link_ref *lr;
  1493. /* struct buf id = { 0, 0, 0, 0, 0 }; / * volatile buf for id search */
  1494. /* up to 3 optional leading spaces */
  1495. if (beg + 3 >= end) return 0;
  1496. if (data[beg] == ' ') { i = 1;
  1497. if (data[beg + 1] == ' ') { i = 2;
  1498. if (data[beg + 2] == ' ') { i = 3;
  1499. if (data[beg + 3] == ' ') return 0; } } }
  1500. i += beg;
  1501. /* id part: anything but a newline between brackets */
  1502. if (data[i] != '[') return 0;
  1503. i += 1;
  1504. id_offset = i;
  1505. while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']')
  1506. i += 1;
  1507. if (i >= end || data[i] != ']') return 0;
  1508. id_end = i;
  1509. /* spacer: colon (space | tab)* newline? (space | tab)* */
  1510. i += 1;
  1511. if (i >= end || data[i] != ':') return 0;
  1512. i += 1;
  1513. while (i < end && (data[i] == ' ' || data[i] == '\t')) i += 1;
  1514. if (i < end && (data[i] == '\n' || data[i] == '\r')) {
  1515. i += 1;
  1516. if (i < end && data[i] == '\r' && data[i - 1] == '\n') i += 1; }
  1517. while (i < end && (data[i] == ' ' || data[i] == '\t')) i += 1;
  1518. if (i >= end) return 0;
  1519. /* link: whitespace-free sequence, optionally between angle brackets */
  1520. if (data[i] == '<') i += 1;
  1521. link_offset = i;
  1522. while (i < end && data[i] != ' ' && data[i] != '\t'
  1523. && data[i] != '\n' && data[i] != '\r') i += 1;
  1524. if (data[i - 1] == '>') link_end = i - 1;
  1525. else link_end = i;
  1526. /* optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) */
  1527. while (i < end && (data[i] == ' ' || data[i] == '\t')) i += 1;
  1528. if (i < end && data[i] != '\n' && data[i] != '\r'
  1529. && data[i] != '\'' && data[i] != '"' && data[i] != '(')
  1530. return 0;
  1531. line_end = 0;
  1532. /* computing end-of-line */
  1533. if (i >= end || data[i] == '\r' || data[i] == '\n') line_end = i;
  1534. if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
  1535. line_end = i + 1;
  1536. /* optional (space|tab)* spacer after a newline */
  1537. if (line_end) {
  1538. i = line_end + 1;
  1539. while (i < end && (data[i] == ' ' || data[i] == '\t')) i += 1; }
  1540. /* optional title: any non-newline sequence enclosed in '"()
  1541. alone on its line */
  1542. title_offset = title_end = 0;
  1543. if (i + 1 < end
  1544. && (data[i] == '\'' || data[i] == '"' || data[i] == '(')) {
  1545. i += 1;
  1546. title_offset = i;
  1547. /* looking for EOL */
  1548. while (i < end && data[i] != '\n' && data[i] != '\r') i += 1;
  1549. if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
  1550. title_end = i + 1;
  1551. else title_end = i;
  1552. /* stepping back */
  1553. i -= 1;
  1554. while (i > title_offset && (data[i] == ' ' || data[i] == '\t'))
  1555. i -= 1;
  1556. if (i > title_offset
  1557. && (data[i] == '\'' || data[i] == '"' || data[i] == ')')) {
  1558. line_end = title_end;
  1559. title_end = i; } }
  1560. if (!line_end) return 0; /* garbage after the link */
  1561. /* a valid ref has been found, filling-in return structures */
  1562. if (last) *last = line_end;
  1563. if (!refs) return 1;
  1564. lr = arr_item(refs, arr_newitem(refs));
  1565. lr->id = bufnew(id_end - id_offset);
  1566. bufput(lr->id, data + id_offset, id_end - id_offset);
  1567. lr->link = bufnew(link_end - link_offset);
  1568. bufput(lr->link, data + link_offset, link_end - link_offset);
  1569. if (title_end > title_offset) {
  1570. lr->title = bufnew(title_end - title_offset);
  1571. bufput(lr->title, data + title_offset,
  1572. title_end - title_offset); }
  1573. else lr->title = 0;
  1574. return 1;
  1575. }
  1576. static void expand_tabs(struct buf *ob, const char *line, size_t size)
  1577. {
  1578. size_t i = 0, tab = 0;
  1579. while (i < size) {
  1580. size_t org = i;
  1581. while (i < size && line[i] != '\t') {
  1582. i++; tab++;
  1583. }
  1584. if (i > org)
  1585. bufput(ob, line + org, i - org);
  1586. if (i >= size)
  1587. break;
  1588. do {
  1589. bufputc(ob, ' '); tab++;
  1590. } while (tab % 4);
  1591. i++;
  1592. }
  1593. }
  1594. /**********************
  1595. * EXPORTED FUNCTIONS *
  1596. **********************/
  1597. /* markdown • parses the input buffer and renders it into the output buffer */
  1598. void
  1599. ups_markdown(struct buf *ob, struct buf *ib, const struct mkd_renderer *rndrer, unsigned int extensions) {
  1600. struct link_ref *lr;
  1601. struct buf *text;
  1602. size_t i, beg, end;
  1603. struct render rndr;
  1604. /* filling the render structure */
  1605. if (!rndrer)
  1606. return;
  1607. text = bufnew(TEXT_UNIT);
  1608. if (!text)
  1609. return;
  1610. rndr.make = *rndrer;
  1611. arr_init(&rndr.refs, sizeof (struct link_ref));
  1612. parr_init(&rndr.work);
  1613. for (i = 0; i < 256; i += 1)
  1614. rndr.active_char[i] = 0;
  1615. if (rndr.make.emphasis || rndr.make.double_emphasis || rndr.make.triple_emphasis) {
  1616. rndr.active_char['*'] = char_emphasis;
  1617. rndr.active_char['_'] = char_emphasis;
  1618. if (extensions & MKDEXT_STRIKETHROUGH)
  1619. rndr.active_char['~'] = char_emphasis;
  1620. }
  1621. if (rndr.make.codespan)
  1622. rndr.active_char['`'] = char_codespan;
  1623. if (rndr.make.linebreak)
  1624. rndr.active_char['\n'] = char_linebreak;
  1625. if (rndr.make.image || rndr.make.link)
  1626. rndr.active_char['['] = char_link;
  1627. rndr.active_char['<'] = char_langle_tag;
  1628. rndr.active_char['\\'] = char_escape;
  1629. rndr.active_char['&'] = char_entity;
  1630. if (extensions & MKDEXT_AUTOLINK) {
  1631. rndr.active_char['h'] = char_autolink; // http, https
  1632. rndr.active_char['H'] = char_autolink;
  1633. rndr.active_char['f'] = char_autolink; // ftp
  1634. rndr.active_char['F'] = char_autolink;
  1635. rndr.active_char['m'] = char_autolink; // mailto
  1636. rndr.active_char['M'] = char_autolink;
  1637. }
  1638. /* Extension data */
  1639. rndr.ext_flags = extensions;
  1640. rndr.max_nesting = 16;
  1641. /* first pass: looking for references, copying everything else */
  1642. beg = 0;
  1643. while (beg < ib->size) /* iterating over lines */
  1644. if (is_ref(ib->data, beg, ib->size, &end, &rndr.refs))
  1645. beg = end;
  1646. else { /* skipping to the next line */
  1647. end = beg;
  1648. while (end < ib->size && ib->data[end] != '\n' && ib->data[end] != '\r')
  1649. end += 1;
  1650. /* adding the line body if present */
  1651. if (end > beg)
  1652. expand_tabs(text, ib->data + beg, end - beg);
  1653. while (end < ib->size && (ib->data[end] == '\n' || ib->data[end] == '\r')) {
  1654. /* add one \n per newline */
  1655. if (ib->data[end] == '\n' || (end + 1 < ib->size && ib->data[end + 1] != '\n'))
  1656. bufputc(text, '\n');
  1657. end += 1;
  1658. }
  1659. beg = end;
  1660. }
  1661. /* sorting the reference array */
  1662. if (rndr.refs.size)
  1663. qsort(rndr.refs.base, rndr.refs.size, rndr.refs.unit, cmp_link_ref_sort);
  1664. /* adding a final newline if not already present */
  1665. if (!text->size)
  1666. goto cleanup;
  1667. if (text->data[text->size - 1] != '\n' && text->data[text->size - 1] != '\r')
  1668. bufputc(text, '\n…

Large files files are truncated, but you can click here to view the full file