PageRenderTime 60ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/reddish/lib/sundown/src/markdown.c

https://bitbucket.org/murarth/reddish
C | 2527 lines | 2310 code | 139 blank | 78 comment | 201 complexity | f5d676d5ca4135f38f714cea86bc1fe2 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

  1. /* markdown.c - generic markdown parser */
  2. /*
  3. * Copyright (c) 2009, Natacha Porté
  4. * Copyright (c) 2011, Vicent Marti
  5. *
  6. * Permission to use, copy, modify, and distribute this software for any
  7. * purpose with or without fee is hereby granted, provided that the above
  8. * copyright notice and this permission notice appear in all copies.
  9. *
  10. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  11. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  12. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  13. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  14. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  15. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  16. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  17. */
  18. #include "markdown.h"
  19. #include "stack.h"
  20. #include <assert.h>
  21. #include <string.h>
  22. #include <ctype.h>
  23. #include <stdio.h>
  24. #if defined(_WIN32)
  25. #define strncasecmp _strnicmp
  26. #endif
  27. #define BUFFER_BLOCK 0
  28. #define BUFFER_SPAN 1
  29. #define MKD_LI_END 8 /* internal list flag */
  30. #define gperf_case_strncmp(s1, s2, n) strncasecmp(s1, s2, n)
  31. #define GPERF_DOWNCASE 1
  32. #define GPERF_CASE_STRNCMP 1
  33. #include "html_blocks.h"
  34. /***************
  35. * LOCAL TYPES *
  36. ***************/
  37. /* link_ref: reference to a link */
  38. struct link_ref {
  39. unsigned int id;
  40. struct buf *link;
  41. struct buf *title;
  42. struct link_ref *next;
  43. };
  44. /* char_trigger: function pointer to render active chars */
  45. /* returns the number of chars taken care of */
  46. /* data is the pointer of the beginning of the span */
  47. /* offset is the number of valid chars before data */
  48. struct sd_markdown;
  49. typedef size_t
  50. (*char_trigger)(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  51. static size_t char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  52. static size_t char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  53. static size_t char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  54. static size_t char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  55. static size_t char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  56. static size_t char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  57. static size_t char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  58. static size_t char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  59. static size_t char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  60. static size_t char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  61. static size_t char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  62. enum markdown_char_t {
  63. MD_CHAR_NONE = 0,
  64. MD_CHAR_EMPHASIS,
  65. MD_CHAR_CODESPAN,
  66. MD_CHAR_LINEBREAK,
  67. MD_CHAR_LINK,
  68. MD_CHAR_LANGLE,
  69. MD_CHAR_ESCAPE,
  70. MD_CHAR_ENTITITY,
  71. MD_CHAR_AUTOLINK_URL,
  72. MD_CHAR_AUTOLINK_EMAIL,
  73. MD_CHAR_AUTOLINK_WWW,
  74. MD_CHAR_SUPERSCRIPT,
  75. };
  76. static char_trigger markdown_char_ptrs[] = {
  77. NULL,
  78. &char_emphasis,
  79. &char_codespan,
  80. &char_linebreak,
  81. &char_link,
  82. &char_langle_tag,
  83. &char_escape,
  84. &char_entity,
  85. &char_autolink_url,
  86. &char_autolink_email,
  87. &char_autolink_www,
  88. &char_superscript,
  89. };
  90. /***************************
  91. * HELPER FUNCTIONS *
  92. ***************************/
  93. static inline struct buf *
  94. rndr_newbuf(struct sd_markdown *rndr, int type)
  95. {
  96. static const size_t buf_size[2] = {256, 64};
  97. struct buf *work = NULL;
  98. struct stack *pool = &rndr->work_bufs[type];
  99. if (pool->size < pool->asize &&
  100. pool->item[pool->size] != NULL) {
  101. work = pool->item[pool->size++];
  102. work->size = 0;
  103. } else {
  104. work = bufnew(buf_size[type]);
  105. stack_push(pool, work);
  106. }
  107. return work;
  108. }
  109. static inline void
  110. rndr_popbuf(struct sd_markdown *rndr, int type)
  111. {
  112. rndr->work_bufs[type].size--;
  113. }
  114. static void
  115. unscape_text(struct buf *ob, struct buf *src)
  116. {
  117. size_t i = 0, org;
  118. while (i < src->size) {
  119. org = i;
  120. while (i < src->size && src->data[i] != '\\')
  121. i++;
  122. if (i > org)
  123. bufput(ob, src->data + org, i - org);
  124. if (i + 1 >= src->size)
  125. break;
  126. bufputc(ob, src->data[i + 1]);
  127. i += 2;
  128. }
  129. }
  130. static unsigned int
  131. hash_link_ref(const uint8_t *link_ref, size_t length)
  132. {
  133. size_t i;
  134. unsigned int hash = 0;
  135. for (i = 0; i < length; ++i)
  136. hash = tolower(link_ref[i]) + (hash << 6) + (hash << 16) - hash;
  137. return hash;
  138. }
  139. static struct link_ref *
  140. add_link_ref(
  141. struct link_ref **references,
  142. const uint8_t *name, size_t name_size)
  143. {
  144. struct link_ref *ref = calloc(1, sizeof(struct link_ref));
  145. if (!ref)
  146. return NULL;
  147. ref->id = hash_link_ref(name, name_size);
  148. ref->next = references[ref->id % REF_TABLE_SIZE];
  149. references[ref->id % REF_TABLE_SIZE] = ref;
  150. return ref;
  151. }
  152. static struct link_ref *
  153. find_link_ref(struct link_ref **references, uint8_t *name, size_t length)
  154. {
  155. unsigned int hash = hash_link_ref(name, length);
  156. struct link_ref *ref = NULL;
  157. ref = references[hash % REF_TABLE_SIZE];
  158. while (ref != NULL) {
  159. if (ref->id == hash)
  160. return ref;
  161. ref = ref->next;
  162. }
  163. return NULL;
  164. }
  165. static void
  166. free_link_refs(struct link_ref **references)
  167. {
  168. size_t i;
  169. for (i = 0; i < REF_TABLE_SIZE; ++i) {
  170. struct link_ref *r = references[i];
  171. struct link_ref *next;
  172. while (r) {
  173. next = r->next;
  174. bufrelease(r->link);
  175. bufrelease(r->title);
  176. free(r);
  177. r = next;
  178. }
  179. }
  180. }
  181. /*
  182. * Check whether a char is a Markdown space.
  183. * Right now we only consider spaces the actual
  184. * space and a newline: tabs and carriage returns
  185. * are filtered out during the preprocessing phase.
  186. *
  187. * If we wanted to actually be UTF-8 compliant, we
  188. * should instead extract an Unicode codepoint from
  189. * this character and check for space properties.
  190. */
  191. static inline int
  192. _isspace(int c)
  193. {
  194. return c == ' ' || c == '\n';
  195. }
  196. /****************************
  197. * INLINE PARSING FUNCTIONS *
  198. ****************************/
  199. /* is_mail_autolink • looks for the address part of a mail autolink and '>' */
  200. /* this is less strict than the original markdown e-mail address matching */
  201. static size_t
  202. is_mail_autolink(uint8_t *data, size_t size)
  203. {
  204. size_t i = 0, nb = 0;
  205. /* address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' */
  206. for (i = 0; i < size; ++i) {
  207. if (isalnum(data[i]))
  208. continue;
  209. switch (data[i]) {
  210. case '@':
  211. nb++;
  212. case '-':
  213. case '.':
  214. case '_':
  215. break;
  216. case '>':
  217. return (nb == 1) ? i + 1 : 0;
  218. default:
  219. return 0;
  220. }
  221. }
  222. return 0;
  223. }
  224. /* tag_length • returns the length of the given tag, or 0 is it's not valid */
  225. static size_t
  226. tag_length(uint8_t *data, size_t size, enum mkd_autolink *autolink)
  227. {
  228. size_t i, j;
  229. /* a valid tag can't be shorter than 3 chars */
  230. if (size < 3) return 0;
  231. /* begins with a '<' optionally followed by '/', followed by letter or number */
  232. if (data[0] != '<') return 0;
  233. i = (data[1] == '/') ? 2 : 1;
  234. if (!isalnum(data[i]))
  235. return 0;
  236. /* scheme test */
  237. *autolink = MKDA_NOT_AUTOLINK;
  238. /* try to find the beginning of an URI */
  239. while (i < size && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-'))
  240. i++;
  241. if (i > 1 && data[i] == '@') {
  242. if ((j = is_mail_autolink(data + i, size - i)) != 0) {
  243. *autolink = MKDA_EMAIL;
  244. return i + j;
  245. }
  246. }
  247. if (i > 2 && data[i] == ':') {
  248. *autolink = MKDA_NORMAL;
  249. i++;
  250. }
  251. /* completing autolink test: no whitespace or ' or " */
  252. if (i >= size)
  253. *autolink = MKDA_NOT_AUTOLINK;
  254. else if (*autolink) {
  255. j = i;
  256. while (i < size) {
  257. if (data[i] == '\\') i += 2;
  258. else if (data[i] == '>' || data[i] == '\'' ||
  259. data[i] == '"' || data[i] == ' ' || data[i] == '\n')
  260. break;
  261. else i++;
  262. }
  263. if (i >= size) return 0;
  264. if (i > j && data[i] == '>') return i + 1;
  265. /* one of the forbidden chars has been found */
  266. *autolink = MKDA_NOT_AUTOLINK;
  267. }
  268. /* looking for sometinhg looking like a tag end */
  269. while (i < size && data[i] != '>') i++;
  270. if (i >= size) return 0;
  271. return i + 1;
  272. }
  273. /* parse_inline • parses inline markdown elements */
  274. static void
  275. parse_inline(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
  276. {
  277. size_t i = 0, end = 0;
  278. uint8_t action = 0;
  279. struct buf work = { 0, 0, 0, 0 };
  280. if (rndr->work_bufs[BUFFER_SPAN].size +
  281. rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting)
  282. return;
  283. while (i < size) {
  284. /* copying inactive chars into the output */
  285. while (end < size && (action = rndr->active_char[data[end]]) == 0) {
  286. end++;
  287. }
  288. if (rndr->cb.normal_text) {
  289. work.data = data + i;
  290. work.size = end - i;
  291. rndr->cb.normal_text(ob, &work, rndr->opaque);
  292. }
  293. else
  294. bufput(ob, data + i, end - i);
  295. if (end >= size) break;
  296. i = end;
  297. end = markdown_char_ptrs[(int)action](ob, rndr, data + i, i, size - i);
  298. if (!end) /* no action from the callback */
  299. end = i + 1;
  300. else {
  301. i += end;
  302. end = i;
  303. }
  304. }
  305. }
  306. /* find_emph_char • looks for the next emph uint8_t, skipping other constructs */
  307. static size_t
  308. find_emph_char(uint8_t *data, size_t size, uint8_t c)
  309. {
  310. size_t i = 1;
  311. while (i < size) {
  312. while (i < size && data[i] != c && data[i] != '`' && data[i] != '[')
  313. i++;
  314. if (i == size)
  315. return 0;
  316. if (data[i] == c)
  317. return i;
  318. /* not counting escaped chars */
  319. if (i && data[i - 1] == '\\') {
  320. i++; continue;
  321. }
  322. if (data[i] == '`') {
  323. size_t span_nb = 0, bt;
  324. size_t tmp_i = 0;
  325. /* counting the number of opening backticks */
  326. while (i < size && data[i] == '`') {
  327. i++; span_nb++;
  328. }
  329. if (i >= size) return 0;
  330. /* finding the matching closing sequence */
  331. bt = 0;
  332. while (i < size && bt < span_nb) {
  333. if (!tmp_i && data[i] == c) tmp_i = i;
  334. if (data[i] == '`') bt++;
  335. else bt = 0;
  336. i++;
  337. }
  338. if (i >= size) return tmp_i;
  339. }
  340. /* skipping a link */
  341. else if (data[i] == '[') {
  342. size_t tmp_i = 0;
  343. uint8_t cc;
  344. i++;
  345. while (i < size && data[i] != ']') {
  346. if (!tmp_i && data[i] == c) tmp_i = i;
  347. i++;
  348. }
  349. i++;
  350. while (i < size && (data[i] == ' ' || data[i] == '\n'))
  351. i++;
  352. if (i >= size)
  353. return tmp_i;
  354. switch (data[i]) {
  355. case '[':
  356. cc = ']'; break;
  357. case '(':
  358. cc = ')'; break;
  359. default:
  360. if (tmp_i)
  361. return tmp_i;
  362. else
  363. continue;
  364. }
  365. i++;
  366. while (i < size && data[i] != cc) {
  367. if (!tmp_i && data[i] == c) tmp_i = i;
  368. i++;
  369. }
  370. if (i >= size)
  371. return tmp_i;
  372. i++;
  373. }
  374. }
  375. return 0;
  376. }
  377. /* parse_emph1 • parsing single emphase */
  378. /* closed by a symbol not preceded by whitespace and not followed by symbol */
  379. static size_t
  380. parse_emph1(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
  381. {
  382. size_t i = 0, len;
  383. struct buf *work = 0;
  384. int r;
  385. if (!rndr->cb.emphasis) return 0;
  386. /* skipping one symbol if coming from emph3 */
  387. if (size > 1 && data[0] == c && data[1] == c) i = 1;
  388. while (i < size) {
  389. len = find_emph_char(data + i, size - i, c);
  390. if (!len) return 0;
  391. i += len;
  392. if (i >= size) return 0;
  393. if (data[i] == c && !_isspace(data[i - 1])) {
  394. if (rndr->ext_flags & MKDEXT_NO_INTRA_EMPHASIS) {
  395. if (!(i + 1 == size || _isspace(data[i + 1]) || ispunct(data[i + 1])))
  396. continue;
  397. }
  398. work = rndr_newbuf(rndr, BUFFER_SPAN);
  399. parse_inline(work, rndr, data, i);
  400. r = rndr->cb.emphasis(ob, work, rndr->opaque);
  401. rndr_popbuf(rndr, BUFFER_SPAN);
  402. return r ? i + 1 : 0;
  403. }
  404. }
  405. return 0;
  406. }
  407. /* parse_emph2 • parsing single emphase */
  408. static size_t
  409. parse_emph2(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
  410. {
  411. int (*render_method)(struct buf *ob, const struct buf *text, void *opaque);
  412. size_t i = 0, len;
  413. struct buf *work = 0;
  414. int r;
  415. render_method = (c == '~') ? rndr->cb.strikethrough : rndr->cb.double_emphasis;
  416. if (!render_method)
  417. return 0;
  418. while (i < size) {
  419. len = find_emph_char(data + i, size - i, c);
  420. if (!len) return 0;
  421. i += len;
  422. if (i + 1 < size && data[i] == c && data[i + 1] == c && i && !_isspace(data[i - 1])) {
  423. work = rndr_newbuf(rndr, BUFFER_SPAN);
  424. parse_inline(work, rndr, data, i);
  425. r = render_method(ob, work, rndr->opaque);
  426. rndr_popbuf(rndr, BUFFER_SPAN);
  427. return r ? i + 2 : 0;
  428. }
  429. i++;
  430. }
  431. return 0;
  432. }
  433. /* parse_emph3 • parsing single emphase */
  434. /* finds the first closing tag, and delegates to the other emph */
  435. static size_t
  436. parse_emph3(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
  437. {
  438. size_t i = 0, len;
  439. int r;
  440. while (i < size) {
  441. len = find_emph_char(data + i, size - i, c);
  442. if (!len) return 0;
  443. i += len;
  444. /* skip whitespace preceded symbols */
  445. if (data[i] != c || _isspace(data[i - 1]))
  446. continue;
  447. if (i + 2 < size && data[i + 1] == c && data[i + 2] == c && rndr->cb.triple_emphasis) {
  448. /* triple symbol found */
  449. struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN);
  450. parse_inline(work, rndr, data, i);
  451. r = rndr->cb.triple_emphasis(ob, work, rndr->opaque);
  452. rndr_popbuf(rndr, BUFFER_SPAN);
  453. return r ? i + 3 : 0;
  454. } else if (i + 1 < size && data[i + 1] == c) {
  455. /* double symbol found, handing over to emph1 */
  456. len = parse_emph1(ob, rndr, data - 2, size + 2, c);
  457. if (!len) return 0;
  458. else return len - 2;
  459. } else {
  460. /* single symbol found, handing over to emph2 */
  461. len = parse_emph2(ob, rndr, data - 1, size + 1, c);
  462. if (!len) return 0;
  463. else return len - 1;
  464. }
  465. }
  466. return 0;
  467. }
  468. /* char_emphasis • single and double emphasis parsing */
  469. static size_t
  470. char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  471. {
  472. uint8_t c = data[0];
  473. size_t ret;
  474. if (size > 2 && data[1] != c) {
  475. /* whitespace cannot follow an opening emphasis;
  476. * strikethrough only takes two characters '~~' */
  477. if (c == '~' || _isspace(data[1]) || (ret = parse_emph1(ob, rndr, data + 1, size - 1, c)) == 0)
  478. return 0;
  479. return ret + 1;
  480. }
  481. if (size > 3 && data[1] == c && data[2] != c) {
  482. if (_isspace(data[2]) || (ret = parse_emph2(ob, rndr, data + 2, size - 2, c)) == 0)
  483. return 0;
  484. return ret + 2;
  485. }
  486. if (size > 4 && data[1] == c && data[2] == c && data[3] != c) {
  487. if (c == '~' || _isspace(data[3]) || (ret = parse_emph3(ob, rndr, data + 3, size - 3, c)) == 0)
  488. return 0;
  489. return ret + 3;
  490. }
  491. return 0;
  492. }
  493. /* char_linebreak • '\n' preceded by two spaces (assuming linebreak != 0) */
  494. static size_t
  495. char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  496. {
  497. if (offset < 2 || data[-1] != ' ' || data[-2] != ' ')
  498. return 0;
  499. /* removing the last space from ob and rendering */
  500. while (ob->size && ob->data[ob->size - 1] == ' ')
  501. ob->size--;
  502. return rndr->cb.linebreak(ob, rndr->opaque) ? 1 : 0;
  503. }
  504. /* char_codespan • '`' parsing a code span (assuming codespan != 0) */
  505. static size_t
  506. char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  507. {
  508. size_t end, nb = 0, i, f_begin, f_end;
  509. /* counting the number of backticks in the delimiter */
  510. while (nb < size && data[nb] == '`')
  511. nb++;
  512. /* finding the next delimiter */
  513. i = 0;
  514. for (end = nb; end < size && i < nb; end++) {
  515. if (data[end] == '`') i++;
  516. else i = 0;
  517. }
  518. if (i < nb && end >= size)
  519. return 0; /* no matching delimiter */
  520. /* trimming outside whitespaces */
  521. f_begin = nb;
  522. while (f_begin < end && data[f_begin] == ' ')
  523. f_begin++;
  524. f_end = end - nb;
  525. while (f_end > nb && data[f_end-1] == ' ')
  526. f_end--;
  527. /* real code span */
  528. if (f_begin < f_end) {
  529. struct buf work = { data + f_begin, f_end - f_begin, 0, 0 };
  530. if (!rndr->cb.codespan(ob, &work, rndr->opaque))
  531. end = 0;
  532. } else {
  533. if (!rndr->cb.codespan(ob, 0, rndr->opaque))
  534. end = 0;
  535. }
  536. return end;
  537. }
  538. /* char_escape • '\\' backslash escape */
  539. static size_t
  540. char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  541. {
  542. static const char *escape_chars = "\\`*_{}[]()#+-.!:|&<>^~";
  543. struct buf work = { 0, 0, 0, 0 };
  544. if (size > 1) {
  545. if (strchr(escape_chars, data[1]) == NULL)
  546. return 0;
  547. if (rndr->cb.normal_text) {
  548. work.data = data + 1;
  549. work.size = 1;
  550. rndr->cb.normal_text(ob, &work, rndr->opaque);
  551. }
  552. else bufputc(ob, data[1]);
  553. } else if (size == 1) {
  554. bufputc(ob, data[0]);
  555. }
  556. return 2;
  557. }
  558. /* char_entity • '&' escaped when it doesn't belong to an entity */
  559. /* valid entities are assumed to be anything matching &#?[A-Za-z0-9]+; */
  560. static size_t
  561. char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  562. {
  563. size_t end = 1;
  564. struct buf work = { 0, 0, 0, 0 };
  565. if (end < size && data[end] == '#')
  566. end++;
  567. while (end < size && isalnum(data[end]))
  568. end++;
  569. if (end < size && data[end] == ';')
  570. end++; /* real entity */
  571. else
  572. return 0; /* lone '&' */
  573. if (rndr->cb.entity) {
  574. work.data = data;
  575. work.size = end;
  576. rndr->cb.entity(ob, &work, rndr->opaque);
  577. }
  578. else bufput(ob, data, end);
  579. return end;
  580. }
  581. /* char_langle_tag • '<' when tags or autolinks are allowed */
  582. static size_t
  583. char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  584. {
  585. enum mkd_autolink altype = MKDA_NOT_AUTOLINK;
  586. size_t end = tag_length(data, size, &altype);
  587. struct buf work = { data, end, 0, 0 };
  588. int ret = 0;
  589. if (end > 2) {
  590. if (rndr->cb.autolink && altype != MKDA_NOT_AUTOLINK) {
  591. struct buf *u_link = rndr_newbuf(rndr, BUFFER_SPAN);
  592. work.data = data + 1;
  593. work.size = end - 2;
  594. unscape_text(u_link, &work);
  595. ret = rndr->cb.autolink(ob, u_link, altype, rndr->opaque);
  596. rndr_popbuf(rndr, BUFFER_SPAN);
  597. }
  598. else if (rndr->cb.raw_html_tag)
  599. ret = rndr->cb.raw_html_tag(ob, &work, rndr->opaque);
  600. }
  601. if (!ret) return 0;
  602. else return end;
  603. }
  604. static size_t
  605. char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  606. {
  607. struct buf *link, *link_url, *link_text;
  608. size_t link_len, rewind;
  609. if (!rndr->cb.link || rndr->in_link_body)
  610. return 0;
  611. link = rndr_newbuf(rndr, BUFFER_SPAN);
  612. if ((link_len = sd_autolink__www(&rewind, link, data, offset, size)) > 0) {
  613. link_url = rndr_newbuf(rndr, BUFFER_SPAN);
  614. BUFPUTSL(link_url, "http://");
  615. bufput(link_url, link->data, link->size);
  616. ob->size -= rewind;
  617. if (rndr->cb.normal_text) {
  618. link_text = rndr_newbuf(rndr, BUFFER_SPAN);
  619. rndr->cb.normal_text(link_text, link, rndr->opaque);
  620. rndr->cb.link(ob, link_url, NULL, link_text, rndr->opaque);
  621. rndr_popbuf(rndr, BUFFER_SPAN);
  622. } else {
  623. rndr->cb.link(ob, link_url, NULL, link, rndr->opaque);
  624. }
  625. rndr_popbuf(rndr, BUFFER_SPAN);
  626. }
  627. rndr_popbuf(rndr, BUFFER_SPAN);
  628. return link_len;
  629. }
  630. static size_t
  631. char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  632. {
  633. struct buf *link;
  634. size_t link_len, rewind;
  635. if (!rndr->cb.autolink || rndr->in_link_body)
  636. return 0;
  637. link = rndr_newbuf(rndr, BUFFER_SPAN);
  638. if ((link_len = sd_autolink__email(&rewind, link, data, offset, size)) > 0) {
  639. ob->size -= rewind;
  640. rndr->cb.autolink(ob, link, MKDA_EMAIL, rndr->opaque);
  641. }
  642. rndr_popbuf(rndr, BUFFER_SPAN);
  643. return link_len;
  644. }
  645. static size_t
  646. char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  647. {
  648. struct buf *link;
  649. size_t link_len, rewind;
  650. if (!rndr->cb.autolink || rndr->in_link_body)
  651. return 0;
  652. link = rndr_newbuf(rndr, BUFFER_SPAN);
  653. if ((link_len = sd_autolink__url(&rewind, link, data, offset, size)) > 0) {
  654. ob->size -= rewind;
  655. rndr->cb.autolink(ob, link, MKDA_NORMAL, rndr->opaque);
  656. }
  657. rndr_popbuf(rndr, BUFFER_SPAN);
  658. return link_len;
  659. }
  660. /* char_link • '[': parsing a link or an image */
  661. static size_t
  662. char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  663. {
  664. int is_img = (offset && data[-1] == '!'), level;
  665. size_t i = 1, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0;
  666. struct buf *content = 0;
  667. struct buf *link = 0;
  668. struct buf *title = 0;
  669. struct buf *u_link = 0;
  670. size_t org_work_size = rndr->work_bufs[BUFFER_SPAN].size;
  671. int text_has_nl = 0, ret = 0;
  672. int in_title = 0, qtype = 0;
  673. /* checking whether the correct renderer exists */
  674. if ((is_img && !rndr->cb.image) || (!is_img && !rndr->cb.link))
  675. goto cleanup;
  676. /* looking for the matching closing bracket */
  677. for (level = 1; i < size; i++) {
  678. if (data[i] == '\n')
  679. text_has_nl = 1;
  680. else if (data[i - 1] == '\\')
  681. continue;
  682. else if (data[i] == '[')
  683. level++;
  684. else if (data[i] == ']') {
  685. level--;
  686. if (level <= 0)
  687. break;
  688. }
  689. }
  690. if (i >= size)
  691. goto cleanup;
  692. txt_e = i;
  693. i++;
  694. /* skip any amount of whitespace or newline */
  695. /* (this is much more laxist than original markdown syntax) */
  696. while (i < size && _isspace(data[i]))
  697. i++;
  698. /* inline style link */
  699. if (i < size && data[i] == '(') {
  700. /* skipping initial whitespace */
  701. i++;
  702. while (i < size && _isspace(data[i]))
  703. i++;
  704. link_b = i;
  705. /* looking for link end: ' " ) */
  706. while (i < size) {
  707. if (data[i] == '\\') i += 2;
  708. else if (data[i] == ')') break;
  709. else if (i >= 1 && _isspace(data[i-1]) && (data[i] == '\'' || data[i] == '"')) break;
  710. else i++;
  711. }
  712. if (i >= size) goto cleanup;
  713. link_e = i;
  714. /* looking for title end if present */
  715. if (data[i] == '\'' || data[i] == '"') {
  716. qtype = data[i];
  717. in_title = 1;
  718. i++;
  719. title_b = i;
  720. while (i < size) {
  721. if (data[i] == '\\') i += 2;
  722. else if (data[i] == qtype) {in_title = 0; i++;}
  723. else if ((data[i] == ')') && !in_title) break;
  724. else i++;
  725. }
  726. if (i >= size) goto cleanup;
  727. /* skipping whitespaces after title */
  728. title_e = i - 1;
  729. while (title_e > title_b && _isspace(data[title_e]))
  730. title_e--;
  731. /* checking for closing quote presence */
  732. if (data[title_e] != '\'' && data[title_e] != '"') {
  733. title_b = title_e = 0;
  734. link_e = i;
  735. }
  736. }
  737. /* remove whitespace at the end of the link */
  738. while (link_e > link_b && _isspace(data[link_e - 1]))
  739. link_e--;
  740. /* remove optional angle brackets around the link */
  741. if (data[link_b] == '<') link_b++;
  742. if (data[link_e - 1] == '>') link_e--;
  743. /* building escaped link and title */
  744. if (link_e > link_b) {
  745. link = rndr_newbuf(rndr, BUFFER_SPAN);
  746. bufput(link, data + link_b, link_e - link_b);
  747. }
  748. if (title_e > title_b) {
  749. title = rndr_newbuf(rndr, BUFFER_SPAN);
  750. bufput(title, data + title_b, title_e - title_b);
  751. }
  752. i++;
  753. }
  754. /* reference style link */
  755. else if (i < size && data[i] == '[') {
  756. struct buf id = { 0, 0, 0, 0 };
  757. struct link_ref *lr;
  758. /* looking for the id */
  759. i++;
  760. link_b = i;
  761. while (i < size && data[i] != ']') i++;
  762. if (i >= size) goto cleanup;
  763. link_e = i;
  764. /* finding the link_ref */
  765. if (link_b == link_e) {
  766. if (text_has_nl) {
  767. struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN);
  768. size_t j;
  769. for (j = 1; j < txt_e; j++) {
  770. if (data[j] != '\n')
  771. bufputc(b, data[j]);
  772. else if (data[j - 1] != ' ')
  773. bufputc(b, ' ');
  774. }
  775. id.data = b->data;
  776. id.size = b->size;
  777. } else {
  778. id.data = data + 1;
  779. id.size = txt_e - 1;
  780. }
  781. } else {
  782. id.data = data + link_b;
  783. id.size = link_e - link_b;
  784. }
  785. lr = find_link_ref(rndr->refs, id.data, id.size);
  786. if (!lr)
  787. goto cleanup;
  788. /* keeping link and title from link_ref */
  789. link = lr->link;
  790. title = lr->title;
  791. i++;
  792. }
  793. /* shortcut reference style link */
  794. else {
  795. struct buf id = { 0, 0, 0, 0 };
  796. struct link_ref *lr;
  797. /* crafting the id */
  798. if (text_has_nl) {
  799. struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN);
  800. size_t j;
  801. for (j = 1; j < txt_e; j++) {
  802. if (data[j] != '\n')
  803. bufputc(b, data[j]);
  804. else if (data[j - 1] != ' ')
  805. bufputc(b, ' ');
  806. }
  807. id.data = b->data;
  808. id.size = b->size;
  809. } else {
  810. id.data = data + 1;
  811. id.size = txt_e - 1;
  812. }
  813. /* finding the link_ref */
  814. lr = find_link_ref(rndr->refs, id.data, id.size);
  815. if (!lr)
  816. goto cleanup;
  817. /* keeping link and title from link_ref */
  818. link = lr->link;
  819. title = lr->title;
  820. /* rewinding the whitespace */
  821. i = txt_e + 1;
  822. }
  823. /* building content: img alt is escaped, link content is parsed */
  824. if (txt_e > 1) {
  825. content = rndr_newbuf(rndr, BUFFER_SPAN);
  826. if (is_img) {
  827. bufput(content, data + 1, txt_e - 1);
  828. } else {
  829. /* disable autolinking when parsing inline the
  830. * content of a link */
  831. rndr->in_link_body = 1;
  832. parse_inline(content, rndr, data + 1, txt_e - 1);
  833. rndr->in_link_body = 0;
  834. }
  835. }
  836. if (link) {
  837. u_link = rndr_newbuf(rndr, BUFFER_SPAN);
  838. unscape_text(u_link, link);
  839. }
  840. /* calling the relevant rendering function */
  841. if (is_img) {
  842. if (ob->size && ob->data[ob->size - 1] == '!')
  843. ob->size -= 1;
  844. ret = rndr->cb.image(ob, u_link, title, content, rndr->opaque);
  845. } else {
  846. ret = rndr->cb.link(ob, u_link, title, content, rndr->opaque);
  847. }
  848. /* cleanup */
  849. cleanup:
  850. rndr->work_bufs[BUFFER_SPAN].size = (int)org_work_size;
  851. return ret ? i : 0;
  852. }
  853. static size_t
  854. char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  855. {
  856. size_t sup_start, sup_len;
  857. struct buf *sup;
  858. if (!rndr->cb.superscript)
  859. return 0;
  860. if (size < 2)
  861. return 0;
  862. if (data[1] == '(') {
  863. sup_start = sup_len = 2;
  864. while (sup_len < size && data[sup_len] != ')' && data[sup_len - 1] != '\\')
  865. sup_len++;
  866. if (sup_len == size)
  867. return 0;
  868. } else {
  869. sup_start = sup_len = 1;
  870. while (sup_len < size && !_isspace(data[sup_len]))
  871. sup_len++;
  872. }
  873. if (sup_len - sup_start == 0)
  874. return (sup_start == 2) ? 3 : 0;
  875. sup = rndr_newbuf(rndr, BUFFER_SPAN);
  876. parse_inline(sup, rndr, data + sup_start, sup_len - sup_start);
  877. rndr->cb.superscript(ob, sup, rndr->opaque);
  878. rndr_popbuf(rndr, BUFFER_SPAN);
  879. return (sup_start == 2) ? sup_len + 1 : sup_len;
  880. }
  881. /*********************************
  882. * BLOCK-LEVEL PARSING FUNCTIONS *
  883. *********************************/
  884. /* is_empty • returns the line length when it is empty, 0 otherwise */
  885. static size_t
  886. is_empty(uint8_t *data, size_t size)
  887. {
  888. size_t i;
  889. for (i = 0; i < size && data[i] != '\n'; i++)
  890. if (data[i] != ' ')
  891. return 0;
  892. return i + 1;
  893. }
  894. /* is_hrule • returns whether a line is a horizontal rule */
  895. static int
  896. is_hrule(uint8_t *data, size_t size)
  897. {
  898. size_t i = 0, n = 0;
  899. uint8_t c;
  900. /* skipping initial spaces */
  901. if (size < 3) return 0;
  902. if (data[0] == ' ') { i++;
  903. if (data[1] == ' ') { i++;
  904. if (data[2] == ' ') { i++; } } }
  905. /* looking at the hrule uint8_t */
  906. if (i + 2 >= size
  907. || (data[i] != '*' && data[i] != '-' && data[i] != '_'))
  908. return 0;
  909. c = data[i];
  910. /* the whole line must be the char or whitespace */
  911. while (i < size && data[i] != '\n') {
  912. if (data[i] == c) n++;
  913. else if (data[i] != ' ')
  914. return 0;
  915. i++;
  916. }
  917. return n >= 3;
  918. }
  919. /* check if a line begins with a code fence; return the
  920. * width of the code fence */
  921. static size_t
  922. prefix_codefence(uint8_t *data, size_t size)
  923. {
  924. size_t i = 0, n = 0;
  925. uint8_t c;
  926. /* skipping initial spaces */
  927. if (size < 3) return 0;
  928. if (data[0] == ' ') { i++;
  929. if (data[1] == ' ') { i++;
  930. if (data[2] == ' ') { i++; } } }
  931. /* looking at the hrule uint8_t */
  932. if (i + 2 >= size || !(data[i] == '~' || data[i] == '`'))
  933. return 0;
  934. c = data[i];
  935. /* the whole line must be the uint8_t or whitespace */
  936. while (i < size && data[i] == c) {
  937. n++; i++;
  938. }
  939. if (n < 3)
  940. return 0;
  941. return i;
  942. }
  943. /* check if a line is a code fence; return its size if it is */
  944. static size_t
  945. is_codefence(uint8_t *data, size_t size, struct buf *syntax)
  946. {
  947. size_t i = 0, syn_len = 0;
  948. uint8_t *syn_start;
  949. i = prefix_codefence(data, size);
  950. if (i == 0)
  951. return 0;
  952. while (i < size && data[i] == ' ')
  953. i++;
  954. syn_start = data + i;
  955. if (i < size && data[i] == '{') {
  956. i++; syn_start++;
  957. while (i < size && data[i] != '}' && data[i] != '\n') {
  958. syn_len++; i++;
  959. }
  960. if (i == size || data[i] != '}')
  961. return 0;
  962. /* strip all whitespace at the beginning and the end
  963. * of the {} block */
  964. while (syn_len > 0 && _isspace(syn_start[0])) {
  965. syn_start++; syn_len--;
  966. }
  967. while (syn_len > 0 && _isspace(syn_start[syn_len - 1]))
  968. syn_len--;
  969. i++;
  970. } else {
  971. while (i < size && !_isspace(data[i])) {
  972. syn_len++; i++;
  973. }
  974. }
  975. if (syntax) {
  976. syntax->data = syn_start;
  977. syntax->size = syn_len;
  978. }
  979. while (i < size && data[i] != '\n') {
  980. if (!_isspace(data[i]))
  981. return 0;
  982. i++;
  983. }
  984. return i + 1;
  985. }
  986. /* is_atxheader • returns whether the line is a hash-prefixed header */
  987. static int
  988. is_atxheader(struct sd_markdown *rndr, uint8_t *data, size_t size)
  989. {
  990. if (data[0] != '#')
  991. return 0;
  992. if (rndr->ext_flags & MKDEXT_SPACE_HEADERS) {
  993. size_t level = 0;
  994. while (level < size && level < 6 && data[level] == '#')
  995. level++;
  996. if (level < size && data[level] != ' ')
  997. return 0;
  998. }
  999. return 1;
  1000. }
  1001. /* is_headerline • returns whether the line is a setext-style hdr underline */
  1002. static int
  1003. is_headerline(uint8_t *data, size_t size)
  1004. {
  1005. size_t i = 0;
  1006. /* test of level 1 header */
  1007. if (data[i] == '=') {
  1008. for (i = 1; i < size && data[i] == '='; i++);
  1009. while (i < size && data[i] == ' ') i++;
  1010. return (i >= size || data[i] == '\n') ? 1 : 0; }
  1011. /* test of level 2 header */
  1012. if (data[i] == '-') {
  1013. for (i = 1; i < size && data[i] == '-'; i++);
  1014. while (i < size && data[i] == ' ') i++;
  1015. return (i >= size || data[i] == '\n') ? 2 : 0; }
  1016. return 0;
  1017. }
  1018. static int
  1019. is_next_headerline(uint8_t *data, size_t size)
  1020. {
  1021. size_t i = 0;
  1022. while (i < size && data[i] != '\n')
  1023. i++;
  1024. if (++i >= size)
  1025. return 0;
  1026. return is_headerline(data + i, size - i);
  1027. }
  1028. /* prefix_quote • returns blockquote prefix length */
  1029. static size_t
  1030. prefix_quote(uint8_t *data, size_t size)
  1031. {
  1032. size_t i = 0;
  1033. if (i < size && data[i] == ' ') i++;
  1034. if (i < size && data[i] == ' ') i++;
  1035. if (i < size && data[i] == ' ') i++;
  1036. if (i < size && data[i] == '>') {
  1037. if (i + 1 < size && data[i + 1] == ' ')
  1038. return i + 2;
  1039. return i + 1;
  1040. }
  1041. return 0;
  1042. }
  1043. /* prefix_code • returns prefix length for block code*/
  1044. static size_t
  1045. prefix_code(uint8_t *data, size_t size)
  1046. {
  1047. if (size > 3 && data[0] == ' ' && data[1] == ' '
  1048. && data[2] == ' ' && data[3] == ' ') return 4;
  1049. return 0;
  1050. }
  1051. /* prefix_oli • returns ordered list item prefix */
  1052. static size_t
  1053. prefix_oli(uint8_t *data, size_t size)
  1054. {
  1055. size_t i = 0;
  1056. if (i < size && data[i] == ' ') i++;
  1057. if (i < size && data[i] == ' ') i++;
  1058. if (i < size && data[i] == ' ') i++;
  1059. if (i >= size || data[i] < '0' || data[i] > '9')
  1060. return 0;
  1061. while (i < size && data[i] >= '0' && data[i] <= '9')
  1062. i++;
  1063. if (i + 1 >= size || data[i] != '.' || data[i + 1] != ' ')
  1064. return 0;
  1065. if (is_next_headerline(data + i, size - i))
  1066. return 0;
  1067. return i + 2;
  1068. }
  1069. /* prefix_uli • returns ordered list item prefix */
  1070. static size_t
  1071. prefix_uli(uint8_t *data, size_t size)
  1072. {
  1073. size_t i = 0;
  1074. if (i < size && data[i] == ' ') i++;
  1075. if (i < size && data[i] == ' ') i++;
  1076. if (i < size && data[i] == ' ') i++;
  1077. if (i + 1 >= size ||
  1078. (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
  1079. data[i + 1] != ' ')
  1080. return 0;
  1081. if (is_next_headerline(data + i, size - i))
  1082. return 0;
  1083. return i + 2;
  1084. }
  1085. /* parse_block • parsing of one block, returning next uint8_t to parse */
  1086. static void parse_block(struct buf *ob, struct sd_markdown *rndr,
  1087. uint8_t *data, size_t size);
  1088. /* parse_blockquote • handles parsing of a blockquote fragment */
  1089. static size_t
  1090. parse_blockquote(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
  1091. {
  1092. size_t beg, end = 0, pre, work_size = 0;
  1093. uint8_t *work_data = 0;
  1094. struct buf *out = 0;
  1095. out = rndr_newbuf(rndr, BUFFER_BLOCK);
  1096. beg = 0;
  1097. while (beg < size) {
  1098. for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
  1099. pre = prefix_quote(data + beg, end - beg);
  1100. if (pre)
  1101. beg += pre; /* skipping prefix */
  1102. /* empty line followed by non-quote line */
  1103. else if (is_empty(data + beg, end - beg) &&
  1104. (end >= size || (prefix_quote(data + end, size - end) == 0 &&
  1105. !is_empty(data + end, size - end))))
  1106. break;
  1107. if (beg < end) { /* copy into the in-place working buffer */
  1108. /* bufput(work, data + beg, end - beg); */
  1109. if (!work_data)
  1110. work_data = data + beg;
  1111. else if (data + beg != work_data + work_size)
  1112. memmove(work_data + work_size, data + beg, end - beg);
  1113. work_size += end - beg;
  1114. }
  1115. beg = end;
  1116. }
  1117. parse_block(out, rndr, work_data, work_size);
  1118. if (rndr->cb.blockquote)
  1119. rndr->cb.blockquote(ob, out, rndr->opaque);
  1120. rndr_popbuf(rndr, BUFFER_BLOCK);
  1121. return end;
  1122. }
  1123. static size_t
  1124. parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render);
  1125. /* parse_blockquote • handles parsing of a regular paragraph */
  1126. static size_t
  1127. parse_paragraph(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
  1128. {
  1129. size_t i = 0, end = 0;
  1130. int level = 0;
  1131. struct buf work = { data, 0, 0, 0 };
  1132. while (i < size) {
  1133. for (end = i + 1; end < size && data[end - 1] != '\n'; end++) /* empty */;
  1134. if (is_empty(data + i, size - i))
  1135. break;
  1136. if ((level = is_headerline(data + i, size - i)) != 0)
  1137. break;
  1138. if (is_atxheader(rndr, data + i, size - i) ||
  1139. is_hrule(data + i, size - i) ||
  1140. prefix_quote(data + i, size - i)) {
  1141. end = i;
  1142. break;
  1143. }
  1144. /*
  1145. * Early termination of a paragraph with the same logic
  1146. * as Markdown 1.0.0. If this logic is applied, the
  1147. * Markdown 1.0.3 test suite won't pass cleanly
  1148. *
  1149. * :: If the first character in a new line is not a letter,
  1150. * let's check to see if there's some kind of block starting
  1151. * here
  1152. */
  1153. if ((rndr->ext_flags & MKDEXT_LAX_SPACING) && !isalnum(data[i])) {
  1154. if (prefix_oli(data + i, size - i) ||
  1155. prefix_uli(data + i, size - i)) {
  1156. end = i;
  1157. break;
  1158. }
  1159. /* see if an html block starts here */
  1160. if (data[i] == '<' && rndr->cb.blockhtml &&
  1161. parse_htmlblock(ob, rndr, data + i, size - i, 0)) {
  1162. end = i;
  1163. break;
  1164. }
  1165. /* see if a code fence starts here */
  1166. if ((rndr->ext_flags & MKDEXT_FENCED_CODE) != 0 &&
  1167. is_codefence(data + i, size - i, NULL) != 0) {
  1168. end = i;
  1169. break;
  1170. }
  1171. }
  1172. i = end;
  1173. }
  1174. work.size = i;
  1175. while (work.size && data[work.size - 1] == '\n')
  1176. work.size--;
  1177. if (!level) {
  1178. struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK);
  1179. parse_inline(tmp, rndr, work.data, work.size);
  1180. if (rndr->cb.paragraph)
  1181. rndr->cb.paragraph(ob, tmp, rndr->opaque);
  1182. rndr_popbuf(rndr, BUFFER_BLOCK);
  1183. } else {
  1184. struct buf *header_work;
  1185. if (work.size) {
  1186. size_t beg;
  1187. i = work.size;
  1188. work.size -= 1;
  1189. while (work.size && data[work.size] != '\n')
  1190. work.size -= 1;
  1191. beg = work.size + 1;
  1192. while (work.size && data[work.size - 1] == '\n')
  1193. work.size -= 1;
  1194. if (work.size > 0) {
  1195. struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK);
  1196. parse_inline(tmp, rndr, work.data, work.size);
  1197. if (rndr->cb.paragraph)
  1198. rndr->cb.paragraph(ob, tmp, rndr->opaque);
  1199. rndr_popbuf(rndr, BUFFER_BLOCK);
  1200. work.data += beg;
  1201. work.size = i - beg;
  1202. }
  1203. else work.size = i;
  1204. }
  1205. header_work = rndr_newbuf(rndr, BUFFER_SPAN);
  1206. parse_inline(header_work, rndr, work.data, work.size);
  1207. if (rndr->cb.header)
  1208. rndr->cb.header(ob, header_work, (int)level, rndr->opaque);
  1209. rndr_popbuf(rndr, BUFFER_SPAN);
  1210. }
  1211. return end;
  1212. }
  1213. /* parse_fencedcode • handles parsing of a block-level code fragment */
  1214. static size_t
  1215. parse_fencedcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
  1216. {
  1217. size_t beg, end;
  1218. struct buf *work = 0;
  1219. struct buf lang = { 0, 0, 0, 0 };
  1220. beg = is_codefence(data, size, &lang);
  1221. if (beg == 0) return 0;
  1222. work = rndr_newbuf(rndr, BUFFER_BLOCK);
  1223. while (beg < size) {
  1224. size_t fence_end;
  1225. struct buf fence_trail = { 0, 0, 0, 0 };
  1226. fence_end = is_codefence(data + beg, size - beg, &fence_trail);
  1227. if (fence_end != 0 && fence_trail.size == 0) {
  1228. beg += fence_end;
  1229. break;
  1230. }
  1231. for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
  1232. if (beg < end) {
  1233. /* verbatim copy to the working buffer,
  1234. escaping entities */
  1235. if (is_empty(data + beg, end - beg))
  1236. bufputc(work, '\n');
  1237. else bufput(work, data + beg, end - beg);
  1238. }
  1239. beg = end;
  1240. }
  1241. if (work->size && work->data[work->size - 1] != '\n')
  1242. bufputc(work, '\n');
  1243. if (rndr->cb.blockcode)
  1244. rndr->cb.blockcode(ob, work, lang.size ? &lang : NULL, rndr->opaque);
  1245. rndr_popbuf(rndr, BUFFER_BLOCK);
  1246. return beg;
  1247. }
  1248. static size_t
  1249. parse_blockcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
  1250. {
  1251. size_t beg, end, pre;
  1252. struct buf *work = 0;
  1253. work = rndr_newbuf(rndr, BUFFER_BLOCK);
  1254. beg = 0;
  1255. while (beg < size) {
  1256. for (end = beg + 1; end < size && data[end - 1] != '\n'; end++) {};
  1257. pre = prefix_code(data + beg, end - beg);
  1258. if (pre)
  1259. beg += pre; /* skipping prefix */
  1260. else if (!is_empty(data + beg, end - beg))
  1261. /* non-empty non-prefixed line breaks the pre */
  1262. break;
  1263. if (beg < end) {
  1264. /* verbatim copy to the working buffer,
  1265. escaping entities */
  1266. if (is_empty(data + beg, end - beg))
  1267. bufputc(work, '\n');
  1268. else bufput(work, data + beg, end - beg);
  1269. }
  1270. beg = end;
  1271. }
  1272. while (work->size && work->data[work->size - 1] == '\n')
  1273. work->size -= 1;
  1274. bufputc(work, '\n');
  1275. if (rndr->cb.blockcode)
  1276. rndr->cb.blockcode(ob, work, NULL, rndr->opaque);
  1277. rndr_popbuf(rndr, BUFFER_BLOCK);
  1278. return beg;
  1279. }
  1280. /* parse_listitem • parsing of a single list item */
  1281. /* assuming initial prefix is already removed */
  1282. static size_t
  1283. parse_listitem(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int *flags)
  1284. {
  1285. struct buf *work = 0, *inter = 0;
  1286. size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i;
  1287. int in_empty = 0, has_inside_empty = 0, in_fence = 0;
  1288. /* keeping track of the first indentation prefix */
  1289. while (orgpre < 3 && orgpre < size && data[orgpre] == ' ')
  1290. orgpre++;
  1291. beg = prefix_uli(data, size);
  1292. if (!beg)
  1293. beg = prefix_oli(data, size);
  1294. if (!beg)
  1295. return 0;
  1296. /* skipping to the beginning of the following line */
  1297. end = beg;
  1298. while (end < size && data[end - 1] != '\n')
  1299. end++;
  1300. /* getting working buffers */
  1301. work = rndr_newbuf(rndr, BUFFER_SPAN);
  1302. inter = rndr_newbuf(rndr, BUFFER_SPAN);
  1303. /* putting the first line into the working buffer */
  1304. bufput(work, data + beg, end - beg);
  1305. beg = end;
  1306. /* process the following lines */
  1307. while (beg < size) {
  1308. size_t has_next_uli = 0, has_next_oli = 0;
  1309. end++;
  1310. while (end < size && data[end - 1] != '\n')
  1311. end++;
  1312. /* process an empty line */
  1313. if (is_empty(data + beg, end - beg)) {
  1314. in_empty = 1;
  1315. beg = end;
  1316. continue;
  1317. }
  1318. /* calculating the indentation */
  1319. i = 0;
  1320. while (i < 4 && beg + i < end && data[beg + i] == ' ')
  1321. i++;
  1322. pre = i;
  1323. if (rndr->ext_flags & MKDEXT_FENCED_CODE) {
  1324. if (is_codefence(data + beg + i, end - beg - i, NULL) != 0)
  1325. in_fence = !in_fence;
  1326. }
  1327. /* Only check for new list items if we are **not** inside
  1328. * a fenced code block */
  1329. if (!in_fence) {
  1330. has_next_uli = prefix_uli(data + beg + i, end - beg - i);
  1331. has_next_oli = prefix_oli(data + beg + i, end - beg - i);
  1332. }
  1333. /* checking for ul/ol switch */
  1334. if (in_empty && (
  1335. ((*flags & MKD_LIST_ORDERED) && has_next_uli) ||
  1336. (!(*flags & MKD_LIST_ORDERED) && has_next_oli))){
  1337. *flags |= MKD_LI_END;
  1338. break; /* the following item must have same list type */
  1339. }
  1340. /* checking for a new item */
  1341. if ((has_next_uli && !is_hrule(data + beg + i, end - beg - i)) || has_next_oli) {
  1342. if (in_empty)
  1343. has_inside_empty = 1;
  1344. if (pre == orgpre) /* the following item must have */
  1345. break; /* the same indentation */
  1346. if (!sublist)
  1347. sublist = work->size;
  1348. }
  1349. /* joining only indented stuff after empty lines;
  1350. * note that now we only require 1 space of indentation
  1351. * to continue a list */
  1352. else if (in_empty && pre == 0) {
  1353. *flags |= MKD_LI_END;
  1354. break;
  1355. }
  1356. else if (in_empty) {
  1357. bufputc(work, '\n');
  1358. has_inside_empty = 1;
  1359. }
  1360. in_empty = 0;
  1361. /* adding the line without prefix into the working buffer */
  1362. bufput(work, data + beg + i, end - beg - i);
  1363. beg = end;
  1364. }
  1365. /* render of li contents */
  1366. if (has_inside_empty)
  1367. *flags |= MKD_LI_BLOCK;
  1368. if (*flags & MKD_LI_BLOCK) {
  1369. /* intermediate render of block li */
  1370. if (sublist && sublist < work->size) {
  1371. parse_block(inter, rndr, work->data, sublist);
  1372. parse_block(inter, rndr, work->data + sublist, work->size - sublist);
  1373. }
  1374. else
  1375. parse_block(inter, rndr, work->data, work->size);
  1376. } else {
  1377. /* intermediate render of inline li */
  1378. if (sublist && sublist < work->size) {
  1379. parse_inline(inter, rndr, work->data, sublist);
  1380. parse_block(inter, rndr, work->data + sublist, work->size - sublist);
  1381. }
  1382. else
  1383. parse_inline(inter, rndr, work->data, work->size);
  1384. }
  1385. /* render of li itself */
  1386. if (rndr->cb.listitem)
  1387. rndr->cb.listitem(ob, inter, *flags, rndr->opaque);
  1388. rndr_popbuf(rndr, BUFFER_SPAN);
  1389. rndr_popbuf(rndr, BUFFER_SPAN);
  1390. return beg;
  1391. }
  1392. /* parse_list • parsing ordered or unordered list block */
  1393. static size_t
  1394. parse_list(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int flags)
  1395. {
  1396. struct buf *work = 0;
  1397. size_t i = 0, j;
  1398. work = rndr_newbuf(rndr, BUFFER_BLOCK);
  1399. while (i < size) {
  1400. j = parse_listitem(work, rndr, data + i, size - i, &flags);
  1401. i += j;
  1402. if (!j || (flags & MKD_LI_END))
  1403. break;
  1404. }
  1405. if (rndr->cb.list)
  1406. rndr->cb.list(ob, work, flags, rndr->opaque);
  1407. rndr_popbuf(rndr, BUFFER_BLOCK);
  1408. return i;
  1409. }
  1410. /* parse_atxheader • parsing of atx-style headers */
  1411. static size_t
  1412. parse_atxheader(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
  1413. {
  1414. size_t level = 0;
  1415. size_t i, end, skip;
  1416. while (level < size && level < 6 && data[level] == '#')
  1417. level++;
  1418. for (i = level; i < size && data[i] == ' '; i++);
  1419. for (end = i; end < size && data[end] != '\n'; end++);
  1420. skip = end;
  1421. while (end && data[end - 1] == '#')
  1422. end--;
  1423. while (end && data[end - 1] == ' ')
  1424. end--;
  1425. if (end > i) {
  1426. struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN);
  1427. parse_inline(work, rndr, data + i, end - i);
  1428. if (rndr->cb.header)
  1429. rndr->cb.header(ob, work, (int)level, rndr->opaque);
  1430. rndr_popbuf(rndr, BUFFER_SPAN);
  1431. }
  1432. return skip;
  1433. }
  1434. /* htmlblock_end • checking end of HTML block : </tag>[ \t]*\n[ \t*]\n */
  1435. /* returns the length on match, 0 otherwise */
  1436. static size_t
  1437. htmlblock_end_tag(
  1438. const char *tag,
  1439. size_t tag_len,
  1440. struct sd_markdown *rndr,
  1441. uint8_t *data,
  1442. size_t size)
  1443. {
  1444. size_t i, w;
  1445. /* checking if tag is a match */
  1446. if (tag_len + 3 >= size ||
  1447. strncasecmp((char *)data + 2, tag, tag_len) != 0 ||
  1448. data[tag_len + 2] != '>')
  1449. return 0;
  1450. /* checking white lines */
  1451. i = tag_len + 3;
  1452. w = 0;
  1453. if (i < size && (w = is_empty(data + i, size - i)) == 0)
  1454. return 0; /* non-blank after tag */
  1455. i += w;
  1456. w = 0;
  1457. if (i < size)
  1458. w = is_empty(data + i, size - i);
  1459. return i + w;
  1460. }
  1461. static size_t
  1462. htmlblock_end(const char *curtag,
  1463. struct sd_markdown *rndr,
  1464. uint8_t *data,
  1465. size_t size,
  1466. int start_of_line)
  1467. {
  1468. size_t tag_size = strlen(curtag);
  1469. size_t i = 1, end_tag;
  1470. int block_lines = 0;
  1471. while (i < size) {
  1472. i++;
  1473. while (i < size && !(data[i - 1] == '<' && data[i] == '/')) {
  1474. if (data[i] == '\n')
  1475. block_lines++;
  1476. i++;
  1477. }
  1478. /* If we are only looking for unindented tags, skip the tag
  1479. * if it doesn't follow a newline.
  1480. *
  1481. * The only exception to this is if the tag is still on the
  1482. * initial line; in that case it still counts as a closing
  1483. * tag
  1484. */
  1485. if (start_of_line && block_lines > 0 && data[i - 2] != '\n')
  1486. continue;
  1487. if (i + 2 + tag_size >= size)
  1488. break;
  1489. end_tag = htmlblock_end_tag(curtag, tag_size, rndr, data + i - 1, size - i + 1);
  1490. if (end_tag)
  1491. return i + end_tag - 1;
  1492. }
  1493. return 0;
  1494. }
  1495. /* parse_htmlblock • parsing of inline HTML block */
  1496. static size_t
  1497. parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render)
  1498. {
  1499. size_t i, j = 0, tag_end;
  1500. const char *curtag = NULL;
  1501. struct buf work = { data, 0, 0, 0 };
  1502. /* identification of the opening tag */
  1503. if (size < 2 || data[0] != '<')
  1504. return 0;
  1505. i = 1;
  1506. while (i < size && data[i] != '>' && data[i] != ' ')
  1507. i++;
  1508. if (i < size)
  1509. curtag = find_block_tag((char *)data + 1, (int)i - 1);
  1510. /* handling of special cases */
  1511. if (!curtag) {
  1512. /* HTML comment, laxist form */
  1513. if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') {
  1514. i = 5;
  1515. while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>'))
  1516. i++;
  1517. i++;
  1518. if (i < size)
  1519. j = is_empty(data + i, size - i);
  1520. if (j) {
  1521. work.size = i + j;
  1522. if (do_render && rndr->cb.blockhtml)
  1523. rndr->cb.blockhtml(ob, &work, rndr->opaque);
  1524. return work.size;
  1525. }
  1526. }
  1527. /* HR, which is the only self-closing block tag considered */
  1528. if (size > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R')) {
  1529. i = 3;
  1530. while (i < size && data[i] != '>')
  1531. i++;
  1532. if (i + 1 < size) {
  1533. i++;
  1534. j = is_empty(data + i, size - i);
  1535. if (j) {
  1536. work.size = i + j;
  1537. if (do_render && rndr->cb.blockhtml)
  1538. rndr->cb.blockhtml(ob, &work, rndr->opaque);
  1539. return work.size;
  1540. }
  1541. }
  1542. }
  1543. /* no special case recognised */
  1544. return 0;
  1545. }
  1546. /* looking for an unindented matching closing tag */
  1547. /* followed by a blank line */
  1548. tag_end = htmlblock_end(curtag, rndr, data, size, 1);
  1549. /* if not found, trying a second pass looking for indented match */
  1550. /* but not if tag is "ins" or "del" (following original Markdown.pl) */
  1551. if (!tag_end && strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0) {
  1552. tag_end = htmlblock_end(curtag, rndr, data, size, 0);
  1553. }
  1554. if (!tag_end)
  1555. return 0;
  1556. /* the end of the block has been found */
  1557. work.size = tag_end;
  1558. if (do_render && rndr->cb.blockhtml)
  1559. rndr->cb.blockhtml(ob, &work, rndr->opaque);
  1560. return tag_end;
  1561. }
  1562. static void
  1563. parse_table_row(
  1564. struct buf *ob,
  1565. struct sd_markdown *rndr,
  1566. uint8_t *data,
  1567. size_t size,
  1568. size_t columns,
  1569. int *col_data,
  1570. int header_flag)
  1571. {
  1572. size_t i = 0, col;
  1573. struct buf *row_work = 0;
  1574. if (!rndr->cb.table_cell || !rndr->cb.table_row)
  1575. return;
  1576. row_work = rndr_newbuf(rndr, BUFFER_SPAN);
  1577. if (i < size && data[i] == '|')
  1578. i++;
  1579. for (col = 0; col < columns && i < size; ++col) {
  1580. size_t cell_start, cell_end;
  1581. struct buf *cell_work;
  1582. cell_work = rndr_newbuf(rndr, BUFFER_SPAN);
  1583. while (i < size && _isspace(data[i]))
  1584. i++;
  1585. cell_start = i;
  1586. while (i < size && data[i] != '|')
  1587. i++;
  1588. cell_end = i - 1;
  1589. while (cell_end > cell_start && _isspace(data[cell_end]))
  1590. cell_end--;
  1591. parse_inline(cell_work, rndr, data + cell_start, 1 + cell_end - cell_start);
  1592. rndr->cb.table_cell(row_work, cell_work, col_data[col] | header_flag, rndr->opaque);
  1593. rndr_popbuf(rndr, BUFFER_SPAN);
  1594. i++;
  1595. }
  1596. for (; col < columns; ++col) {
  1597. struct buf empty_cell = { 0, 0, 0, 0 };
  1598. rndr->cb.table_cell(row_work, &empty_cell, col_data[col] | header_flag, rndr->opaque);
  1599. }
  1600. rndr->cb.table_row(ob, row_work, rndr->opaque);
  1601. rndr_popbuf(rndr, BUFFER_SPAN);
  1602. }
  1603. static size_t
  1604. parse_table_header(
  1605. struct buf *ob,
  1606. struct sd_markdown *rndr,
  1607. uint8_t *data,
  1608. size_t size,
  1609. size_t *columns,
  1610. int **column_data)
  1611. {
  1612. int pipes;
  1613. size_t i = 0, col, header_end, under_end;
  1614. pipes = 0;
  1615. while (i < size && data[i] != '\n')
  1616. if (data[i++] == '|')
  1617. pipes++;
  1618. if (i == size || pipes == 0)
  1619. return 0;
  1620. header_end = i;
  1621. while (header_end > 0 && _isspace(data[header_end - 1]))
  1622. header_end--;
  1623. if (data[0] == '|')
  1624. pipes--;
  1625. if (header_end && data[header_end - 1] == '|')
  1626. pipes--;
  1627. *columns = pipes + 1;
  1628. *column_data = calloc(*columns, sizeof(int));
  1629. /* Parse the header underline */
  1630. i++;
  1631. if (i < size && data[i] == '|')
  1632. i++;
  1633. under_end = i;
  1634. while (under_end < size && data[under_end] != '\n')
  1635. under_end++;
  1636. for (col = 0; col < *columns && i < under_end; ++col) {
  1637. size_t dashes = 0;
  1638. while (i < under_end && data[i] == ' ')
  1639. i++;
  1640. if (data[i] == ':') {
  1641. i++; (*column_data)[col] |= MKD_TABLE_ALIGN_L;
  1642. dashes++;
  1643. }
  1644. while (i < under_end && data[i] == '-') {
  1645. i++; dashes++;
  1646. }
  1647. if (i < under_end && data[i] == ':') {
  1648. i++; (*column_data)[col] |= MKD_TABLE_ALIGN_R;
  1649. dashes++;
  1650. }
  1651. while (i < under_end && data[i] == ' ')
  1652. i++;
  1653. if (i < under_end && data[i] != '|')
  1654. break;
  1655. if (dashes < 3)
  1656. break;
  1657. i++;
  1658. }
  1659. if (col < *columns)
  1660. return 0;
  1661. parse_table_row(
  1662. ob, rndr, data,
  1663. header_end,
  1664. *columns,
  1665. *column_data,
  1666. MKD_TABLE_HEADER
  1667. );
  1668. return under_end + 1;
  1669. }
  1670. static size_t
  1671. parse_table(
  1672. struct buf *ob,
  1673. struct sd_markdown *rndr,
  1674. uint8_t *data,
  1675. size_t size)
  1676. {
  1677. size_t i;
  1678. struct buf *header_work = 0;
  1679. struct buf *body_work = 0;
  1680. size_t columns;
  1681. int *col_data = NULL;
  1682. header_work = rndr_newbuf(rndr, BUFFER_SPAN);
  1683. body_work = rndr_newbuf(rndr, BUFFER_BLOCK);
  1684. i = parse_table_header(header_work, rndr, data, size, &columns, &col_data);
  1685. if (i > 0) {
  1686. while (i < size) {
  1687. size_t row_start;
  1688. int pipes = 0;
  1689. row_start = i;
  1690. while (i < size && data[i] != '\n')
  1691. if (data[i++] == '|')
  1692. pipes++;
  1693. if (pipes == 0 || i == size) {
  1694. i = row_start;
  1695. break;
  1696. }
  1697. parse_table_row(
  1698. body_work,
  1699. rndr,
  1700. data + row_start,
  1701. i - row_start,
  1702. columns,
  1703. col_data, 0
  1704. );
  1705. i++;
  1706. }
  1707. if (rndr->cb.table)
  1708. rndr->cb.table(ob, header_work, body_work, rndr->opaque);
  1709. }
  1710. free(col_data);
  1711. rndr_popbuf(rndr, BUFFER_SPAN);
  1712. rndr_popbuf(rndr, BUFFER_BLOCK);
  1713. return i;
  1714. }
  1715. /* parse_block • parsing of one block, returning next uint8_t to parse */
  1716. static void
  1717. parse_block(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
  1718. {
  1719. size_t beg, end, i;
  1720. uint8_t *txt_data;
  1721. beg = 0;
  1722. if (rndr->work_bufs[BUFFER_SPAN].size +
  1723. rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting)
  1724. return;
  1725. while (beg < size) {
  1726. txt_data = data + beg;
  1727. end = size - beg;
  1728. if (is_atxheader(rndr, txt_data, end))
  1729. beg += parse_atxheader(ob, rndr, txt_data, end);
  1730. else if (data[beg] == '<' && rndr->cb.blockhtml &&
  1731. (i = parse_htmlblock(ob, rndr, txt_data, end, 1)) != 0)
  1732. beg += i;
  1733. e

Large files files are truncated, but you can click here to view the full file