PageRenderTime 77ms CodeModel.GetById 36ms RepoModel.GetById 1ms app.codeStats 0ms

/ext/redcarpet/markdown.c

https://bitbucket.org/yangkit/redcarpet
C | 2505 lines | 2279 code | 146 blank | 80 comment | 202 complexity | dcd01c5f121ed67c6ef851d1178de6ab MD5 | raw file
Possible License(s): 0BSD

Large files files are truncated, but you can click here to view the full file

  1. /* markdown.c - generic markdown parser */
  2. /*
  3. * Copyright (c) 2009, Natacha Porté
  4. * Copyright (c) 2011, Vicent Marti
  5. *
  6. * Permission to use, copy, modify, and distribute this software for any
  7. * purpose with or without fee is hereby granted, provided that the above
  8. * copyright notice and this permission notice appear in all copies.
  9. *
  10. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  11. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  12. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  13. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  14. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  15. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  16. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  17. */
  18. #include "markdown.h"
  19. #include "stack.h"
  20. #include <assert.h>
  21. #include <string.h>
  22. #include <ctype.h>
  23. #include <stdio.h>
  24. #if defined(_WIN32)
  25. #define strncasecmp _strnicmp
  26. #endif
  27. #define REF_TABLE_SIZE 8
  28. #define BUFFER_BLOCK 0
  29. #define BUFFER_SPAN 1
  30. #define MKD_LI_END 8 /* internal list flag */
  31. #define gperf_case_strncmp(s1, s2, n) strncasecmp(s1, s2, n)
  32. #define GPERF_DOWNCASE 1
  33. #define GPERF_CASE_STRNCMP 1
  34. #include "html_blocks.h"
  35. /***************
  36. * LOCAL TYPES *
  37. ***************/
  38. /* link_ref: reference to a link */
  39. struct link_ref {
  40. unsigned int id;
  41. struct buf *link;
  42. struct buf *title;
  43. struct link_ref *next;
  44. };
  45. /* char_trigger: function pointer to render active chars */
  46. /* returns the number of chars taken care of */
  47. /* data is the pointer of the beginning of the span */
  48. /* offset is the number of valid chars before data */
  49. struct sd_markdown;
  50. typedef size_t
  51. (*char_trigger)(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  52. static size_t char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  53. static size_t char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  54. static size_t char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  55. static size_t char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  56. static size_t char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  57. static size_t char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  58. static size_t char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  59. static size_t char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  60. static size_t char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  61. static size_t char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  62. static size_t char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  63. enum markdown_char_t {
  64. MD_CHAR_NONE = 0,
  65. MD_CHAR_EMPHASIS,
  66. MD_CHAR_CODESPAN,
  67. MD_CHAR_LINEBREAK,
  68. MD_CHAR_LINK,
  69. MD_CHAR_LANGLE,
  70. MD_CHAR_ESCAPE,
  71. MD_CHAR_ENTITITY,
  72. MD_CHAR_AUTOLINK_URL,
  73. MD_CHAR_AUTOLINK_EMAIL,
  74. MD_CHAR_AUTOLINK_WWW,
  75. MD_CHAR_SUPERSCRIPT,
  76. };
  77. static char_trigger markdown_char_ptrs[] = {
  78. NULL,
  79. &char_emphasis,
  80. &char_codespan,
  81. &char_linebreak,
  82. &char_link,
  83. &char_langle_tag,
  84. &char_escape,
  85. &char_entity,
  86. &char_autolink_url,
  87. &char_autolink_email,
  88. &char_autolink_www,
  89. &char_superscript,
  90. };
  91. /* render • structure containing one particular render */
  92. struct sd_markdown {
  93. struct sd_callbacks cb;
  94. void *opaque;
  95. struct link_ref *refs[REF_TABLE_SIZE];
  96. uint8_t active_char[256];
  97. struct stack work_bufs[2];
  98. unsigned int ext_flags;
  99. size_t max_nesting;
  100. int in_link_body;
  101. };
  102. /***************************
  103. * HELPER FUNCTIONS *
  104. ***************************/
  105. static inline struct buf *
  106. rndr_newbuf(struct sd_markdown *rndr, int type)
  107. {
  108. static const size_t buf_size[2] = {256, 64};
  109. struct buf *work = NULL;
  110. struct stack *pool = &rndr->work_bufs[type];
  111. if (pool->size < pool->asize &&
  112. pool->item[pool->size] != NULL) {
  113. work = pool->item[pool->size++];
  114. work->size = 0;
  115. } else {
  116. work = bufnew(buf_size[type]);
  117. stack_push(pool, work);
  118. }
  119. return work;
  120. }
  121. static inline void
  122. rndr_popbuf(struct sd_markdown *rndr, int type)
  123. {
  124. rndr->work_bufs[type].size--;
  125. }
  126. static void
  127. unscape_text(struct buf *ob, struct buf *src)
  128. {
  129. size_t i = 0, org;
  130. while (i < src->size) {
  131. org = i;
  132. while (i < src->size && src->data[i] != '\\')
  133. i++;
  134. if (i > org)
  135. bufput(ob, src->data + org, i - org);
  136. if (i + 1 >= src->size)
  137. break;
  138. bufputc(ob, src->data[i + 1]);
  139. i += 2;
  140. }
  141. }
  142. static unsigned int
  143. hash_link_ref(const uint8_t *link_ref, size_t length)
  144. {
  145. size_t i;
  146. unsigned int hash = 0;
  147. for (i = 0; i < length; ++i)
  148. hash = tolower(link_ref[i]) + (hash << 6) + (hash << 16) - hash;
  149. return hash;
  150. }
  151. static struct link_ref *
  152. add_link_ref(
  153. struct link_ref **references,
  154. const uint8_t *name, size_t name_size)
  155. {
  156. struct link_ref *ref = calloc(1, sizeof(struct link_ref));
  157. if (!ref)
  158. return NULL;
  159. ref->id = hash_link_ref(name, name_size);
  160. ref->next = references[ref->id % REF_TABLE_SIZE];
  161. references[ref->id % REF_TABLE_SIZE] = ref;
  162. return ref;
  163. }
  164. static struct link_ref *
  165. find_link_ref(struct link_ref **references, uint8_t *name, size_t length)
  166. {
  167. unsigned int hash = hash_link_ref(name, length);
  168. struct link_ref *ref = NULL;
  169. ref = references[hash % REF_TABLE_SIZE];
  170. while (ref != NULL) {
  171. if (ref->id == hash)
  172. return ref;
  173. ref = ref->next;
  174. }
  175. return NULL;
  176. }
  177. static void
  178. free_link_refs(struct link_ref **references)
  179. {
  180. size_t i;
  181. for (i = 0; i < REF_TABLE_SIZE; ++i) {
  182. struct link_ref *r = references[i];
  183. struct link_ref *next;
  184. while (r) {
  185. next = r->next;
  186. bufrelease(r->link);
  187. bufrelease(r->title);
  188. free(r);
  189. r = next;
  190. }
  191. }
  192. }
  193. /*
  194. * Check whether a char is a Markdown space.
  195. * Right now we only consider spaces the actual
  196. * space and a newline: tabs and carriage returns
  197. * are filtered out during the preprocessing phase.
  198. *
  199. * If we wanted to actually be UTF-8 compliant, we
  200. * should instead extract an Unicode codepoint from
  201. * this character and check for space properties.
  202. */
  203. static inline int
  204. _isspace(int c)
  205. {
  206. return c == ' ' || c == '\n';
  207. }
  208. /****************************
  209. * INLINE PARSING FUNCTIONS *
  210. ****************************/
  211. /* is_mail_autolink • looks for the address part of a mail autolink and '>' */
  212. /* this is less strict than the original markdown e-mail address matching */
  213. static size_t
  214. is_mail_autolink(uint8_t *data, size_t size)
  215. {
  216. size_t i = 0, nb = 0;
  217. /* address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' */
  218. for (i = 0; i < size; ++i) {
  219. if (isalnum(data[i]))
  220. continue;
  221. switch (data[i]) {
  222. case '@':
  223. nb++;
  224. case '-':
  225. case '.':
  226. case '_':
  227. break;
  228. case '>':
  229. return (nb == 1) ? i + 1 : 0;
  230. default:
  231. return 0;
  232. }
  233. }
  234. return 0;
  235. }
  236. /* tag_length • returns the length of the given tag, or 0 is it's not valid */
  237. static size_t
  238. tag_length(uint8_t *data, size_t size, enum mkd_autolink *autolink)
  239. {
  240. size_t i, j;
  241. /* a valid tag can't be shorter than 3 chars */
  242. if (size < 3) return 0;
  243. /* begins with a '<' optionally followed by '/', followed by letter or number */
  244. if (data[0] != '<') return 0;
  245. i = (data[1] == '/') ? 2 : 1;
  246. if (!isalnum(data[i]))
  247. return 0;
  248. /* scheme test */
  249. *autolink = MKDA_NOT_AUTOLINK;
  250. /* try to find the beginning of an URI */
  251. while (i < size && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-'))
  252. i++;
  253. if (i > 1 && data[i] == '@') {
  254. if ((j = is_mail_autolink(data + i, size - i)) != 0) {
  255. *autolink = MKDA_EMAIL;
  256. return i + j;
  257. }
  258. }
  259. if (i > 2 && data[i] == ':') {
  260. *autolink = MKDA_NORMAL;
  261. i++;
  262. }
  263. /* completing autolink test: no whitespace or ' or " */
  264. if (i >= size)
  265. *autolink = MKDA_NOT_AUTOLINK;
  266. else if (*autolink) {
  267. j = i;
  268. while (i < size) {
  269. if (data[i] == '\\') i += 2;
  270. else if (data[i] == '>' || data[i] == '\'' ||
  271. data[i] == '"' || data[i] == ' ' || data[i] == '\n')
  272. break;
  273. else i++;
  274. }
  275. if (i >= size) return 0;
  276. if (i > j && data[i] == '>') return i + 1;
  277. /* one of the forbidden chars has been found */
  278. *autolink = MKDA_NOT_AUTOLINK;
  279. }
  280. /* looking for sometinhg looking like a tag end */
  281. while (i < size && data[i] != '>') i++;
  282. if (i >= size) return 0;
  283. return i + 1;
  284. }
  285. /* parse_inline • parses inline markdown elements */
  286. static void
  287. parse_inline(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
  288. {
  289. size_t i = 0, end = 0;
  290. uint8_t action = 0;
  291. struct buf work = { 0, 0, 0, 0 };
  292. if (rndr->work_bufs[BUFFER_SPAN].size +
  293. rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting)
  294. return;
  295. while (i < size) {
  296. /* copying inactive chars into the output */
  297. while (end < size && (action = rndr->active_char[data[end]]) == 0) {
  298. end++;
  299. }
  300. if (rndr->cb.normal_text) {
  301. work.data = data + i;
  302. work.size = end - i;
  303. rndr->cb.normal_text(ob, &work, rndr->opaque);
  304. }
  305. else
  306. bufput(ob, data + i, end - i);
  307. if (end >= size) break;
  308. i = end;
  309. end = markdown_char_ptrs[(int)action](ob, rndr, data + i, i, size - i);
  310. if (!end) /* no action from the callback */
  311. end = i + 1;
  312. else {
  313. i += end;
  314. end = i;
  315. }
  316. }
  317. }
  318. /* find_emph_char • looks for the next emph uint8_t, skipping other constructs */
  319. static size_t
  320. find_emph_char(uint8_t *data, size_t size, uint8_t c)
  321. {
  322. size_t i = 1;
  323. while (i < size) {
  324. while (i < size && data[i] != c && data[i] != '`' && data[i] != '[')
  325. i++;
  326. if (i == size)
  327. return 0;
  328. if (data[i] == c)
  329. return i;
  330. /* not counting escaped chars */
  331. if (i && data[i - 1] == '\\') {
  332. i++; continue;
  333. }
  334. if (data[i] == '`') {
  335. size_t span_nb = 0, bt;
  336. size_t tmp_i = 0;
  337. /* counting the number of opening backticks */
  338. while (i < size && data[i] == '`') {
  339. i++; span_nb++;
  340. }
  341. if (i >= size) return 0;
  342. /* finding the matching closing sequence */
  343. bt = 0;
  344. while (i < size && bt < span_nb) {
  345. if (!tmp_i && data[i] == c) tmp_i = i;
  346. if (data[i] == '`') bt++;
  347. else bt = 0;
  348. i++;
  349. }
  350. if (i >= size) return tmp_i;
  351. }
  352. /* skipping a link */
  353. else if (data[i] == '[') {
  354. size_t tmp_i = 0;
  355. uint8_t cc;
  356. i++;
  357. while (i < size && data[i] != ']') {
  358. if (!tmp_i && data[i] == c) tmp_i = i;
  359. i++;
  360. }
  361. i++;
  362. while (i < size && (data[i] == ' ' || data[i] == '\n'))
  363. i++;
  364. if (i >= size)
  365. return tmp_i;
  366. switch (data[i]) {
  367. case '[':
  368. cc = ']'; break;
  369. case '(':
  370. cc = ')'; break;
  371. default:
  372. if (tmp_i)
  373. return tmp_i;
  374. else
  375. continue;
  376. }
  377. i++;
  378. while (i < size && data[i] != cc) {
  379. if (!tmp_i && data[i] == c) tmp_i = i;
  380. i++;
  381. }
  382. if (i >= size)
  383. return tmp_i;
  384. i++;
  385. }
  386. }
  387. return 0;
  388. }
  389. /* parse_emph1 • parsing single emphase */
  390. /* closed by a symbol not preceded by whitespace and not followed by symbol */
  391. static size_t
  392. parse_emph1(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
  393. {
  394. size_t i = 0, len;
  395. struct buf *work = 0;
  396. int r;
  397. if (!rndr->cb.emphasis) return 0;
  398. /* skipping one symbol if coming from emph3 */
  399. if (size > 1 && data[0] == c && data[1] == c) i = 1;
  400. while (i < size) {
  401. len = find_emph_char(data + i, size - i, c);
  402. if (!len) return 0;
  403. i += len;
  404. if (i >= size) return 0;
  405. if (data[i] == c && !_isspace(data[i - 1])) {
  406. if (rndr->ext_flags & MKDEXT_NO_INTRA_EMPHASIS) {
  407. if (!(i + 1 == size || _isspace(data[i + 1]) || ispunct(data[i + 1])))
  408. continue;
  409. }
  410. work = rndr_newbuf(rndr, BUFFER_SPAN);
  411. parse_inline(work, rndr, data, i);
  412. r = rndr->cb.emphasis(ob, work, rndr->opaque);
  413. rndr_popbuf(rndr, BUFFER_SPAN);
  414. return r ? i + 1 : 0;
  415. }
  416. }
  417. return 0;
  418. }
  419. /* parse_emph2 • parsing single emphase */
  420. static size_t
  421. parse_emph2(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
  422. {
  423. int (*render_method)(struct buf *ob, const struct buf *text, void *opaque);
  424. size_t i = 0, len;
  425. struct buf *work = 0;
  426. int r;
  427. render_method = (c == '~') ? rndr->cb.strikethrough : rndr->cb.double_emphasis;
  428. if (!render_method)
  429. return 0;
  430. while (i < size) {
  431. len = find_emph_char(data + i, size - i, c);
  432. if (!len) return 0;
  433. i += len;
  434. if (i + 1 < size && data[i] == c && data[i + 1] == c && i && !_isspace(data[i - 1])) {
  435. work = rndr_newbuf(rndr, BUFFER_SPAN);
  436. parse_inline(work, rndr, data, i);
  437. r = render_method(ob, work, rndr->opaque);
  438. rndr_popbuf(rndr, BUFFER_SPAN);
  439. return r ? i + 2 : 0;
  440. }
  441. i++;
  442. }
  443. return 0;
  444. }
  445. /* parse_emph3 • parsing single emphase */
  446. /* finds the first closing tag, and delegates to the other emph */
  447. static size_t
  448. parse_emph3(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
  449. {
  450. size_t i = 0, len;
  451. int r;
  452. while (i < size) {
  453. len = find_emph_char(data + i, size - i, c);
  454. if (!len) return 0;
  455. i += len;
  456. /* skip whitespace preceded symbols */
  457. if (data[i] != c || _isspace(data[i - 1]))
  458. continue;
  459. if (i + 2 < size && data[i + 1] == c && data[i + 2] == c && rndr->cb.triple_emphasis) {
  460. /* triple symbol found */
  461. struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN);
  462. parse_inline(work, rndr, data, i);
  463. r = rndr->cb.triple_emphasis(ob, work, rndr->opaque);
  464. rndr_popbuf(rndr, BUFFER_SPAN);
  465. return r ? i + 3 : 0;
  466. } else if (i + 1 < size && data[i + 1] == c) {
  467. /* double symbol found, handing over to emph1 */
  468. len = parse_emph1(ob, rndr, data - 2, size + 2, c);
  469. if (!len) return 0;
  470. else return len - 2;
  471. } else {
  472. /* single symbol found, handing over to emph2 */
  473. len = parse_emph2(ob, rndr, data - 1, size + 1, c);
  474. if (!len) return 0;
  475. else return len - 1;
  476. }
  477. }
  478. return 0;
  479. }
  480. /* char_emphasis • single and double emphasis parsing */
  481. static size_t
  482. char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  483. {
  484. uint8_t c = data[0];
  485. size_t ret;
  486. if (size > 2 && data[1] != c) {
  487. /* whitespace cannot follow an opening emphasis;
  488. * strikethrough only takes two characters '~~' */
  489. if (c == '~' || _isspace(data[1]) || (ret = parse_emph1(ob, rndr, data + 1, size - 1, c)) == 0)
  490. return 0;
  491. return ret + 1;
  492. }
  493. if (size > 3 && data[1] == c && data[2] != c) {
  494. if (_isspace(data[2]) || (ret = parse_emph2(ob, rndr, data + 2, size - 2, c)) == 0)
  495. return 0;
  496. return ret + 2;
  497. }
  498. if (size > 4 && data[1] == c && data[2] == c && data[3] != c) {
  499. if (c == '~' || _isspace(data[3]) || (ret = parse_emph3(ob, rndr, data + 3, size - 3, c)) == 0)
  500. return 0;
  501. return ret + 3;
  502. }
  503. return 0;
  504. }
  505. /* char_linebreak • '\n' preceded by two spaces (assuming linebreak != 0) */
  506. static size_t
  507. char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  508. {
  509. if (offset < 2 || data[-1] != ' ' || data[-2] != ' ')
  510. return 0;
  511. /* removing the last space from ob and rendering */
  512. while (ob->size && ob->data[ob->size - 1] == ' ')
  513. ob->size--;
  514. return rndr->cb.linebreak(ob, rndr->opaque) ? 1 : 0;
  515. }
  516. /* char_codespan • '`' parsing a code span (assuming codespan != 0) */
  517. static size_t
  518. char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  519. {
  520. size_t end, nb = 0, i, f_begin, f_end;
  521. /* counting the number of backticks in the delimiter */
  522. while (nb < size && data[nb] == '`')
  523. nb++;
  524. /* finding the next delimiter */
  525. i = 0;
  526. for (end = nb; end < size && i < nb; end++) {
  527. if (data[end] == '`') i++;
  528. else i = 0;
  529. }
  530. if (i < nb && end >= size)
  531. return 0; /* no matching delimiter */
  532. /* trimming outside whitespaces */
  533. f_begin = nb;
  534. while (f_begin < end && data[f_begin] == ' ')
  535. f_begin++;
  536. f_end = end - nb;
  537. while (f_end > nb && data[f_end-1] == ' ')
  538. f_end--;
  539. /* real code span */
  540. if (f_begin < f_end) {
  541. struct buf work = { data + f_begin, f_end - f_begin, 0, 0 };
  542. if (!rndr->cb.codespan(ob, &work, rndr->opaque))
  543. end = 0;
  544. } else {
  545. if (!rndr->cb.codespan(ob, 0, rndr->opaque))
  546. end = 0;
  547. }
  548. return end;
  549. }
  550. /* char_escape • '\\' backslash escape */
  551. static size_t
  552. char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  553. {
  554. static const char *escape_chars = "\\`*_{}[]()#+-.!:|&<>^~";
  555. struct buf work = { 0, 0, 0, 0 };
  556. if (size > 1) {
  557. if (strchr(escape_chars, data[1]) == NULL)
  558. return 0;
  559. if (rndr->cb.normal_text) {
  560. work.data = data + 1;
  561. work.size = 1;
  562. rndr->cb.normal_text(ob, &work, rndr->opaque);
  563. }
  564. else bufputc(ob, data[1]);
  565. } else if (size == 1) {
  566. bufputc(ob, data[0]);
  567. }
  568. return 2;
  569. }
  570. /* char_entity • '&' escaped when it doesn't belong to an entity */
  571. /* valid entities are assumed to be anything matching &#?[A-Za-z0-9]+; */
  572. static size_t
  573. char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  574. {
  575. size_t end = 1;
  576. struct buf work = { 0, 0, 0, 0 };
  577. if (end < size && data[end] == '#')
  578. end++;
  579. while (end < size && isalnum(data[end]))
  580. end++;
  581. if (end < size && data[end] == ';')
  582. end++; /* real entity */
  583. else
  584. return 0; /* lone '&' */
  585. if (rndr->cb.entity) {
  586. work.data = data;
  587. work.size = end;
  588. rndr->cb.entity(ob, &work, rndr->opaque);
  589. }
  590. else bufput(ob, data, end);
  591. return end;
  592. }
  593. /* char_langle_tag • '<' when tags or autolinks are allowed */
  594. static size_t
  595. char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  596. {
  597. enum mkd_autolink altype = MKDA_NOT_AUTOLINK;
  598. size_t end = tag_length(data, size, &altype);
  599. struct buf work = { data, end, 0, 0 };
  600. int ret = 0;
  601. if (end > 2) {
  602. if (rndr->cb.autolink && altype != MKDA_NOT_AUTOLINK) {
  603. struct buf *u_link = rndr_newbuf(rndr, BUFFER_SPAN);
  604. work.data = data + 1;
  605. work.size = end - 2;
  606. unscape_text(u_link, &work);
  607. ret = rndr->cb.autolink(ob, u_link, altype, rndr->opaque);
  608. rndr_popbuf(rndr, BUFFER_SPAN);
  609. }
  610. else if (rndr->cb.raw_html_tag)
  611. ret = rndr->cb.raw_html_tag(ob, &work, rndr->opaque);
  612. }
  613. if (!ret) return 0;
  614. else return end;
  615. }
  616. static size_t
  617. char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  618. {
  619. struct buf *link, *link_url, *link_text;
  620. size_t link_len, rewind;
  621. if (!rndr->cb.link || rndr->in_link_body)
  622. return 0;
  623. link = rndr_newbuf(rndr, BUFFER_SPAN);
  624. if ((link_len = sd_autolink__www(&rewind, link, data, offset, size)) > 0) {
  625. link_url = rndr_newbuf(rndr, BUFFER_SPAN);
  626. BUFPUTSL(link_url, "http://");
  627. bufput(link_url, link->data, link->size);
  628. ob->size -= rewind;
  629. if (rndr->cb.normal_text) {
  630. link_text = rndr_newbuf(rndr, BUFFER_SPAN);
  631. rndr->cb.normal_text(link_text, link, rndr->opaque);
  632. rndr->cb.link(ob, link_url, NULL, link_text, rndr->opaque);
  633. rndr_popbuf(rndr, BUFFER_SPAN);
  634. } else {
  635. rndr->cb.link(ob, link_url, NULL, link, rndr->opaque);
  636. }
  637. rndr_popbuf(rndr, BUFFER_SPAN);
  638. }
  639. rndr_popbuf(rndr, BUFFER_SPAN);
  640. return link_len;
  641. }
  642. static size_t
  643. char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  644. {
  645. struct buf *link;
  646. size_t link_len, rewind;
  647. if (!rndr->cb.autolink || rndr->in_link_body)
  648. return 0;
  649. link = rndr_newbuf(rndr, BUFFER_SPAN);
  650. if ((link_len = sd_autolink__email(&rewind, link, data, offset, size)) > 0) {
  651. ob->size -= rewind;
  652. rndr->cb.autolink(ob, link, MKDA_EMAIL, rndr->opaque);
  653. }
  654. rndr_popbuf(rndr, BUFFER_SPAN);
  655. return link_len;
  656. }
  657. static size_t
  658. char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  659. {
  660. struct buf *link;
  661. size_t link_len, rewind;
  662. if (!rndr->cb.autolink || rndr->in_link_body)
  663. return 0;
  664. link = rndr_newbuf(rndr, BUFFER_SPAN);
  665. if ((link_len = sd_autolink__url(&rewind, link, data, offset, size)) > 0) {
  666. ob->size -= rewind;
  667. rndr->cb.autolink(ob, link, MKDA_NORMAL, rndr->opaque);
  668. }
  669. rndr_popbuf(rndr, BUFFER_SPAN);
  670. return link_len;
  671. }
  672. /* char_link • '[': parsing a link or an image */
  673. static size_t
  674. char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  675. {
  676. int is_img = (offset && data[-1] == '!'), level;
  677. size_t i = 1, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0;
  678. struct buf *content = 0;
  679. struct buf *link = 0;
  680. struct buf *title = 0;
  681. struct buf *u_link = 0;
  682. size_t org_work_size = rndr->work_bufs[BUFFER_SPAN].size;
  683. int text_has_nl = 0, ret = 0;
  684. int in_title = 0, qtype = 0;
  685. /* checking whether the correct renderer exists */
  686. if ((is_img && !rndr->cb.image) || (!is_img && !rndr->cb.link))
  687. goto cleanup;
  688. /* looking for the matching closing bracket */
  689. for (level = 1; i < size; i++) {
  690. if (data[i] == '\n')
  691. text_has_nl = 1;
  692. else if (data[i - 1] == '\\')
  693. continue;
  694. else if (data[i] == '[')
  695. level++;
  696. else if (data[i] == ']') {
  697. level--;
  698. if (level <= 0)
  699. break;
  700. }
  701. }
  702. if (i >= size)
  703. goto cleanup;
  704. txt_e = i;
  705. i++;
  706. /* skip any amount of whitespace or newline */
  707. /* (this is much more laxist than original markdown syntax) */
  708. while (i < size && _isspace(data[i]))
  709. i++;
  710. /* inline style link */
  711. if (i < size && data[i] == '(') {
  712. /* skipping initial whitespace */
  713. i++;
  714. while (i < size && _isspace(data[i]))
  715. i++;
  716. link_b = i;
  717. /* looking for link end: ' " ) */
  718. while (i < size) {
  719. if (data[i] == '\\') i += 2;
  720. else if (data[i] == ')') break;
  721. else if (i >= 1 && _isspace(data[i-1]) && (data[i] == '\'' || data[i] == '"')) break;
  722. else i++;
  723. }
  724. if (i >= size) goto cleanup;
  725. link_e = i;
  726. /* looking for title end if present */
  727. if (data[i] == '\'' || data[i] == '"') {
  728. qtype = data[i];
  729. in_title = 1;
  730. i++;
  731. title_b = i;
  732. while (i < size) {
  733. if (data[i] == '\\') i += 2;
  734. else if (data[i] == qtype) {in_title = 0; i++;}
  735. else if ((data[i] == ')') && !in_title) break;
  736. else i++;
  737. }
  738. if (i >= size) goto cleanup;
  739. /* skipping whitespaces after title */
  740. title_e = i - 1;
  741. while (title_e > title_b && _isspace(data[title_e]))
  742. title_e--;
  743. /* checking for closing quote presence */
  744. if (data[title_e] != '\'' && data[title_e] != '"') {
  745. title_b = title_e = 0;
  746. link_e = i;
  747. }
  748. }
  749. /* remove whitespace at the end of the link */
  750. while (link_e > link_b && _isspace(data[link_e - 1]))
  751. link_e--;
  752. /* remove optional angle brackets around the link */
  753. if (data[link_b] == '<') link_b++;
  754. if (data[link_e - 1] == '>') link_e--;
  755. /* building escaped link and title */
  756. if (link_e > link_b) {
  757. link = rndr_newbuf(rndr, BUFFER_SPAN);
  758. bufput(link, data + link_b, link_e - link_b);
  759. }
  760. if (title_e > title_b) {
  761. title = rndr_newbuf(rndr, BUFFER_SPAN);
  762. bufput(title, data + title_b, title_e - title_b);
  763. }
  764. i++;
  765. }
  766. /* reference style link */
  767. else if (i < size && data[i] == '[') {
  768. struct buf id = { 0, 0, 0, 0 };
  769. struct link_ref *lr;
  770. /* looking for the id */
  771. i++;
  772. link_b = i;
  773. while (i < size && data[i] != ']') i++;
  774. if (i >= size) goto cleanup;
  775. link_e = i;
  776. /* finding the link_ref */
  777. if (link_b == link_e) {
  778. if (text_has_nl) {
  779. struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN);
  780. size_t j;
  781. for (j = 1; j < txt_e; j++) {
  782. if (data[j] != '\n')
  783. bufputc(b, data[j]);
  784. else if (data[j - 1] != ' ')
  785. bufputc(b, ' ');
  786. }
  787. id.data = b->data;
  788. id.size = b->size;
  789. } else {
  790. id.data = data + 1;
  791. id.size = txt_e - 1;
  792. }
  793. } else {
  794. id.data = data + link_b;
  795. id.size = link_e - link_b;
  796. }
  797. lr = find_link_ref(rndr->refs, id.data, id.size);
  798. if (!lr)
  799. goto cleanup;
  800. /* keeping link and title from link_ref */
  801. link = lr->link;
  802. title = lr->title;
  803. i++;
  804. }
  805. /* shortcut reference style link */
  806. else {
  807. struct buf id = { 0, 0, 0, 0 };
  808. struct link_ref *lr;
  809. /* crafting the id */
  810. if (text_has_nl) {
  811. struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN);
  812. size_t j;
  813. for (j = 1; j < txt_e; j++) {
  814. if (data[j] != '\n')
  815. bufputc(b, data[j]);
  816. else if (data[j - 1] != ' ')
  817. bufputc(b, ' ');
  818. }
  819. id.data = b->data;
  820. id.size = b->size;
  821. } else {
  822. id.data = data + 1;
  823. id.size = txt_e - 1;
  824. }
  825. /* finding the link_ref */
  826. lr = find_link_ref(rndr->refs, id.data, id.size);
  827. if (!lr)
  828. goto cleanup;
  829. /* keeping link and title from link_ref */
  830. link = lr->link;
  831. title = lr->title;
  832. /* rewinding the whitespace */
  833. i = txt_e + 1;
  834. }
  835. /* building content: img alt is escaped, link content is parsed */
  836. if (txt_e > 1) {
  837. content = rndr_newbuf(rndr, BUFFER_SPAN);
  838. if (is_img) {
  839. bufput(content, data + 1, txt_e - 1);
  840. } else {
  841. /* disable autolinking when parsing inline the
  842. * content of a link */
  843. rndr->in_link_body = 1;
  844. parse_inline(content, rndr, data + 1, txt_e - 1);
  845. rndr->in_link_body = 0;
  846. }
  847. }
  848. if (link) {
  849. u_link = rndr_newbuf(rndr, BUFFER_SPAN);
  850. unscape_text(u_link, link);
  851. }
  852. /* calling the relevant rendering function */
  853. if (is_img) {
  854. if (ob->size && ob->data[ob->size - 1] == '!')
  855. ob->size -= 1;
  856. ret = rndr->cb.image(ob, u_link, title, content, rndr->opaque);
  857. } else {
  858. ret = rndr->cb.link(ob, u_link, title, content, rndr->opaque);
  859. }
  860. /* cleanup */
  861. cleanup:
  862. rndr->work_bufs[BUFFER_SPAN].size = (int)org_work_size;
  863. return ret ? i : 0;
  864. }
  865. static size_t
  866. char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  867. {
  868. size_t sup_start, sup_len;
  869. struct buf *sup;
  870. if (!rndr->cb.superscript)
  871. return 0;
  872. if (size < 2)
  873. return 0;
  874. if (data[1] == '(') {
  875. sup_start = sup_len = 2;
  876. while (sup_len < size && data[sup_len] != ')' && data[sup_len - 1] != '\\')
  877. sup_len++;
  878. if (sup_len == size)
  879. return 0;
  880. } else {
  881. sup_start = sup_len = 1;
  882. while (sup_len < size && !_isspace(data[sup_len]))
  883. sup_len++;
  884. }
  885. if (sup_len - sup_start == 0)
  886. return (sup_start == 2) ? 3 : 0;
  887. sup = rndr_newbuf(rndr, BUFFER_SPAN);
  888. parse_inline(sup, rndr, data + sup_start, sup_len - sup_start);
  889. rndr->cb.superscript(ob, sup, rndr->opaque);
  890. rndr_popbuf(rndr, BUFFER_SPAN);
  891. return (sup_start == 2) ? sup_len + 1 : sup_len;
  892. }
  893. /*********************************
  894. * BLOCK-LEVEL PARSING FUNCTIONS *
  895. *********************************/
  896. /* is_empty • returns the line length when it is empty, 0 otherwise */
  897. static size_t
  898. is_empty(uint8_t *data, size_t size)
  899. {
  900. size_t i;
  901. for (i = 0; i < size && data[i] != '\n'; i++)
  902. if (data[i] != ' ')
  903. return 0;
  904. return i + 1;
  905. }
  906. /* is_hrule • returns whether a line is a horizontal rule */
  907. static int
  908. is_hrule(uint8_t *data, size_t size)
  909. {
  910. size_t i = 0, n = 0;
  911. uint8_t c;
  912. /* skipping initial spaces */
  913. if (size < 3) return 0;
  914. if (data[0] == ' ') { i++;
  915. if (data[1] == ' ') { i++;
  916. if (data[2] == ' ') { i++; } } }
  917. /* looking at the hrule uint8_t */
  918. if (i + 2 >= size
  919. || (data[i] != '*' && data[i] != '-' && data[i] != '_'))
  920. return 0;
  921. c = data[i];
  922. /* the whole line must be the char or whitespace */
  923. while (i < size && data[i] != '\n') {
  924. if (data[i] == c) n++;
  925. else if (data[i] != ' ')
  926. return 0;
  927. i++;
  928. }
  929. return n >= 3;
  930. }
  931. /* check if a line is a code fence; return its size if it is */
  932. static size_t
  933. is_codefence(uint8_t *data, size_t size, struct buf *syntax)
  934. {
  935. size_t i = 0, n = 0;
  936. uint8_t c;
  937. /* skipping initial spaces */
  938. if (size < 3) return 0;
  939. if (data[0] == ' ') { i++;
  940. if (data[1] == ' ') { i++;
  941. if (data[2] == ' ') { i++; } } }
  942. /* looking at the hrule uint8_t */
  943. if (i + 2 >= size || !(data[i] == '~' || data[i] == '`'))
  944. return 0;
  945. c = data[i];
  946. /* the whole line must be the uint8_t or whitespace */
  947. while (i < size && data[i] == c) {
  948. n++; i++;
  949. }
  950. if (n < 3)
  951. return 0;
  952. if (syntax != NULL) {
  953. size_t syn = 0;
  954. while (i < size && data[i] == ' ')
  955. i++;
  956. syntax->data = data + i;
  957. if (i < size && data[i] == '{') {
  958. i++; syntax->data++;
  959. while (i < size && data[i] != '}' && data[i] != '\n') {
  960. syn++; i++;
  961. }
  962. if (i == size || data[i] != '}')
  963. return 0;
  964. /* strip all whitespace at the beginning and the end
  965. * of the {} block */
  966. while (syn > 0 && _isspace(syntax->data[0])) {
  967. syntax->data++; syn--;
  968. }
  969. while (syn > 0 && _isspace(syntax->data[syn - 1]))
  970. syn--;
  971. i++;
  972. } else {
  973. while (i < size && !_isspace(data[i])) {
  974. syn++; i++;
  975. }
  976. }
  977. syntax->size = syn;
  978. }
  979. while (i < size && data[i] != '\n') {
  980. if (!_isspace(data[i]))
  981. return 0;
  982. i++;
  983. }
  984. return i + 1;
  985. }
  986. /* is_atxheader • returns whether the line is a hash-prefixed header */
  987. static int
  988. is_atxheader(struct sd_markdown *rndr, uint8_t *data, size_t size)
  989. {
  990. if (data[0] != '#')
  991. return 0;
  992. if (rndr->ext_flags & MKDEXT_SPACE_HEADERS) {
  993. size_t level = 0;
  994. while (level < size && level < 6 && data[level] == '#')
  995. level++;
  996. if (level < size && data[level] != ' ')
  997. return 0;
  998. }
  999. return 1;
  1000. }
  1001. /* is_headerline • returns whether the line is a setext-style hdr underline */
  1002. static int
  1003. is_headerline(uint8_t *data, size_t size)
  1004. {
  1005. size_t i = 0;
  1006. /* test of level 1 header */
  1007. if (data[i] == '=') {
  1008. for (i = 1; i < size && data[i] == '='; i++);
  1009. while (i < size && data[i] == ' ') i++;
  1010. return (i >= size || data[i] == '\n') ? 1 : 0; }
  1011. /* test of level 2 header */
  1012. if (data[i] == '-') {
  1013. for (i = 1; i < size && data[i] == '-'; i++);
  1014. while (i < size && data[i] == ' ') i++;
  1015. return (i >= size || data[i] == '\n') ? 2 : 0; }
  1016. return 0;
  1017. }
  1018. static int
  1019. is_next_headerline(uint8_t *data, size_t size)
  1020. {
  1021. size_t i = 0;
  1022. while (i < size && data[i] != '\n')
  1023. i++;
  1024. if (++i >= size)
  1025. return 0;
  1026. return is_headerline(data + i, size - i);
  1027. }
  1028. /* prefix_quote • returns blockquote prefix length */
  1029. static size_t
  1030. prefix_quote(uint8_t *data, size_t size)
  1031. {
  1032. size_t i = 0;
  1033. if (i < size && data[i] == ' ') i++;
  1034. if (i < size && data[i] == ' ') i++;
  1035. if (i < size && data[i] == ' ') i++;
  1036. if (i < size && data[i] == '>') {
  1037. if (i + 1 < size && data[i + 1] == ' ')
  1038. return i + 2;
  1039. return i + 1;
  1040. }
  1041. return 0;
  1042. }
  1043. /* prefix_code • returns prefix length for block code*/
  1044. static size_t
  1045. prefix_code(uint8_t *data, size_t size)
  1046. {
  1047. if (size > 3 && data[0] == ' ' && data[1] == ' '
  1048. && data[2] == ' ' && data[3] == ' ') return 4;
  1049. return 0;
  1050. }
  1051. /* prefix_oli • returns ordered list item prefix */
  1052. static size_t
  1053. prefix_oli(uint8_t *data, size_t size)
  1054. {
  1055. size_t i = 0;
  1056. if (i < size && data[i] == ' ') i++;
  1057. if (i < size && data[i] == ' ') i++;
  1058. if (i < size && data[i] == ' ') i++;
  1059. if (i >= size || data[i] < '0' || data[i] > '9')
  1060. return 0;
  1061. while (i < size && data[i] >= '0' && data[i] <= '9')
  1062. i++;
  1063. if (i + 1 >= size || data[i] != '.' || data[i + 1] != ' ')
  1064. return 0;
  1065. if (is_next_headerline(data + i, size - i))
  1066. return 0;
  1067. return i + 2;
  1068. }
  1069. /* prefix_uli • returns ordered list item prefix */
  1070. static size_t
  1071. prefix_uli(uint8_t *data, size_t size)
  1072. {
  1073. size_t i = 0;
  1074. if (i < size && data[i] == ' ') i++;
  1075. if (i < size && data[i] == ' ') i++;
  1076. if (i < size && data[i] == ' ') i++;
  1077. if (i + 1 >= size ||
  1078. (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
  1079. data[i + 1] != ' ')
  1080. return 0;
  1081. if (is_next_headerline(data + i, size - i))
  1082. return 0;
  1083. return i + 2;
  1084. }
  1085. /* parse_block • parsing of one block, returning next uint8_t to parse */
  1086. static void parse_block(struct buf *ob, struct sd_markdown *rndr,
  1087. uint8_t *data, size_t size);
  1088. /* parse_blockquote • handles parsing of a blockquote fragment */
  1089. static size_t
  1090. parse_blockquote(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
  1091. {
  1092. size_t beg, end = 0, pre, work_size = 0;
  1093. uint8_t *work_data = 0;
  1094. struct buf *out = 0;
  1095. out = rndr_newbuf(rndr, BUFFER_BLOCK);
  1096. beg = 0;
  1097. while (beg < size) {
  1098. for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
  1099. pre = prefix_quote(data + beg, end - beg);
  1100. if (pre)
  1101. beg += pre; /* skipping prefix */
  1102. /* empty line followed by non-quote line */
  1103. else if (is_empty(data + beg, end - beg) &&
  1104. (end >= size || (prefix_quote(data + end, size - end) == 0 &&
  1105. !is_empty(data + end, size - end))))
  1106. break;
  1107. if (beg < end) { /* copy into the in-place working buffer */
  1108. /* bufput(work, data + beg, end - beg); */
  1109. if (!work_data)
  1110. work_data = data + beg;
  1111. else if (data + beg != work_data + work_size)
  1112. memmove(work_data + work_size, data + beg, end - beg);
  1113. work_size += end - beg;
  1114. }
  1115. beg = end;
  1116. }
  1117. parse_block(out, rndr, work_data, work_size);
  1118. if (rndr->cb.blockquote)
  1119. rndr->cb.blockquote(ob, out, rndr->opaque);
  1120. rndr_popbuf(rndr, BUFFER_BLOCK);
  1121. return end;
  1122. }
  1123. static size_t
  1124. parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render);
  1125. /* parse_blockquote • handles parsing of a regular paragraph */
  1126. static size_t
  1127. parse_paragraph(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
  1128. {
  1129. size_t i = 0, end = 0;
  1130. int level = 0;
  1131. struct buf work = { data, 0, 0, 0 };
  1132. while (i < size) {
  1133. for (end = i + 1; end < size && data[end - 1] != '\n'; end++) /* empty */;
  1134. if (is_empty(data + i, size - i))
  1135. break;
  1136. if ((level = is_headerline(data + i, size - i)) != 0)
  1137. break;
  1138. if (rndr->ext_flags & MKDEXT_LAX_HTML_BLOCKS) {
  1139. if (data[i] == '<' && rndr->cb.blockhtml && parse_htmlblock(ob, rndr, data + i, size - i, 0)) {
  1140. end = i;
  1141. break;
  1142. }
  1143. }
  1144. if (is_atxheader(rndr, data + i, size - i) ||
  1145. is_hrule(data + i, size - i) ||
  1146. prefix_quote(data + i, size - i)) {
  1147. end = i;
  1148. break;
  1149. }
  1150. i = end;
  1151. }
  1152. work.size = i;
  1153. while (work.size && data[work.size - 1] == '\n')
  1154. work.size--;
  1155. if (!level) {
  1156. struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK);
  1157. parse_inline(tmp, rndr, work.data, work.size);
  1158. if (rndr->cb.paragraph)
  1159. rndr->cb.paragraph(ob, tmp, rndr->opaque);
  1160. rndr_popbuf(rndr, BUFFER_BLOCK);
  1161. } else {
  1162. struct buf *header_work;
  1163. if (work.size) {
  1164. size_t beg;
  1165. i = work.size;
  1166. work.size -= 1;
  1167. while (work.size && data[work.size] != '\n')
  1168. work.size -= 1;
  1169. beg = work.size + 1;
  1170. while (work.size && data[work.size - 1] == '\n')
  1171. work.size -= 1;
  1172. if (work.size > 0) {
  1173. struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK);
  1174. parse_inline(tmp, rndr, work.data, work.size);
  1175. if (rndr->cb.paragraph)
  1176. rndr->cb.paragraph(ob, tmp, rndr->opaque);
  1177. rndr_popbuf(rndr, BUFFER_BLOCK);
  1178. work.data += beg;
  1179. work.size = i - beg;
  1180. }
  1181. else work.size = i;
  1182. }
  1183. header_work = rndr_newbuf(rndr, BUFFER_SPAN);
  1184. parse_inline(header_work, rndr, work.data, work.size);
  1185. if (rndr->cb.header)
  1186. rndr->cb.header(ob, header_work, (int)level, rndr->opaque);
  1187. rndr_popbuf(rndr, BUFFER_SPAN);
  1188. }
  1189. return end;
  1190. }
  1191. /* parse_fencedcode • handles parsing of a block-level code fragment */
  1192. static size_t
  1193. parse_fencedcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
  1194. {
  1195. size_t beg, end;
  1196. struct buf *work = 0;
  1197. struct buf lang = { 0, 0, 0, 0 };
  1198. beg = is_codefence(data, size, &lang);
  1199. if (beg == 0) return 0;
  1200. work = rndr_newbuf(rndr, BUFFER_BLOCK);
  1201. while (beg < size) {
  1202. size_t fence_end;
  1203. fence_end = is_codefence(data + beg, size - beg, NULL);
  1204. if (fence_end != 0) {
  1205. beg += fence_end;
  1206. break;
  1207. }
  1208. for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
  1209. if (beg < end) {
  1210. /* verbatim copy to the working buffer,
  1211. escaping entities */
  1212. if (is_empty(data + beg, end - beg))
  1213. bufputc(work, '\n');
  1214. else bufput(work, data + beg, end - beg);
  1215. }
  1216. beg = end;
  1217. }
  1218. if (work->size && work->data[work->size - 1] != '\n')
  1219. bufputc(work, '\n');
  1220. if (rndr->cb.blockcode)
  1221. rndr->cb.blockcode(ob, work, lang.size ? &lang : NULL, rndr->opaque);
  1222. rndr_popbuf(rndr, BUFFER_BLOCK);
  1223. return beg;
  1224. }
  1225. static size_t
  1226. parse_blockcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
  1227. {
  1228. size_t beg, end, pre;
  1229. struct buf *work = 0;
  1230. work = rndr_newbuf(rndr, BUFFER_BLOCK);
  1231. beg = 0;
  1232. while (beg < size) {
  1233. for (end = beg + 1; end < size && data[end - 1] != '\n'; end++) {};
  1234. pre = prefix_code(data + beg, end - beg);
  1235. if (pre)
  1236. beg += pre; /* skipping prefix */
  1237. else if (!is_empty(data + beg, end - beg))
  1238. /* non-empty non-prefixed line breaks the pre */
  1239. break;
  1240. if (beg < end) {
  1241. /* verbatim copy to the working buffer,
  1242. escaping entities */
  1243. if (is_empty(data + beg, end - beg))
  1244. bufputc(work, '\n');
  1245. else bufput(work, data + beg, end - beg);
  1246. }
  1247. beg = end;
  1248. }
  1249. while (work->size && work->data[work->size - 1] == '\n')
  1250. work->size -= 1;
  1251. bufputc(work, '\n');
  1252. if (rndr->cb.blockcode)
  1253. rndr->cb.blockcode(ob, work, NULL, rndr->opaque);
  1254. rndr_popbuf(rndr, BUFFER_BLOCK);
  1255. return beg;
  1256. }
  1257. /* parse_listitem • parsing of a single list item */
  1258. /* assuming initial prefix is already removed */
  1259. static size_t
  1260. parse_listitem(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int *flags)
  1261. {
  1262. struct buf *work = 0, *inter = 0;
  1263. size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i;
  1264. int in_empty = 0, has_inside_empty = 0;
  1265. size_t has_next_uli, has_next_oli;
  1266. /* keeping track of the first indentation prefix */
  1267. while (orgpre < 3 && orgpre < size && data[orgpre] == ' ')
  1268. orgpre++;
  1269. beg = prefix_uli(data, size);
  1270. if (!beg)
  1271. beg = prefix_oli(data, size);
  1272. if (!beg)
  1273. return 0;
  1274. /* skipping to the beginning of the following line */
  1275. end = beg;
  1276. while (end < size && data[end - 1] != '\n')
  1277. end++;
  1278. /* getting working buffers */
  1279. work = rndr_newbuf(rndr, BUFFER_SPAN);
  1280. inter = rndr_newbuf(rndr, BUFFER_SPAN);
  1281. /* putting the first line into the working buffer */
  1282. bufput(work, data + beg, end - beg);
  1283. beg = end;
  1284. /* process the following lines */
  1285. while (beg < size) {
  1286. end++;
  1287. while (end < size && data[end - 1] != '\n')
  1288. end++;
  1289. /* process an empty line */
  1290. if (is_empty(data + beg, end - beg)) {
  1291. in_empty = 1;
  1292. beg = end;
  1293. continue;
  1294. }
  1295. /* calculating the indentation */
  1296. i = 0;
  1297. while (i < 4 && beg + i < end && data[beg + i] == ' ')
  1298. i++;
  1299. pre = i;
  1300. has_next_uli = prefix_uli(data + beg + i, end - beg - i);
  1301. has_next_oli = prefix_oli(data + beg + i, end - beg - i);
  1302. /* checking for ul/ol switch */
  1303. if (in_empty && (
  1304. ((*flags & MKD_LIST_ORDERED) && has_next_uli) ||
  1305. (!(*flags & MKD_LIST_ORDERED) && has_next_oli))){
  1306. *flags |= MKD_LI_END;
  1307. break; /* the following item must have same list type */
  1308. }
  1309. /* checking for a new item */
  1310. if ((has_next_uli && !is_hrule(data + beg + i, end - beg - i)) || has_next_oli) {
  1311. if (in_empty)
  1312. has_inside_empty = 1;
  1313. if (pre == orgpre) /* the following item must have */
  1314. break; /* the same indentation */
  1315. if (!sublist)
  1316. sublist = work->size;
  1317. }
  1318. /* joining only indented stuff after empty lines */
  1319. else if (in_empty && i < 4) {
  1320. *flags |= MKD_LI_END;
  1321. break;
  1322. }
  1323. else if (in_empty) {
  1324. bufputc(work, '\n');
  1325. has_inside_empty = 1;
  1326. }
  1327. in_empty = 0;
  1328. /* adding the line without prefix into the working buffer */
  1329. bufput(work, data + beg + i, end - beg - i);
  1330. beg = end;
  1331. }
  1332. /* render of li contents */
  1333. if (has_inside_empty)
  1334. *flags |= MKD_LI_BLOCK;
  1335. if (*flags & MKD_LI_BLOCK) {
  1336. /* intermediate render of block li */
  1337. if (sublist && sublist < work->size) {
  1338. parse_block(inter, rndr, work->data, sublist);
  1339. parse_block(inter, rndr, work->data + sublist, work->size - sublist);
  1340. }
  1341. else
  1342. parse_block(inter, rndr, work->data, work->size);
  1343. } else {
  1344. /* intermediate render of inline li */
  1345. if (sublist && sublist < work->size) {
  1346. parse_inline(inter, rndr, work->data, sublist);
  1347. parse_block(inter, rndr, work->data + sublist, work->size - sublist);
  1348. }
  1349. else
  1350. parse_inline(inter, rndr, work->data, work->size);
  1351. }
  1352. /* render of li itself */
  1353. if (rndr->cb.listitem)
  1354. rndr->cb.listitem(ob, inter, *flags, rndr->opaque);
  1355. rndr_popbuf(rndr, BUFFER_SPAN);
  1356. rndr_popbuf(rndr, BUFFER_SPAN);
  1357. return beg;
  1358. }
  1359. /* parse_list • parsing ordered or unordered list block */
  1360. static size_t
  1361. parse_list(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int flags)
  1362. {
  1363. struct buf *work = 0;
  1364. size_t i = 0, j;
  1365. work = rndr_newbuf(rndr, BUFFER_BLOCK);
  1366. while (i < size) {
  1367. j = parse_listitem(work, rndr, data + i, size - i, &flags);
  1368. i += j;
  1369. if (!j || (flags & MKD_LI_END))
  1370. break;
  1371. }
  1372. if (rndr->cb.list)
  1373. rndr->cb.list(ob, work, flags, rndr->opaque);
  1374. rndr_popbuf(rndr, BUFFER_BLOCK);
  1375. return i;
  1376. }
  1377. /* parse_atxheader • parsing of atx-style headers */
  1378. static size_t
  1379. parse_atxheader(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
  1380. {
  1381. size_t level = 0;
  1382. size_t i, end, skip;
  1383. while (level < size && level < 6 && data[level] == '#')
  1384. level++;
  1385. for (i = level; i < size && data[i] == ' '; i++);
  1386. for (end = i; end < size && data[end] != '\n'; end++);
  1387. skip = end;
  1388. while (end && data[end - 1] == '#')
  1389. end--;
  1390. while (end && data[end - 1] == ' ')
  1391. end--;
  1392. if (end > i) {
  1393. struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN);
  1394. parse_inline(work, rndr, data + i, end - i);
  1395. if (rndr->cb.header)
  1396. rndr->cb.header(ob, work, (int)level, rndr->opaque);
  1397. rndr_popbuf(rndr, BUFFER_SPAN);
  1398. }
  1399. return skip;
  1400. }
  1401. /* htmlblock_end • checking end of HTML block : </tag>[ \t]*\n[ \t*]\n */
  1402. /* returns the length on match, 0 otherwise */
  1403. static size_t
  1404. htmlblock_end_tag(
  1405. const char *tag,
  1406. size_t tag_len,
  1407. struct sd_markdown *rndr,
  1408. uint8_t *data,
  1409. size_t size)
  1410. {
  1411. size_t i, w;
  1412. /* checking if tag is a match */
  1413. if (tag_len + 3 >= size ||
  1414. strncasecmp((char *)data + 2, tag, tag_len) != 0 ||
  1415. data[tag_len + 2] != '>')
  1416. return 0;
  1417. /* checking white lines */
  1418. i = tag_len + 3;
  1419. w = 0;
  1420. if (i < size && (w = is_empty(data + i, size - i)) == 0)
  1421. return 0; /* non-blank after tag */
  1422. i += w;
  1423. w = 0;
  1424. if (rndr->ext_flags & MKDEXT_LAX_HTML_BLOCKS) {
  1425. if (i < size)
  1426. w = is_empty(data + i, size - i);
  1427. } else {
  1428. if (i < size && (w = is_empty(data + i, size - i)) == 0)
  1429. return 0; /* non-blank line after tag line */
  1430. }
  1431. return i + w;
  1432. }
  1433. static size_t
  1434. htmlblock_end(const char *curtag,
  1435. struct sd_markdown *rndr,
  1436. uint8_t *data,
  1437. size_t size,
  1438. int start_of_line)
  1439. {
  1440. size_t tag_size = strlen(curtag);
  1441. size_t i = 1, end_tag;
  1442. int block_lines = 0;
  1443. while (i < size) {
  1444. i++;
  1445. while (i < size && !(data[i - 1] == '<' && data[i] == '/')) {
  1446. if (data[i] == '\n')
  1447. block_lines++;
  1448. i++;
  1449. }
  1450. /* If we are only looking for unindented tags, skip the tag
  1451. * if it doesn't follow a newline.
  1452. *
  1453. * The only exception to this is if the tag is still on the
  1454. * initial line; in that case it still counts as a closing
  1455. * tag
  1456. */
  1457. if (start_of_line && block_lines > 0 && data[i - 2] != '\n')
  1458. continue;
  1459. if (i + 2 + tag_size >= size)
  1460. break;
  1461. end_tag = htmlblock_end_tag(curtag, tag_size, rndr, data + i - 1, size - i + 1);
  1462. if (end_tag)
  1463. return i + end_tag - 1;
  1464. }
  1465. return 0;
  1466. }
  1467. /* parse_htmlblock • parsing of inline HTML block */
  1468. static size_t
  1469. parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render)
  1470. {
  1471. size_t i, j = 0, tag_end;
  1472. const char *curtag = NULL;
  1473. struct buf work = { data, 0, 0, 0 };
  1474. /* identification of the opening tag */
  1475. if (size < 2 || data[0] != '<')
  1476. return 0;
  1477. i = 1;
  1478. while (i < size && data[i] != '>' && data[i] != ' ')
  1479. i++;
  1480. if (i < size)
  1481. curtag = find_block_tag((char *)data + 1, (int)i - 1);
  1482. /* handling of special cases */
  1483. if (!curtag) {
  1484. /* HTML comment, laxist form */
  1485. if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') {
  1486. i = 5;
  1487. while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>'))
  1488. i++;
  1489. i++;
  1490. if (i < size)
  1491. j = is_empty(data + i, size - i);
  1492. if (j) {
  1493. work.size = i + j;
  1494. if (do_render && rndr->cb.blockhtml)
  1495. rndr->cb.blockhtml(ob, &work, rndr->opaque);
  1496. return work.size;
  1497. }
  1498. }
  1499. /* HR, which is the only self-closing block tag considered */
  1500. if (size > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R')) {
  1501. i = 3;
  1502. while (i < size && data[i] != '>')
  1503. i++;
  1504. if (i + 1 < size) {
  1505. i++;
  1506. j = is_empty(data + i, size - i);
  1507. if (j) {
  1508. work.size = i + j;
  1509. if (do_render && rndr->cb.blockhtml)
  1510. rndr->cb.blockhtml(ob, &work, rndr->opaque);
  1511. return work.size;
  1512. }
  1513. }
  1514. }
  1515. /* no special case recognised */
  1516. return 0;
  1517. }
  1518. /* looking for an unindented matching closing tag */
  1519. /* followed by a blank line */
  1520. tag_end = htmlblock_end(curtag, rndr, data, size, 1);
  1521. /* if not found, trying a second pass looking for indented match */
  1522. /* but not if tag is "ins" or "del" (following original Markdown.pl) */
  1523. if (!tag_end && strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0) {
  1524. tag_end = htmlblock_end(curtag, rndr, data, size, 0);
  1525. }
  1526. if (!tag_end)
  1527. return 0;
  1528. /* the end of the block has been found */
  1529. work.size = tag_end;
  1530. if (do_render && rndr->cb.blockhtml)
  1531. rndr->cb.blockhtml(ob, &work, rndr->opaque);
  1532. return tag_end;
  1533. }
  1534. static void
  1535. parse_table_row(
  1536. struct buf *ob,
  1537. struct sd_markdown *rndr,
  1538. uint8_t *data,
  1539. size_t size,
  1540. size_t columns,
  1541. int *col_data,
  1542. int header_flag)
  1543. {
  1544. size_t i = 0, col;
  1545. struct buf *row_work = 0;
  1546. if (!rndr->cb.table_cell || !rndr->cb.table_row)
  1547. return;
  1548. row_work = rndr_newbuf(rndr, BUFFER_SPAN);
  1549. if (i < size && data[i] == '|')
  1550. i++;
  1551. for (col = 0; col < columns && i < size; ++col) {
  1552. size_t cell_start, cell_end;
  1553. struct buf *cell_work;
  1554. cell_work = rndr_newbuf(rndr, BUFFER_SPAN);
  1555. while (i < size && _isspace(data[i]))
  1556. i++;
  1557. cell_start = i;
  1558. while (i < size && data[i] != '|')
  1559. i++;
  1560. cell_end = i - 1;
  1561. while (cell_end > cell_start && _isspace(data[cell_end]))
  1562. cell_end--;
  1563. parse_inline(cell_work, rndr, data + cell_start, 1 + cell_end - cell_start);
  1564. rndr->cb.table_cell(row_work, cell_work, col_data[col] | header_flag, rndr->opaque);
  1565. rndr_popbuf(rndr, BUFFER_SPAN);
  1566. i++;
  1567. }
  1568. for (; col < columns; ++col) {
  1569. struct buf empty_cell = { 0, 0, 0, 0 };
  1570. rndr->cb.table_cell(row_work, &empty_cell, col_data[col] | header_flag, rndr->opaque);
  1571. }
  1572. rndr->cb.table_row(ob, row_work, rndr->opaque);
  1573. rndr_popbuf(rndr, BUFFER_SPAN);
  1574. }
  1575. static size_t
  1576. parse_table_header(
  1577. struct buf *ob,
  1578. struct sd_markdown *rndr,
  1579. uint8_t *data,
  1580. size_t size,
  1581. size_t *columns,
  1582. int **column_data)
  1583. {
  1584. int pipes;
  1585. size_t i = 0, col, header_end, under_end;
  1586. pipes = 0;
  1587. while (i < size && data[i] != '\n')
  1588. if (data[i++] == '|')
  1589. pipes++;
  1590. if (i == size || pipes == 0)
  1591. return 0;
  1592. header_end = i;
  1593. while (header_end > 0 && _isspace(data[header_end - 1]))
  1594. header_end--;
  1595. if (data[0] == '|')
  1596. pipes--;
  1597. if (header_end && data[header_end - 1] == '|')
  1598. pipes--;
  1599. *columns = pipes + 1;
  1600. *column_data = calloc(*columns, sizeof(int));
  1601. /* Parse the header underline */
  1602. i++;
  1603. if (i < size && data[i] == '|')
  1604. i++;
  1605. under_end = i;
  1606. while (under_end < size && data[under_end] != '\n')
  1607. under_end++;
  1608. for (col = 0; col < *columns && i < under_end; ++col) {
  1609. size_t dashes = 0;
  1610. while (i < under_end && data[i] == ' ')
  1611. i++;
  1612. if (data[i] == ':') {
  1613. i++; (*column_data)[col] |= MKD_TABLE_ALIGN_L;
  1614. dashes++;
  1615. }
  1616. while (i < under_end && data[i] == '-') {
  1617. i++; dashes++;
  1618. }
  1619. if (i < under_end && data[i] == ':') {
  1620. i++; (*column_data)[col] |= MKD_TABLE_ALIGN_R;
  1621. dashes++;
  1622. }
  1623. while (i < under_end && data[i] == ' ')
  1624. i++;
  1625. if (i < under_end && data[i] != '|')
  1626. break;
  1627. if (dashes < 3)
  1628. break;
  1629. i++;
  1630. }
  1631. if (col < *columns)
  1632. return 0;
  1633. parse_table_row(
  1634. ob, rndr, data,
  1635. header_end,
  1636. *columns,
  1637. *column_data,
  1638. MKD_TABLE_HEADER
  1639. );
  1640. return under_end + 1;
  1641. }
  1642. static size_t
  1643. parse_table(
  1644. struct buf *ob,
  1645. struct sd_markdown *rndr,
  1646. uint8_t *data,
  1647. size_t size)
  1648. {
  1649. size_t i;
  1650. struct buf *header_work = 0;
  1651. struct buf *body_work = 0;
  1652. size_t columns;
  1653. int *col_data = NULL;
  1654. header_work = rndr_newbuf(rndr, BUFFER_SPAN);
  1655. body_work = rndr_newbuf(rndr, BUFFER_BLOCK);
  1656. i = parse_table_header(header_work, rndr, data, size, &columns, &col_data);
  1657. if (i > 0) {
  1658. while (i < size) {
  1659. size_t row_start;
  1660. int pipes = 0;
  1661. row_start = i;
  1662. while (i < size && data[i] != '\n')
  1663. if (data[i++] == '|')
  1664. pipes++;
  1665. if (pipes == 0 || i == size) {
  1666. i = row_start;
  1667. break;
  1668. }
  1669. parse_table_row(
  1670. body_work,
  1671. rndr,
  1672. data + row_start,
  1673. i - row_start,
  1674. columns,
  1675. col_data, 0
  1676. );
  1677. i++;
  1678. }
  1679. if (rndr->cb.table)
  1680. rndr->cb.table(ob, header_work, body_work, rndr->opaque);
  1681. }
  1682. free(col_data);
  1683. rndr_popbuf(rndr, BUFFER_SPAN);
  1684. rndr_popbuf(rndr, BUFFER_BLOCK);
  1685. return i;
  1686. }
  1687. /* parse_block • parsing of one block, returning next uint8_t to parse */
  1688. static void
  1689. parse_block(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
  1690. {
  1691. size_t beg, end, i;
  1692. uint8_t *txt_data;
  1693. beg = 0;
  1694. if (rndr->work_bufs[BUFFER_SPAN].size +
  1695. rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting)
  1696. return;
  1697. while (beg < size) {
  1698. txt_data = data + beg;
  1699. end = size - beg;
  1700. if (is_atxheader(rndr, txt_data, end))
  1701. beg += parse_atxheader(ob, rndr, txt_data, end);
  1702. else if (data[beg] == '<' && rndr->cb.blockhtml &&
  1703. (i = parse_htmlblock(ob, rndr, txt_data, end, 1)) != 0)
  1704. beg += i;
  1705. else if ((i = is_empty(txt_data, end)) != 0)
  1706. beg += i;
  1707. else if (is_hrule(txt_data, end)) {
  1708. if (rndr->cb.hrule)
  1709. rndr->cb.hrule(ob, rndr->opaque);
  1710. while (beg < size && data[beg] != '\n')
  1711. beg++;
  1712. beg++;
  1713. }
  1714. else if ((rndr->ext_flags & MKDEXT_FENCED_CODE) != 0 &&
  1715. (i = parse_fencedcode(ob, rndr, txt_data, end)) != 0)
  1716. beg += i;
  1717. else if ((rndr->ext_flags & MKDEXT_TABLES) != 0 &&
  1718. (i = parse_table(ob, rndr, txt_data, end)) != 0)
  1719. beg += i;
  1720. else if (prefix_quote(txt_data, end))
  1721. beg += parse_blockquote(ob, rndr, txt_data, end);
  1722. else if (prefix_code(txt_data, end))
  1723. beg += parse_blockcode(ob, rndr, txt_data, end);
  1724. else if (prefix_uli(txt_data, end))
  1725. beg += parse_list(ob, rndr, txt_data, end, 0);
  1726. else if (prefix_oli(txt_data, end))
  1727. beg += parse_list(ob, rndr, txt_data, end, MKD_LIST_ORDERED);
  1728. else
  1729. beg += parse_para

Large files files are truncated, but you can click here to view the full file