PageRenderTime 33ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 1ms

/req/redcarpet2/ext/redcarpet/markdown.c

http://github.com/shoes/shoes
C | 2425 lines | 2205 code | 143 blank | 77 comment | 185 complexity | 90cc2e89f815e51af62b8a7bd9866a8d MD5 | raw file
Possible License(s): GPL-3.0, GPL-2.0, 0BSD, BSD-3-Clause
  1. /* markdown.c - generic markdown parser */
  2. /*
  3. * Copyright (c) 2009, Natacha Porté
  4. * Copyright (c) 2011, Vicent Marti
  5. *
  6. * Permission to use, copy, modify, and distribute this software for any
  7. * purpose with or without fee is hereby granted, provided that the above
  8. * copyright notice and this permission notice appear in all copies.
  9. *
  10. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  11. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  12. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  13. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  14. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  15. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  16. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  17. */
  18. #include "markdown.h"
  19. #include "stack.h"
  20. #include <assert.h>
  21. #include <string.h>
  22. #include <ctype.h>
  23. #include <stdio.h>
  24. #define REF_TABLE_SIZE 8
  25. #define BUFFER_BLOCK 0
  26. #define BUFFER_SPAN 1
  27. #define MKD_LI_END 8 /* internal list flag */
  28. #define gperf_case_strncmp(s1, s2, n) strncasecmp(s1, s2, n)
  29. #define GPERF_DOWNCASE 1
  30. #define GPERF_CASE_STRNCMP 1
  31. #include "html_blocks.h"
  32. /***************
  33. * LOCAL TYPES *
  34. ***************/
  35. /* link_ref: reference to a link */
  36. struct link_ref {
  37. unsigned int id;
  38. struct buf *link;
  39. struct buf *title;
  40. struct link_ref *next;
  41. };
  42. /* char_trigger: function pointer to render active chars */
  43. /* returns the number of chars taken care of */
  44. /* data is the pointer of the beginning of the span */
  45. /* offset is the number of valid chars before data */
  46. struct sd_markdown;
  47. typedef size_t
  48. (*char_trigger)(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  49. static size_t char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  50. static size_t char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  51. static size_t char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  52. static size_t char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  53. static size_t char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  54. static size_t char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  55. static size_t char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  56. static size_t char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  57. static size_t char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  58. static size_t char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  59. static size_t char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  60. enum markdown_char_t {
  61. MD_CHAR_NONE = 0,
  62. MD_CHAR_EMPHASIS,
  63. MD_CHAR_CODESPAN,
  64. MD_CHAR_LINEBREAK,
  65. MD_CHAR_LINK,
  66. MD_CHAR_LANGLE,
  67. MD_CHAR_ESCAPE,
  68. MD_CHAR_ENTITITY,
  69. MD_CHAR_AUTOLINK_URL,
  70. MD_CHAR_AUTOLINK_EMAIL,
  71. MD_CHAR_AUTOLINK_WWW,
  72. MD_CHAR_SUPERSCRIPT,
  73. };
  74. static char_trigger markdown_char_ptrs[] = {
  75. NULL,
  76. &char_emphasis,
  77. &char_codespan,
  78. &char_linebreak,
  79. &char_link,
  80. &char_langle_tag,
  81. &char_escape,
  82. &char_entity,
  83. &char_autolink_url,
  84. &char_autolink_email,
  85. &char_autolink_www,
  86. &char_superscript,
  87. };
  88. /* render • structure containing one particular render */
  89. struct sd_markdown {
  90. struct sd_callbacks cb;
  91. void *opaque;
  92. struct link_ref *refs[REF_TABLE_SIZE];
  93. uint8_t active_char[256];
  94. struct stack work_bufs[2];
  95. unsigned int ext_flags;
  96. size_t max_nesting;
  97. };
  98. /***************************
  99. * HELPER FUNCTIONS *
  100. ***************************/
  101. static inline struct buf *
  102. rndr_newbuf(struct sd_markdown *rndr, int type)
  103. {
  104. static const size_t buf_size[2] = {256, 64};
  105. struct buf *work = NULL;
  106. struct stack *pool = &rndr->work_bufs[type];
  107. if (pool->size < pool->asize &&
  108. pool->item[pool->size] != NULL) {
  109. work = pool->item[pool->size++];
  110. work->size = 0;
  111. } else {
  112. work = bufnew(buf_size[type]);
  113. stack_push(pool, work);
  114. }
  115. return work;
  116. }
  117. static inline void
  118. rndr_popbuf(struct sd_markdown *rndr, int type)
  119. {
  120. rndr->work_bufs[type].size--;
  121. }
  122. static void
  123. unscape_text(struct buf *ob, struct buf *src)
  124. {
  125. size_t i = 0, org;
  126. while (i < src->size) {
  127. org = i;
  128. while (i < src->size && src->data[i] != '\\')
  129. i++;
  130. if (i > org)
  131. bufput(ob, src->data + org, i - org);
  132. if (i + 1 >= src->size)
  133. break;
  134. bufputc(ob, src->data[i + 1]);
  135. i += 2;
  136. }
  137. }
  138. static unsigned int
  139. hash_link_ref(const uint8_t *link_ref, size_t length)
  140. {
  141. size_t i;
  142. unsigned int hash = 0;
  143. for (i = 0; i < length; ++i)
  144. hash = tolower(link_ref[i]) + (hash << 6) + (hash << 16) - hash;
  145. return hash;
  146. }
  147. static struct link_ref *
  148. add_link_ref(
  149. struct link_ref **references,
  150. const uint8_t *name, size_t name_size)
  151. {
  152. struct link_ref *ref = calloc(1, sizeof(struct link_ref));
  153. if (!ref)
  154. return NULL;
  155. ref->id = hash_link_ref(name, name_size);
  156. ref->next = references[ref->id % REF_TABLE_SIZE];
  157. references[ref->id % REF_TABLE_SIZE] = ref;
  158. return ref;
  159. }
  160. static struct link_ref *
  161. find_link_ref(struct link_ref **references, uint8_t *name, size_t length)
  162. {
  163. unsigned int hash = hash_link_ref(name, length);
  164. struct link_ref *ref = NULL;
  165. ref = references[hash % REF_TABLE_SIZE];
  166. while (ref != NULL) {
  167. if (ref->id == hash)
  168. return ref;
  169. ref = ref->next;
  170. }
  171. return NULL;
  172. }
  173. static void
  174. free_link_refs(struct link_ref **references)
  175. {
  176. size_t i;
  177. for (i = 0; i < REF_TABLE_SIZE; ++i) {
  178. struct link_ref *r = references[i];
  179. struct link_ref *next;
  180. while (r) {
  181. next = r->next;
  182. bufrelease(r->link);
  183. bufrelease(r->title);
  184. free(r);
  185. r = next;
  186. }
  187. }
  188. }
  189. /*
  190. * Check whether a char is a Markdown space.
  191. * Right now we only consider spaces the actual
  192. * space and a newline: tabs and carriage returns
  193. * are filtered out during the preprocessing phase.
  194. *
  195. * If we wanted to actually be UTF-8 compliant, we
  196. * should instead extract an Unicode codepoint from
  197. * this character and check for space properties.
  198. */
  199. static inline int
  200. _isspace(int c)
  201. {
  202. return c == ' ' || c == '\n';
  203. }
  204. /****************************
  205. * INLINE PARSING FUNCTIONS *
  206. ****************************/
  207. /* is_mail_autolink • looks for the address part of a mail autolink and '>' */
  208. /* this is less strict than the original markdown e-mail address matching */
  209. static size_t
  210. is_mail_autolink(uint8_t *data, size_t size)
  211. {
  212. size_t i = 0, nb = 0;
  213. /* address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' */
  214. for (i = 0; i < size; ++i) {
  215. if (isalnum(data[i]))
  216. continue;
  217. switch (data[i]) {
  218. case '@':
  219. nb++;
  220. case '-':
  221. case '.':
  222. case '_':
  223. break;
  224. case '>':
  225. return (nb == 1) ? i + 1 : 0;
  226. default:
  227. return 0;
  228. }
  229. }
  230. return 0;
  231. }
  232. /* tag_length • returns the length of the given tag, or 0 is it's not valid */
  233. static size_t
  234. tag_length(uint8_t *data, size_t size, enum mkd_autolink *autolink)
  235. {
  236. size_t i, j;
  237. /* a valid tag can't be shorter than 3 chars */
  238. if (size < 3) return 0;
  239. /* begins with a '<' optionally followed by '/', followed by letter or number */
  240. if (data[0] != '<') return 0;
  241. i = (data[1] == '/') ? 2 : 1;
  242. if (!isalnum(data[i]))
  243. return 0;
  244. /* scheme test */
  245. *autolink = MKDA_NOT_AUTOLINK;
  246. /* try to find the beginning of an URI */
  247. while (i < size && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-'))
  248. i++;
  249. if (i > 1 && data[i] == '@') {
  250. if ((j = is_mail_autolink(data + i, size - i)) != 0) {
  251. *autolink = MKDA_EMAIL;
  252. return i + j;
  253. }
  254. }
  255. if (i > 2 && data[i] == ':') {
  256. *autolink = MKDA_NORMAL;
  257. i++;
  258. }
  259. /* completing autolink test: no whitespace or ' or " */
  260. if (i >= size)
  261. *autolink = MKDA_NOT_AUTOLINK;
  262. else if (*autolink) {
  263. j = i;
  264. while (i < size) {
  265. if (data[i] == '\\') i += 2;
  266. else if (data[i] == '>' || data[i] == '\'' ||
  267. data[i] == '"' || data[i] == ' ' || data[i] == '\n')
  268. break;
  269. else i++;
  270. }
  271. if (i >= size) return 0;
  272. if (i > j && data[i] == '>') return i + 1;
  273. /* one of the forbidden chars has been found */
  274. *autolink = MKDA_NOT_AUTOLINK;
  275. }
  276. /* looking for sometinhg looking like a tag end */
  277. while (i < size && data[i] != '>') i++;
  278. if (i >= size) return 0;
  279. return i + 1;
  280. }
  281. /* parse_inline • parses inline markdown elements */
  282. static void
  283. parse_inline(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
  284. {
  285. size_t i = 0, end = 0;
  286. uint8_t action = 0;
  287. struct buf work = { 0, 0, 0, 0 };
  288. if (rndr->work_bufs[BUFFER_SPAN].size +
  289. rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting)
  290. return;
  291. while (i < size) {
  292. /* copying inactive chars into the output */
  293. while (end < size && (action = rndr->active_char[data[end]]) == 0) {
  294. end++;
  295. }
  296. if (rndr->cb.normal_text) {
  297. work.data = data + i;
  298. work.size = end - i;
  299. rndr->cb.normal_text(ob, &work, rndr->opaque);
  300. }
  301. else
  302. bufput(ob, data + i, end - i);
  303. if (end >= size) break;
  304. i = end;
  305. /* calling the trigger */
  306. end = markdown_char_ptrs[(int)action](ob, rndr, data + i, i, size - i);
  307. if (!end) /* no action from the callback */
  308. end = i + 1;
  309. else {
  310. i += end;
  311. end = i;
  312. }
  313. }
  314. }
  315. /* find_emph_char • looks for the next emph uint8_t, skipping other constructs */
  316. static size_t
  317. find_emph_char(uint8_t *data, size_t size, uint8_t c)
  318. {
  319. size_t i = 1;
  320. while (i < size) {
  321. while (i < size && data[i] != c && data[i] != '`' && data[i] != '[')
  322. i++;
  323. if (i == size)
  324. return 0;
  325. if (data[i] == c)
  326. return i;
  327. /* not counting escaped chars */
  328. if (i && data[i - 1] == '\\') {
  329. i++; continue;
  330. }
  331. if (data[i] == '`') {
  332. size_t span_nb = 0, bt;
  333. size_t tmp_i = 0;
  334. /* counting the number of opening backticks */
  335. while (i < size && data[i] == '`') {
  336. i++; span_nb++;
  337. }
  338. if (i >= size) return 0;
  339. /* finding the matching closing sequence */
  340. bt = 0;
  341. while (i < size && bt < span_nb) {
  342. if (!tmp_i && data[i] == c) tmp_i = i;
  343. if (data[i] == '`') bt++;
  344. else bt = 0;
  345. i++;
  346. }
  347. if (i >= size) return tmp_i;
  348. i++;
  349. }
  350. /* skipping a link */
  351. else if (data[i] == '[') {
  352. size_t tmp_i = 0;
  353. uint8_t cc;
  354. i++;
  355. while (i < size && data[i] != ']') {
  356. if (!tmp_i && data[i] == c) tmp_i = i;
  357. i++;
  358. }
  359. i++;
  360. while (i < size && (data[i] == ' ' || data[i] == '\n'))
  361. i++;
  362. if (i >= size)
  363. return tmp_i;
  364. switch (data[i]) {
  365. case '[':
  366. cc = ']'; break;
  367. case '(':
  368. cc = ')'; break;
  369. default:
  370. if (tmp_i)
  371. return tmp_i;
  372. else
  373. continue;
  374. }
  375. i++;
  376. while (i < size && data[i] != cc) {
  377. if (!tmp_i && data[i] == c) tmp_i = i;
  378. i++;
  379. }
  380. if (i >= size)
  381. return tmp_i;
  382. i++;
  383. }
  384. }
  385. return 0;
  386. }
  387. /* parse_emph1 • parsing single emphase */
  388. /* closed by a symbol not preceded by whitespace and not followed by symbol */
  389. static size_t
  390. parse_emph1(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
  391. {
  392. size_t i = 0, len;
  393. struct buf *work = 0;
  394. int r;
  395. if (!rndr->cb.emphasis) return 0;
  396. /* skipping one symbol if coming from emph3 */
  397. if (size > 1 && data[0] == c && data[1] == c) i = 1;
  398. while (i < size) {
  399. len = find_emph_char(data + i, size - i, c);
  400. if (!len) return 0;
  401. i += len;
  402. if (i >= size) return 0;
  403. if (data[i] == c && !_isspace(data[i - 1])) {
  404. if (rndr->ext_flags & MKDEXT_NO_INTRA_EMPHASIS) {
  405. if (!(i + 1 == size || _isspace(data[i + 1]) || ispunct(data[i + 1])))
  406. continue;
  407. }
  408. work = rndr_newbuf(rndr, BUFFER_SPAN);
  409. parse_inline(work, rndr, data, i);
  410. r = rndr->cb.emphasis(ob, work, rndr->opaque);
  411. rndr_popbuf(rndr, BUFFER_SPAN);
  412. return r ? i + 1 : 0;
  413. }
  414. }
  415. return 0;
  416. }
  417. /* parse_emph2 • parsing single emphase */
  418. static size_t
  419. parse_emph2(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
  420. {
  421. int (*render_method)(struct buf *ob, const struct buf *text, void *opaque);
  422. size_t i = 0, len;
  423. struct buf *work = 0;
  424. int r;
  425. render_method = (c == '~') ? rndr->cb.strikethrough : rndr->cb.double_emphasis;
  426. if (!render_method)
  427. return 0;
  428. while (i < size) {
  429. len = find_emph_char(data + i, size - i, c);
  430. if (!len) return 0;
  431. i += len;
  432. if (i + 1 < size && data[i] == c && data[i + 1] == c && i && !_isspace(data[i - 1])) {
  433. work = rndr_newbuf(rndr, BUFFER_SPAN);
  434. parse_inline(work, rndr, data, i);
  435. r = render_method(ob, work, rndr->opaque);
  436. rndr_popbuf(rndr, BUFFER_SPAN);
  437. return r ? i + 2 : 0;
  438. }
  439. i++;
  440. }
  441. return 0;
  442. }
  443. /* parse_emph3 • parsing single emphase */
  444. /* finds the first closing tag, and delegates to the other emph */
  445. static size_t
  446. parse_emph3(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
  447. {
  448. size_t i = 0, len;
  449. int r;
  450. while (i < size) {
  451. len = find_emph_char(data + i, size - i, c);
  452. if (!len) return 0;
  453. i += len;
  454. /* skip whitespace preceded symbols */
  455. if (data[i] != c || _isspace(data[i - 1]))
  456. continue;
  457. if (i + 2 < size && data[i + 1] == c && data[i + 2] == c && rndr->cb.triple_emphasis) {
  458. /* triple symbol found */
  459. struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN);
  460. parse_inline(work, rndr, data, i);
  461. r = rndr->cb.triple_emphasis(ob, work, rndr->opaque);
  462. rndr_popbuf(rndr, BUFFER_SPAN);
  463. return r ? i + 3 : 0;
  464. } else if (i + 1 < size && data[i + 1] == c) {
  465. /* double symbol found, handing over to emph1 */
  466. len = parse_emph1(ob, rndr, data - 2, size + 2, c);
  467. if (!len) return 0;
  468. else return len - 2;
  469. } else {
  470. /* single symbol found, handing over to emph2 */
  471. len = parse_emph2(ob, rndr, data - 1, size + 1, c);
  472. if (!len) return 0;
  473. else return len - 1;
  474. }
  475. }
  476. return 0;
  477. }
  478. /* char_emphasis • single and double emphasis parsing */
  479. static size_t
  480. char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  481. {
  482. uint8_t c = data[0];
  483. size_t ret;
  484. if (size > 2 && data[1] != c) {
  485. /* whitespace cannot follow an opening emphasis;
  486. * strikethrough only takes two characters '~~' */
  487. if (c == '~' || _isspace(data[1]) || (ret = parse_emph1(ob, rndr, data + 1, size - 1, c)) == 0)
  488. return 0;
  489. return ret + 1;
  490. }
  491. if (size > 3 && data[1] == c && data[2] != c) {
  492. if (_isspace(data[2]) || (ret = parse_emph2(ob, rndr, data + 2, size - 2, c)) == 0)
  493. return 0;
  494. return ret + 2;
  495. }
  496. if (size > 4 && data[1] == c && data[2] == c && data[3] != c) {
  497. if (c == '~' || _isspace(data[3]) || (ret = parse_emph3(ob, rndr, data + 3, size - 3, c)) == 0)
  498. return 0;
  499. return ret + 3;
  500. }
  501. return 0;
  502. }
  503. /* char_linebreak • '\n' preceded by two spaces (assuming linebreak != 0) */
  504. static size_t
  505. char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  506. {
  507. if (offset < 2 || data[-1] != ' ' || data[-2] != ' ')
  508. return 0;
  509. /* removing the last space from ob and rendering */
  510. while (ob->size && ob->data[ob->size - 1] == ' ')
  511. ob->size--;
  512. return rndr->cb.linebreak(ob, rndr->opaque) ? 1 : 0;
  513. }
  514. /* char_codespan • '`' parsing a code span (assuming codespan != 0) */
  515. static size_t
  516. char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  517. {
  518. size_t end, nb = 0, i, f_begin, f_end;
  519. /* counting the number of backticks in the delimiter */
  520. while (nb < size && data[nb] == '`')
  521. nb++;
  522. /* finding the next delimiter */
  523. i = 0;
  524. for (end = nb; end < size && i < nb; end++) {
  525. if (data[end] == '`') i++;
  526. else i = 0;
  527. }
  528. if (i < nb && end >= size)
  529. return 0; /* no matching delimiter */
  530. /* trimming outside whitespaces */
  531. f_begin = nb;
  532. while (f_begin < end && data[f_begin] == ' ')
  533. f_begin++;
  534. f_end = end - nb;
  535. while (f_end > nb && data[f_end-1] == ' ')
  536. f_end--;
  537. /* real code span */
  538. if (f_begin < f_end) {
  539. struct buf work = { data + f_begin, f_end - f_begin, 0, 0 };
  540. if (!rndr->cb.codespan(ob, &work, rndr->opaque))
  541. end = 0;
  542. } else {
  543. if (!rndr->cb.codespan(ob, 0, rndr->opaque))
  544. end = 0;
  545. }
  546. return end;
  547. }
  548. /* char_escape • '\\' backslash escape */
  549. static size_t
  550. char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  551. {
  552. static const char *escape_chars = "\\`*_{}[]()#+-.!:|&<>";
  553. struct buf work = { 0, 0, 0, 0 };
  554. if (size > 1) {
  555. if (strchr(escape_chars, data[1]) == NULL)
  556. return 0;
  557. if (rndr->cb.normal_text) {
  558. work.data = data + 1;
  559. work.size = 1;
  560. rndr->cb.normal_text(ob, &work, rndr->opaque);
  561. }
  562. else bufputc(ob, data[1]);
  563. }
  564. return 2;
  565. }
  566. /* char_entity • '&' escaped when it doesn't belong to an entity */
  567. /* valid entities are assumed to be anything matching &#?[A-Za-z0-9]+; */
  568. static size_t
  569. char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  570. {
  571. size_t end = 1;
  572. struct buf work = { 0, 0, 0, 0 };
  573. if (end < size && data[end] == '#')
  574. end++;
  575. while (end < size && isalnum(data[end]))
  576. end++;
  577. if (end < size && data[end] == ';')
  578. end++; /* real entity */
  579. else
  580. return 0; /* lone '&' */
  581. if (rndr->cb.entity) {
  582. work.data = data;
  583. work.size = end;
  584. rndr->cb.entity(ob, &work, rndr->opaque);
  585. }
  586. else bufput(ob, data, end);
  587. return end;
  588. }
  589. /* char_langle_tag • '<' when tags or autolinks are allowed */
  590. static size_t
  591. char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  592. {
  593. enum mkd_autolink altype = MKDA_NOT_AUTOLINK;
  594. size_t end = tag_length(data, size, &altype);
  595. struct buf work = { data, end, 0, 0 };
  596. int ret = 0;
  597. if (end > 2) {
  598. if (rndr->cb.autolink && altype != MKDA_NOT_AUTOLINK) {
  599. struct buf *u_link = rndr_newbuf(rndr, BUFFER_SPAN);
  600. work.data = data + 1;
  601. work.size = end - 2;
  602. unscape_text(u_link, &work);
  603. ret = rndr->cb.autolink(ob, u_link, altype, rndr->opaque);
  604. rndr_popbuf(rndr, BUFFER_SPAN);
  605. }
  606. else if (rndr->cb.raw_html_tag)
  607. ret = rndr->cb.raw_html_tag(ob, &work, rndr->opaque);
  608. }
  609. if (!ret) return 0;
  610. else return end;
  611. }
  612. static size_t
  613. char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  614. {
  615. struct buf *link, *link_url;
  616. size_t link_len, rewind;
  617. if (!rndr->cb.link)
  618. return 0;
  619. link = rndr_newbuf(rndr, BUFFER_SPAN);
  620. if ((link_len = sd_autolink__www(&rewind, link, data, offset, size)) > 0) {
  621. link_url = rndr_newbuf(rndr, BUFFER_SPAN);
  622. BUFPUTSL(link_url, "http://");
  623. bufput(link_url, link->data, link->size);
  624. ob->size -= rewind;
  625. rndr->cb.link(ob, link_url, NULL, link, rndr->opaque);
  626. rndr_popbuf(rndr, BUFFER_SPAN);
  627. }
  628. rndr_popbuf(rndr, BUFFER_SPAN);
  629. return link_len;
  630. }
  631. static size_t
  632. char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  633. {
  634. struct buf *link;
  635. size_t link_len, rewind;
  636. if (!rndr->cb.autolink)
  637. return 0;
  638. link = rndr_newbuf(rndr, BUFFER_SPAN);
  639. if ((link_len = sd_autolink__email(&rewind, link, data, offset, size)) > 0) {
  640. ob->size -= rewind;
  641. rndr->cb.autolink(ob, link, MKDA_EMAIL, rndr->opaque);
  642. }
  643. rndr_popbuf(rndr, BUFFER_SPAN);
  644. return link_len;
  645. }
  646. static size_t
  647. char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  648. {
  649. struct buf *link;
  650. size_t link_len, rewind;
  651. if (!rndr->cb.autolink)
  652. return 0;
  653. link = rndr_newbuf(rndr, BUFFER_SPAN);
  654. if ((link_len = sd_autolink__url(&rewind, link, data, offset, size)) > 0) {
  655. ob->size -= rewind;
  656. rndr->cb.autolink(ob, link, MKDA_NORMAL, rndr->opaque);
  657. }
  658. rndr_popbuf(rndr, BUFFER_SPAN);
  659. return link_len;
  660. }
  661. /* char_link • '[': parsing a link or an image */
  662. static size_t
  663. char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  664. {
  665. int is_img = (offset && data[-1] == '!'), level;
  666. size_t i = 1, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0;
  667. struct buf *content = 0;
  668. struct buf *link = 0;
  669. struct buf *title = 0;
  670. struct buf *u_link = 0;
  671. size_t org_work_size = rndr->work_bufs[BUFFER_SPAN].size;
  672. int text_has_nl = 0, ret = 0;
  673. /* checking whether the correct renderer exists */
  674. if ((is_img && !rndr->cb.image) || (!is_img && !rndr->cb.link))
  675. goto cleanup;
  676. /* looking for the matching closing bracket */
  677. for (level = 1; i < size; i++) {
  678. if (data[i] == '\n')
  679. text_has_nl = 1;
  680. else if (data[i - 1] == '\\')
  681. continue;
  682. else if (data[i] == '[')
  683. level++;
  684. else if (data[i] == ']') {
  685. level--;
  686. if (level <= 0)
  687. break;
  688. }
  689. }
  690. if (i >= size)
  691. goto cleanup;
  692. txt_e = i;
  693. i++;
  694. /* skip any amount of whitespace or newline */
  695. /* (this is much more laxist than original markdown syntax) */
  696. while (i < size && _isspace(data[i]))
  697. i++;
  698. /* inline style link */
  699. if (i < size && data[i] == '(') {
  700. /* skipping initial whitespace */
  701. i++;
  702. while (i < size && _isspace(data[i]))
  703. i++;
  704. link_b = i;
  705. /* looking for link end: ' " ) */
  706. while (i < size) {
  707. if (data[i] == '\\') i += 2;
  708. else if (data[i] == ')' || data[i] == '\'' || data[i] == '"') break;
  709. else i++;
  710. }
  711. if (i >= size) goto cleanup;
  712. link_e = i;
  713. /* looking for title end if present */
  714. if (data[i] == '\'' || data[i] == '"') {
  715. i++;
  716. title_b = i;
  717. while (i < size) {
  718. if (data[i] == '\\') i += 2;
  719. else if (data[i] == ')') break;
  720. else i++;
  721. }
  722. if (i >= size) goto cleanup;
  723. /* skipping whitespaces after title */
  724. title_e = i - 1;
  725. while (title_e > title_b && _isspace(data[title_e]))
  726. title_e--;
  727. /* checking for closing quote presence */
  728. if (data[title_e] != '\'' && data[title_e] != '"') {
  729. title_b = title_e = 0;
  730. link_e = i;
  731. }
  732. }
  733. /* remove whitespace at the end of the link */
  734. while (link_e > link_b && _isspace(data[link_e - 1]))
  735. link_e--;
  736. /* remove optional angle brackets around the link */
  737. if (data[link_b] == '<') link_b++;
  738. if (data[link_e - 1] == '>') link_e--;
  739. /* building escaped link and title */
  740. if (link_e > link_b) {
  741. link = rndr_newbuf(rndr, BUFFER_SPAN);
  742. bufput(link, data + link_b, link_e - link_b);
  743. }
  744. if (title_e > title_b) {
  745. title = rndr_newbuf(rndr, BUFFER_SPAN);
  746. bufput(title, data + title_b, title_e - title_b);
  747. }
  748. i++;
  749. }
  750. /* reference style link */
  751. else if (i < size && data[i] == '[') {
  752. struct buf id = { 0, 0, 0, 0 };
  753. struct link_ref *lr;
  754. /* looking for the id */
  755. i++;
  756. link_b = i;
  757. while (i < size && data[i] != ']') i++;
  758. if (i >= size) goto cleanup;
  759. link_e = i;
  760. /* finding the link_ref */
  761. if (link_b == link_e) {
  762. if (text_has_nl) {
  763. struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN);
  764. size_t j;
  765. for (j = 1; j < txt_e; j++) {
  766. if (data[j] != '\n')
  767. bufputc(b, data[j]);
  768. else if (data[j - 1] != ' ')
  769. bufputc(b, ' ');
  770. }
  771. id.data = b->data;
  772. id.size = b->size;
  773. } else {
  774. id.data = data + 1;
  775. id.size = txt_e - 1;
  776. }
  777. } else {
  778. id.data = data + link_b;
  779. id.size = link_e - link_b;
  780. }
  781. lr = find_link_ref(rndr->refs, id.data, id.size);
  782. if (!lr)
  783. goto cleanup;
  784. /* keeping link and title from link_ref */
  785. link = lr->link;
  786. title = lr->title;
  787. i++;
  788. }
  789. /* shortcut reference style link */
  790. else {
  791. struct buf id = { 0, 0, 0, 0 };
  792. struct link_ref *lr;
  793. /* crafting the id */
  794. if (text_has_nl) {
  795. struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN);
  796. size_t j;
  797. for (j = 1; j < txt_e; j++) {
  798. if (data[j] != '\n')
  799. bufputc(b, data[j]);
  800. else if (data[j - 1] != ' ')
  801. bufputc(b, ' ');
  802. }
  803. id.data = b->data;
  804. id.size = b->size;
  805. } else {
  806. id.data = data + 1;
  807. id.size = txt_e - 1;
  808. }
  809. /* finding the link_ref */
  810. lr = find_link_ref(rndr->refs, id.data, id.size);
  811. if (!lr)
  812. goto cleanup;
  813. /* keeping link and title from link_ref */
  814. link = lr->link;
  815. title = lr->title;
  816. /* rewinding the whitespace */
  817. i = txt_e + 1;
  818. }
  819. /* building content: img alt is escaped, link content is parsed */
  820. if (txt_e > 1) {
  821. content = rndr_newbuf(rndr, BUFFER_SPAN);
  822. if (is_img) bufput(content, data + 1, txt_e - 1);
  823. else parse_inline(content, rndr, data + 1, txt_e - 1);
  824. }
  825. if (link) {
  826. u_link = rndr_newbuf(rndr, BUFFER_SPAN);
  827. unscape_text(u_link, link);
  828. }
  829. /* calling the relevant rendering function */
  830. if (is_img) {
  831. if (ob->size && ob->data[ob->size - 1] == '!')
  832. ob->size -= 1;
  833. ret = rndr->cb.image(ob, u_link, title, content, rndr->opaque);
  834. } else {
  835. ret = rndr->cb.link(ob, u_link, title, content, rndr->opaque);
  836. }
  837. /* cleanup */
  838. cleanup:
  839. rndr->work_bufs[BUFFER_SPAN].size = (int)org_work_size;
  840. return ret ? i : 0;
  841. }
  842. static size_t
  843. char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  844. {
  845. size_t sup_start, sup_len;
  846. struct buf *sup;
  847. if (!rndr->cb.superscript)
  848. return 0;
  849. if (size < 2)
  850. return 0;
  851. if (data[1] == '(') {
  852. sup_start = sup_len = 2;
  853. while (sup_len < size && data[sup_len] != ')' && data[sup_len - 1] != '\\')
  854. sup_len++;
  855. if (sup_len == size)
  856. return 0;
  857. } else {
  858. sup_start = sup_len = 1;
  859. while (sup_len < size && !_isspace(data[sup_len]))
  860. sup_len++;
  861. }
  862. if (sup_len - sup_start == 0)
  863. return (sup_start == 2) ? 3 : 0;
  864. sup = rndr_newbuf(rndr, BUFFER_SPAN);
  865. parse_inline(sup, rndr, data + sup_start, sup_len - sup_start);
  866. rndr->cb.superscript(ob, sup, rndr->opaque);
  867. rndr_popbuf(rndr, BUFFER_SPAN);
  868. return (sup_start == 2) ? sup_len + 1 : sup_len;
  869. }
  870. /*********************************
  871. * BLOCK-LEVEL PARSING FUNCTIONS *
  872. *********************************/
  873. /* is_empty • returns the line length when it is empty, 0 otherwise */
  874. static size_t
  875. is_empty(uint8_t *data, size_t size)
  876. {
  877. size_t i;
  878. for (i = 0; i < size && data[i] != '\n'; i++)
  879. if (data[i] != ' ')
  880. return 0;
  881. return i + 1;
  882. }
  883. /* is_hrule • returns whether a line is a horizontal rule */
  884. static int
  885. is_hrule(uint8_t *data, size_t size)
  886. {
  887. size_t i = 0, n = 0;
  888. uint8_t c;
  889. /* skipping initial spaces */
  890. if (size < 3) return 0;
  891. if (data[0] == ' ') { i++;
  892. if (data[1] == ' ') { i++;
  893. if (data[2] == ' ') { i++; } } }
  894. /* looking at the hrule uint8_t */
  895. if (i + 2 >= size
  896. || (data[i] != '*' && data[i] != '-' && data[i] != '_'))
  897. return 0;
  898. c = data[i];
  899. /* the whole line must be the char or whitespace */
  900. while (i < size && data[i] != '\n') {
  901. if (data[i] == c) n++;
  902. else if (data[i] != ' ')
  903. return 0;
  904. i++;
  905. }
  906. return n >= 3;
  907. }
  908. /* check if a line is a code fence; return its size if it is */
  909. static size_t
  910. is_codefence(uint8_t *data, size_t size, struct buf *syntax)
  911. {
  912. size_t i = 0, n = 0;
  913. uint8_t c;
  914. /* skipping initial spaces */
  915. if (size < 3) return 0;
  916. if (data[0] == ' ') { i++;
  917. if (data[1] == ' ') { i++;
  918. if (data[2] == ' ') { i++; } } }
  919. /* looking at the hrule uint8_t */
  920. if (i + 2 >= size || !(data[i] == '~' || data[i] == '`'))
  921. return 0;
  922. c = data[i];
  923. /* the whole line must be the uint8_t or whitespace */
  924. while (i < size && data[i] == c) {
  925. n++; i++;
  926. }
  927. if (n < 3)
  928. return 0;
  929. if (syntax != NULL) {
  930. size_t syn = 0;
  931. while (i < size && data[i] == ' ')
  932. i++;
  933. syntax->data = data + i;
  934. if (i < size && data[i] == '{') {
  935. i++; syntax->data++;
  936. while (i < size && data[i] != '}' && data[i] != '\n') {
  937. syn++; i++;
  938. }
  939. if (i == size || data[i] != '}')
  940. return 0;
  941. /* strip all whitespace at the beginning and the end
  942. * of the {} block */
  943. while (syn > 0 && _isspace(syntax->data[0])) {
  944. syntax->data++; syn--;
  945. }
  946. while (syn > 0 && _isspace(syntax->data[syn - 1]))
  947. syn--;
  948. i++;
  949. } else {
  950. while (i < size && !_isspace(data[i])) {
  951. syn++; i++;
  952. }
  953. }
  954. syntax->size = syn;
  955. }
  956. while (i < size && data[i] != '\n') {
  957. if (!_isspace(data[i]))
  958. return 0;
  959. i++;
  960. }
  961. return i + 1;
  962. }
  963. /* is_atxheader • returns whether the line is a hash-prefixed header */
  964. static int
  965. is_atxheader(struct sd_markdown *rndr, uint8_t *data, size_t size)
  966. {
  967. if (data[0] != '#')
  968. return 0;
  969. if (rndr->ext_flags & MKDEXT_SPACE_HEADERS) {
  970. size_t level = 0;
  971. while (level < size && level < 6 && data[level] == '#')
  972. level++;
  973. if (level < size && data[level] != ' ')
  974. return 0;
  975. }
  976. return 1;
  977. }
  978. /* is_headerline • returns whether the line is a setext-style hdr underline */
  979. static int
  980. is_headerline(uint8_t *data, size_t size)
  981. {
  982. size_t i = 0;
  983. /* test of level 1 header */
  984. if (data[i] == '=') {
  985. for (i = 1; i < size && data[i] == '='; i++);
  986. while (i < size && data[i] == ' ') i++;
  987. return (i >= size || data[i] == '\n') ? 1 : 0; }
  988. /* test of level 2 header */
  989. if (data[i] == '-') {
  990. for (i = 1; i < size && data[i] == '-'; i++);
  991. while (i < size && data[i] == ' ') i++;
  992. return (i >= size || data[i] == '\n') ? 2 : 0; }
  993. return 0;
  994. }
  995. static int
  996. is_next_headerline(uint8_t *data, size_t size)
  997. {
  998. size_t i = 0;
  999. while (i < size && data[i] != '\n')
  1000. i++;
  1001. if (++i >= size)
  1002. return 0;
  1003. return is_headerline(data + i, size - i);
  1004. }
  1005. /* prefix_quote • returns blockquote prefix length */
  1006. static size_t
  1007. prefix_quote(uint8_t *data, size_t size)
  1008. {
  1009. size_t i = 0;
  1010. if (i < size && data[i] == ' ') i++;
  1011. if (i < size && data[i] == ' ') i++;
  1012. if (i < size && data[i] == ' ') i++;
  1013. if (i < size && data[i] == '>') {
  1014. if (i + 1 < size && data[i + 1] == ' ')
  1015. return i + 2;
  1016. return i + 1;
  1017. }
  1018. return 0;
  1019. }
  1020. /* prefix_code • returns prefix length for block code*/
  1021. static size_t
  1022. prefix_code(uint8_t *data, size_t size)
  1023. {
  1024. if (size > 3 && data[0] == ' ' && data[1] == ' '
  1025. && data[2] == ' ' && data[3] == ' ') return 4;
  1026. return 0;
  1027. }
  1028. /* prefix_oli • returns ordered list item prefix */
  1029. static size_t
  1030. prefix_oli(uint8_t *data, size_t size)
  1031. {
  1032. size_t i = 0;
  1033. if (i < size && data[i] == ' ') i++;
  1034. if (i < size && data[i] == ' ') i++;
  1035. if (i < size && data[i] == ' ') i++;
  1036. if (i >= size || data[i] < '0' || data[i] > '9')
  1037. return 0;
  1038. while (i < size && data[i] >= '0' && data[i] <= '9')
  1039. i++;
  1040. if (i + 1 >= size || data[i] != '.' || data[i + 1] != ' ')
  1041. return 0;
  1042. if (is_next_headerline(data + i, size - i))
  1043. return 0;
  1044. return i + 2;
  1045. }
  1046. /* prefix_uli • returns ordered list item prefix */
  1047. static size_t
  1048. prefix_uli(uint8_t *data, size_t size)
  1049. {
  1050. size_t i = 0;
  1051. if (i < size && data[i] == ' ') i++;
  1052. if (i < size && data[i] == ' ') i++;
  1053. if (i < size && data[i] == ' ') i++;
  1054. if (i + 1 >= size ||
  1055. (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
  1056. data[i + 1] != ' ')
  1057. return 0;
  1058. if (is_next_headerline(data + i, size - i))
  1059. return 0;
  1060. return i + 2;
  1061. }
  1062. /* parse_block • parsing of one block, returning next uint8_t to parse */
  1063. static void parse_block(struct buf *ob, struct sd_markdown *rndr,
  1064. uint8_t *data, size_t size);
  1065. /* parse_blockquote • handles parsing of a blockquote fragment */
  1066. static size_t
  1067. parse_blockquote(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
  1068. {
  1069. size_t beg, end = 0, pre, work_size = 0;
  1070. uint8_t *work_data = 0;
  1071. struct buf *out = 0;
  1072. out = rndr_newbuf(rndr, BUFFER_BLOCK);
  1073. beg = 0;
  1074. while (beg < size) {
  1075. for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
  1076. pre = prefix_quote(data + beg, end - beg);
  1077. if (pre)
  1078. beg += pre; /* skipping prefix */
  1079. /* empty line followed by non-quote line */
  1080. else if (is_empty(data + beg, end - beg) &&
  1081. (end >= size || (prefix_quote(data + end, size - end) == 0 &&
  1082. !is_empty(data + end, size - end))))
  1083. break;
  1084. if (beg < end) { /* copy into the in-place working buffer */
  1085. /* bufput(work, data + beg, end - beg); */
  1086. if (!work_data)
  1087. work_data = data + beg;
  1088. else if (data + beg != work_data + work_size)
  1089. memmove(work_data + work_size, data + beg, end - beg);
  1090. work_size += end - beg;
  1091. }
  1092. beg = end;
  1093. }
  1094. parse_block(out, rndr, work_data, work_size);
  1095. if (rndr->cb.blockquote)
  1096. rndr->cb.blockquote(ob, out, rndr->opaque);
  1097. rndr_popbuf(rndr, BUFFER_BLOCK);
  1098. return end;
  1099. }
  1100. static size_t
  1101. parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render);
  1102. /* parse_blockquote • handles parsing of a regular paragraph */
  1103. static size_t
  1104. parse_paragraph(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
  1105. {
  1106. size_t i = 0, end = 0;
  1107. int level = 0;
  1108. struct buf work = { data, 0, 0, 0 };
  1109. while (i < size) {
  1110. for (end = i + 1; end < size && data[end - 1] != '\n'; end++) /* empty */;
  1111. if (is_empty(data + i, size - i) || (level = is_headerline(data + i, size - i)) != 0)
  1112. break;
  1113. if (rndr->ext_flags & MKDEXT_LAX_HTML_BLOCKS) {
  1114. if (data[i] == '<' && rndr->cb.blockhtml && parse_htmlblock(ob, rndr, data + i, size - i, 0)) {
  1115. end = i;
  1116. break;
  1117. }
  1118. }
  1119. if (is_atxheader(rndr, data + i, size - i) || is_hrule(data + i, size - i)) {
  1120. end = i;
  1121. break;
  1122. }
  1123. i = end;
  1124. }
  1125. work.size = i;
  1126. while (work.size && data[work.size - 1] == '\n')
  1127. work.size--;
  1128. if (!level) {
  1129. struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK);
  1130. parse_inline(tmp, rndr, work.data, work.size);
  1131. if (rndr->cb.paragraph)
  1132. rndr->cb.paragraph(ob, tmp, rndr->opaque);
  1133. rndr_popbuf(rndr, BUFFER_BLOCK);
  1134. } else {
  1135. struct buf *header_work;
  1136. if (work.size) {
  1137. size_t beg;
  1138. i = work.size;
  1139. work.size -= 1;
  1140. while (work.size && data[work.size] != '\n')
  1141. work.size -= 1;
  1142. beg = work.size + 1;
  1143. while (work.size && data[work.size - 1] == '\n')
  1144. work.size -= 1;
  1145. if (work.size > 0) {
  1146. struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK);
  1147. parse_inline(tmp, rndr, work.data, work.size);
  1148. if (rndr->cb.paragraph)
  1149. rndr->cb.paragraph(ob, tmp, rndr->opaque);
  1150. rndr_popbuf(rndr, BUFFER_BLOCK);
  1151. work.data += beg;
  1152. work.size = i - beg;
  1153. }
  1154. else work.size = i;
  1155. }
  1156. header_work = rndr_newbuf(rndr, BUFFER_SPAN);
  1157. parse_inline(header_work, rndr, work.data, work.size);
  1158. if (rndr->cb.header)
  1159. rndr->cb.header(ob, header_work, (int)level, rndr->opaque);
  1160. rndr_popbuf(rndr, BUFFER_SPAN);
  1161. }
  1162. return end;
  1163. }
  1164. /* parse_fencedcode • handles parsing of a block-level code fragment */
  1165. static size_t
  1166. parse_fencedcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
  1167. {
  1168. size_t beg, end;
  1169. struct buf *work = 0;
  1170. struct buf lang = { 0, 0, 0, 0 };
  1171. beg = is_codefence(data, size, &lang);
  1172. if (beg == 0) return 0;
  1173. work = rndr_newbuf(rndr, BUFFER_BLOCK);
  1174. while (beg < size) {
  1175. size_t fence_end;
  1176. fence_end = is_codefence(data + beg, size - beg, NULL);
  1177. if (fence_end != 0) {
  1178. beg += fence_end;
  1179. break;
  1180. }
  1181. for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
  1182. if (beg < end) {
  1183. /* verbatim copy to the working buffer,
  1184. escaping entities */
  1185. if (is_empty(data + beg, end - beg))
  1186. bufputc(work, '\n');
  1187. else bufput(work, data + beg, end - beg);
  1188. }
  1189. beg = end;
  1190. }
  1191. if (work->size && work->data[work->size - 1] != '\n')
  1192. bufputc(work, '\n');
  1193. if (rndr->cb.blockcode)
  1194. rndr->cb.blockcode(ob, work, lang.size ? &lang : NULL, rndr->opaque);
  1195. rndr_popbuf(rndr, BUFFER_BLOCK);
  1196. return beg;
  1197. }
  1198. static size_t
  1199. parse_blockcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
  1200. {
  1201. size_t beg, end, pre;
  1202. struct buf *work = 0;
  1203. work = rndr_newbuf(rndr, BUFFER_BLOCK);
  1204. beg = 0;
  1205. while (beg < size) {
  1206. for (end = beg + 1; end < size && data[end - 1] != '\n'; end++) {};
  1207. pre = prefix_code(data + beg, end - beg);
  1208. if (pre)
  1209. beg += pre; /* skipping prefix */
  1210. else if (!is_empty(data + beg, end - beg))
  1211. /* non-empty non-prefixed line breaks the pre */
  1212. break;
  1213. if (beg < end) {
  1214. /* verbatim copy to the working buffer,
  1215. escaping entities */
  1216. if (is_empty(data + beg, end - beg))
  1217. bufputc(work, '\n');
  1218. else bufput(work, data + beg, end - beg);
  1219. }
  1220. beg = end;
  1221. }
  1222. while (work->size && work->data[work->size - 1] == '\n')
  1223. work->size -= 1;
  1224. bufputc(work, '\n');
  1225. if (rndr->cb.blockcode)
  1226. rndr->cb.blockcode(ob, work, NULL, rndr->opaque);
  1227. rndr_popbuf(rndr, BUFFER_BLOCK);
  1228. return beg;
  1229. }
  1230. /* parse_listitem • parsing of a single list item */
  1231. /* assuming initial prefix is already removed */
  1232. static size_t
  1233. parse_listitem(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int *flags)
  1234. {
  1235. struct buf *work = 0, *inter = 0;
  1236. size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i;
  1237. int in_empty = 0, has_inside_empty = 0;
  1238. /* keeping track of the first indentation prefix */
  1239. while (orgpre < 3 && orgpre < size && data[orgpre] == ' ')
  1240. orgpre++;
  1241. beg = prefix_uli(data, size);
  1242. if (!beg)
  1243. beg = prefix_oli(data, size);
  1244. if (!beg)
  1245. return 0;
  1246. /* skipping to the beginning of the following line */
  1247. end = beg;
  1248. while (end < size && data[end - 1] != '\n')
  1249. end++;
  1250. /* getting working buffers */
  1251. work = rndr_newbuf(rndr, BUFFER_SPAN);
  1252. inter = rndr_newbuf(rndr, BUFFER_SPAN);
  1253. /* putting the first line into the working buffer */
  1254. bufput(work, data + beg, end - beg);
  1255. beg = end;
  1256. /* process the following lines */
  1257. while (beg < size) {
  1258. end++;
  1259. while (end < size && data[end - 1] != '\n')
  1260. end++;
  1261. /* process an empty line */
  1262. if (is_empty(data + beg, end - beg)) {
  1263. in_empty = 1;
  1264. beg = end;
  1265. continue;
  1266. }
  1267. /* calculating the indentation */
  1268. i = 0;
  1269. while (i < 4 && beg + i < end && data[beg + i] == ' ')
  1270. i++;
  1271. pre = i;
  1272. /* checking for a new item */
  1273. if ((prefix_uli(data + beg + i, end - beg - i) &&
  1274. !is_hrule(data + beg + i, end - beg - i)) ||
  1275. prefix_oli(data + beg + i, end - beg - i)) {
  1276. if (in_empty)
  1277. has_inside_empty = 1;
  1278. if (pre == orgpre) /* the following item must have */
  1279. break; /* the same indentation */
  1280. if (!sublist)
  1281. sublist = work->size;
  1282. }
  1283. /* joining only indented stuff after empty lines */
  1284. else if (in_empty && i < 4) {
  1285. *flags |= MKD_LI_END;
  1286. break;
  1287. }
  1288. else if (in_empty) {
  1289. bufputc(work, '\n');
  1290. has_inside_empty = 1;
  1291. }
  1292. in_empty = 0;
  1293. /* adding the line without prefix into the working buffer */
  1294. bufput(work, data + beg + i, end - beg - i);
  1295. beg = end;
  1296. }
  1297. /* render of li contents */
  1298. if (has_inside_empty)
  1299. *flags |= MKD_LI_BLOCK;
  1300. if (*flags & MKD_LI_BLOCK) {
  1301. /* intermediate render of block li */
  1302. if (sublist && sublist < work->size) {
  1303. parse_block(inter, rndr, work->data, sublist);
  1304. parse_block(inter, rndr, work->data + sublist, work->size - sublist);
  1305. }
  1306. else
  1307. parse_block(inter, rndr, work->data, work->size);
  1308. } else {
  1309. /* intermediate render of inline li */
  1310. if (sublist && sublist < work->size) {
  1311. parse_inline(inter, rndr, work->data, sublist);
  1312. parse_block(inter, rndr, work->data + sublist, work->size - sublist);
  1313. }
  1314. else
  1315. parse_inline(inter, rndr, work->data, work->size);
  1316. }
  1317. /* render of li itself */
  1318. if (rndr->cb.listitem)
  1319. rndr->cb.listitem(ob, inter, *flags, rndr->opaque);
  1320. rndr_popbuf(rndr, BUFFER_SPAN);
  1321. rndr_popbuf(rndr, BUFFER_SPAN);
  1322. return beg;
  1323. }
  1324. /* parse_list • parsing ordered or unordered list block */
  1325. static size_t
  1326. parse_list(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int flags)
  1327. {
  1328. struct buf *work = 0;
  1329. size_t i = 0, j;
  1330. work = rndr_newbuf(rndr, BUFFER_BLOCK);
  1331. while (i < size) {
  1332. j = parse_listitem(work, rndr, data + i, size - i, &flags);
  1333. i += j;
  1334. if (!j || (flags & MKD_LI_END))
  1335. break;
  1336. }
  1337. if (rndr->cb.list)
  1338. rndr->cb.list(ob, work, flags, rndr->opaque);
  1339. rndr_popbuf(rndr, BUFFER_BLOCK);
  1340. return i;
  1341. }
  1342. /* parse_atxheader • parsing of atx-style headers */
  1343. static size_t
  1344. parse_atxheader(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
  1345. {
  1346. size_t level = 0;
  1347. size_t i, end, skip;
  1348. while (level < size && level < 6 && data[level] == '#')
  1349. level++;
  1350. for (i = level; i < size && data[i] == ' '; i++);
  1351. for (end = i; end < size && data[end] != '\n'; end++);
  1352. skip = end;
  1353. while (end && data[end - 1] == '#')
  1354. end--;
  1355. while (end && data[end - 1] == ' ')
  1356. end--;
  1357. if (end > i) {
  1358. struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN);
  1359. parse_inline(work, rndr, data + i, end - i);
  1360. if (rndr->cb.header)
  1361. rndr->cb.header(ob, work, (int)level, rndr->opaque);
  1362. rndr_popbuf(rndr, BUFFER_SPAN);
  1363. }
  1364. return skip;
  1365. }
  1366. /* htmlblock_end • checking end of HTML block : </tag>[ \t]*\n[ \t*]\n */
  1367. /* returns the length on match, 0 otherwise */
  1368. static size_t
  1369. htmlblock_end(const char *tag, size_t tag_len, struct sd_markdown *rndr, uint8_t *data, size_t size)
  1370. {
  1371. size_t i, w;
  1372. /* checking if tag is a match */
  1373. if (tag_len + 3 >= size ||
  1374. strncasecmp((char *)data + 2, tag, tag_len) != 0 ||
  1375. data[tag_len + 2] != '>')
  1376. return 0;
  1377. /* checking white lines */
  1378. i = tag_len + 3;
  1379. w = 0;
  1380. if (i < size && (w = is_empty(data + i, size - i)) == 0)
  1381. return 0; /* non-blank after tag */
  1382. i += w;
  1383. w = 0;
  1384. if (rndr->ext_flags & MKDEXT_LAX_HTML_BLOCKS) {
  1385. if (i < size)
  1386. w = is_empty(data + i, size - i);
  1387. } else {
  1388. if (i < size && (w = is_empty(data + i, size - i)) == 0)
  1389. return 0; /* non-blank line after tag line */
  1390. }
  1391. return i + w;
  1392. }
  1393. /* parse_htmlblock • parsing of inline HTML block */
  1394. static size_t
  1395. parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render)
  1396. {
  1397. size_t i, j = 0;
  1398. const char *curtag = NULL;
  1399. int found;
  1400. struct buf work = { data, 0, 0, 0 };
  1401. /* identification of the opening tag */
  1402. if (size < 2 || data[0] != '<')
  1403. return 0;
  1404. i = 1;
  1405. while (i < size && data[i] != '>' && data[i] != ' ')
  1406. i++;
  1407. if (i < size)
  1408. curtag = find_block_tag((char *)data + 1, i - 1);
  1409. /* handling of special cases */
  1410. if (!curtag) {
  1411. /* HTML comment, laxist form */
  1412. if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') {
  1413. i = 5;
  1414. while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>'))
  1415. i++;
  1416. i++;
  1417. if (i < size)
  1418. j = is_empty(data + i, size - i);
  1419. if (j) {
  1420. work.size = i + j;
  1421. if (do_render && rndr->cb.blockhtml)
  1422. rndr->cb.blockhtml(ob, &work, rndr->opaque);
  1423. return work.size;
  1424. }
  1425. }
  1426. /* HR, which is the only self-closing block tag considered */
  1427. if (size > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R')) {
  1428. i = 3;
  1429. while (i < size && data[i] != '>')
  1430. i++;
  1431. if (i + 1 < size) {
  1432. i++;
  1433. j = is_empty(data + i, size - i);
  1434. if (j) {
  1435. work.size = i + j;
  1436. if (do_render && rndr->cb.blockhtml)
  1437. rndr->cb.blockhtml(ob, &work, rndr->opaque);
  1438. return work.size;
  1439. }
  1440. }
  1441. }
  1442. /* no special case recognised */
  1443. return 0;
  1444. }
  1445. /* looking for an unindented matching closing tag */
  1446. /* followed by a blank line */
  1447. i = 1;
  1448. found = 0;
  1449. /* if not found, trying a second pass looking for indented match */
  1450. /* but not if tag is "ins" or "del" (following original Markdown.pl) */
  1451. if (strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0) {
  1452. size_t tag_size = strlen(curtag);
  1453. i = 1;
  1454. while (i < size) {
  1455. i++;
  1456. while (i < size && !(data[i - 1] == '<' && data[i] == '/'))
  1457. i++;
  1458. if (i + 2 + tag_size >= size)
  1459. break;
  1460. j = htmlblock_end(curtag, tag_size, rndr, data + i - 1, size - i + 1);
  1461. if (j) {
  1462. i += j - 1;
  1463. found = 1;
  1464. break;
  1465. }
  1466. }
  1467. }
  1468. if (!found) return 0;
  1469. /* the end of the block has been found */
  1470. work.size = i;
  1471. if (do_render && rndr->cb.blockhtml)
  1472. rndr->cb.blockhtml(ob, &work, rndr->opaque);
  1473. return i;
  1474. }
  1475. static void
  1476. parse_table_row(
  1477. struct buf *ob,
  1478. struct sd_markdown *rndr,
  1479. uint8_t *data,
  1480. size_t size,
  1481. size_t columns,
  1482. int *col_data,
  1483. int header_flag)
  1484. {
  1485. size_t i = 0, col;
  1486. struct buf *row_work = 0;
  1487. if (!rndr->cb.table_cell || !rndr->cb.table_row)
  1488. return;
  1489. row_work = rndr_newbuf(rndr, BUFFER_SPAN);
  1490. if (i < size && data[i] == '|')
  1491. i++;
  1492. for (col = 0; col < columns && i < size; ++col) {
  1493. size_t cell_start, cell_end;
  1494. struct buf *cell_work;
  1495. cell_work = rndr_newbuf(rndr, BUFFER_SPAN);
  1496. while (i < size && _isspace(data[i]))
  1497. i++;
  1498. cell_start = i;
  1499. while (i < size && data[i] != '|')
  1500. i++;
  1501. cell_end = i - 1;
  1502. while (cell_end > cell_start && _isspace(data[cell_end]))
  1503. cell_end--;
  1504. parse_inline(cell_work, rndr, data + cell_start, 1 + cell_end - cell_start);
  1505. rndr->cb.table_cell(row_work, cell_work, col_data[col] | header_flag, rndr->opaque);
  1506. rndr_popbuf(rndr, BUFFER_SPAN);
  1507. i++;
  1508. }
  1509. for (; col < columns; ++col) {
  1510. struct buf empty_cell = { 0, 0, 0, 0 };
  1511. rndr->cb.table_cell(row_work, &empty_cell, col_data[col] | header_flag, rndr->opaque);
  1512. }
  1513. rndr->cb.table_row(ob, row_work, rndr->opaque);
  1514. rndr_popbuf(rndr, BUFFER_SPAN);
  1515. }
  1516. static size_t
  1517. parse_table_header(
  1518. struct buf *ob,
  1519. struct sd_markdown *rndr,
  1520. uint8_t *data,
  1521. size_t size,
  1522. size_t *columns,
  1523. int **column_data)
  1524. {
  1525. int pipes;
  1526. size_t i = 0, col, header_end, under_end;
  1527. pipes = 0;
  1528. while (i < size && data[i] != '\n')
  1529. if (data[i++] == '|')
  1530. pipes++;
  1531. if (i == size || pipes == 0)
  1532. return 0;
  1533. header_end = i;
  1534. if (data[0] == '|')
  1535. pipes--;
  1536. if (i > 2 && data[i - 1] == '|')
  1537. pipes--;
  1538. *columns = pipes + 1;
  1539. *column_data = calloc(*columns, sizeof(int));
  1540. /* Parse the header underline */
  1541. i++;
  1542. if (i < size && data[i] == '|')
  1543. i++;
  1544. under_end = i;
  1545. while (under_end < size && data[under_end] != '\n')
  1546. under_end++;
  1547. for (col = 0; col < *columns && i < under_end; ++col) {
  1548. size_t dashes = 0;
  1549. while (i < under_end && data[i] == ' ')
  1550. i++;
  1551. if (data[i] == ':') {
  1552. i++; (*column_data)[col] |= MKD_TABLE_ALIGN_L;
  1553. dashes++;
  1554. }
  1555. while (i < under_end && data[i] == '-') {
  1556. i++; dashes++;
  1557. }
  1558. if (i < under_end && data[i] == ':') {
  1559. i++; (*column_data)[col] |= MKD_TABLE_ALIGN_R;
  1560. dashes++;
  1561. }
  1562. while (i < under_end && data[i] == ' ')
  1563. i++;
  1564. if (i < under_end && data[i] != '|')
  1565. break;
  1566. if (dashes < 3)
  1567. break;
  1568. i++;
  1569. }
  1570. if (col < *columns)
  1571. return 0;
  1572. parse_table_row(
  1573. ob, rndr, data,
  1574. header_end,
  1575. *columns,
  1576. *column_data,
  1577. MKD_TABLE_HEADER
  1578. );
  1579. return under_end + 1;
  1580. }
  1581. static size_t
  1582. parse_table(
  1583. struct buf *ob,
  1584. struct sd_markdown *rndr,
  1585. uint8_t *data,
  1586. size_t size)
  1587. {
  1588. size_t i;
  1589. struct buf *header_work = 0;
  1590. struct buf *body_work = 0;
  1591. size_t columns;
  1592. int *col_data = NULL;
  1593. header_work = rndr_newbuf(rndr, BUFFER_SPAN);
  1594. body_work = rndr_newbuf(rndr, BUFFER_BLOCK);
  1595. i = parse_table_header(header_work, rndr, data, size, &columns, &col_data);
  1596. if (i > 0) {
  1597. while (i < size) {
  1598. size_t row_start;
  1599. int pipes = 0;
  1600. row_start = i;
  1601. while (i < size && data[i] != '\n')
  1602. if (data[i++] == '|')
  1603. pipes++;
  1604. if (pipes == 0 || i == size) {
  1605. i = row_start;
  1606. break;
  1607. }
  1608. parse_table_row(
  1609. body_work,
  1610. rndr,
  1611. data + row_start,
  1612. i - row_start,
  1613. columns,
  1614. col_data, 0
  1615. );
  1616. i++;
  1617. }
  1618. if (rndr->cb.table)
  1619. rndr->cb.table(ob, header_work, body_work, rndr->opaque);
  1620. }
  1621. free(col_data);
  1622. rndr_popbuf(rndr, BUFFER_SPAN);
  1623. rndr_popbuf(rndr, BUFFER_BLOCK);
  1624. return i;
  1625. }
  1626. /* parse_block • parsing of one block, returning next uint8_t to parse */
  1627. static void
  1628. parse_block(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
  1629. {
  1630. size_t beg, end, i;
  1631. uint8_t *txt_data;
  1632. beg = 0;
  1633. if (rndr->work_bufs[BUFFER_SPAN].size +
  1634. rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting)
  1635. return;
  1636. while (beg < size) {
  1637. txt_data = data + beg;
  1638. end = size - beg;
  1639. if (is_atxheader(rndr, txt_data, end))
  1640. beg += parse_atxheader(ob, rndr, txt_data, end);
  1641. else if (data[beg] == '<' && rndr->cb.blockhtml &&
  1642. (i = parse_htmlblock(ob, rndr, txt_data, end, 1)) != 0)
  1643. beg += i;
  1644. else if ((i = is_empty(txt_data, end)) != 0)
  1645. beg += i;
  1646. else if (is_hrule(txt_data, end)) {
  1647. if (rndr->cb.hrule)
  1648. rndr->cb.hrule(ob, rndr->opaque);
  1649. while (beg < size && data[beg] != '\n')
  1650. beg++;
  1651. beg++;
  1652. }
  1653. else if ((rndr->ext_flags & MKDEXT_FENCED_CODE) != 0 &&
  1654. (i = parse_fencedcode(ob, rndr, txt_data, end)) != 0)
  1655. beg += i;
  1656. else if ((rndr->ext_flags & MKDEXT_TABLES) != 0 &&
  1657. (i = parse_table(ob, rndr, txt_data, end)) != 0)
  1658. beg += i;
  1659. else if (prefix_quote(txt_data, end))
  1660. beg += parse_blockquote(ob, rndr, txt_data, end);
  1661. else if (prefix_code(txt_data, end))
  1662. beg += parse_blockcode(ob, rndr, txt_data, end);
  1663. else if (prefix_uli(txt_data, end))
  1664. beg += parse_list(ob, rndr, txt_data, end, 0);
  1665. else if (prefix_oli(txt_data, end))
  1666. beg += parse_list(ob, rndr, txt_data, end, MKD_LIST_ORDERED);
  1667. else
  1668. beg += parse_paragraph(ob, rndr, txt_data, end);
  1669. }
  1670. }
  1671. /*********************
  1672. * REFERENCE PARSING *
  1673. *********************/
  1674. /* is_ref • returns whether a line is a reference or not */
  1675. static int
  1676. is_ref(const uint8_t *data, size_t beg, size_t end, size_t *last, struct link_ref **refs)
  1677. {
  1678. /* int n; */
  1679. size_t i = 0;
  1680. size_t id_offset, id_end;
  1681. size_t link_offset, link_end;
  1682. size_t title_offset, title_end;
  1683. size_t line_end;
  1684. /* up to 3 optional leading spaces */
  1685. if (beg + 3 >= end) return 0;
  1686. if (data[beg] == ' ') { i = 1;
  1687. if (data[beg + 1] == ' ') { i = 2;
  1688. if (data[beg + 2] == ' ') { i = 3;
  1689. if (data[beg + 3] == ' ') return 0; } } }
  1690. i += beg;
  1691. /* id part: anything but a newline between brackets */
  1692. if (data[i] != '[') return 0;
  1693. i++;
  1694. id_offset = i;
  1695. while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']')
  1696. i++;
  1697. if (i >= end || data[i] != ']') return 0;
  1698. id_end = i;
  1699. /* spacer: colon (space | tab)* newline? (space | tab)* */
  1700. i++;
  1701. if (i >= end || data[i] != ':') return 0;
  1702. i++;
  1703. while (i < end && data[i] == ' ') i++;
  1704. if (i < end && (data[i] == '\n' || data[i] == '\r')) {
  1705. i++;
  1706. if (i < end && data[i] == '\r' && data[i - 1] == '\n') i++; }
  1707. while (i < end && data[i] == ' ') i++;
  1708. if (i >= end) return 0;
  1709. /* link: whitespace-free sequence, optionally between angle brackets */
  1710. if (data[i] == '<')
  1711. i++;
  1712. link_offset = i;
  1713. while (i < end && data[i] != ' ' && data[i] != '\n' && data[i] != '\r')
  1714. i++;
  1715. if (data[i - 1] == '>') link_end = i - 1;
  1716. else link_end = i;
  1717. /* optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) */
  1718. while (i < end && data[i] == ' ') i++;
  1719. if (i < end && data[i] != '\n' && data[i] != '\r'
  1720. && data[i] != '\'' && data[i] != '"' && data[i] != '(')
  1721. return 0;
  1722. line_end = 0;
  1723. /* computing end-of-line */
  1724. if (i >= end || data[i] == '\r' || data[i] == '\n') line_end = i;
  1725. if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
  1726. line_end = i + 1;
  1727. /* optional (space|tab)* spacer after a newline */
  1728. if (line_end) {
  1729. i = line_end + 1;
  1730. while (i < end && data[i] == ' ') i++; }
  1731. /* optional title: any non-newline sequence enclosed in '"()
  1732. alone on its line */
  1733. title_offset = title_end = 0;
  1734. if (i + 1 < end
  1735. && (data[i] == '\'' || data[i] == '"' || data[i] == '(')) {
  1736. i++;
  1737. title_offset = i;
  1738. /* looking for EOL */
  1739. while (i < end && data[i] != '\n' && data[i] != '\r') i++;
  1740. if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
  1741. title_end = i + 1;
  1742. else title_end = i;
  1743. /* stepping back */
  1744. i -= 1;
  1745. while (i > title_offset && data[i] == ' ')
  1746. i -= 1;
  1747. if (i > title_offset
  1748. && (data[i] == '\'' || data[i] == '"' || data[i] == ')')) {
  1749. line_end = title_end;
  1750. title_end = i; } }
  1751. if (!line_end)
  1752. return 0; /* garbage after the link */
  1753. /* a valid ref has been found, filling-in return structures */
  1754. if (last)
  1755. *last = line_end;
  1756. if (refs) {
  1757. struct link_ref *ref;
  1758. ref = add_link_ref(refs, data + id_offset, id_end - id_offset);
  1759. ref->link = bufnew(link_end - link_offset);
  1760. bufput(ref->link, data + link_offset, link_end - link_offset);
  1761. if (title_end > title_offset) {
  1762. ref->title = bufnew(title_end - title_offset);
  1763. bufput(ref->title, data + title_offset, title_end - title_offset);
  1764. }
  1765. }
  1766. return 1;
  1767. }
  1768. static void expand_tabs(struct buf *ob, const uint8_t *line, size_t size)
  1769. {
  1770. size_t i = 0, tab = 0;
  1771. while (i < size) {
  1772. size_t org = i;
  1773. while (i < size && line[i] != '\t') {
  1774. i++; tab++;
  1775. }
  1776. if (i > org)
  1777. bufput(ob, line + org, i - org);
  1778. if (i >= size)
  1779. break;
  1780. do {
  1781. bufputc(ob, ' '); tab++;
  1782. } while (tab % 4);
  1783. i++;
  1784. }
  1785. }
  1786. /**********************
  1787. * EXPORTED FUNCTIONS *
  1788. **********************/
  1789. struct sd_markdown *
  1790. sd_markdown_new(
  1791. unsigned int extensions,
  1792. size_t max_nesting,
  1793. const struct sd_callbacks *callbacks,
  1794. void *opaque)
  1795. {
  1796. struct sd_markdown *md = NULL;
  1797. assert(max_nesting > 0 && callbacks);
  1798. md = malloc(sizeof(struct sd_markdown));
  1799. if (!md)
  1800. return NULL;
  1801. memcpy(&md->cb, callbacks, sizeof(struct sd_callbacks));
  1802. stack_init(&md->work_bufs[BUFFER_BLOCK], 4);
  1803. stack_init(&md->work_bufs[BUFFER_SPAN], 8);
  1804. memset(md->active_char, 0x0, 256);
  1805. if (md->cb.emphasis || md->cb.double_emphasis || md->cb.triple_emphasis) {
  1806. md->active_char['*'] = MD_CHAR_EMPHASIS;
  1807. md->active_char['_'] = MD_CHAR_EMPHASIS;
  1808. if (extensions & MKDEXT_STRIKETHROUGH)
  1809. md->active_char['~'] = MD_CHAR_EMPHASIS;
  1810. }
  1811. if (md->cb.codespan)
  1812. md->active_char['`'] = MD_CHAR_CODESPAN;
  1813. if (md->cb.linebreak)
  1814. md->active_char['\n'] = MD_CHAR_LINEBREAK;
  1815. if (md->cb.image || md->cb.link)
  1816. md->active_char['['] = MD_CHAR_LINK;
  1817. md->active_char['<'] = MD_CHAR_LANGLE;
  1818. md->active_char['\\'] = MD_CHAR_ESCAPE;
  1819. md->active_char['&'] = MD_CHAR_ENTITITY;
  1820. if (extensions & MKDEXT_AUTOLINK) {
  1821. md->active_char[':'] = MD_CHAR_AUTOLINK_URL;
  1822. md->active_char['@'] = MD_CHAR_AUTOLINK_EMAIL;
  1823. md->active_char['w'] = MD_CHAR_AUTOLINK_WWW;
  1824. }
  1825. if (extensions & MKDEXT_SUPERSCRIPT)
  1826. md->active_char['^'] = MD_CHAR_SUPERSCRIPT;
  1827. /* Extension data */
  1828. md->ext_flags = extensions;
  1829. md->opaque = opaque;
  1830. md->max_nesting = max_nesting;
  1831. return md;
  1832. }
  1833. void
  1834. sd_markdown_render(struct buf *ob, const uint8_t *document, size_t doc_size, struct sd_markdown *md)
  1835. {
  1836. static const float MARKDOWN_GROW_FACTOR = 1.4f;
  1837. struct buf *text;
  1838. size_t beg, end;
  1839. text = bufnew(64);
  1840. if (!text)
  1841. return;
  1842. /* Preallocate enough space for our buffer to avoid expanding while copying */
  1843. bufgrow(text, doc_size);
  1844. /* reset the references table */
  1845. memset(&md->refs, 0x0, REF_TABLE_SIZE * sizeof(void *));
  1846. /* first pass: looking for references, copying everything else */
  1847. beg = 0;
  1848. while (beg < doc_size) /* iterating over lines */
  1849. if (is_ref(document, beg, doc_size, &end, md->refs))
  1850. beg = end;
  1851. else { /* skipping to the next line */
  1852. end = beg;
  1853. while (end < doc_size && document[end] != '\n' && document[end] != '\r')
  1854. end++;
  1855. /* adding the line body if present */
  1856. if (end > beg)
  1857. expand_tabs(text, document + beg, end - beg);
  1858. while (end < doc_size && (document[end] == '\n' || document[end] == '\r')) {
  1859. /* add one \n per newline */
  1860. if (document[end] == '\n' || (end + 1 < doc_size && document[end + 1] != '\n'))
  1861. bufputc(text, '\n');
  1862. end++;
  1863. }
  1864. beg = end;
  1865. }
  1866. /* pre-grow the output buffer to minimize allocations */
  1867. bufgrow(ob, text->size * MARKDOWN_GROW_FACTOR);
  1868. /* second pass: actual rendering */
  1869. if (md->cb.doc_header)
  1870. md->cb.doc_header(ob, md->opaque);
  1871. if (text->size) {
  1872. /* adding a final newline if not already present */
  1873. if (text->data[text->size - 1] != '\n' && text->data[text->size - 1] != '\r')
  1874. bufputc(text, '\n');
  1875. parse_block(ob, md, text->data, text->size);
  1876. }
  1877. if (md->cb.doc_footer)
  1878. md->cb.doc_footer(ob, md->opaque);
  1879. /* clean-up */
  1880. bufrelease(text);
  1881. free_link_refs(md->refs);
  1882. assert(md->work_bufs[BUFFER_SPAN].size == 0);
  1883. assert(md->work_bufs[BUFFER_BLOCK].size == 0);
  1884. }
  1885. void
  1886. sd_markdown_free(struct sd_markdown *md)
  1887. {
  1888. size_t i;
  1889. for (i = 0; i < (size_t)md->work_bufs[BUFFER_SPAN].asize; ++i)
  1890. bufrelease(md->work_bufs[BUFFER_SPAN].item[i]);
  1891. for (i = 0; i < (size_t)md->work_bufs[BUFFER_BLOCK].asize; ++i)
  1892. bufrelease(md->work_bufs[BUFFER_BLOCK].item[i]);
  1893. stack_free(&md->work_bufs[BUFFER_SPAN]);
  1894. stack_free(&md->work_bufs[BUFFER_BLOCK]);
  1895. free(md);
  1896. }
  1897. void
  1898. sd_version(int *ver_major, int *ver_minor, int *ver_revision)
  1899. {
  1900. *ver_major = UPSKIRT_VER_MAJOR;
  1901. *ver_minor = UPSKIRT_VER_MINOR;
  1902. *ver_revision = UPSKIRT_VER_REVISION;
  1903. }
  1904. /* vim: set filetype=c: */