PageRenderTime 230ms CodeModel.GetById 26ms RepoModel.GetById 6ms app.codeStats 1ms

/ext/redcarpet/markdown.c

http://github.com/tanoku/redcarpet
C | 2460 lines | 2236 code | 145 blank | 79 comment | 196 complexity | 0c478537a5049dd6655e38ae4f31ac2c MD5 | raw file
Possible License(s): 0BSD
  1. /* markdown.c - generic markdown parser */
  2. /*
  3. * Copyright (c) 2009, Natacha Porté
  4. * Copyright (c) 2011, Vicent Marti
  5. *
  6. * Permission to use, copy, modify, and distribute this software for any
  7. * purpose with or without fee is hereby granted, provided that the above
  8. * copyright notice and this permission notice appear in all copies.
  9. *
  10. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  11. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  12. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  13. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  14. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  15. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  16. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  17. */
  18. #include "markdown.h"
  19. #include "stack.h"
  20. #include <assert.h>
  21. #include <string.h>
  22. #include <ctype.h>
  23. #include <stdio.h>
  24. #define REF_TABLE_SIZE 8
  25. #define BUFFER_BLOCK 0
  26. #define BUFFER_SPAN 1
  27. #define MKD_LI_END 8 /* internal list flag */
  28. #define gperf_case_strncmp(s1, s2, n) strncasecmp(s1, s2, n)
  29. #define GPERF_DOWNCASE 1
  30. #define GPERF_CASE_STRNCMP 1
  31. #include "html_blocks.h"
  32. /***************
  33. * LOCAL TYPES *
  34. ***************/
  35. /* link_ref: reference to a link */
  36. struct link_ref {
  37. unsigned int id;
  38. struct buf *link;
  39. struct buf *title;
  40. struct link_ref *next;
  41. };
  42. /* char_trigger: function pointer to render active chars */
  43. /* returns the number of chars taken care of */
  44. /* data is the pointer of the beginning of the span */
  45. /* offset is the number of valid chars before data */
  46. struct sd_markdown;
  47. typedef size_t
  48. (*char_trigger)(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  49. static size_t char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  50. static size_t char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  51. static size_t char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  52. static size_t char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  53. static size_t char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  54. static size_t char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  55. static size_t char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  56. static size_t char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  57. static size_t char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  58. static size_t char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  59. static size_t char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  60. enum markdown_char_t {
  61. MD_CHAR_NONE = 0,
  62. MD_CHAR_EMPHASIS,
  63. MD_CHAR_CODESPAN,
  64. MD_CHAR_LINEBREAK,
  65. MD_CHAR_LINK,
  66. MD_CHAR_LANGLE,
  67. MD_CHAR_ESCAPE,
  68. MD_CHAR_ENTITITY,
  69. MD_CHAR_AUTOLINK_URL,
  70. MD_CHAR_AUTOLINK_EMAIL,
  71. MD_CHAR_AUTOLINK_WWW,
  72. MD_CHAR_SUPERSCRIPT,
  73. };
  74. static char_trigger markdown_char_ptrs[] = {
  75. NULL,
  76. &char_emphasis,
  77. &char_codespan,
  78. &char_linebreak,
  79. &char_link,
  80. &char_langle_tag,
  81. &char_escape,
  82. &char_entity,
  83. &char_autolink_url,
  84. &char_autolink_email,
  85. &char_autolink_www,
  86. &char_superscript,
  87. };
  88. /* render • structure containing one particular render */
  89. struct sd_markdown {
  90. struct sd_callbacks cb;
  91. void *opaque;
  92. struct link_ref *refs[REF_TABLE_SIZE];
  93. uint8_t active_char[256];
  94. struct stack work_bufs[2];
  95. unsigned int ext_flags;
  96. size_t max_nesting;
  97. int in_link_body;
  98. };
  99. /***************************
  100. * HELPER FUNCTIONS *
  101. ***************************/
  102. static inline struct buf *
  103. rndr_newbuf(struct sd_markdown *rndr, int type)
  104. {
  105. static const size_t buf_size[2] = {256, 64};
  106. struct buf *work = NULL;
  107. struct stack *pool = &rndr->work_bufs[type];
  108. if (pool->size < pool->asize &&
  109. pool->item[pool->size] != NULL) {
  110. work = pool->item[pool->size++];
  111. work->size = 0;
  112. } else {
  113. work = bufnew(buf_size[type]);
  114. stack_push(pool, work);
  115. }
  116. return work;
  117. }
  118. static inline void
  119. rndr_popbuf(struct sd_markdown *rndr, int type)
  120. {
  121. rndr->work_bufs[type].size--;
  122. }
  123. static void
  124. unscape_text(struct buf *ob, struct buf *src)
  125. {
  126. size_t i = 0, org;
  127. while (i < src->size) {
  128. org = i;
  129. while (i < src->size && src->data[i] != '\\')
  130. i++;
  131. if (i > org)
  132. bufput(ob, src->data + org, i - org);
  133. if (i + 1 >= src->size)
  134. break;
  135. bufputc(ob, src->data[i + 1]);
  136. i += 2;
  137. }
  138. }
  139. static unsigned int
  140. hash_link_ref(const uint8_t *link_ref, size_t length)
  141. {
  142. size_t i;
  143. unsigned int hash = 0;
  144. for (i = 0; i < length; ++i)
  145. hash = tolower(link_ref[i]) + (hash << 6) + (hash << 16) - hash;
  146. return hash;
  147. }
  148. static struct link_ref *
  149. add_link_ref(
  150. struct link_ref **references,
  151. const uint8_t *name, size_t name_size)
  152. {
  153. struct link_ref *ref = calloc(1, sizeof(struct link_ref));
  154. if (!ref)
  155. return NULL;
  156. ref->id = hash_link_ref(name, name_size);
  157. ref->next = references[ref->id % REF_TABLE_SIZE];
  158. references[ref->id % REF_TABLE_SIZE] = ref;
  159. return ref;
  160. }
  161. static struct link_ref *
  162. find_link_ref(struct link_ref **references, uint8_t *name, size_t length)
  163. {
  164. unsigned int hash = hash_link_ref(name, length);
  165. struct link_ref *ref = NULL;
  166. ref = references[hash % REF_TABLE_SIZE];
  167. while (ref != NULL) {
  168. if (ref->id == hash)
  169. return ref;
  170. ref = ref->next;
  171. }
  172. return NULL;
  173. }
  174. static void
  175. free_link_refs(struct link_ref **references)
  176. {
  177. size_t i;
  178. for (i = 0; i < REF_TABLE_SIZE; ++i) {
  179. struct link_ref *r = references[i];
  180. struct link_ref *next;
  181. while (r) {
  182. next = r->next;
  183. bufrelease(r->link);
  184. bufrelease(r->title);
  185. free(r);
  186. r = next;
  187. }
  188. }
  189. }
  190. /*
  191. * Check whether a char is a Markdown space.
  192. * Right now we only consider spaces the actual
  193. * space and a newline: tabs and carriage returns
  194. * are filtered out during the preprocessing phase.
  195. *
  196. * If we wanted to actually be UTF-8 compliant, we
  197. * should instead extract an Unicode codepoint from
  198. * this character and check for space properties.
  199. */
  200. static inline int
  201. _isspace(int c)
  202. {
  203. return c == ' ' || c == '\n';
  204. }
  205. /****************************
  206. * INLINE PARSING FUNCTIONS *
  207. ****************************/
  208. /* is_mail_autolink • looks for the address part of a mail autolink and '>' */
  209. /* this is less strict than the original markdown e-mail address matching */
  210. static size_t
  211. is_mail_autolink(uint8_t *data, size_t size)
  212. {
  213. size_t i = 0, nb = 0;
  214. /* address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' */
  215. for (i = 0; i < size; ++i) {
  216. if (isalnum(data[i]))
  217. continue;
  218. switch (data[i]) {
  219. case '@':
  220. nb++;
  221. case '-':
  222. case '.':
  223. case '_':
  224. break;
  225. case '>':
  226. return (nb == 1) ? i + 1 : 0;
  227. default:
  228. return 0;
  229. }
  230. }
  231. return 0;
  232. }
  233. /* tag_length • returns the length of the given tag, or 0 is it's not valid */
  234. static size_t
  235. tag_length(uint8_t *data, size_t size, enum mkd_autolink *autolink)
  236. {
  237. size_t i, j;
  238. /* a valid tag can't be shorter than 3 chars */
  239. if (size < 3) return 0;
  240. /* begins with a '<' optionally followed by '/', followed by letter or number */
  241. if (data[0] != '<') return 0;
  242. i = (data[1] == '/') ? 2 : 1;
  243. if (!isalnum(data[i]))
  244. return 0;
  245. /* scheme test */
  246. *autolink = MKDA_NOT_AUTOLINK;
  247. /* try to find the beginning of an URI */
  248. while (i < size && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-'))
  249. i++;
  250. if (i > 1 && data[i] == '@') {
  251. if ((j = is_mail_autolink(data + i, size - i)) != 0) {
  252. *autolink = MKDA_EMAIL;
  253. return i + j;
  254. }
  255. }
  256. if (i > 2 && data[i] == ':') {
  257. *autolink = MKDA_NORMAL;
  258. i++;
  259. }
  260. /* completing autolink test: no whitespace or ' or " */
  261. if (i >= size)
  262. *autolink = MKDA_NOT_AUTOLINK;
  263. else if (*autolink) {
  264. j = i;
  265. while (i < size) {
  266. if (data[i] == '\\') i += 2;
  267. else if (data[i] == '>' || data[i] == '\'' ||
  268. data[i] == '"' || data[i] == ' ' || data[i] == '\n')
  269. break;
  270. else i++;
  271. }
  272. if (i >= size) return 0;
  273. if (i > j && data[i] == '>') return i + 1;
  274. /* one of the forbidden chars has been found */
  275. *autolink = MKDA_NOT_AUTOLINK;
  276. }
  277. /* looking for sometinhg looking like a tag end */
  278. while (i < size && data[i] != '>') i++;
  279. if (i >= size) return 0;
  280. return i + 1;
  281. }
  282. /* parse_inline • parses inline markdown elements */
  283. static void
  284. parse_inline(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
  285. {
  286. size_t i = 0, end = 0;
  287. uint8_t action = 0;
  288. struct buf work = { 0, 0, 0, 0 };
  289. if (rndr->work_bufs[BUFFER_SPAN].size +
  290. rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting)
  291. return;
  292. while (i < size) {
  293. /* copying inactive chars into the output */
  294. while (end < size && (action = rndr->active_char[data[end]]) == 0) {
  295. end++;
  296. }
  297. if (rndr->cb.normal_text) {
  298. work.data = data + i;
  299. work.size = end - i;
  300. rndr->cb.normal_text(ob, &work, rndr->opaque);
  301. }
  302. else
  303. bufput(ob, data + i, end - i);
  304. if (end >= size) break;
  305. i = end;
  306. end = markdown_char_ptrs[(int)action](ob, rndr, data + i, i, size - i);
  307. if (!end) /* no action from the callback */
  308. end = i + 1;
  309. else {
  310. i += end;
  311. end = i;
  312. }
  313. }
  314. }
  315. /* find_emph_char • looks for the next emph uint8_t, skipping other constructs */
  316. static size_t
  317. find_emph_char(uint8_t *data, size_t size, uint8_t c)
  318. {
  319. size_t i = 1;
  320. while (i < size) {
  321. while (i < size && data[i] != c && data[i] != '`' && data[i] != '[')
  322. i++;
  323. if (i == size)
  324. return 0;
  325. if (data[i] == c)
  326. return i;
  327. /* not counting escaped chars */
  328. if (i && data[i - 1] == '\\') {
  329. i++; continue;
  330. }
  331. if (data[i] == '`') {
  332. size_t span_nb = 0, bt;
  333. size_t tmp_i = 0;
  334. /* counting the number of opening backticks */
  335. while (i < size && data[i] == '`') {
  336. i++; span_nb++;
  337. }
  338. if (i >= size) return 0;
  339. /* finding the matching closing sequence */
  340. bt = 0;
  341. while (i < size && bt < span_nb) {
  342. if (!tmp_i && data[i] == c) tmp_i = i;
  343. if (data[i] == '`') bt++;
  344. else bt = 0;
  345. i++;
  346. }
  347. if (i >= size) return tmp_i;
  348. }
  349. /* skipping a link */
  350. else if (data[i] == '[') {
  351. size_t tmp_i = 0;
  352. uint8_t cc;
  353. i++;
  354. while (i < size && data[i] != ']') {
  355. if (!tmp_i && data[i] == c) tmp_i = i;
  356. i++;
  357. }
  358. i++;
  359. while (i < size && (data[i] == ' ' || data[i] == '\n'))
  360. i++;
  361. if (i >= size)
  362. return tmp_i;
  363. switch (data[i]) {
  364. case '[':
  365. cc = ']'; break;
  366. case '(':
  367. cc = ')'; break;
  368. default:
  369. if (tmp_i)
  370. return tmp_i;
  371. else
  372. continue;
  373. }
  374. i++;
  375. while (i < size && data[i] != cc) {
  376. if (!tmp_i && data[i] == c) tmp_i = i;
  377. i++;
  378. }
  379. if (i >= size)
  380. return tmp_i;
  381. i++;
  382. }
  383. }
  384. return 0;
  385. }
  386. /* parse_emph1 • parsing single emphase */
  387. /* closed by a symbol not preceded by whitespace and not followed by symbol */
  388. static size_t
  389. parse_emph1(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
  390. {
  391. size_t i = 0, len;
  392. struct buf *work = 0;
  393. int r;
  394. if (!rndr->cb.emphasis) return 0;
  395. /* skipping one symbol if coming from emph3 */
  396. if (size > 1 && data[0] == c && data[1] == c) i = 1;
  397. while (i < size) {
  398. len = find_emph_char(data + i, size - i, c);
  399. if (!len) return 0;
  400. i += len;
  401. if (i >= size) return 0;
  402. if (data[i] == c && !_isspace(data[i - 1])) {
  403. if (rndr->ext_flags & MKDEXT_NO_INTRA_EMPHASIS) {
  404. if (!(i + 1 == size || _isspace(data[i + 1]) || ispunct(data[i + 1])))
  405. continue;
  406. }
  407. work = rndr_newbuf(rndr, BUFFER_SPAN);
  408. parse_inline(work, rndr, data, i);
  409. r = rndr->cb.emphasis(ob, work, rndr->opaque);
  410. rndr_popbuf(rndr, BUFFER_SPAN);
  411. return r ? i + 1 : 0;
  412. }
  413. }
  414. return 0;
  415. }
  416. /* parse_emph2 • parsing single emphase */
  417. static size_t
  418. parse_emph2(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
  419. {
  420. int (*render_method)(struct buf *ob, const struct buf *text, void *opaque);
  421. size_t i = 0, len;
  422. struct buf *work = 0;
  423. int r;
  424. render_method = (c == '~') ? rndr->cb.strikethrough : rndr->cb.double_emphasis;
  425. if (!render_method)
  426. return 0;
  427. while (i < size) {
  428. len = find_emph_char(data + i, size - i, c);
  429. if (!len) return 0;
  430. i += len;
  431. if (i + 1 < size && data[i] == c && data[i + 1] == c && i && !_isspace(data[i - 1])) {
  432. work = rndr_newbuf(rndr, BUFFER_SPAN);
  433. parse_inline(work, rndr, data, i);
  434. r = render_method(ob, work, rndr->opaque);
  435. rndr_popbuf(rndr, BUFFER_SPAN);
  436. return r ? i + 2 : 0;
  437. }
  438. i++;
  439. }
  440. return 0;
  441. }
  442. /* parse_emph3 • parsing single emphase */
  443. /* finds the first closing tag, and delegates to the other emph */
  444. static size_t
  445. parse_emph3(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
  446. {
  447. size_t i = 0, len;
  448. int r;
  449. while (i < size) {
  450. len = find_emph_char(data + i, size - i, c);
  451. if (!len) return 0;
  452. i += len;
  453. /* skip whitespace preceded symbols */
  454. if (data[i] != c || _isspace(data[i - 1]))
  455. continue;
  456. if (i + 2 < size && data[i + 1] == c && data[i + 2] == c && rndr->cb.triple_emphasis) {
  457. /* triple symbol found */
  458. struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN);
  459. parse_inline(work, rndr, data, i);
  460. r = rndr->cb.triple_emphasis(ob, work, rndr->opaque);
  461. rndr_popbuf(rndr, BUFFER_SPAN);
  462. return r ? i + 3 : 0;
  463. } else if (i + 1 < size && data[i + 1] == c) {
  464. /* double symbol found, handing over to emph1 */
  465. len = parse_emph1(ob, rndr, data - 2, size + 2, c);
  466. if (!len) return 0;
  467. else return len - 2;
  468. } else {
  469. /* single symbol found, handing over to emph2 */
  470. len = parse_emph2(ob, rndr, data - 1, size + 1, c);
  471. if (!len) return 0;
  472. else return len - 1;
  473. }
  474. }
  475. return 0;
  476. }
  477. /* char_emphasis • single and double emphasis parsing */
  478. static size_t
  479. char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  480. {
  481. uint8_t c = data[0];
  482. size_t ret;
  483. if (size > 2 && data[1] != c) {
  484. /* whitespace cannot follow an opening emphasis;
  485. * strikethrough only takes two characters '~~' */
  486. if (c == '~' || _isspace(data[1]) || (ret = parse_emph1(ob, rndr, data + 1, size - 1, c)) == 0)
  487. return 0;
  488. return ret + 1;
  489. }
  490. if (size > 3 && data[1] == c && data[2] != c) {
  491. if (_isspace(data[2]) || (ret = parse_emph2(ob, rndr, data + 2, size - 2, c)) == 0)
  492. return 0;
  493. return ret + 2;
  494. }
  495. if (size > 4 && data[1] == c && data[2] == c && data[3] != c) {
  496. if (c == '~' || _isspace(data[3]) || (ret = parse_emph3(ob, rndr, data + 3, size - 3, c)) == 0)
  497. return 0;
  498. return ret + 3;
  499. }
  500. return 0;
  501. }
  502. /* char_linebreak • '\n' preceded by two spaces (assuming linebreak != 0) */
  503. static size_t
  504. char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  505. {
  506. if (offset < 2 || data[-1] != ' ' || data[-2] != ' ')
  507. return 0;
  508. /* removing the last space from ob and rendering */
  509. while (ob->size && ob->data[ob->size - 1] == ' ')
  510. ob->size--;
  511. return rndr->cb.linebreak(ob, rndr->opaque) ? 1 : 0;
  512. }
  513. /* char_codespan • '`' parsing a code span (assuming codespan != 0) */
  514. static size_t
  515. char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  516. {
  517. size_t end, nb = 0, i, f_begin, f_end;
  518. /* counting the number of backticks in the delimiter */
  519. while (nb < size && data[nb] == '`')
  520. nb++;
  521. /* finding the next delimiter */
  522. i = 0;
  523. for (end = nb; end < size && i < nb; end++) {
  524. if (data[end] == '`') i++;
  525. else i = 0;
  526. }
  527. if (i < nb && end >= size)
  528. return 0; /* no matching delimiter */
  529. /* trimming outside whitespaces */
  530. f_begin = nb;
  531. while (f_begin < end && data[f_begin] == ' ')
  532. f_begin++;
  533. f_end = end - nb;
  534. while (f_end > nb && data[f_end-1] == ' ')
  535. f_end--;
  536. /* real code span */
  537. if (f_begin < f_end) {
  538. struct buf work = { data + f_begin, f_end - f_begin, 0, 0 };
  539. if (!rndr->cb.codespan(ob, &work, rndr->opaque))
  540. end = 0;
  541. } else {
  542. if (!rndr->cb.codespan(ob, 0, rndr->opaque))
  543. end = 0;
  544. }
  545. return end;
  546. }
  547. /* char_escape • '\\' backslash escape */
  548. static size_t
  549. char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  550. {
  551. static const char *escape_chars = "\\`*_{}[]()#+-.!:|&<>^~";
  552. struct buf work = { 0, 0, 0, 0 };
  553. if (size > 1) {
  554. if (strchr(escape_chars, data[1]) == NULL)
  555. return 0;
  556. if (rndr->cb.normal_text) {
  557. work.data = data + 1;
  558. work.size = 1;
  559. rndr->cb.normal_text(ob, &work, rndr->opaque);
  560. }
  561. else bufputc(ob, data[1]);
  562. }
  563. return 2;
  564. }
  565. /* char_entity • '&' escaped when it doesn't belong to an entity */
  566. /* valid entities are assumed to be anything matching &#?[A-Za-z0-9]+; */
  567. static size_t
  568. char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  569. {
  570. size_t end = 1;
  571. struct buf work = { 0, 0, 0, 0 };
  572. if (end < size && data[end] == '#')
  573. end++;
  574. while (end < size && isalnum(data[end]))
  575. end++;
  576. if (end < size && data[end] == ';')
  577. end++; /* real entity */
  578. else
  579. return 0; /* lone '&' */
  580. if (rndr->cb.entity) {
  581. work.data = data;
  582. work.size = end;
  583. rndr->cb.entity(ob, &work, rndr->opaque);
  584. }
  585. else bufput(ob, data, end);
  586. return end;
  587. }
  588. /* char_langle_tag • '<' when tags or autolinks are allowed */
  589. static size_t
  590. char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  591. {
  592. enum mkd_autolink altype = MKDA_NOT_AUTOLINK;
  593. size_t end = tag_length(data, size, &altype);
  594. struct buf work = { data, end, 0, 0 };
  595. int ret = 0;
  596. if (end > 2) {
  597. if (rndr->cb.autolink && altype != MKDA_NOT_AUTOLINK) {
  598. struct buf *u_link = rndr_newbuf(rndr, BUFFER_SPAN);
  599. work.data = data + 1;
  600. work.size = end - 2;
  601. unscape_text(u_link, &work);
  602. ret = rndr->cb.autolink(ob, u_link, altype, rndr->opaque);
  603. rndr_popbuf(rndr, BUFFER_SPAN);
  604. }
  605. else if (rndr->cb.raw_html_tag)
  606. ret = rndr->cb.raw_html_tag(ob, &work, rndr->opaque);
  607. }
  608. if (!ret) return 0;
  609. else return end;
  610. }
  611. static size_t
  612. char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  613. {
  614. struct buf *link, *link_url, *link_text;
  615. size_t link_len, rewind;
  616. if (!rndr->cb.link || rndr->in_link_body)
  617. return 0;
  618. link = rndr_newbuf(rndr, BUFFER_SPAN);
  619. if ((link_len = sd_autolink__www(&rewind, link, data, offset, size)) > 0) {
  620. link_url = rndr_newbuf(rndr, BUFFER_SPAN);
  621. BUFPUTSL(link_url, "http://");
  622. bufput(link_url, link->data, link->size);
  623. ob->size -= rewind;
  624. if (rndr->cb.normal_text) {
  625. link_text = rndr_newbuf(rndr, BUFFER_SPAN);
  626. rndr->cb.normal_text(link_text, link, rndr->opaque);
  627. rndr->cb.link(ob, link_url, NULL, link_text, rndr->opaque);
  628. rndr_popbuf(rndr, BUFFER_SPAN);
  629. } else {
  630. rndr->cb.link(ob, link_url, NULL, link, rndr->opaque);
  631. }
  632. rndr_popbuf(rndr, BUFFER_SPAN);
  633. }
  634. rndr_popbuf(rndr, BUFFER_SPAN);
  635. return link_len;
  636. }
  637. static size_t
  638. char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  639. {
  640. struct buf *link;
  641. size_t link_len, rewind;
  642. if (!rndr->cb.autolink || rndr->in_link_body)
  643. return 0;
  644. link = rndr_newbuf(rndr, BUFFER_SPAN);
  645. if ((link_len = sd_autolink__email(&rewind, link, data, offset, size)) > 0) {
  646. ob->size -= rewind;
  647. rndr->cb.autolink(ob, link, MKDA_EMAIL, rndr->opaque);
  648. }
  649. rndr_popbuf(rndr, BUFFER_SPAN);
  650. return link_len;
  651. }
  652. static size_t
  653. char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  654. {
  655. struct buf *link;
  656. size_t link_len, rewind;
  657. if (!rndr->cb.autolink || rndr->in_link_body)
  658. return 0;
  659. link = rndr_newbuf(rndr, BUFFER_SPAN);
  660. if ((link_len = sd_autolink__url(&rewind, link, data, offset, size)) > 0) {
  661. ob->size -= rewind;
  662. rndr->cb.autolink(ob, link, MKDA_NORMAL, rndr->opaque);
  663. }
  664. rndr_popbuf(rndr, BUFFER_SPAN);
  665. return link_len;
  666. }
  667. /* char_link • '[': parsing a link or an image */
  668. static size_t
  669. char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  670. {
  671. int is_img = (offset && data[-1] == '!'), level;
  672. size_t i = 1, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0;
  673. struct buf *content = 0;
  674. struct buf *link = 0;
  675. struct buf *title = 0;
  676. struct buf *u_link = 0;
  677. size_t org_work_size = rndr->work_bufs[BUFFER_SPAN].size;
  678. int text_has_nl = 0, ret = 0;
  679. int in_title = 0, qtype = 0;
  680. /* checking whether the correct renderer exists */
  681. if ((is_img && !rndr->cb.image) || (!is_img && !rndr->cb.link))
  682. goto cleanup;
  683. /* looking for the matching closing bracket */
  684. for (level = 1; i < size; i++) {
  685. if (data[i] == '\n')
  686. text_has_nl = 1;
  687. else if (data[i - 1] == '\\')
  688. continue;
  689. else if (data[i] == '[')
  690. level++;
  691. else if (data[i] == ']') {
  692. level--;
  693. if (level <= 0)
  694. break;
  695. }
  696. }
  697. if (i >= size)
  698. goto cleanup;
  699. txt_e = i;
  700. i++;
  701. /* skip any amount of whitespace or newline */
  702. /* (this is much more laxist than original markdown syntax) */
  703. while (i < size && _isspace(data[i]))
  704. i++;
  705. /* inline style link */
  706. if (i < size && data[i] == '(') {
  707. /* skipping initial whitespace */
  708. i++;
  709. while (i < size && _isspace(data[i]))
  710. i++;
  711. link_b = i;
  712. /* looking for link end: ' " ) */
  713. while (i < size) {
  714. if (data[i] == '\\') i += 2;
  715. else if (data[i] == ')' || data[i] == '\'' || data[i] == '"') break;
  716. else i++;
  717. }
  718. if (i >= size) goto cleanup;
  719. link_e = i;
  720. /* looking for title end if present */
  721. if (data[i] == '\'' || data[i] == '"') {
  722. qtype = data[i];
  723. in_title = 1;
  724. i++;
  725. title_b = i;
  726. while (i < size) {
  727. if (data[i] == '\\') i += 2;
  728. else if (data[i] == qtype) {in_title = 0; i++;}
  729. else if ((data[i] == ')') && !in_title) break;
  730. else i++;
  731. }
  732. if (i >= size) goto cleanup;
  733. /* skipping whitespaces after title */
  734. title_e = i - 1;
  735. while (title_e > title_b && _isspace(data[title_e]))
  736. title_e--;
  737. /* checking for closing quote presence */
  738. if (data[title_e] != '\'' && data[title_e] != '"') {
  739. title_b = title_e = 0;
  740. link_e = i;
  741. }
  742. }
  743. /* remove whitespace at the end of the link */
  744. while (link_e > link_b && _isspace(data[link_e - 1]))
  745. link_e--;
  746. /* remove optional angle brackets around the link */
  747. if (data[link_b] == '<') link_b++;
  748. if (data[link_e - 1] == '>') link_e--;
  749. /* building escaped link and title */
  750. if (link_e > link_b) {
  751. link = rndr_newbuf(rndr, BUFFER_SPAN);
  752. bufput(link, data + link_b, link_e - link_b);
  753. }
  754. if (title_e > title_b) {
  755. title = rndr_newbuf(rndr, BUFFER_SPAN);
  756. bufput(title, data + title_b, title_e - title_b);
  757. }
  758. i++;
  759. }
  760. /* reference style link */
  761. else if (i < size && data[i] == '[') {
  762. struct buf id = { 0, 0, 0, 0 };
  763. struct link_ref *lr;
  764. /* looking for the id */
  765. i++;
  766. link_b = i;
  767. while (i < size && data[i] != ']') i++;
  768. if (i >= size) goto cleanup;
  769. link_e = i;
  770. /* finding the link_ref */
  771. if (link_b == link_e) {
  772. if (text_has_nl) {
  773. struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN);
  774. size_t j;
  775. for (j = 1; j < txt_e; j++) {
  776. if (data[j] != '\n')
  777. bufputc(b, data[j]);
  778. else if (data[j - 1] != ' ')
  779. bufputc(b, ' ');
  780. }
  781. id.data = b->data;
  782. id.size = b->size;
  783. } else {
  784. id.data = data + 1;
  785. id.size = txt_e - 1;
  786. }
  787. } else {
  788. id.data = data + link_b;
  789. id.size = link_e - link_b;
  790. }
  791. lr = find_link_ref(rndr->refs, id.data, id.size);
  792. if (!lr)
  793. goto cleanup;
  794. /* keeping link and title from link_ref */
  795. link = lr->link;
  796. title = lr->title;
  797. i++;
  798. }
  799. /* shortcut reference style link */
  800. else {
  801. struct buf id = { 0, 0, 0, 0 };
  802. struct link_ref *lr;
  803. /* crafting the id */
  804. if (text_has_nl) {
  805. struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN);
  806. size_t j;
  807. for (j = 1; j < txt_e; j++) {
  808. if (data[j] != '\n')
  809. bufputc(b, data[j]);
  810. else if (data[j - 1] != ' ')
  811. bufputc(b, ' ');
  812. }
  813. id.data = b->data;
  814. id.size = b->size;
  815. } else {
  816. id.data = data + 1;
  817. id.size = txt_e - 1;
  818. }
  819. /* finding the link_ref */
  820. lr = find_link_ref(rndr->refs, id.data, id.size);
  821. if (!lr)
  822. goto cleanup;
  823. /* keeping link and title from link_ref */
  824. link = lr->link;
  825. title = lr->title;
  826. /* rewinding the whitespace */
  827. i = txt_e + 1;
  828. }
  829. /* building content: img alt is escaped, link content is parsed */
  830. if (txt_e > 1) {
  831. content = rndr_newbuf(rndr, BUFFER_SPAN);
  832. if (is_img) {
  833. bufput(content, data + 1, txt_e - 1);
  834. } else {
  835. /* disable autolinking when parsing inline the
  836. * content of a link */
  837. rndr->in_link_body = 1;
  838. parse_inline(content, rndr, data + 1, txt_e - 1);
  839. rndr->in_link_body = 0;
  840. }
  841. }
  842. if (link) {
  843. u_link = rndr_newbuf(rndr, BUFFER_SPAN);
  844. unscape_text(u_link, link);
  845. }
  846. /* calling the relevant rendering function */
  847. if (is_img) {
  848. if (ob->size && ob->data[ob->size - 1] == '!')
  849. ob->size -= 1;
  850. ret = rndr->cb.image(ob, u_link, title, content, rndr->opaque);
  851. } else {
  852. ret = rndr->cb.link(ob, u_link, title, content, rndr->opaque);
  853. }
  854. /* cleanup */
  855. cleanup:
  856. rndr->work_bufs[BUFFER_SPAN].size = (int)org_work_size;
  857. return ret ? i : 0;
  858. }
  859. static size_t
  860. char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  861. {
  862. size_t sup_start, sup_len;
  863. struct buf *sup;
  864. if (!rndr->cb.superscript)
  865. return 0;
  866. if (size < 2)
  867. return 0;
  868. if (data[1] == '(') {
  869. sup_start = sup_len = 2;
  870. while (sup_len < size && data[sup_len] != ')' && data[sup_len - 1] != '\\')
  871. sup_len++;
  872. if (sup_len == size)
  873. return 0;
  874. } else {
  875. sup_start = sup_len = 1;
  876. while (sup_len < size && !_isspace(data[sup_len]))
  877. sup_len++;
  878. }
  879. if (sup_len - sup_start == 0)
  880. return (sup_start == 2) ? 3 : 0;
  881. sup = rndr_newbuf(rndr, BUFFER_SPAN);
  882. parse_inline(sup, rndr, data + sup_start, sup_len - sup_start);
  883. rndr->cb.superscript(ob, sup, rndr->opaque);
  884. rndr_popbuf(rndr, BUFFER_SPAN);
  885. return (sup_start == 2) ? sup_len + 1 : sup_len;
  886. }
  887. /*********************************
  888. * BLOCK-LEVEL PARSING FUNCTIONS *
  889. *********************************/
  890. /* is_empty • returns the line length when it is empty, 0 otherwise */
  891. static size_t
  892. is_empty(uint8_t *data, size_t size)
  893. {
  894. size_t i;
  895. for (i = 0; i < size && data[i] != '\n'; i++)
  896. if (data[i] != ' ')
  897. return 0;
  898. return i + 1;
  899. }
  900. /* is_hrule • returns whether a line is a horizontal rule */
  901. static int
  902. is_hrule(uint8_t *data, size_t size)
  903. {
  904. size_t i = 0, n = 0;
  905. uint8_t c;
  906. /* skipping initial spaces */
  907. if (size < 3) return 0;
  908. if (data[0] == ' ') { i++;
  909. if (data[1] == ' ') { i++;
  910. if (data[2] == ' ') { i++; } } }
  911. /* looking at the hrule uint8_t */
  912. if (i + 2 >= size
  913. || (data[i] != '*' && data[i] != '-' && data[i] != '_'))
  914. return 0;
  915. c = data[i];
  916. /* the whole line must be the char or whitespace */
  917. while (i < size && data[i] != '\n') {
  918. if (data[i] == c) n++;
  919. else if (data[i] != ' ')
  920. return 0;
  921. i++;
  922. }
  923. return n >= 3;
  924. }
  925. /* check if a line is a code fence; return its size if it is */
  926. static size_t
  927. is_codefence(uint8_t *data, size_t size, struct buf *syntax)
  928. {
  929. size_t i = 0, n = 0;
  930. uint8_t c;
  931. /* skipping initial spaces */
  932. if (size < 3) return 0;
  933. if (data[0] == ' ') { i++;
  934. if (data[1] == ' ') { i++;
  935. if (data[2] == ' ') { i++; } } }
  936. /* looking at the hrule uint8_t */
  937. if (i + 2 >= size || !(data[i] == '~' || data[i] == '`'))
  938. return 0;
  939. c = data[i];
  940. /* the whole line must be the uint8_t or whitespace */
  941. while (i < size && data[i] == c) {
  942. n++; i++;
  943. }
  944. if (n < 3)
  945. return 0;
  946. if (syntax != NULL) {
  947. size_t syn = 0;
  948. while (i < size && data[i] == ' ')
  949. i++;
  950. syntax->data = data + i;
  951. if (i < size && data[i] == '{') {
  952. i++; syntax->data++;
  953. while (i < size && data[i] != '}' && data[i] != '\n') {
  954. syn++; i++;
  955. }
  956. if (i == size || data[i] != '}')
  957. return 0;
  958. /* strip all whitespace at the beginning and the end
  959. * of the {} block */
  960. while (syn > 0 && _isspace(syntax->data[0])) {
  961. syntax->data++; syn--;
  962. }
  963. while (syn > 0 && _isspace(syntax->data[syn - 1]))
  964. syn--;
  965. i++;
  966. } else {
  967. while (i < size && !_isspace(data[i])) {
  968. syn++; i++;
  969. }
  970. }
  971. syntax->size = syn;
  972. }
  973. while (i < size && data[i] != '\n') {
  974. if (!_isspace(data[i]))
  975. return 0;
  976. i++;
  977. }
  978. return i + 1;
  979. }
  980. /* is_atxheader • returns whether the line is a hash-prefixed header */
  981. static int
  982. is_atxheader(struct sd_markdown *rndr, uint8_t *data, size_t size)
  983. {
  984. if (data[0] != '#')
  985. return 0;
  986. if (rndr->ext_flags & MKDEXT_SPACE_HEADERS) {
  987. size_t level = 0;
  988. while (level < size && level < 6 && data[level] == '#')
  989. level++;
  990. if (level < size && data[level] != ' ')
  991. return 0;
  992. }
  993. return 1;
  994. }
  995. /* is_headerline • returns whether the line is a setext-style hdr underline */
  996. static int
  997. is_headerline(uint8_t *data, size_t size)
  998. {
  999. size_t i = 0;
  1000. /* test of level 1 header */
  1001. if (data[i] == '=') {
  1002. for (i = 1; i < size && data[i] == '='; i++);
  1003. while (i < size && data[i] == ' ') i++;
  1004. return (i >= size || data[i] == '\n') ? 1 : 0; }
  1005. /* test of level 2 header */
  1006. if (data[i] == '-') {
  1007. for (i = 1; i < size && data[i] == '-'; i++);
  1008. while (i < size && data[i] == ' ') i++;
  1009. return (i >= size || data[i] == '\n') ? 2 : 0; }
  1010. return 0;
  1011. }
  1012. static int
  1013. is_next_headerline(uint8_t *data, size_t size)
  1014. {
  1015. size_t i = 0;
  1016. while (i < size && data[i] != '\n')
  1017. i++;
  1018. if (++i >= size)
  1019. return 0;
  1020. return is_headerline(data + i, size - i);
  1021. }
  1022. /* prefix_quote • returns blockquote prefix length */
  1023. static size_t
  1024. prefix_quote(uint8_t *data, size_t size)
  1025. {
  1026. size_t i = 0;
  1027. if (i < size && data[i] == ' ') i++;
  1028. if (i < size && data[i] == ' ') i++;
  1029. if (i < size && data[i] == ' ') i++;
  1030. if (i < size && data[i] == '>') {
  1031. if (i + 1 < size && data[i + 1] == ' ')
  1032. return i + 2;
  1033. return i + 1;
  1034. }
  1035. return 0;
  1036. }
  1037. /* prefix_code • returns prefix length for block code*/
  1038. static size_t
  1039. prefix_code(uint8_t *data, size_t size)
  1040. {
  1041. if (size > 3 && data[0] == ' ' && data[1] == ' '
  1042. && data[2] == ' ' && data[3] == ' ') return 4;
  1043. return 0;
  1044. }
  1045. /* prefix_oli • returns ordered list item prefix */
  1046. static size_t
  1047. prefix_oli(uint8_t *data, size_t size)
  1048. {
  1049. size_t i = 0;
  1050. if (i < size && data[i] == ' ') i++;
  1051. if (i < size && data[i] == ' ') i++;
  1052. if (i < size && data[i] == ' ') i++;
  1053. if (i >= size || data[i] < '0' || data[i] > '9')
  1054. return 0;
  1055. while (i < size && data[i] >= '0' && data[i] <= '9')
  1056. i++;
  1057. if (i + 1 >= size || data[i] != '.' || data[i + 1] != ' ')
  1058. return 0;
  1059. if (is_next_headerline(data + i, size - i))
  1060. return 0;
  1061. return i + 2;
  1062. }
  1063. /* prefix_uli • returns ordered list item prefix */
  1064. static size_t
  1065. prefix_uli(uint8_t *data, size_t size)
  1066. {
  1067. size_t i = 0;
  1068. if (i < size && data[i] == ' ') i++;
  1069. if (i < size && data[i] == ' ') i++;
  1070. if (i < size && data[i] == ' ') i++;
  1071. if (i + 1 >= size ||
  1072. (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
  1073. data[i + 1] != ' ')
  1074. return 0;
  1075. if (is_next_headerline(data + i, size - i))
  1076. return 0;
  1077. return i + 2;
  1078. }
  1079. /* parse_block • parsing of one block, returning next uint8_t to parse */
  1080. static void parse_block(struct buf *ob, struct sd_markdown *rndr,
  1081. uint8_t *data, size_t size);
  1082. /* parse_blockquote • handles parsing of a blockquote fragment */
  1083. static size_t
  1084. parse_blockquote(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
  1085. {
  1086. size_t beg, end = 0, pre, work_size = 0;
  1087. uint8_t *work_data = 0;
  1088. struct buf *out = 0;
  1089. out = rndr_newbuf(rndr, BUFFER_BLOCK);
  1090. beg = 0;
  1091. while (beg < size) {
  1092. for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
  1093. pre = prefix_quote(data + beg, end - beg);
  1094. if (pre)
  1095. beg += pre; /* skipping prefix */
  1096. /* empty line followed by non-quote line */
  1097. else if (is_empty(data + beg, end - beg) &&
  1098. (end >= size || (prefix_quote(data + end, size - end) == 0 &&
  1099. !is_empty(data + end, size - end))))
  1100. break;
  1101. if (beg < end) { /* copy into the in-place working buffer */
  1102. /* bufput(work, data + beg, end - beg); */
  1103. if (!work_data)
  1104. work_data = data + beg;
  1105. else if (data + beg != work_data + work_size)
  1106. memmove(work_data + work_size, data + beg, end - beg);
  1107. work_size += end - beg;
  1108. }
  1109. beg = end;
  1110. }
  1111. parse_block(out, rndr, work_data, work_size);
  1112. if (rndr->cb.blockquote)
  1113. rndr->cb.blockquote(ob, out, rndr->opaque);
  1114. rndr_popbuf(rndr, BUFFER_BLOCK);
  1115. return end;
  1116. }
  1117. static size_t
  1118. parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render);
  1119. /* parse_blockquote • handles parsing of a regular paragraph */
  1120. static size_t
  1121. parse_paragraph(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
  1122. {
  1123. size_t i = 0, end = 0;
  1124. int level = 0;
  1125. struct buf work = { data, 0, 0, 0 };
  1126. while (i < size) {
  1127. for (end = i + 1; end < size && data[end - 1] != '\n'; end++) /* empty */;
  1128. if (is_empty(data + i, size - i) || (level = is_headerline(data + i, size - i)) != 0)
  1129. break;
  1130. if (rndr->ext_flags & MKDEXT_LAX_HTML_BLOCKS) {
  1131. if (data[i] == '<' && rndr->cb.blockhtml && parse_htmlblock(ob, rndr, data + i, size - i, 0)) {
  1132. end = i;
  1133. break;
  1134. }
  1135. }
  1136. if (is_atxheader(rndr, data + i, size - i) || is_hrule(data + i, size - i)) {
  1137. end = i;
  1138. break;
  1139. }
  1140. i = end;
  1141. }
  1142. work.size = i;
  1143. while (work.size && data[work.size - 1] == '\n')
  1144. work.size--;
  1145. if (!level) {
  1146. struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK);
  1147. parse_inline(tmp, rndr, work.data, work.size);
  1148. if (rndr->cb.paragraph)
  1149. rndr->cb.paragraph(ob, tmp, rndr->opaque);
  1150. rndr_popbuf(rndr, BUFFER_BLOCK);
  1151. } else {
  1152. struct buf *header_work;
  1153. if (work.size) {
  1154. size_t beg;
  1155. i = work.size;
  1156. work.size -= 1;
  1157. while (work.size && data[work.size] != '\n')
  1158. work.size -= 1;
  1159. beg = work.size + 1;
  1160. while (work.size && data[work.size - 1] == '\n')
  1161. work.size -= 1;
  1162. if (work.size > 0) {
  1163. struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK);
  1164. parse_inline(tmp, rndr, work.data, work.size);
  1165. if (rndr->cb.paragraph)
  1166. rndr->cb.paragraph(ob, tmp, rndr->opaque);
  1167. rndr_popbuf(rndr, BUFFER_BLOCK);
  1168. work.data += beg;
  1169. work.size = i - beg;
  1170. }
  1171. else work.size = i;
  1172. }
  1173. header_work = rndr_newbuf(rndr, BUFFER_SPAN);
  1174. parse_inline(header_work, rndr, work.data, work.size);
  1175. if (rndr->cb.header)
  1176. rndr->cb.header(ob, header_work, (int)level, rndr->opaque);
  1177. rndr_popbuf(rndr, BUFFER_SPAN);
  1178. }
  1179. return end;
  1180. }
  1181. /* parse_fencedcode • handles parsing of a block-level code fragment */
  1182. static size_t
  1183. parse_fencedcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
  1184. {
  1185. size_t beg, end;
  1186. struct buf *work = 0;
  1187. struct buf lang = { 0, 0, 0, 0 };
  1188. beg = is_codefence(data, size, &lang);
  1189. if (beg == 0) return 0;
  1190. work = rndr_newbuf(rndr, BUFFER_BLOCK);
  1191. while (beg < size) {
  1192. size_t fence_end;
  1193. fence_end = is_codefence(data + beg, size - beg, NULL);
  1194. if (fence_end != 0) {
  1195. beg += fence_end;
  1196. break;
  1197. }
  1198. for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
  1199. if (beg < end) {
  1200. /* verbatim copy to the working buffer,
  1201. escaping entities */
  1202. if (is_empty(data + beg, end - beg))
  1203. bufputc(work, '\n');
  1204. else bufput(work, data + beg, end - beg);
  1205. }
  1206. beg = end;
  1207. }
  1208. if (work->size && work->data[work->size - 1] != '\n')
  1209. bufputc(work, '\n');
  1210. if (rndr->cb.blockcode)
  1211. rndr->cb.blockcode(ob, work, lang.size ? &lang : NULL, rndr->opaque);
  1212. rndr_popbuf(rndr, BUFFER_BLOCK);
  1213. return beg;
  1214. }
  1215. static size_t
  1216. parse_blockcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
  1217. {
  1218. size_t beg, end, pre;
  1219. struct buf *work = 0;
  1220. work = rndr_newbuf(rndr, BUFFER_BLOCK);
  1221. beg = 0;
  1222. while (beg < size) {
  1223. for (end = beg + 1; end < size && data[end - 1] != '\n'; end++) {};
  1224. pre = prefix_code(data + beg, end - beg);
  1225. if (pre)
  1226. beg += pre; /* skipping prefix */
  1227. else if (!is_empty(data + beg, end - beg))
  1228. /* non-empty non-prefixed line breaks the pre */
  1229. break;
  1230. if (beg < end) {
  1231. /* verbatim copy to the working buffer,
  1232. escaping entities */
  1233. if (is_empty(data + beg, end - beg))
  1234. bufputc(work, '\n');
  1235. else bufput(work, data + beg, end - beg);
  1236. }
  1237. beg = end;
  1238. }
  1239. while (work->size && work->data[work->size - 1] == '\n')
  1240. work->size -= 1;
  1241. bufputc(work, '\n');
  1242. if (rndr->cb.blockcode)
  1243. rndr->cb.blockcode(ob, work, NULL, rndr->opaque);
  1244. rndr_popbuf(rndr, BUFFER_BLOCK);
  1245. return beg;
  1246. }
  1247. /* parse_listitem • parsing of a single list item */
  1248. /* assuming initial prefix is already removed */
  1249. static size_t
  1250. parse_listitem(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int *flags)
  1251. {
  1252. struct buf *work = 0, *inter = 0;
  1253. size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i;
  1254. int in_empty = 0, has_inside_empty = 0;
  1255. size_t has_next_uli, has_next_oli;
  1256. /* keeping track of the first indentation prefix */
  1257. while (orgpre < 3 && orgpre < size && data[orgpre] == ' ')
  1258. orgpre++;
  1259. beg = prefix_uli(data, size);
  1260. if (!beg)
  1261. beg = prefix_oli(data, size);
  1262. if (!beg)
  1263. return 0;
  1264. /* skipping to the beginning of the following line */
  1265. end = beg;
  1266. while (end < size && data[end - 1] != '\n')
  1267. end++;
  1268. /* getting working buffers */
  1269. work = rndr_newbuf(rndr, BUFFER_SPAN);
  1270. inter = rndr_newbuf(rndr, BUFFER_SPAN);
  1271. /* putting the first line into the working buffer */
  1272. bufput(work, data + beg, end - beg);
  1273. beg = end;
  1274. /* process the following lines */
  1275. while (beg < size) {
  1276. end++;
  1277. while (end < size && data[end - 1] != '\n')
  1278. end++;
  1279. /* process an empty line */
  1280. if (is_empty(data + beg, end - beg)) {
  1281. in_empty = 1;
  1282. beg = end;
  1283. continue;
  1284. }
  1285. /* calculating the indentation */
  1286. i = 0;
  1287. while (i < 4 && beg + i < end && data[beg + i] == ' ')
  1288. i++;
  1289. pre = i;
  1290. has_next_uli = prefix_uli(data + beg + i, end - beg - i);
  1291. has_next_oli = prefix_oli(data + beg + i, end - beg - i);
  1292. /* checking for ul/ol switch */
  1293. if (in_empty && (
  1294. ((*flags & MKD_LIST_ORDERED) && has_next_uli) ||
  1295. (!(*flags & MKD_LIST_ORDERED) && has_next_oli))){
  1296. *flags |= MKD_LI_END;
  1297. break; /* the following item must have same list type */
  1298. }
  1299. /* checking for a new item */
  1300. if ((has_next_uli && !is_hrule(data + beg + i, end - beg - i)) || has_next_oli) {
  1301. if (in_empty)
  1302. has_inside_empty = 1;
  1303. if (pre == orgpre) /* the following item must have */
  1304. break; /* the same indentation */
  1305. if (!sublist)
  1306. sublist = work->size;
  1307. }
  1308. /* joining only indented stuff after empty lines */
  1309. else if (in_empty && i < 4) {
  1310. *flags |= MKD_LI_END;
  1311. break;
  1312. }
  1313. else if (in_empty) {
  1314. bufputc(work, '\n');
  1315. has_inside_empty = 1;
  1316. }
  1317. in_empty = 0;
  1318. /* adding the line without prefix into the working buffer */
  1319. bufput(work, data + beg + i, end - beg - i);
  1320. beg = end;
  1321. }
  1322. /* render of li contents */
  1323. if (has_inside_empty)
  1324. *flags |= MKD_LI_BLOCK;
  1325. if (*flags & MKD_LI_BLOCK) {
  1326. /* intermediate render of block li */
  1327. if (sublist && sublist < work->size) {
  1328. parse_block(inter, rndr, work->data, sublist);
  1329. parse_block(inter, rndr, work->data + sublist, work->size - sublist);
  1330. }
  1331. else
  1332. parse_block(inter, rndr, work->data, work->size);
  1333. } else {
  1334. /* intermediate render of inline li */
  1335. if (sublist && sublist < work->size) {
  1336. parse_inline(inter, rndr, work->data, sublist);
  1337. parse_block(inter, rndr, work->data + sublist, work->size - sublist);
  1338. }
  1339. else
  1340. parse_inline(inter, rndr, work->data, work->size);
  1341. }
  1342. /* render of li itself */
  1343. if (rndr->cb.listitem)
  1344. rndr->cb.listitem(ob, inter, *flags, rndr->opaque);
  1345. rndr_popbuf(rndr, BUFFER_SPAN);
  1346. rndr_popbuf(rndr, BUFFER_SPAN);
  1347. return beg;
  1348. }
  1349. /* parse_list • parsing ordered or unordered list block */
  1350. static size_t
  1351. parse_list(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int flags)
  1352. {
  1353. struct buf *work = 0;
  1354. size_t i = 0, j;
  1355. work = rndr_newbuf(rndr, BUFFER_BLOCK);
  1356. while (i < size) {
  1357. j = parse_listitem(work, rndr, data + i, size - i, &flags);
  1358. i += j;
  1359. if (!j || (flags & MKD_LI_END))
  1360. break;
  1361. }
  1362. if (rndr->cb.list)
  1363. rndr->cb.list(ob, work, flags, rndr->opaque);
  1364. rndr_popbuf(rndr, BUFFER_BLOCK);
  1365. return i;
  1366. }
  1367. /* parse_atxheader • parsing of atx-style headers */
  1368. static size_t
  1369. parse_atxheader(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
  1370. {
  1371. size_t level = 0;
  1372. size_t i, end, skip;
  1373. while (level < size && level < 6 && data[level] == '#')
  1374. level++;
  1375. for (i = level; i < size && data[i] == ' '; i++);
  1376. for (end = i; end < size && data[end] != '\n'; end++);
  1377. skip = end;
  1378. while (end && data[end - 1] == '#')
  1379. end--;
  1380. while (end && data[end - 1] == ' ')
  1381. end--;
  1382. if (end > i) {
  1383. struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN);
  1384. parse_inline(work, rndr, data + i, end - i);
  1385. if (rndr->cb.header)
  1386. rndr->cb.header(ob, work, (int)level, rndr->opaque);
  1387. rndr_popbuf(rndr, BUFFER_SPAN);
  1388. }
  1389. return skip;
  1390. }
  1391. /* htmlblock_end • checking end of HTML block : </tag>[ \t]*\n[ \t*]\n */
  1392. /* returns the length on match, 0 otherwise */
  1393. static size_t
  1394. htmlblock_end(const char *tag, size_t tag_len, struct sd_markdown *rndr, uint8_t *data, size_t size)
  1395. {
  1396. size_t i, w;
  1397. /* checking if tag is a match */
  1398. if (tag_len + 3 >= size ||
  1399. strncasecmp((char *)data + 2, tag, tag_len) != 0 ||
  1400. data[tag_len + 2] != '>')
  1401. return 0;
  1402. /* checking white lines */
  1403. i = tag_len + 3;
  1404. w = 0;
  1405. if (i < size && (w = is_empty(data + i, size - i)) == 0)
  1406. return 0; /* non-blank after tag */
  1407. i += w;
  1408. w = 0;
  1409. if (rndr->ext_flags & MKDEXT_LAX_HTML_BLOCKS) {
  1410. if (i < size)
  1411. w = is_empty(data + i, size - i);
  1412. } else {
  1413. if (i < size && (w = is_empty(data + i, size - i)) == 0)
  1414. return 0; /* non-blank line after tag line */
  1415. }
  1416. return i + w;
  1417. }
  1418. /* parse_htmlblock • parsing of inline HTML block */
  1419. static size_t
  1420. parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render)
  1421. {
  1422. size_t i, j = 0;
  1423. const char *curtag = NULL;
  1424. int found;
  1425. struct buf work = { data, 0, 0, 0 };
  1426. /* identification of the opening tag */
  1427. if (size < 2 || data[0] != '<')
  1428. return 0;
  1429. i = 1;
  1430. while (i < size && data[i] != '>' && data[i] != ' ')
  1431. i++;
  1432. if (i < size)
  1433. curtag = find_block_tag((char *)data + 1, (int)i - 1);
  1434. /* handling of special cases */
  1435. if (!curtag) {
  1436. /* HTML comment, laxist form */
  1437. if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') {
  1438. i = 5;
  1439. while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>'))
  1440. i++;
  1441. i++;
  1442. if (i < size)
  1443. j = is_empty(data + i, size - i);
  1444. if (j) {
  1445. work.size = i + j;
  1446. if (do_render && rndr->cb.blockhtml)
  1447. rndr->cb.blockhtml(ob, &work, rndr->opaque);
  1448. return work.size;
  1449. }
  1450. }
  1451. /* HR, which is the only self-closing block tag considered */
  1452. if (size > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R')) {
  1453. i = 3;
  1454. while (i < size && data[i] != '>')
  1455. i++;
  1456. if (i + 1 < size) {
  1457. i++;
  1458. j = is_empty(data + i, size - i);
  1459. if (j) {
  1460. work.size = i + j;
  1461. if (do_render && rndr->cb.blockhtml)
  1462. rndr->cb.blockhtml(ob, &work, rndr->opaque);
  1463. return work.size;
  1464. }
  1465. }
  1466. }
  1467. /* no special case recognised */
  1468. return 0;
  1469. }
  1470. /* looking for an unindented matching closing tag */
  1471. /* followed by a blank line */
  1472. i = 1;
  1473. found = 0;
  1474. /* if not found, trying a second pass looking for indented match */
  1475. /* but not if tag is "ins" or "del" (following original Markdown.pl) */
  1476. if (strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0) {
  1477. size_t tag_size = strlen(curtag);
  1478. i = 1;
  1479. while (i < size) {
  1480. i++;
  1481. while (i < size && !(data[i - 1] == '<' && data[i] == '/'))
  1482. i++;
  1483. if (i + 2 + tag_size >= size)
  1484. break;
  1485. j = htmlblock_end(curtag, tag_size, rndr, data + i - 1, size - i + 1);
  1486. if (j) {
  1487. i += j - 1;
  1488. found = 1;
  1489. break;
  1490. }
  1491. }
  1492. }
  1493. if (!found) return 0;
  1494. /* the end of the block has been found */
  1495. work.size = i;
  1496. if (do_render && rndr->cb.blockhtml)
  1497. rndr->cb.blockhtml(ob, &work, rndr->opaque);
  1498. return i;
  1499. }
  1500. static void
  1501. parse_table_row(
  1502. struct buf *ob,
  1503. struct sd_markdown *rndr,
  1504. uint8_t *data,
  1505. size_t size,
  1506. size_t columns,
  1507. int *col_data,
  1508. int header_flag)
  1509. {
  1510. size_t i = 0, col;
  1511. struct buf *row_work = 0;
  1512. if (!rndr->cb.table_cell || !rndr->cb.table_row)
  1513. return;
  1514. row_work = rndr_newbuf(rndr, BUFFER_SPAN);
  1515. if (i < size && data[i] == '|')
  1516. i++;
  1517. for (col = 0; col < columns && i < size; ++col) {
  1518. size_t cell_start, cell_end;
  1519. struct buf *cell_work;
  1520. cell_work = rndr_newbuf(rndr, BUFFER_SPAN);
  1521. while (i < size && _isspace(data[i]))
  1522. i++;
  1523. cell_start = i;
  1524. while (i < size && data[i] != '|')
  1525. i++;
  1526. cell_end = i - 1;
  1527. while (cell_end > cell_start && _isspace(data[cell_end]))
  1528. cell_end--;
  1529. parse_inline(cell_work, rndr, data + cell_start, 1 + cell_end - cell_start);
  1530. rndr->cb.table_cell(row_work, cell_work, col_data[col] | header_flag, rndr->opaque);
  1531. rndr_popbuf(rndr, BUFFER_SPAN);
  1532. i++;
  1533. }
  1534. for (; col < columns; ++col) {
  1535. struct buf empty_cell = { 0, 0, 0, 0 };
  1536. rndr->cb.table_cell(row_work, &empty_cell, col_data[col] | header_flag, rndr->opaque);
  1537. }
  1538. rndr->cb.table_row(ob, row_work, rndr->opaque);
  1539. rndr_popbuf(rndr, BUFFER_SPAN);
  1540. }
  1541. static size_t
  1542. parse_table_header(
  1543. struct buf *ob,
  1544. struct sd_markdown *rndr,
  1545. uint8_t *data,
  1546. size_t size,
  1547. size_t *columns,
  1548. int **column_data)
  1549. {
  1550. int pipes;
  1551. size_t i = 0, col, header_end, under_end;
  1552. pipes = 0;
  1553. while (i < size && data[i] != '\n')
  1554. if (data[i++] == '|')
  1555. pipes++;
  1556. if (i == size || pipes == 0)
  1557. return 0;
  1558. header_end = i;
  1559. if (data[0] == '|')
  1560. pipes--;
  1561. if (i > 2 && data[i - 1] == '|')
  1562. pipes--;
  1563. *columns = pipes + 1;
  1564. *column_data = calloc(*columns, sizeof(int));
  1565. /* Parse the header underline */
  1566. i++;
  1567. if (i < size && data[i] == '|')
  1568. i++;
  1569. under_end = i;
  1570. while (under_end < size && data[under_end] != '\n')
  1571. under_end++;
  1572. for (col = 0; col < *columns && i < under_end; ++col) {
  1573. size_t dashes = 0;
  1574. while (i < under_end && data[i] == ' ')
  1575. i++;
  1576. if (data[i] == ':') {
  1577. i++; (*column_data)[col] |= MKD_TABLE_ALIGN_L;
  1578. dashes++;
  1579. }
  1580. while (i < under_end && data[i] == '-') {
  1581. i++; dashes++;
  1582. }
  1583. if (i < under_end && data[i] == ':') {
  1584. i++; (*column_data)[col] |= MKD_TABLE_ALIGN_R;
  1585. dashes++;
  1586. }
  1587. while (i < under_end && data[i] == ' ')
  1588. i++;
  1589. if (i < under_end && data[i] != '|')
  1590. break;
  1591. if (dashes < 3)
  1592. break;
  1593. i++;
  1594. }
  1595. if (col < *columns)
  1596. return 0;
  1597. parse_table_row(
  1598. ob, rndr, data,
  1599. header_end,
  1600. *columns,
  1601. *column_data,
  1602. MKD_TABLE_HEADER
  1603. );
  1604. return under_end + 1;
  1605. }
  1606. static size_t
  1607. parse_table(
  1608. struct buf *ob,
  1609. struct sd_markdown *rndr,
  1610. uint8_t *data,
  1611. size_t size)
  1612. {
  1613. size_t i;
  1614. struct buf *header_work = 0;
  1615. struct buf *body_work = 0;
  1616. size_t columns;
  1617. int *col_data = NULL;
  1618. header_work = rndr_newbuf(rndr, BUFFER_SPAN);
  1619. body_work = rndr_newbuf(rndr, BUFFER_BLOCK);
  1620. i = parse_table_header(header_work, rndr, data, size, &columns, &col_data);
  1621. if (i > 0) {
  1622. while (i < size) {
  1623. size_t row_start;
  1624. int pipes = 0;
  1625. row_start = i;
  1626. while (i < size && data[i] != '\n')
  1627. if (data[i++] == '|')
  1628. pipes++;
  1629. if (pipes == 0 || i == size) {
  1630. i = row_start;
  1631. break;
  1632. }
  1633. parse_table_row(
  1634. body_work,
  1635. rndr,
  1636. data + row_start,
  1637. i - row_start,
  1638. columns,
  1639. col_data, 0
  1640. );
  1641. i++;
  1642. }
  1643. if (rndr->cb.table)
  1644. rndr->cb.table(ob, header_work, body_work, rndr->opaque);
  1645. }
  1646. free(col_data);
  1647. rndr_popbuf(rndr, BUFFER_SPAN);
  1648. rndr_popbuf(rndr, BUFFER_BLOCK);
  1649. return i;
  1650. }
  1651. /* parse_block • parsing of one block, returning next uint8_t to parse */
  1652. static void
  1653. parse_block(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
  1654. {
  1655. size_t beg, end, i;
  1656. uint8_t *txt_data;
  1657. beg = 0;
  1658. if (rndr->work_bufs[BUFFER_SPAN].size +
  1659. rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting)
  1660. return;
  1661. while (beg < size) {
  1662. txt_data = data + beg;
  1663. end = size - beg;
  1664. if (is_atxheader(rndr, txt_data, end))
  1665. beg += parse_atxheader(ob, rndr, txt_data, end);
  1666. else if (data[beg] == '<' && rndr->cb.blockhtml &&
  1667. (i = parse_htmlblock(ob, rndr, txt_data, end, 1)) != 0)
  1668. beg += i;
  1669. else if ((i = is_empty(txt_data, end)) != 0)
  1670. beg += i;
  1671. else if (is_hrule(txt_data, end)) {
  1672. if (rndr->cb.hrule)
  1673. rndr->cb.hrule(ob, rndr->opaque);
  1674. while (beg < size && data[beg] != '\n')
  1675. beg++;
  1676. beg++;
  1677. }
  1678. else if ((rndr->ext_flags & MKDEXT_FENCED_CODE) != 0 &&
  1679. (i = parse_fencedcode(ob, rndr, txt_data, end)) != 0)
  1680. beg += i;
  1681. else if ((rndr->ext_flags & MKDEXT_TABLES) != 0 &&
  1682. (i = parse_table(ob, rndr, txt_data, end)) != 0)
  1683. beg += i;
  1684. else if (prefix_quote(txt_data, end))
  1685. beg += parse_blockquote(ob, rndr, txt_data, end);
  1686. else if (prefix_code(txt_data, end))
  1687. beg += parse_blockcode(ob, rndr, txt_data, end);
  1688. else if (prefix_uli(txt_data, end))
  1689. beg += parse_list(ob, rndr, txt_data, end, 0);
  1690. else if (prefix_oli(txt_data, end))
  1691. beg += parse_list(ob, rndr, txt_data, end, MKD_LIST_ORDERED);
  1692. else
  1693. beg += parse_paragraph(ob, rndr, txt_data, end);
  1694. }
  1695. }
  1696. /*********************
  1697. * REFERENCE PARSING *
  1698. *********************/
  1699. /* is_ref • returns whether a line is a reference or not */
  1700. static int
  1701. is_ref(const uint8_t *data, size_t beg, size_t end, size_t *last, struct link_ref **refs)
  1702. {
  1703. /* int n; */
  1704. size_t i = 0;
  1705. size_t id_offset, id_end;
  1706. size_t link_offset, link_end;
  1707. size_t title_offset, title_end;
  1708. size_t line_end;
  1709. /* up to 3 optional leading spaces */
  1710. if (beg + 3 >= end) return 0;
  1711. if (data[beg] == ' ') { i = 1;
  1712. if (data[beg + 1] == ' ') { i = 2;
  1713. if (data[beg + 2] == ' ') { i = 3;
  1714. if (data[beg + 3] == ' ') return 0; } } }
  1715. i += beg;
  1716. /* id part: anything but a newline between brackets */
  1717. if (data[i] != '[') return 0;
  1718. i++;
  1719. id_offset = i;
  1720. while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']')
  1721. i++;
  1722. if (i >= end || data[i] != ']') return 0;
  1723. id_end = i;
  1724. /* spacer: colon (space | tab)* newline? (space | tab)* */
  1725. i++;
  1726. if (i >= end || data[i] != ':') return 0;
  1727. i++;
  1728. while (i < end && data[i] == ' ') i++;
  1729. if (i < end && (data[i] == '\n' || data[i] == '\r')) {
  1730. i++;
  1731. if (i < end && data[i] == '\r' && data[i - 1] == '\n') i++; }
  1732. while (i < end && data[i] == ' ') i++;
  1733. if (i >= end) return 0;
  1734. /* link: whitespace-free sequence, optionally between angle brackets */
  1735. if (data[i] == '<')
  1736. i++;
  1737. link_offset = i;
  1738. while (i < end && data[i] != ' ' && data[i] != '\n' && data[i] != '\r')
  1739. i++;
  1740. if (data[i - 1] == '>') link_end = i - 1;
  1741. else link_end = i;
  1742. /* optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) */
  1743. while (i < end && data[i] == ' ') i++;
  1744. if (i < end && data[i] != '\n' && data[i] != '\r'
  1745. && data[i] != '\'' && data[i] != '"' && data[i] != '(')
  1746. return 0;
  1747. line_end = 0;
  1748. /* computing end-of-line */
  1749. if (i >= end || data[i] == '\r' || data[i] == '\n') line_end = i;
  1750. if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
  1751. line_end = i + 1;
  1752. /* optional (space|tab)* spacer after a newline */
  1753. if (line_end) {
  1754. i = line_end + 1;
  1755. while (i < end && data[i] == ' ') i++; }
  1756. /* optional title: any non-newline sequence enclosed in '"()
  1757. alone on its line */
  1758. title_offset = title_end = 0;
  1759. if (i + 1 < end
  1760. && (data[i] == '\'' || data[i] == '"' || data[i] == '(')) {
  1761. i++;
  1762. title_offset = i;
  1763. /* looking for EOL */
  1764. while (i < end && data[i] != '\n' && data[i] != '\r') i++;
  1765. if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
  1766. title_end = i + 1;
  1767. else title_end = i;
  1768. /* stepping back */
  1769. i -= 1;
  1770. while (i > title_offset && data[i] == ' ')
  1771. i -= 1;
  1772. if (i > title_offset
  1773. && (data[i] == '\'' || data[i] == '"' || data[i] == ')')) {
  1774. line_end = title_end;
  1775. title_end = i; } }
  1776. if (!line_end)
  1777. return 0; /* garbage after the link */
  1778. /* a valid ref has been found, filling-in return structures */
  1779. if (last)
  1780. *last = line_end;
  1781. if (refs) {
  1782. struct link_ref *ref;
  1783. ref = add_link_ref(refs, data + id_offset, id_end - id_offset);
  1784. ref->link = bufnew(link_end - link_offset);
  1785. bufput(ref->link, data + link_offset, link_end - link_offset);
  1786. if (title_end > title_offset) {
  1787. ref->title = bufnew(title_end - title_offset);
  1788. bufput(ref->title, data + title_offset, title_end - title_offset);
  1789. }
  1790. }
  1791. return 1;
  1792. }
  1793. static void expand_tabs(struct buf *ob, const uint8_t *line, size_t size)
  1794. {
  1795. size_t i = 0, tab = 0;
  1796. while (i < size) {
  1797. size_t org = i;
  1798. while (i < size && line[i] != '\t') {
  1799. i++; tab++;
  1800. }
  1801. if (i > org)
  1802. bufput(ob, line + org, i - org);
  1803. if (i >= size)
  1804. break;
  1805. do {
  1806. bufputc(ob, ' '); tab++;
  1807. } while (tab % 4);
  1808. i++;
  1809. }
  1810. }
  1811. /**********************
  1812. * EXPORTED FUNCTIONS *
  1813. **********************/
  1814. struct sd_markdown *
  1815. sd_markdown_new(
  1816. unsigned int extensions,
  1817. size_t max_nesting,
  1818. const struct sd_callbacks *callbacks,
  1819. void *opaque)
  1820. {
  1821. struct sd_markdown *md = NULL;
  1822. assert(max_nesting > 0 && callbacks);
  1823. md = malloc(sizeof(struct sd_markdown));
  1824. if (!md)
  1825. return NULL;
  1826. memcpy(&md->cb, callbacks, sizeof(struct sd_callbacks));
  1827. stack_init(&md->work_bufs[BUFFER_BLOCK], 4);
  1828. stack_init(&md->work_bufs[BUFFER_SPAN], 8);
  1829. memset(md->active_char, 0x0, 256);
  1830. if (md->cb.emphasis || md->cb.double_emphasis || md->cb.triple_emphasis) {
  1831. md->active_char['*'] = MD_CHAR_EMPHASIS;
  1832. md->active_char['_'] = MD_CHAR_EMPHASIS;
  1833. if (extensions & MKDEXT_STRIKETHROUGH)
  1834. md->active_char['~'] = MD_CHAR_EMPHASIS;
  1835. }
  1836. if (md->cb.codespan)
  1837. md->active_char['`'] = MD_CHAR_CODESPAN;
  1838. if (md->cb.linebreak)
  1839. md->active_char['\n'] = MD_CHAR_LINEBREAK;
  1840. if (md->cb.image || md->cb.link)
  1841. md->active_char['['] = MD_CHAR_LINK;
  1842. md->active_char['<'] = MD_CHAR_LANGLE;
  1843. md->active_char['\\'] = MD_CHAR_ESCAPE;
  1844. md->active_char['&'] = MD_CHAR_ENTITITY;
  1845. if (extensions & MKDEXT_AUTOLINK) {
  1846. md->active_char[':'] = MD_CHAR_AUTOLINK_URL;
  1847. md->active_char['@'] = MD_CHAR_AUTOLINK_EMAIL;
  1848. md->active_char['w'] = MD_CHAR_AUTOLINK_WWW;
  1849. }
  1850. if (extensions & MKDEXT_SUPERSCRIPT)
  1851. md->active_char['^'] = MD_CHAR_SUPERSCRIPT;
  1852. /* Extension data */
  1853. md->ext_flags = extensions;
  1854. md->opaque = opaque;
  1855. md->max_nesting = max_nesting;
  1856. md->in_link_body = 0;
  1857. return md;
  1858. }
  1859. void
  1860. sd_markdown_render(struct buf *ob, const uint8_t *document, size_t doc_size, struct sd_markdown *md)
  1861. {
  1862. #define MARKDOWN_GROW(x) ((x) + ((x) >> 1))
  1863. static const char UTF8_BOM[] = {0xEF, 0xBB, 0xBF};
  1864. struct buf *text;
  1865. size_t beg, end;
  1866. text = bufnew(64);
  1867. if (!text)
  1868. return;
  1869. /* Preallocate enough space for our buffer to avoid expanding while copying */
  1870. bufgrow(text, doc_size);
  1871. /* reset the references table */
  1872. memset(&md->refs, 0x0, REF_TABLE_SIZE * sizeof(void *));
  1873. /* first pass: looking for references, copying everything else */
  1874. beg = 0;
  1875. /* Skip a possible UTF-8 BOM, even though the Unicode standard
  1876. * discourages having these in UTF-8 documents */
  1877. if (doc_size >= 3 && memcmp(document, UTF8_BOM, 3) == 0)
  1878. beg += 3;
  1879. while (beg < doc_size) /* iterating over lines */
  1880. if (is_ref(document, beg, doc_size, &end, md->refs))
  1881. beg = end;
  1882. else { /* skipping to the next line */
  1883. end = beg;
  1884. while (end < doc_size && document[end] != '\n' && document[end] != '\r')
  1885. end++;
  1886. /* adding the line body if present */
  1887. if (end > beg)
  1888. expand_tabs(text, document + beg, end - beg);
  1889. while (end < doc_size && (document[end] == '\n' || document[end] == '\r')) {
  1890. /* add one \n per newline */
  1891. if (document[end] == '\n' || (end + 1 < doc_size && document[end + 1] != '\n'))
  1892. bufputc(text, '\n');
  1893. end++;
  1894. }
  1895. beg = end;
  1896. }
  1897. /* pre-grow the output buffer to minimize allocations */
  1898. bufgrow(ob, MARKDOWN_GROW(text->size));
  1899. /* second pass: actual rendering */
  1900. if (md->cb.doc_header)
  1901. md->cb.doc_header(ob, md->opaque);
  1902. if (text->size) {
  1903. /* adding a final newline if not already present */
  1904. if (text->data[text->size - 1] != '\n' && text->data[text->size - 1] != '\r')
  1905. bufputc(text, '\n');
  1906. parse_block(ob, md, text->data, text->size);
  1907. }
  1908. if (md->cb.doc_footer)
  1909. md->cb.doc_footer(ob, md->opaque);
  1910. /* clean-up */
  1911. bufrelease(text);
  1912. free_link_refs(md->refs);
  1913. assert(md->work_bufs[BUFFER_SPAN].size == 0);
  1914. assert(md->work_bufs[BUFFER_BLOCK].size == 0);
  1915. }
  1916. void
  1917. sd_markdown_free(struct sd_markdown *md)
  1918. {
  1919. size_t i;
  1920. for (i = 0; i < (size_t)md->work_bufs[BUFFER_SPAN].asize; ++i)
  1921. bufrelease(md->work_bufs[BUFFER_SPAN].item[i]);
  1922. for (i = 0; i < (size_t)md->work_bufs[BUFFER_BLOCK].asize; ++i)
  1923. bufrelease(md->work_bufs[BUFFER_BLOCK].item[i]);
  1924. stack_free(&md->work_bufs[BUFFER_SPAN]);
  1925. stack_free(&md->work_bufs[BUFFER_BLOCK]);
  1926. free(md);
  1927. }
  1928. void
  1929. sd_version(int *ver_major, int *ver_minor, int *ver_revision)
  1930. {
  1931. *ver_major = UPSKIRT_VER_MAJOR;
  1932. *ver_minor = UPSKIRT_VER_MINOR;
  1933. *ver_revision = UPSKIRT_VER_REVISION;
  1934. }
  1935. /* vim: set filetype=c: */