PageRenderTime 64ms CodeModel.GetById 25ms RepoModel.GetById 0ms app.codeStats 1ms

/SnuDom/src/markdown.cpp

https://github.com/hippiehunter/Baconography
C++ | 2586 lines | 2356 code | 150 blank | 80 comment | 207 complexity | 9509e15c85cde3562310fe96d461d9b1 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

  1. /* markdown.c - generic markdown parser */
  2. /*
  3. * Copyright (c) 2009, Natacha Porté
  4. * Copyright (c) 2011, Vicent Marti
  5. *
  6. * Permission to use, copy, modify, and distribute this software for any
  7. * purpose with or without fee is hereby granted, provided that the above
  8. * copyright notice and this permission notice appear in all copies.
  9. *
  10. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  11. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  12. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  13. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  14. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  15. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  16. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  17. */
  18. #include "markdown.h"
  19. #include "stack.h"
  20. #include <assert.h>
  21. #include <string.h>
  22. #include <ctype.h>
  23. #include <stdio.h>
  24. #if defined(_WIN32)
  25. #define strncasecmp _strnicmp
  26. #endif
  27. #define REF_TABLE_SIZE 8
  28. #define BUFFER_BLOCK 0
  29. #define BUFFER_SPAN 1
  30. #define MKD_LI_END 8 /* internal list flag */
  31. #define gperf_case_strncmp(s1, s2, n) strncasecmp(s1, s2, n)
  32. #define GPERF_DOWNCASE 1
  33. #define GPERF_CASE_STRNCMP 1
  34. #include "html_blocks.h"
  35. /***************
  36. * LOCAL TYPES *
  37. ***************/
  38. /* link_ref: reference to a link */
  39. struct link_ref {
  40. unsigned int id;
  41. struct buf *link;
  42. struct buf *title;
  43. struct link_ref *next;
  44. };
  45. /* char_trigger: function pointer to render active chars */
  46. /* returns the number of chars taken care of */
  47. /* data is the pointer of the beginning of the span */
  48. /* offset is the number of valid chars before data */
  49. struct sd_markdown;
  50. typedef size_t
  51. (*char_trigger)(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  52. static size_t char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  53. static size_t char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  54. static size_t char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  55. static size_t char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  56. static size_t char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  57. static size_t char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  58. static size_t char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  59. static size_t char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  60. static size_t char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  61. static size_t char_autolink_subreddit_or_username(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  62. static size_t char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  63. static size_t char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  64. enum markdown_char_t {
  65. MD_CHAR_NONE = 0,
  66. MD_CHAR_EMPHASIS,
  67. MD_CHAR_CODESPAN,
  68. MD_CHAR_LINEBREAK,
  69. MD_CHAR_LINK,
  70. MD_CHAR_LANGLE,
  71. MD_CHAR_ESCAPE,
  72. MD_CHAR_ENTITITY,
  73. MD_CHAR_AUTOLINK_URL,
  74. MD_CHAR_AUTOLINK_EMAIL,
  75. MD_CHAR_AUTOLINK_WWW,
  76. MD_CHAR_AUTOLINK_SUBREDDIT_OR_USERNAME,
  77. MD_CHAR_SUPERSCRIPT,
  78. };
  79. static char_trigger markdown_char_ptrs[] = {
  80. NULL,
  81. &char_emphasis,
  82. &char_codespan,
  83. &char_linebreak,
  84. &char_link,
  85. &char_langle_tag,
  86. &char_escape,
  87. &char_entity,
  88. &char_autolink_url,
  89. &char_autolink_email,
  90. &char_autolink_www,
  91. &char_autolink_subreddit_or_username,
  92. &char_superscript,
  93. };
  94. /* render • structure containing one particular render */
  95. struct sd_markdown {
  96. struct sd_callbacks cb;
  97. void *opaque;
  98. struct link_ref *refs[REF_TABLE_SIZE];
  99. uint8_t active_char[256];
  100. struct stack work_bufs[2];
  101. unsigned int ext_flags;
  102. size_t max_nesting;
  103. int in_link_body;
  104. };
  105. /***************************
  106. * HELPER FUNCTIONS *
  107. ***************************/
  108. static inline struct buf *
  109. rndr_newbuf(struct sd_markdown *rndr, int type)
  110. {
  111. static const size_t buf_size[2] = {256, 64};
  112. struct buf *work = NULL;
  113. struct stack *pool = &rndr->work_bufs[type];
  114. if (pool->size < pool->asize &&
  115. pool->item[pool->size] != NULL) {
  116. work = (buf*)pool->item[pool->size++];
  117. work->size = 0;
  118. } else {
  119. work = bufnew(rndr->opaque, rndr->cb.allocate, buf_size[type]);
  120. stack_push(pool, work);
  121. }
  122. return work;
  123. }
  124. static inline void
  125. rndr_popbuf(struct sd_markdown *rndr, int type)
  126. {
  127. rndr->work_bufs[type].size--;
  128. }
  129. static void
  130. unscape_text(void* opaque, void* (*allocate)(void *opaque, size_t size), struct buf *ob, struct buf *src)
  131. {
  132. size_t i = 0, org;
  133. while (i < src->size) {
  134. org = i;
  135. while (i < src->size && src->data[i] != '\\')
  136. i++;
  137. if (i > org)
  138. bufput(opaque, allocate, ob, src->data + org, i - org);
  139. if (i + 1 >= src->size)
  140. break;
  141. bufputc(opaque, allocate, ob, src->data[i + 1]);
  142. i += 2;
  143. }
  144. }
  145. static unsigned int
  146. hash_link_ref(const uint8_t *link_ref, size_t length)
  147. {
  148. size_t i;
  149. unsigned int hash = 0;
  150. for (i = 0; i < length; ++i)
  151. hash = tolower(link_ref[i]) + (hash << 6) + (hash << 16) - hash;
  152. return hash;
  153. }
  154. static struct link_ref *
  155. add_link_ref(
  156. void* opaque, void* (*allocate)(void *opaque, size_t size),
  157. struct link_ref **references,
  158. const uint8_t *name, size_t name_size)
  159. {
  160. struct link_ref *ref = (link_ref*)allocate(opaque, sizeof(struct link_ref));
  161. memset(ref, 0, sizeof(struct link_ref));
  162. if (!ref)
  163. return NULL;
  164. ref->id = hash_link_ref(name, name_size);
  165. ref->next = references[ref->id % REF_TABLE_SIZE];
  166. references[ref->id % REF_TABLE_SIZE] = ref;
  167. return ref;
  168. }
  169. static struct link_ref *
  170. find_link_ref(struct link_ref **references, uint8_t *name, size_t length)
  171. {
  172. unsigned int hash = hash_link_ref(name, length);
  173. struct link_ref *ref = NULL;
  174. ref = references[hash % REF_TABLE_SIZE];
  175. while (ref != NULL) {
  176. if (ref->id == hash)
  177. return ref;
  178. ref = ref->next;
  179. }
  180. return NULL;
  181. }
  182. static void
  183. free_link_refs(struct link_ref **references)
  184. {
  185. size_t i;
  186. for (i = 0; i < REF_TABLE_SIZE; ++i) {
  187. struct link_ref *r = references[i];
  188. struct link_ref *next;
  189. while (r) {
  190. next = r->next;
  191. bufrelease(r->link);
  192. bufrelease(r->title);
  193. //free(r);
  194. r = next;
  195. }
  196. }
  197. }
  198. /*
  199. * Check whether a char is a Markdown space.
  200. * Right now we only consider spaces the actual
  201. * space and a newline: tabs and carriage returns
  202. * are filtered out during the preprocessing phase.
  203. *
  204. * If we wanted to actually be UTF-8 compliant, we
  205. * should instead extract an Unicode codepoint from
  206. * this character and check for space properties.
  207. */
  208. static inline int
  209. _isspace(int c)
  210. {
  211. return c == ' ' || c == '\n';
  212. }
  213. /****************************
  214. * INLINE PARSING FUNCTIONS *
  215. ****************************/
  216. /* is_mail_autolink • looks for the address part of a mail autolink and '>' */
  217. /* this is less strict than the original markdown e-mail address matching */
  218. static size_t
  219. is_mail_autolink(uint8_t *data, size_t size)
  220. {
  221. size_t i = 0, nb = 0;
  222. /* address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' */
  223. for (i = 0; i < size; ++i) {
  224. if (isalnum(data[i]))
  225. continue;
  226. switch (data[i]) {
  227. case '@':
  228. nb++;
  229. case '-':
  230. case '.':
  231. case '_':
  232. break;
  233. case '>':
  234. return (nb == 1) ? i + 1 : 0;
  235. default:
  236. return 0;
  237. }
  238. }
  239. return 0;
  240. }
  241. /* tag_length • returns the length of the given tag, or 0 is it's not valid */
  242. static size_t
  243. tag_length(uint8_t *data, size_t size, enum mkd_autolink *autolink)
  244. {
  245. size_t i, j;
  246. /* a valid tag can't be shorter than 3 chars */
  247. if (size < 3) return 0;
  248. /* begins with a '<' optionally followed by '/', followed by letter or number */
  249. if (data[0] != '<') return 0;
  250. i = (data[1] == '/') ? 2 : 1;
  251. if (!isalnum(data[i]))
  252. return 0;
  253. /* scheme test */
  254. *autolink = MKDA_NOT_AUTOLINK;
  255. /* try to find the beginning of an URI */
  256. while (i < size && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-'))
  257. i++;
  258. if (i > 1 && data[i] == '@') {
  259. if ((j = is_mail_autolink(data + i, size - i)) != 0) {
  260. *autolink = MKDA_EMAIL;
  261. return i + j;
  262. }
  263. }
  264. if (i > 2 && data[i] == ':') {
  265. *autolink = MKDA_NORMAL;
  266. i++;
  267. }
  268. /* completing autolink test: no whitespace or ' or " */
  269. if (i >= size)
  270. *autolink = MKDA_NOT_AUTOLINK;
  271. else if (*autolink) {
  272. j = i;
  273. while (i < size) {
  274. if (data[i] == '\\') i += 2;
  275. else if (data[i] == '>' || data[i] == '\'' ||
  276. data[i] == '"' || data[i] == ' ' || data[i] == '\n')
  277. break;
  278. else i++;
  279. }
  280. if (i >= size) return 0;
  281. if (i > j && data[i] == '>') return i + 1;
  282. /* one of the forbidden chars has been found */
  283. *autolink = MKDA_NOT_AUTOLINK;
  284. }
  285. /* looking for sometinhg looking like a tag end */
  286. while (i < size && data[i] != '>') i++;
  287. if (i >= size) return 0;
  288. return i + 1;
  289. }
  290. /* parse_inline • parses inline markdown elements */
  291. static void
  292. parse_inline(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
  293. {
  294. size_t i = 0, end = 0;
  295. uint8_t action = 0;
  296. struct buf work = { 0, 0, 0, 0 };
  297. if (rndr->work_bufs[BUFFER_SPAN].size +
  298. rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting)
  299. return;
  300. while (i < size) {
  301. /* copying inactive chars into the output */
  302. while (end < size && (action = rndr->active_char[data[end]]) == 0) {
  303. end++;
  304. }
  305. if (rndr->cb.normal_text) {
  306. work.data = data + i;
  307. work.size = end - i;
  308. rndr->cb.normal_text(ob, &work, rndr->opaque);
  309. }
  310. else
  311. bufput(rndr->opaque, rndr->cb.allocate, ob, data + i, end - i);
  312. if (end >= size) break;
  313. i = end;
  314. end = markdown_char_ptrs[(int)action](ob, rndr, data + i, i, size - i);
  315. if (!end) /* no action from the callback */
  316. end = i + 1;
  317. else {
  318. i += end;
  319. end = i;
  320. }
  321. }
  322. }
  323. /* find_emph_char • looks for the next emph uint8_t, skipping other constructs */
  324. static size_t
  325. find_emph_char(uint8_t *data, size_t size, uint8_t c)
  326. {
  327. size_t i = 1;
  328. while (i < size) {
  329. while (i < size && data[i] != c && data[i] != '`' && data[i] != '[')
  330. i++;
  331. if (i == size)
  332. return 0;
  333. if (data[i] == c)
  334. return i;
  335. /* not counting escaped chars */
  336. if (i && data[i - 1] == '\\') {
  337. i++; continue;
  338. }
  339. if (data[i] == '`') {
  340. size_t span_nb = 0, bt;
  341. size_t tmp_i = 0;
  342. /* counting the number of opening backticks */
  343. while (i < size && data[i] == '`') {
  344. i++; span_nb++;
  345. }
  346. if (i >= size) return 0;
  347. /* finding the matching closing sequence */
  348. bt = 0;
  349. while (i < size && bt < span_nb) {
  350. if (!tmp_i && data[i] == c) tmp_i = i;
  351. if (data[i] == '`') bt++;
  352. else bt = 0;
  353. i++;
  354. }
  355. if (i >= size) return tmp_i;
  356. }
  357. /* skipping a link */
  358. else if (data[i] == '[') {
  359. size_t tmp_i = 0;
  360. uint8_t cc;
  361. i++;
  362. while (i < size && data[i] != ']') {
  363. if (!tmp_i && data[i] == c) tmp_i = i;
  364. i++;
  365. }
  366. i++;
  367. while (i < size && (data[i] == ' ' || data[i] == '\n'))
  368. i++;
  369. if (i >= size)
  370. return tmp_i;
  371. switch (data[i]) {
  372. case '[':
  373. cc = ']'; break;
  374. case '(':
  375. cc = ')'; break;
  376. default:
  377. if (tmp_i)
  378. return tmp_i;
  379. else
  380. continue;
  381. }
  382. i++;
  383. while (i < size && data[i] != cc) {
  384. if (!tmp_i && data[i] == c) tmp_i = i;
  385. i++;
  386. }
  387. if (i >= size)
  388. return tmp_i;
  389. i++;
  390. }
  391. }
  392. return 0;
  393. }
  394. /* parse_emph1 • parsing single emphase */
  395. /* closed by a symbol not preceded by whitespace and not followed by symbol */
  396. static size_t
  397. parse_emph1(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
  398. {
  399. size_t i = 0, len;
  400. struct buf *work = 0;
  401. int r;
  402. if (!rndr->cb.emphasis) return 0;
  403. /* skipping one symbol if coming from emph3 */
  404. if (size > 1 && data[0] == c && data[1] == c) i = 1;
  405. while (i < size) {
  406. len = find_emph_char(data + i, size - i, c);
  407. if (!len) return 0;
  408. i += len;
  409. if (i >= size) return 0;
  410. if (data[i] == c && !_isspace(data[i - 1])) {
  411. if ((rndr->ext_flags & MKDEXT_NO_INTRA_EMPHASIS) && (c == '_')) {
  412. if (!(i + 1 == size || _isspace(data[i + 1]) || ispunct(data[i + 1])))
  413. continue;
  414. }
  415. work = rndr_newbuf(rndr, BUFFER_SPAN);
  416. parse_inline(work, rndr, data, i);
  417. r = rndr->cb.emphasis(ob, work, rndr->opaque);
  418. rndr_popbuf(rndr, BUFFER_SPAN);
  419. return r ? i + 1 : 0;
  420. }
  421. }
  422. return 0;
  423. }
  424. /* parse_emph2 • parsing single emphase */
  425. static size_t
  426. parse_emph2(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
  427. {
  428. int (*render_method)(struct buf *ob, const struct buf *text, void *opaque);
  429. size_t i = 0, len;
  430. struct buf *work = 0;
  431. int r;
  432. render_method = (c == '~') ? rndr->cb.strikethrough : rndr->cb.double_emphasis;
  433. if (!render_method)
  434. return 0;
  435. while (i < size) {
  436. len = find_emph_char(data + i, size - i, c);
  437. if (!len) return 0;
  438. i += len;
  439. if (i + 1 < size && data[i] == c && data[i + 1] == c && i && !_isspace(data[i - 1])) {
  440. work = rndr_newbuf(rndr, BUFFER_SPAN);
  441. parse_inline(work, rndr, data, i);
  442. r = render_method(ob, work, rndr->opaque);
  443. rndr_popbuf(rndr, BUFFER_SPAN);
  444. return r ? i + 2 : 0;
  445. }
  446. i++;
  447. }
  448. return 0;
  449. }
  450. /* parse_emph3 • parsing single emphase */
  451. /* finds the first closing tag, and delegates to the other emph */
  452. static size_t
  453. parse_emph3(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
  454. {
  455. size_t i = 0, len;
  456. int r;
  457. while (i < size) {
  458. len = find_emph_char(data + i, size - i, c);
  459. if (!len) return 0;
  460. i += len;
  461. /* skip whitespace preceded symbols */
  462. if (data[i] != c || _isspace(data[i - 1]))
  463. continue;
  464. if (i + 2 < size && data[i + 1] == c && data[i + 2] == c && rndr->cb.triple_emphasis) {
  465. /* triple symbol found */
  466. struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN);
  467. parse_inline(work, rndr, data, i);
  468. r = rndr->cb.triple_emphasis(ob, work, rndr->opaque);
  469. rndr_popbuf(rndr, BUFFER_SPAN);
  470. return r ? i + 3 : 0;
  471. } else if (i + 1 < size && data[i + 1] == c) {
  472. /* double symbol found, handing over to emph1 */
  473. len = parse_emph1(ob, rndr, data - 2, size + 2, c);
  474. if (!len) return 0;
  475. else return len - 2;
  476. } else {
  477. /* single symbol found, handing over to emph2 */
  478. len = parse_emph2(ob, rndr, data - 1, size + 1, c);
  479. if (!len) return 0;
  480. else return len - 1;
  481. }
  482. }
  483. return 0;
  484. }
  485. /* char_emphasis • single and double emphasis parsing */
  486. static size_t
  487. char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  488. {
  489. uint8_t c = data[0];
  490. size_t ret;
  491. if (size > 2 && data[1] != c) {
  492. /* whitespace cannot follow an opening emphasis;
  493. * strikethrough only takes two characters '~~' */
  494. if (c == '~' || _isspace(data[1]) || (ret = parse_emph1(ob, rndr, data + 1, size - 1, c)) == 0)
  495. return 0;
  496. return ret + 1;
  497. }
  498. if (size > 3 && data[1] == c && data[2] != c) {
  499. if (_isspace(data[2]) || (ret = parse_emph2(ob, rndr, data + 2, size - 2, c)) == 0)
  500. return 0;
  501. return ret + 2;
  502. }
  503. if (size > 4 && data[1] == c && data[2] == c && data[3] != c) {
  504. if (c == '~' || _isspace(data[3]) || (ret = parse_emph3(ob, rndr, data + 3, size - 3, c)) == 0)
  505. return 0;
  506. return ret + 3;
  507. }
  508. return 0;
  509. }
  510. /* char_linebreak • '\n' preceded by two spaces (assuming linebreak != 0) */
  511. static size_t
  512. char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  513. {
  514. if (offset < 2 || data[-1] != ' ' || data[-2] != ' ')
  515. return 0;
  516. /* removing the last space from ob and rendering */
  517. while (ob->size && ob->data[ob->size - 1] == ' ')
  518. ob->size--;
  519. return rndr->cb.linebreak(ob, rndr->opaque) ? 1 : 0;
  520. }
  521. /* char_codespan • '`' parsing a code span (assuming codespan != 0) */
  522. static size_t
  523. char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  524. {
  525. size_t end, nb = 0, i, f_begin, f_end;
  526. /* counting the number of backticks in the delimiter */
  527. while (nb < size && data[nb] == '`')
  528. nb++;
  529. /* finding the next delimiter */
  530. i = 0;
  531. for (end = nb; end < size && i < nb; end++) {
  532. if (data[end] == '`') i++;
  533. else i = 0;
  534. }
  535. if (i < nb && end >= size)
  536. return 0; /* no matching delimiter */
  537. /* trimming outside whitespaces */
  538. f_begin = nb;
  539. while (f_begin < end && data[f_begin] == ' ')
  540. f_begin++;
  541. f_end = end - nb;
  542. while (f_end > nb && data[f_end-1] == ' ')
  543. f_end--;
  544. /* real code span */
  545. if (f_begin < f_end) {
  546. struct buf work = { data + f_begin, f_end - f_begin, 0, 0 };
  547. if (!rndr->cb.codespan(ob, &work, rndr->opaque))
  548. end = 0;
  549. } else {
  550. if (!rndr->cb.codespan(ob, 0, rndr->opaque))
  551. end = 0;
  552. }
  553. return end;
  554. }
  555. /* char_escape • '\\' backslash escape */
  556. static size_t
  557. char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  558. {
  559. static const char *escape_chars = "\\`*_{}[]()#+-.!:|&<>/^~";
  560. struct buf work = { 0, 0, 0, 0 };
  561. if (size > 1) {
  562. if (strchr(escape_chars, data[1]) == NULL)
  563. return 0;
  564. if (rndr->cb.normal_text) {
  565. work.data = data + 1;
  566. work.size = 1;
  567. rndr->cb.normal_text(ob, &work, rndr->opaque);
  568. }
  569. else bufputc(rndr->opaque, rndr->cb.allocate, ob, data[1]);
  570. } else if (size == 1) {
  571. bufputc(rndr->opaque, rndr->cb.allocate, ob, data[0]);
  572. }
  573. return 2;
  574. }
  575. /* char_entity • '&' escaped when it doesn't belong to an entity */
  576. /* valid entities are assumed to be anything matching &#?[A-Za-z0-9]+; */
  577. static size_t
  578. char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  579. {
  580. size_t end = 1;
  581. struct buf work = { 0, 0, 0, 0 };
  582. if (end < size && data[end] == '#')
  583. end++;
  584. while (end < size && isalnum(data[end]))
  585. end++;
  586. if (end < size && data[end] == ';')
  587. end++; /* real entity */
  588. else
  589. return 0; /* lone '&' */
  590. if (rndr->cb.entity) {
  591. work.data = data;
  592. work.size = end;
  593. rndr->cb.entity(ob, &work, rndr->opaque);
  594. }
  595. else bufput(rndr->opaque, rndr->cb.allocate, ob, data, end);
  596. return end;
  597. }
  598. /* char_langle_tag • '<' when tags or autolinks are allowed */
  599. static size_t
  600. char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  601. {
  602. enum mkd_autolink altype = MKDA_NOT_AUTOLINK;
  603. size_t end = tag_length(data, size, &altype);
  604. struct buf work = { data, end, 0, 0 };
  605. int ret = 0;
  606. if (end > 2) {
  607. if (rndr->cb.autolink && altype != MKDA_NOT_AUTOLINK) {
  608. struct buf *u_link = rndr_newbuf(rndr, BUFFER_SPAN);
  609. work.data = data + 1;
  610. work.size = end - 2;
  611. unscape_text(rndr->opaque, rndr->cb.allocate, u_link, &work);
  612. ret = rndr->cb.autolink(ob, u_link, altype, rndr->opaque);
  613. rndr_popbuf(rndr, BUFFER_SPAN);
  614. }
  615. else if (rndr->cb.raw_html_tag)
  616. ret = rndr->cb.raw_html_tag(ob, &work, rndr->opaque);
  617. }
  618. if (!ret) return 0;
  619. else return end;
  620. }
  621. static size_t
  622. char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  623. {
  624. struct buf *link, *link_url, *link_text;
  625. size_t link_len, rewind;
  626. if (!rndr->cb.link || rndr->in_link_body)
  627. return 0;
  628. link = rndr_newbuf(rndr, BUFFER_SPAN);
  629. if ((link_len = sd_autolink__www(rndr->opaque, rndr->cb.allocate, &rewind, link, data, offset, size, 0)) > 0) {
  630. link_url = rndr_newbuf(rndr, BUFFER_SPAN);
  631. BUFPUTSL(rndr->opaque, rndr->cb.allocate,link_url, "http://");
  632. bufput(rndr->opaque, rndr->cb.allocate, link_url, link->data, link->size);
  633. ob->size -= rewind;
  634. if (rndr->cb.normal_text) {
  635. link_text = rndr_newbuf(rndr, BUFFER_SPAN);
  636. rndr->cb.normal_text(link_text, link, rndr->opaque);
  637. rndr->cb.link(ob, link_url, NULL, link_text, rndr->opaque);
  638. rndr_popbuf(rndr, BUFFER_SPAN);
  639. } else {
  640. rndr->cb.link(ob, link_url, NULL, link, rndr->opaque);
  641. }
  642. rndr_popbuf(rndr, BUFFER_SPAN);
  643. }
  644. rndr_popbuf(rndr, BUFFER_SPAN);
  645. return link_len;
  646. }
  647. static size_t
  648. char_autolink_subreddit_or_username(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  649. {
  650. struct buf *link;
  651. size_t link_len, rewind;
  652. if (!rndr->cb.autolink || rndr->in_link_body)
  653. return 0;
  654. link = rndr_newbuf(rndr, BUFFER_SPAN);
  655. if ((link_len = sd_autolink__subreddit(rndr->opaque, rndr->cb.allocate, &rewind, link, data, offset, size)) > 0) {
  656. ob->size -= rewind;
  657. rndr->cb.autolink(ob, link, MKDA_NORMAL, rndr->opaque);
  658. } else if ((link_len = sd_autolink__username(rndr->opaque, rndr->cb.allocate, &rewind, link, data, offset, size)) > 0) {
  659. ob->size -= rewind;
  660. rndr->cb.autolink(ob, link, MKDA_NORMAL, rndr->opaque);
  661. }
  662. rndr_popbuf(rndr, BUFFER_SPAN);
  663. return link_len;
  664. }
  665. static size_t
  666. char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  667. {
  668. struct buf *link;
  669. size_t link_len, rewind;
  670. if (!rndr->cb.autolink || rndr->in_link_body)
  671. return 0;
  672. link = rndr_newbuf(rndr, BUFFER_SPAN);
  673. if ((link_len = sd_autolink__email(rndr->opaque, rndr->cb.allocate, &rewind, link, data, offset, size, 0)) > 0) {
  674. ob->size -= rewind;
  675. rndr->cb.autolink(ob, link, MKDA_EMAIL, rndr->opaque);
  676. }
  677. rndr_popbuf(rndr, BUFFER_SPAN);
  678. return link_len;
  679. }
  680. static size_t
  681. char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  682. {
  683. struct buf *link;
  684. size_t link_len, rewind;
  685. if (!rndr->cb.autolink || rndr->in_link_body)
  686. return 0;
  687. link = rndr_newbuf(rndr, BUFFER_SPAN);
  688. if ((link_len = sd_autolink__url(rndr->opaque, rndr->cb.allocate, &rewind, link, data, offset, size, 0)) > 0) {
  689. ob->size -= rewind;
  690. rndr->cb.autolink(ob, link, MKDA_NORMAL, rndr->opaque);
  691. }
  692. rndr_popbuf(rndr, BUFFER_SPAN);
  693. return link_len;
  694. }
  695. /* char_link • '[': parsing a link or an image */
  696. static size_t
  697. char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  698. {
  699. int is_img = (offset && data[-1] == '!'), level;
  700. size_t i = 1, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0;
  701. struct buf *content = 0;
  702. struct buf *link = 0;
  703. struct buf *title = 0;
  704. struct buf *u_link = 0;
  705. size_t org_work_size = rndr->work_bufs[BUFFER_SPAN].size;
  706. int text_has_nl = 0, ret = 0;
  707. int in_title = 0, qtype = 0;
  708. /* checking whether the correct renderer exists */
  709. if ((is_img && !rndr->cb.image) || (!is_img && !rndr->cb.link))
  710. goto cleanup;
  711. /* looking for the matching closing bracket */
  712. for (level = 1; i < size; i++) {
  713. if (data[i] == '\n')
  714. text_has_nl = 1;
  715. else if (data[i - 1] == '\\')
  716. continue;
  717. else if (data[i] == '[')
  718. level++;
  719. else if (data[i] == ']') {
  720. level--;
  721. if (level <= 0)
  722. break;
  723. }
  724. }
  725. if (i >= size)
  726. goto cleanup;
  727. txt_e = i;
  728. i++;
  729. /* skip any amount of whitespace or newline */
  730. /* (this is much more laxist than original markdown syntax) */
  731. while (i < size && _isspace(data[i]))
  732. i++;
  733. /* inline style link */
  734. if (i < size && data[i] == '(') {
  735. /* skipping initial whitespace */
  736. i++;
  737. while (i < size && _isspace(data[i]))
  738. i++;
  739. link_b = i;
  740. /* looking for link end: ' " ) */
  741. while (i < size) {
  742. if (data[i] == '\\') i += 2;
  743. else if (data[i] == ')') break;
  744. else if (i >= 1 && _isspace(data[i-1]) && (data[i] == '\'' || data[i] == '"')) break;
  745. else i++;
  746. }
  747. if (i >= size) goto cleanup;
  748. link_e = i;
  749. /* looking for title end if present */
  750. if (data[i] == '\'' || data[i] == '"') {
  751. qtype = data[i];
  752. in_title = 1;
  753. i++;
  754. title_b = i;
  755. while (i < size) {
  756. if (data[i] == '\\') i += 2;
  757. else if (data[i] == qtype) {in_title = 0; i++;}
  758. else if ((data[i] == ')') && !in_title) break;
  759. else i++;
  760. }
  761. if (i >= size) goto cleanup;
  762. /* skipping whitespaces after title */
  763. title_e = i - 1;
  764. while (title_e > title_b && _isspace(data[title_e]))
  765. title_e--;
  766. /* checking for closing quote presence */
  767. if (data[title_e] != '\'' && data[title_e] != '"') {
  768. title_b = title_e = 0;
  769. link_e = i;
  770. }
  771. }
  772. /* remove whitespace at the end of the link */
  773. while (link_e > link_b && _isspace(data[link_e - 1]))
  774. link_e--;
  775. /* remove optional angle brackets around the link */
  776. if (data[link_b] == '<') link_b++;
  777. if (data[link_e - 1] == '>') link_e--;
  778. /* building escaped link and title */
  779. if (link_e > link_b) {
  780. link = rndr_newbuf(rndr, BUFFER_SPAN);
  781. bufput(rndr->opaque, rndr->cb.allocate, link, data + link_b, link_e - link_b);
  782. }
  783. if (title_e > title_b) {
  784. title = rndr_newbuf(rndr, BUFFER_SPAN);
  785. bufput(rndr->opaque, rndr->cb.allocate, title, data + title_b, title_e - title_b);
  786. }
  787. i++;
  788. }
  789. /* reference style link */
  790. else if (i < size && data[i] == '[') {
  791. struct buf id = { 0, 0, 0, 0 };
  792. struct link_ref *lr;
  793. /* looking for the id */
  794. i++;
  795. link_b = i;
  796. while (i < size && data[i] != ']') i++;
  797. if (i >= size) goto cleanup;
  798. link_e = i;
  799. /* finding the link_ref */
  800. if (link_b == link_e) {
  801. if (text_has_nl) {
  802. struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN);
  803. size_t j;
  804. for (j = 1; j < txt_e; j++) {
  805. if (data[j] != '\n')
  806. bufputc(rndr->opaque, rndr->cb.allocate, b, data[j]);
  807. else if (data[j - 1] != ' ')
  808. bufputc(rndr->opaque, rndr->cb.allocate, b, ' ');
  809. }
  810. id.data = b->data;
  811. id.size = b->size;
  812. } else {
  813. id.data = data + 1;
  814. id.size = txt_e - 1;
  815. }
  816. } else {
  817. id.data = data + link_b;
  818. id.size = link_e - link_b;
  819. }
  820. lr = find_link_ref(rndr->refs, id.data, id.size);
  821. if (!lr)
  822. goto cleanup;
  823. /* keeping link and title from link_ref */
  824. link = lr->link;
  825. title = lr->title;
  826. i++;
  827. }
  828. /* shortcut reference style link */
  829. else {
  830. struct buf id = { 0, 0, 0, 0 };
  831. struct link_ref *lr;
  832. /* crafting the id */
  833. if (text_has_nl) {
  834. struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN);
  835. size_t j;
  836. for (j = 1; j < txt_e; j++) {
  837. if (data[j] != '\n')
  838. bufputc(rndr->opaque, rndr->cb.allocate, b, data[j]);
  839. else if (data[j - 1] != ' ')
  840. bufputc(rndr->opaque, rndr->cb.allocate, b, ' ');
  841. }
  842. id.data = b->data;
  843. id.size = b->size;
  844. } else {
  845. id.data = data + 1;
  846. id.size = txt_e - 1;
  847. }
  848. /* finding the link_ref */
  849. lr = find_link_ref(rndr->refs, id.data, id.size);
  850. if (!lr)
  851. goto cleanup;
  852. /* keeping link and title from link_ref */
  853. link = lr->link;
  854. title = lr->title;
  855. /* rewinding the whitespace */
  856. i = txt_e + 1;
  857. }
  858. /* building content: img alt is escaped, link content is parsed */
  859. if (txt_e > 1) {
  860. content = rndr_newbuf(rndr, BUFFER_SPAN);
  861. if (is_img) {
  862. bufput(rndr->opaque, rndr->cb.allocate, content, data + 1, txt_e - 1);
  863. } else {
  864. /* disable autolinking when parsing inline the
  865. * content of a link */
  866. rndr->in_link_body = 1;
  867. parse_inline(content, rndr, data + 1, txt_e - 1);
  868. rndr->in_link_body = 0;
  869. }
  870. }
  871. if (link) {
  872. u_link = rndr_newbuf(rndr, BUFFER_SPAN);
  873. unscape_text(rndr->opaque, rndr->cb.allocate, u_link, link);
  874. } else {
  875. goto cleanup;
  876. }
  877. /* calling the relevant rendering function */
  878. if (is_img) {
  879. if (ob->size && ob->data[ob->size - 1] == '!')
  880. ob->size -= 1;
  881. ret = rndr->cb.image(ob, u_link, title, content, rndr->opaque);
  882. } else {
  883. ret = rndr->cb.link(ob, u_link, title, content, rndr->opaque);
  884. }
  885. /* cleanup */
  886. cleanup:
  887. rndr->work_bufs[BUFFER_SPAN].size = (int)org_work_size;
  888. return ret ? i : 0;
  889. }
  890. static size_t
  891. char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
  892. {
  893. size_t sup_start, sup_len;
  894. struct buf *sup;
  895. if (!rndr->cb.superscript)
  896. return 0;
  897. if (size < 2)
  898. return 0;
  899. if (data[1] == '(') {
  900. sup_start = sup_len = 2;
  901. while (sup_len < size && data[sup_len] != ')' && data[sup_len - 1] != '\\')
  902. sup_len++;
  903. if (sup_len == size)
  904. return 0;
  905. } else {
  906. sup_start = sup_len = 1;
  907. while (sup_len < size && !_isspace(data[sup_len]))
  908. sup_len++;
  909. }
  910. if (sup_len - sup_start == 0)
  911. return (sup_start == 2) ? 3 : 0;
  912. sup = rndr_newbuf(rndr, BUFFER_SPAN);
  913. parse_inline(sup, rndr, data + sup_start, sup_len - sup_start);
  914. rndr->cb.superscript(ob, sup, rndr->opaque);
  915. rndr_popbuf(rndr, BUFFER_SPAN);
  916. return (sup_start == 2) ? sup_len + 1 : sup_len;
  917. }
  918. /*********************************
  919. * BLOCK-LEVEL PARSING FUNCTIONS *
  920. *********************************/
  921. /* is_empty • returns the line length when it is empty, 0 otherwise */
  922. static size_t
  923. is_empty(uint8_t *data, size_t size)
  924. {
  925. size_t i;
  926. for (i = 0; i < size && data[i] != '\n'; i++)
  927. if (data[i] != ' ')
  928. return 0;
  929. return i + 1;
  930. }
  931. /* is_hrule • returns whether a line is a horizontal rule */
  932. static int
  933. is_hrule(uint8_t *data, size_t size)
  934. {
  935. size_t i = 0, n = 0;
  936. uint8_t c;
  937. /* skipping initial spaces */
  938. if (size < 3) return 0;
  939. if (data[0] == ' ') { i++;
  940. if (data[1] == ' ') { i++;
  941. if (data[2] == ' ') { i++; } } }
  942. /* looking at the hrule uint8_t */
  943. if (i + 2 >= size
  944. || (data[i] != '*' && data[i] != '-' && data[i] != '_'))
  945. return 0;
  946. c = data[i];
  947. /* the whole line must be the char or whitespace */
  948. while (i < size && data[i] != '\n') {
  949. if (data[i] == c) n++;
  950. else if (data[i] != ' ')
  951. return 0;
  952. i++;
  953. }
  954. return n >= 3;
  955. }
  956. /* check if a line begins with a code fence; return the
  957. * width of the code fence */
  958. static size_t
  959. prefix_codefence(uint8_t *data, size_t size)
  960. {
  961. size_t i = 0, n = 0;
  962. uint8_t c;
  963. /* skipping initial spaces */
  964. if (size < 3) return 0;
  965. if (data[0] == ' ') { i++;
  966. if (data[1] == ' ') { i++;
  967. if (data[2] == ' ') { i++; } } }
  968. /* looking at the hrule uint8_t */
  969. if (i + 2 >= size || !(data[i] == '~' || data[i] == '`'))
  970. return 0;
  971. c = data[i];
  972. /* the whole line must be the uint8_t or whitespace */
  973. while (i < size && data[i] == c) {
  974. n++; i++;
  975. }
  976. if (n < 3)
  977. return 0;
  978. return i;
  979. }
  980. /* check if a line is a code fence; return its size if it is */
  981. static size_t
  982. is_codefence(uint8_t *data, size_t size, struct buf *syntax)
  983. {
  984. size_t i = 0, syn_len = 0;
  985. uint8_t *syn_start;
  986. i = prefix_codefence(data, size);
  987. if (i == 0)
  988. return 0;
  989. while (i < size && data[i] == ' ')
  990. i++;
  991. syn_start = data + i;
  992. if (i < size && data[i] == '{') {
  993. i++; syn_start++;
  994. while (i < size && data[i] != '}' && data[i] != '\n') {
  995. syn_len++; i++;
  996. }
  997. if (i == size || data[i] != '}')
  998. return 0;
  999. /* strip all whitespace at the beginning and the end
  1000. * of the {} block */
  1001. while (syn_len > 0 && _isspace(syn_start[0])) {
  1002. syn_start++; syn_len--;
  1003. }
  1004. while (syn_len > 0 && _isspace(syn_start[syn_len - 1]))
  1005. syn_len--;
  1006. i++;
  1007. } else {
  1008. while (i < size && !_isspace(data[i])) {
  1009. syn_len++; i++;
  1010. }
  1011. }
  1012. if (syntax) {
  1013. syntax->data = syn_start;
  1014. syntax->size = syn_len;
  1015. }
  1016. while (i < size && data[i] != '\n') {
  1017. if (!_isspace(data[i]))
  1018. return 0;
  1019. i++;
  1020. }
  1021. return i + 1;
  1022. }
  1023. /* is_atxheader • returns whether the line is a hash-prefixed header */
  1024. static int
  1025. is_atxheader(struct sd_markdown *rndr, uint8_t *data, size_t size)
  1026. {
  1027. if (data[0] != '#')
  1028. return 0;
  1029. if (rndr->ext_flags & MKDEXT_SPACE_HEADERS) {
  1030. size_t level = 0;
  1031. while (level < size && level < 6 && data[level] == '#')
  1032. level++;
  1033. if (level < size && data[level] != ' ')
  1034. return 0;
  1035. }
  1036. return 1;
  1037. }
  1038. /* is_headerline • returns whether the line is a setext-style hdr underline */
  1039. static int
  1040. is_headerline(uint8_t *data, size_t size)
  1041. {
  1042. size_t i = 0;
  1043. /* test of level 1 header */
  1044. if (data[i] == '=') {
  1045. for (i = 1; i < size && data[i] == '='; i++);
  1046. while (i < size && data[i] == ' ') i++;
  1047. return (i >= size || data[i] == '\n') ? 1 : 0; }
  1048. /* test of level 2 header */
  1049. if (data[i] == '-') {
  1050. for (i = 1; i < size && data[i] == '-'; i++);
  1051. while (i < size && data[i] == ' ') i++;
  1052. return (i >= size || data[i] == '\n') ? 2 : 0; }
  1053. return 0;
  1054. }
  1055. static int
  1056. is_next_headerline(uint8_t *data, size_t size)
  1057. {
  1058. size_t i = 0;
  1059. while (i < size && data[i] != '\n')
  1060. i++;
  1061. if (++i >= size)
  1062. return 0;
  1063. return is_headerline(data + i, size - i);
  1064. }
  1065. /* prefix_quote • returns blockquote prefix length */
  1066. static size_t
  1067. prefix_quote(uint8_t *data, size_t size)
  1068. {
  1069. size_t i = 0;
  1070. if (i < size && data[i] == ' ') i++;
  1071. if (i < size && data[i] == ' ') i++;
  1072. if (i < size && data[i] == ' ') i++;
  1073. if (i < size && data[i] == '>') {
  1074. if (i + 1 < size && data[i + 1] == ' ')
  1075. return i + 2;
  1076. return i + 1;
  1077. }
  1078. return 0;
  1079. }
  1080. /* prefix_code • returns prefix length for block code*/
  1081. static size_t
  1082. prefix_code(uint8_t *data, size_t size)
  1083. {
  1084. if (size > 3 && data[0] == ' ' && data[1] == ' '
  1085. && data[2] == ' ' && data[3] == ' ') return 4;
  1086. return 0;
  1087. }
  1088. /* prefix_oli • returns ordered list item prefix */
  1089. static size_t
  1090. prefix_oli(uint8_t *data, size_t size)
  1091. {
  1092. size_t i = 0;
  1093. if (i < size && data[i] == ' ') i++;
  1094. if (i < size && data[i] == ' ') i++;
  1095. if (i < size && data[i] == ' ') i++;
  1096. if (i >= size || data[i] < '0' || data[i] > '9')
  1097. return 0;
  1098. while (i < size && data[i] >= '0' && data[i] <= '9')
  1099. i++;
  1100. if (i + 1 >= size || data[i] != '.' || data[i + 1] != ' ')
  1101. return 0;
  1102. if (is_next_headerline(data + i, size - i))
  1103. return 0;
  1104. return i + 2;
  1105. }
  1106. /* prefix_uli • returns ordered list item prefix */
  1107. static size_t
  1108. prefix_uli(uint8_t *data, size_t size)
  1109. {
  1110. size_t i = 0;
  1111. if (i < size && data[i] == ' ') i++;
  1112. if (i < size && data[i] == ' ') i++;
  1113. if (i < size && data[i] == ' ') i++;
  1114. if (i + 1 >= size ||
  1115. (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
  1116. data[i + 1] != ' ')
  1117. return 0;
  1118. if (is_next_headerline(data + i, size - i))
  1119. return 0;
  1120. return i + 2;
  1121. }
  1122. /* parse_block • parsing of one block, returning next uint8_t to parse */
  1123. static void parse_block(struct buf *ob, struct sd_markdown *rndr,
  1124. uint8_t *data, size_t size);
  1125. /* parse_blockquote • handles parsing of a blockquote fragment */
  1126. static size_t
  1127. parse_blockquote(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
  1128. {
  1129. size_t beg, end = 0, pre, work_size = 0;
  1130. uint8_t *work_data = 0;
  1131. struct buf *out = 0;
  1132. out = rndr_newbuf(rndr, BUFFER_BLOCK);
  1133. beg = 0;
  1134. while (beg < size) {
  1135. for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
  1136. pre = prefix_quote(data + beg, end - beg);
  1137. if (pre)
  1138. beg += pre; /* skipping prefix */
  1139. /* empty line followed by non-quote line */
  1140. else if (is_empty(data + beg, end - beg) &&
  1141. (end >= size || (prefix_quote(data + end, size - end) == 0 &&
  1142. !is_empty(data + end, size - end))))
  1143. break;
  1144. if (beg < end) { /* copy into the in-place working buffer */
  1145. /* bufput(work, data + beg, end - beg); */
  1146. if (!work_data)
  1147. work_data = data + beg;
  1148. else if (data + beg != work_data + work_size)
  1149. memmove(work_data + work_size, data + beg, end - beg);
  1150. work_size += end - beg;
  1151. }
  1152. beg = end;
  1153. }
  1154. parse_block(out, rndr, work_data, work_size);
  1155. if (rndr->cb.blockquote)
  1156. rndr->cb.blockquote(ob, out, rndr->opaque);
  1157. rndr_popbuf(rndr, BUFFER_BLOCK);
  1158. return end;
  1159. }
  1160. static size_t
  1161. parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render);
  1162. /* parse_blockquote • handles parsing of a regular paragraph */
  1163. static size_t
  1164. parse_paragraph(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
  1165. {
  1166. size_t i = 0, end = 0;
  1167. int level = 0;
  1168. struct buf work = { data, 0, 0, 0 };
  1169. while (i < size) {
  1170. for (end = i + 1; end < size && data[end - 1] != '\n'; end++) /* empty */;
  1171. if (prefix_quote(data + i, end - i) != 0) {
  1172. end = i;
  1173. break;
  1174. }
  1175. if (is_empty(data + i, size - i))
  1176. break;
  1177. if ((level = is_headerline(data + i, size - i)) != 0)
  1178. break;
  1179. if (is_atxheader(rndr, data + i, size - i) ||
  1180. is_hrule(data + i, size - i) ||
  1181. prefix_quote(data + i, size - i)) {
  1182. end = i;
  1183. break;
  1184. }
  1185. /*
  1186. * Early termination of a paragraph with the same logic
  1187. * as Markdown 1.0.0. If this logic is applied, the
  1188. * Markdown 1.0.3 test suite won't pass cleanly
  1189. *
  1190. * :: If the first character in a new line is not a letter,
  1191. * let's check to see if there's some kind of block starting
  1192. * here
  1193. */
  1194. if ((rndr->ext_flags & MKDEXT_LAX_SPACING) && !isalnum(data[i])) {
  1195. if (prefix_oli(data + i, size - i) ||
  1196. prefix_uli(data + i, size - i)) {
  1197. end = i;
  1198. break;
  1199. }
  1200. /* see if an html block starts here */
  1201. if (data[i] == '<' && rndr->cb.blockhtml &&
  1202. parse_htmlblock(ob, rndr, data + i, size - i, 0)) {
  1203. end = i;
  1204. break;
  1205. }
  1206. /* see if a code fence starts here */
  1207. if ((rndr->ext_flags & MKDEXT_FENCED_CODE) != 0 &&
  1208. is_codefence(data + i, size - i, NULL) != 0) {
  1209. end = i;
  1210. break;
  1211. }
  1212. }
  1213. i = end;
  1214. }
  1215. work.size = i;
  1216. while (work.size && data[work.size - 1] == '\n')
  1217. work.size--;
  1218. if (!level) {
  1219. struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK);
  1220. parse_inline(tmp, rndr, work.data, work.size);
  1221. if (rndr->cb.paragraph)
  1222. rndr->cb.paragraph(ob, tmp, rndr->opaque);
  1223. rndr_popbuf(rndr, BUFFER_BLOCK);
  1224. } else {
  1225. struct buf *header_work;
  1226. if (work.size) {
  1227. size_t beg;
  1228. i = work.size;
  1229. work.size -= 1;
  1230. while (work.size && data[work.size] != '\n')
  1231. work.size -= 1;
  1232. beg = work.size + 1;
  1233. while (work.size && data[work.size - 1] == '\n')
  1234. work.size -= 1;
  1235. if (work.size > 0) {
  1236. struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK);
  1237. parse_inline(tmp, rndr, work.data, work.size);
  1238. if (rndr->cb.paragraph)
  1239. rndr->cb.paragraph(ob, tmp, rndr->opaque);
  1240. rndr_popbuf(rndr, BUFFER_BLOCK);
  1241. work.data += beg;
  1242. work.size = i - beg;
  1243. }
  1244. else work.size = i;
  1245. }
  1246. header_work = rndr_newbuf(rndr, BUFFER_SPAN);
  1247. parse_inline(header_work, rndr, work.data, work.size);
  1248. if (rndr->cb.header)
  1249. rndr->cb.header(ob, header_work, (int)level, rndr->opaque);
  1250. rndr_popbuf(rndr, BUFFER_SPAN);
  1251. }
  1252. return end;
  1253. }
  1254. /* parse_fencedcode • handles parsing of a block-level code fragment */
  1255. static size_t
  1256. parse_fencedcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
  1257. {
  1258. size_t beg, end;
  1259. struct buf *work = 0;
  1260. struct buf lang = { 0, 0, 0, 0 };
  1261. beg = is_codefence(data, size, &lang);
  1262. if (beg == 0) return 0;
  1263. work = rndr_newbuf(rndr, BUFFER_BLOCK);
  1264. while (beg < size) {
  1265. size_t fence_end;
  1266. struct buf fence_trail = { 0, 0, 0, 0 };
  1267. fence_end = is_codefence(data + beg, size - beg, &fence_trail);
  1268. if (fence_end != 0 && fence_trail.size == 0) {
  1269. beg += fence_end;
  1270. break;
  1271. }
  1272. for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
  1273. if (beg < end) {
  1274. /* verbatim copy to the working buffer,
  1275. escaping entities */
  1276. if (is_empty(data + beg, end - beg))
  1277. bufputc(rndr->opaque, rndr->cb.allocate, work, '\n');
  1278. else bufput(rndr->opaque, rndr->cb.allocate, work, data + beg, end - beg);
  1279. }
  1280. beg = end;
  1281. }
  1282. if (work->size && work->data[work->size - 1] != '\n')
  1283. bufputc(rndr->opaque, rndr->cb.allocate, work, '\n');
  1284. if (rndr->cb.blockcode)
  1285. rndr->cb.blockcode(ob, work, lang.size ? &lang : NULL, rndr->opaque);
  1286. rndr_popbuf(rndr, BUFFER_BLOCK);
  1287. return beg;
  1288. }
  1289. static size_t
  1290. parse_blockcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
  1291. {
  1292. size_t beg, end, pre;
  1293. struct buf *work = 0;
  1294. work = rndr_newbuf(rndr, BUFFER_BLOCK);
  1295. beg = 0;
  1296. while (beg < size) {
  1297. for (end = beg + 1; end < size && data[end - 1] != '\n'; end++) {};
  1298. pre = prefix_code(data + beg, end - beg);
  1299. if (pre)
  1300. beg += pre; /* skipping prefix */
  1301. else if (!is_empty(data + beg, end - beg))
  1302. /* non-empty non-prefixed line breaks the pre */
  1303. break;
  1304. if (beg < end) {
  1305. /* verbatim copy to the working buffer,
  1306. escaping entities */
  1307. if (is_empty(data + beg, end - beg))
  1308. bufputc(rndr->opaque, rndr->cb.allocate, work, '\n');
  1309. else bufput(rndr->opaque, rndr->cb.allocate, work, data + beg, end - beg);
  1310. }
  1311. beg = end;
  1312. }
  1313. while (work->size && work->data[work->size - 1] == '\n')
  1314. work->size -= 1;
  1315. bufputc(rndr->opaque, rndr->cb.allocate, work, '\n');
  1316. if (rndr->cb.blockcode)
  1317. rndr->cb.blockcode(ob, work, NULL, rndr->opaque);
  1318. rndr_popbuf(rndr, BUFFER_BLOCK);
  1319. return beg;
  1320. }
  1321. /* parse_listitem • parsing of a single list item */
  1322. /* assuming initial prefix is already removed */
  1323. static size_t
  1324. parse_listitem(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int *flags)
  1325. {
  1326. struct buf *work = 0, *inter = 0;
  1327. size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i;
  1328. int in_empty = 0, has_inside_empty = 0, in_fence = 0;
  1329. /* keeping track of the first indentation prefix */
  1330. while (orgpre < 3 && orgpre < size && data[orgpre] == ' ')
  1331. orgpre++;
  1332. beg = prefix_uli(data, size);
  1333. if (!beg)
  1334. beg = prefix_oli(data, size);
  1335. if (!beg)
  1336. return 0;
  1337. /* skipping to the beginning of the following line */
  1338. end = beg;
  1339. while (end < size && data[end - 1] != '\n')
  1340. end++;
  1341. /* getting working buffers */
  1342. work = rndr_newbuf(rndr, BUFFER_SPAN);
  1343. inter = rndr_newbuf(rndr, BUFFER_SPAN);
  1344. /* putting the first line into the working buffer */
  1345. bufput(rndr->opaque, rndr->cb.allocate, work, data + beg, end - beg);
  1346. beg = end;
  1347. /* process the following lines */
  1348. while (beg < size) {
  1349. size_t has_next_uli = 0, has_next_oli = 0;
  1350. end++;
  1351. while (end < size && data[end - 1] != '\n')
  1352. end++;
  1353. /* process an empty line */
  1354. if (is_empty(data + beg, end - beg)) {
  1355. in_empty = 1;
  1356. beg = end;
  1357. continue;
  1358. }
  1359. /* calculating the indentation */
  1360. i = 0;
  1361. while (i < 4 && beg + i < end && data[beg + i] == ' ')
  1362. i++;
  1363. pre = i;
  1364. if (rndr->ext_flags & MKDEXT_FENCED_CODE) {
  1365. if (is_codefence(data + beg + i, end - beg - i, NULL) != 0)
  1366. in_fence = !in_fence;
  1367. }
  1368. /* Only check for new list items if we are **not** inside
  1369. * a fenced code block */
  1370. if (!in_fence) {
  1371. has_next_uli = prefix_uli(data + beg + i, end - beg - i);
  1372. has_next_oli = prefix_oli(data + beg + i, end - beg - i);
  1373. }
  1374. /* checking for ul/ol switch */
  1375. if (in_empty && (
  1376. ((*flags & MKD_LIST_ORDERED) && has_next_uli) ||
  1377. (!(*flags & MKD_LIST_ORDERED) && has_next_oli))){
  1378. *flags |= MKD_LI_END;
  1379. break; /* the following item must have same list type */
  1380. }
  1381. /* checking for a new item */
  1382. if ((has_next_uli && !is_hrule(data + beg + i, end - beg - i)) || has_next_oli) {
  1383. if (in_empty)
  1384. has_inside_empty = 1;
  1385. if (pre == orgpre) /* the following item must have */
  1386. break; /* the same indentation */
  1387. if (!sublist)
  1388. sublist = work->size;
  1389. }
  1390. /* joining only indented stuff after empty lines;
  1391. * note that now we only require 1 space of indentation
  1392. * to continue a list */
  1393. else if (in_empty && pre == 0) {
  1394. *flags |= MKD_LI_END;
  1395. break;
  1396. }
  1397. else if (in_empty) {
  1398. bufputc(rndr->opaque, rndr->cb.allocate, work, '\n');
  1399. has_inside_empty = 1;
  1400. }
  1401. in_empty = 0;
  1402. /* adding the line without prefix into the working buffer */
  1403. bufput(rndr->opaque, rndr->cb.allocate, work, data + beg + i, end - beg - i);
  1404. beg = end;
  1405. }
  1406. /* render of li contents */
  1407. if (has_inside_empty)
  1408. *flags |= MKD_LI_BLOCK;
  1409. if (*flags & MKD_LI_BLOCK) {
  1410. /* intermediate render of block li */
  1411. if (sublist && sublist < work->size) {
  1412. parse_block(inter, rndr, work->data, sublist);
  1413. parse_block(inter, rndr, work->data + sublist, work->size - sublist);
  1414. }
  1415. else
  1416. parse_block(inter, rndr, work->data, work->size);
  1417. } else {
  1418. /* intermediate render of inline li */
  1419. if (sublist && sublist < work->size) {
  1420. parse_inline(inter, rndr, work->data, sublist);
  1421. parse_block(inter, rndr, work->data + sublist, work->size - sublist);
  1422. }
  1423. else
  1424. parse_inline(inter, rndr, work->data, work->size);
  1425. }
  1426. /* render of li itself */
  1427. if (rndr->cb.listitem)
  1428. rndr->cb.listitem(ob, inter, *flags, rndr->opaque);
  1429. rndr_popbuf(rndr, BUFFER_SPAN);
  1430. rndr_popbuf(rndr, BUFFER_SPAN);
  1431. return beg;
  1432. }
  1433. /* parse_list • parsing ordered or unordered list block */
  1434. static size_t
  1435. parse_list(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int flags)
  1436. {
  1437. struct buf *work = 0;
  1438. size_t i = 0, j;
  1439. work = rndr_newbuf(rndr, BUFFER_BLOCK);
  1440. while (i < size) {
  1441. j = parse_listitem(work, rndr, data + i, size - i, &flags);
  1442. i += j;
  1443. if (!j || (flags & MKD_LI_END))
  1444. break;
  1445. }
  1446. if (rndr->cb.list)
  1447. rndr->cb.list(ob, work, flags, rndr->opaque);
  1448. rndr_popbuf(rndr, BUFFER_BLOCK);
  1449. return i;
  1450. }
  1451. /* parse_atxheader • parsing of atx-style headers */
  1452. static size_t
  1453. parse_atxheader(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
  1454. {
  1455. size_t level = 0;
  1456. size_t i, end, skip;
  1457. while (level < size && level < 6 && data[level] == '#')
  1458. level++;
  1459. for (i = level; i < size && data[i] == ' '; i++);
  1460. for (end = i; end < size && data[end] != '\n'; end++);
  1461. skip = end;
  1462. while (end && data[end - 1] == '#')
  1463. end--;
  1464. while (end && data[end - 1] == ' ')
  1465. end--;
  1466. if (end > i) {
  1467. struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN);
  1468. parse_inline(work, rndr, data + i, end - i);
  1469. if (rndr->cb.header)
  1470. rndr->cb.header(ob, work, (int)level, rndr->opaque);
  1471. rndr_popbuf(rndr, BUFFER_SPAN);
  1472. }
  1473. return skip;
  1474. }
  1475. /* htmlblock_end • checking end of HTML block : </tag>[ \t]*\n[ \t*]\n */
  1476. /* returns the length on match, 0 otherwise */
  1477. static size_t
  1478. htmlblock_end_tag(
  1479. const char *tag,
  1480. size_t tag_len,
  1481. struct sd_markdown *rndr,
  1482. uint8_t *data,
  1483. size_t size)
  1484. {
  1485. size_t i, w;
  1486. /* checking if tag is a match */
  1487. if (tag_len + 3 >= size ||
  1488. strncasecmp((char *)data + 2, tag, tag_len) != 0 ||
  1489. data[tag_len + 2] != '>')
  1490. return 0;
  1491. /* checking white lines */
  1492. i = tag_len + 3;
  1493. w = 0;
  1494. if (i < size && (w = is_empty(data + i, size - i)) == 0)
  1495. return 0; /* non-blank after tag */
  1496. i += w;
  1497. w = 0;
  1498. if (i < size)
  1499. w = is_empty(data + i, size - i);
  1500. return i + w;
  1501. }
  1502. static size_t
  1503. htmlblock_end(const char *curtag,
  1504. struct sd_markdown *rndr,
  1505. uint8_t *data,
  1506. size_t size,
  1507. int start_of_line)
  1508. {
  1509. size_t tag_size = strlen(curtag);
  1510. size_t i = 1, end_tag;
  1511. int block_lines = 0;
  1512. while (i < size) {
  1513. i++;
  1514. while (i < size && !(data[i - 1] == '<' && data[i] == '/')) {
  1515. if (data[i] == '\n')
  1516. block_lines++;
  1517. i++;
  1518. }
  1519. /* If we are only looking for unindented tags, skip the tag
  1520. * if it doesn't follow a newline.
  1521. *
  1522. * The only exception to this is if the tag is still on the
  1523. * initial line; in that case it still counts as a closing
  1524. * tag
  1525. */
  1526. if (start_of_line && block_lines > 0 && data[i - 2] != '\n')
  1527. continue;
  1528. if (i + 2 + tag_size >= size)
  1529. break;
  1530. end_tag = htmlblock_end_tag(curtag, tag_size, rndr, data + i - 1, size - i + 1);
  1531. if (end_tag)
  1532. return i + end_tag - 1;
  1533. }
  1534. return 0;
  1535. }
  1536. /* parse_htmlblock • parsing of inline HTML block */
  1537. static size_t
  1538. parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render)
  1539. {
  1540. size_t i, j = 0, tag_end;
  1541. const char *curtag = NULL;
  1542. struct buf work = { data, 0, 0, 0 };
  1543. /* identification of the opening tag */
  1544. if (size < 2 || data[0] != '<')
  1545. return 0;
  1546. i = 1;
  1547. while (i < size && data[i] != '>' && data[i] != ' ')
  1548. i++;
  1549. if (i < size)
  1550. curtag = find_block_tag((char *)data + 1, (int)i - 1);
  1551. /* handling of special cases */
  1552. if (!curtag) {
  1553. /* HTML comment, laxist form */
  1554. if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') {
  1555. i = 5;
  1556. while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>'))
  1557. i++;
  1558. i++;
  1559. if (i < size)
  1560. j = is_empty(data + i, size - i);
  1561. if (j) {
  1562. work.size = i + j;
  1563. if (do_render && rndr->cb.blockhtml)
  1564. rndr->cb.blockhtml(ob, &work, rndr->opaque);
  1565. return work.size;
  1566. }
  1567. }
  1568. /* HR, which is the only self-closing block tag considered */
  1569. if (size > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R')) {
  1570. i = 3;
  1571. while (i < size && data[i] != '>')
  1572. i++;
  1573. if (i + 1 < size) {
  1574. i++;
  1575. j = is_empty(data + i, size - i);
  1576. if (j) {
  1577. work.size = i + j;
  1578. if (do_render && rndr->cb.blockhtml)
  1579. rndr->cb.blockhtml(ob, &work, rndr->opaque);
  1580. return work.size;
  1581. }
  1582. }
  1583. }
  1584. /* no special case recognised */
  1585. return 0;
  1586. }
  1587. /* looking for an unindented matching closing tag */
  1588. /* followed by a blank line */
  1589. tag_end = htmlblock_end(curtag, rndr, data, size, 1);
  1590. /* if not found, trying a second pass looking for indented match */
  1591. /* but not if tag is "ins" or "del" (following original Markdown.pl) */
  1592. if (!tag_end && strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0) {
  1593. tag_end = htmlblock_end(curtag, rndr, data, size, 0);
  1594. }
  1595. if (!tag_end)
  1596. return 0;
  1597. /* the end of the block has been found */
  1598. work.size = tag_end;
  1599. if (do_render && rndr->cb.blockhtml)
  1600. rndr->cb.blockhtml(ob, &work, rndr->opaque);
  1601. return tag_end;
  1602. }
  1603. static void
  1604. parse_table_row(
  1605. struct buf *ob,
  1606. struct sd_markdown *rndr,
  1607. uint8_t *data,
  1608. size_t size,
  1609. size_t columns,
  1610. int *col_data,
  1611. int header_flag)
  1612. {
  1613. size_t i = 0, col;
  1614. struct buf *row_work = 0;
  1615. if (!rndr->cb.table_cell || !rndr->cb.table_row)
  1616. return;
  1617. row_work = rndr_newbuf(rndr, BUFFER_SPAN);
  1618. if (i < size && data[i] == '|')
  1619. i++;
  1620. for (col = 0; col < columns && i < size; ++col) {
  1621. size_t cell_start, cell_end;
  1622. struct buf *cell_work;
  1623. cell_work = rndr_newbuf(rndr, BUFFER_SPAN);
  1624. while (i < size && _isspace(data[i]))
  1625. i++;
  1626. cell_start = i;
  1627. while (i < size && data[i] != '|')
  1628. i++;
  1629. cell_end = i - 1;
  1630. while (cell_end > cell_start && _isspace(data[cell_end]))
  1631. cell_end--;
  1632. parse_inline(cell_work, rndr, data + cell_start, 1 + cell_end - cell_start);
  1633. rndr->cb.table_cell(row_work, cell_work, col_data[col] | header_flag, rndr->opaque);
  1634. rndr_popbuf(rndr, BUFFER_SPAN);
  1635. i++;
  1636. }
  1637. for (; col < columns; ++col) {
  1638. struct buf empty_cell = { 0, 0, 0, 0 };
  1639. rndr->cb.table_cell(row_work, &empty_cell, col_data[col] | header_flag, rndr->opaque);
  1640. }
  1641. rndr->cb.table_row(ob, row_work, rndr->opaque);
  1642. rndr_popbuf(rndr, BUFFER_SPAN);
  1643. }
  1644. static size_t
  1645. parse_table_header(
  1646. struct buf *ob,
  1647. struct sd_markdown *rndr,
  1648. uint8_t *data,
  1649. size_t size,
  1650. size_t *columns,
  1651. int **column_data)
  1652. {
  1653. int pipes;
  1654. size_t i = 0, col, header_end, under_end;
  1655. pipes = 0;
  1656. while (i < size && data[i] != '\n')
  1657. if (data[i++] == '|')
  1658. pipes++;
  1659. if (i == size || pipes == 0)
  1660. return 0;
  1661. header_end = i;
  1662. while (header_end > 0 && _isspace(data[header_end - 1]))
  1663. header_end--;
  1664. if (data[0] == '|')
  1665. pipes--;
  1666. if (header_end && data[header_end -

Large files files are truncated, but you can click here to view the full file