PageRenderTime 59ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 1ms

/writeily/libs/anddown/jni/src/markdown.c

https://github.com/jpsullivan/writeily
C | 2884 lines | 2648 code | 172 blank | 64 comment | 243 complexity | afb653adb6a6a64b20987109ab8261be MD5 | raw file

Large files files are truncated, but you can click here to view the full file

  1. /* markdown.c - generic markdown parser */
  2. #include "markdown.h"
  3. #include <assert.h>
  4. #include <string.h>
  5. #include <ctype.h>
  6. #include <stdio.h>
  7. #include "stack.h"
  8. #ifdef _MSC_VER
  9. #define strncasecmp _strnicmp
  10. #endif
  11. #define REF_TABLE_SIZE 8
  12. #define BUFFER_BLOCK 0
  13. #define BUFFER_SPAN 1
  14. #define HOEDOWN_LI_END 8 /* internal list flag */
  15. const char *hoedown_find_block_tag(const char *str, unsigned int len);
  16. /***************
  17. * LOCAL TYPES *
  18. ***************/
  19. /* link_ref: reference to a link */
  20. struct link_ref {
  21. unsigned int id;
  22. hoedown_buffer *link;
  23. hoedown_buffer *title;
  24. struct link_ref *next;
  25. };
  26. /* footnote_ref: reference to a footnote */
  27. struct footnote_ref {
  28. unsigned int id;
  29. int is_used;
  30. unsigned int num;
  31. hoedown_buffer *contents;
  32. };
  33. /* footnote_item: an item in a footnote_list */
  34. struct footnote_item {
  35. struct footnote_ref *ref;
  36. struct footnote_item *next;
  37. };
  38. /* footnote_list: linked list of footnote_item */
  39. struct footnote_list {
  40. unsigned int count;
  41. struct footnote_item *head;
  42. struct footnote_item *tail;
  43. };
  44. /* char_trigger: function pointer to render active chars */
  45. /* returns the number of chars taken care of */
  46. /* data is the pointer of the beginning of the span */
  47. /* offset is the number of valid chars before data */
  48. typedef size_t
  49. (*char_trigger)(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size);
  50. static size_t char_emphasis(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size);
  51. static size_t char_quote(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size);
  52. static size_t char_linebreak(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size);
  53. static size_t char_codespan(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size);
  54. static size_t char_escape(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size);
  55. static size_t char_entity(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size);
  56. static size_t char_langle_tag(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size);
  57. static size_t char_autolink_url(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size);
  58. static size_t char_autolink_email(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size);
  59. static size_t char_autolink_www(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size);
  60. static size_t char_link(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size);
  61. static size_t char_superscript(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size);
  62. enum markdown_char_t {
  63. MD_CHAR_NONE = 0,
  64. MD_CHAR_EMPHASIS,
  65. MD_CHAR_CODESPAN,
  66. MD_CHAR_LINEBREAK,
  67. MD_CHAR_LINK,
  68. MD_CHAR_LANGLE,
  69. MD_CHAR_ESCAPE,
  70. MD_CHAR_ENTITITY,
  71. MD_CHAR_AUTOLINK_URL,
  72. MD_CHAR_AUTOLINK_EMAIL,
  73. MD_CHAR_AUTOLINK_WWW,
  74. MD_CHAR_SUPERSCRIPT,
  75. MD_CHAR_QUOTE
  76. };
  77. static char_trigger markdown_char_ptrs[] = {
  78. NULL,
  79. &char_emphasis,
  80. &char_codespan,
  81. &char_linebreak,
  82. &char_link,
  83. &char_langle_tag,
  84. &char_escape,
  85. &char_entity,
  86. &char_autolink_url,
  87. &char_autolink_email,
  88. &char_autolink_www,
  89. &char_superscript,
  90. &char_quote
  91. };
  92. /* render • structure containing state for a parser instance */
  93. struct hoedown_markdown {
  94. hoedown_renderer md;
  95. struct link_ref *refs[REF_TABLE_SIZE];
  96. struct footnote_list footnotes_found;
  97. struct footnote_list footnotes_used;
  98. uint8_t active_char[256];
  99. hoedown_stack work_bufs[2];
  100. unsigned int ext_flags;
  101. size_t max_nesting;
  102. int in_link_body;
  103. };
  104. /***************************
  105. * HELPER FUNCTIONS *
  106. ***************************/
  107. static inline hoedown_buffer *
  108. newbuf(hoedown_markdown *md, int type)
  109. {
  110. static const size_t buf_size[2] = {256, 64};
  111. hoedown_buffer *work = NULL;
  112. hoedown_stack *pool = &md->work_bufs[type];
  113. if (pool->size < pool->asize &&
  114. pool->item[pool->size] != NULL) {
  115. work = pool->item[pool->size++];
  116. work->size = 0;
  117. } else {
  118. work = hoedown_buffer_new(buf_size[type]);
  119. hoedown_stack_push(pool, work);
  120. }
  121. return work;
  122. }
  123. static inline void
  124. popbuf(hoedown_markdown *md, int type)
  125. {
  126. md->work_bufs[type].size--;
  127. }
  128. static void
  129. unscape_text(hoedown_buffer *ob, hoedown_buffer *src)
  130. {
  131. size_t i = 0, org;
  132. while (i < src->size) {
  133. org = i;
  134. while (i < src->size && src->data[i] != '\\')
  135. i++;
  136. if (i > org)
  137. hoedown_buffer_put(ob, src->data + org, i - org);
  138. if (i + 1 >= src->size)
  139. break;
  140. hoedown_buffer_putc(ob, src->data[i + 1]);
  141. i += 2;
  142. }
  143. }
  144. static unsigned int
  145. hash_link_ref(const uint8_t *link_ref, size_t length)
  146. {
  147. size_t i;
  148. unsigned int hash = 0;
  149. for (i = 0; i < length; ++i)
  150. hash = tolower(link_ref[i]) + (hash << 6) + (hash << 16) - hash;
  151. return hash;
  152. }
  153. static struct link_ref *
  154. add_link_ref(
  155. struct link_ref **references,
  156. const uint8_t *name, size_t name_size)
  157. {
  158. struct link_ref *ref = calloc(1, sizeof(struct link_ref));
  159. if (!ref)
  160. return NULL;
  161. ref->id = hash_link_ref(name, name_size);
  162. ref->next = references[ref->id % REF_TABLE_SIZE];
  163. references[ref->id % REF_TABLE_SIZE] = ref;
  164. return ref;
  165. }
  166. static struct link_ref *
  167. find_link_ref(struct link_ref **references, uint8_t *name, size_t length)
  168. {
  169. unsigned int hash = hash_link_ref(name, length);
  170. struct link_ref *ref = NULL;
  171. ref = references[hash % REF_TABLE_SIZE];
  172. while (ref != NULL) {
  173. if (ref->id == hash)
  174. return ref;
  175. ref = ref->next;
  176. }
  177. return NULL;
  178. }
  179. static void
  180. free_link_refs(struct link_ref **references)
  181. {
  182. size_t i;
  183. for (i = 0; i < REF_TABLE_SIZE; ++i) {
  184. struct link_ref *r = references[i];
  185. struct link_ref *next;
  186. while (r) {
  187. next = r->next;
  188. hoedown_buffer_free(r->link);
  189. hoedown_buffer_free(r->title);
  190. free(r);
  191. r = next;
  192. }
  193. }
  194. }
  195. static struct footnote_ref *
  196. create_footnote_ref(struct footnote_list *list, const uint8_t *name, size_t name_size)
  197. {
  198. struct footnote_ref *ref = calloc(1, sizeof(struct footnote_ref));
  199. if (!ref)
  200. return NULL;
  201. ref->id = hash_link_ref(name, name_size);
  202. return ref;
  203. }
  204. static int
  205. add_footnote_ref(struct footnote_list *list, struct footnote_ref *ref)
  206. {
  207. struct footnote_item *item = calloc(1, sizeof(struct footnote_item));
  208. if (!item)
  209. return 0;
  210. item->ref = ref;
  211. if (list->head == NULL) {
  212. list->head = list->tail = item;
  213. } else {
  214. list->tail->next = item;
  215. list->tail = item;
  216. }
  217. list->count++;
  218. return 1;
  219. }
  220. static struct footnote_ref *
  221. find_footnote_ref(struct footnote_list *list, uint8_t *name, size_t length)
  222. {
  223. unsigned int hash = hash_link_ref(name, length);
  224. struct footnote_item *item = NULL;
  225. item = list->head;
  226. while (item != NULL) {
  227. if (item->ref->id == hash)
  228. return item->ref;
  229. item = item->next;
  230. }
  231. return NULL;
  232. }
  233. static void
  234. free_footnote_ref(struct footnote_ref *ref)
  235. {
  236. hoedown_buffer_free(ref->contents);
  237. free(ref);
  238. }
  239. static void
  240. free_footnote_list(struct footnote_list *list, int free_refs)
  241. {
  242. struct footnote_item *item = list->head;
  243. struct footnote_item *next;
  244. while (item) {
  245. next = item->next;
  246. if (free_refs)
  247. free_footnote_ref(item->ref);
  248. free(item);
  249. item = next;
  250. }
  251. }
  252. /*
  253. * Check whether a char is a Markdown space.
  254. * Right now we only consider spaces the actual
  255. * space and a newline: tabs and carriage returns
  256. * are filtered out during the preprocessing phase.
  257. *
  258. * If we wanted to actually be UTF-8 compliant, we
  259. * should instead extract an Unicode codepoint from
  260. * this character and check for space properties.
  261. */
  262. static inline int
  263. _isspace(int c)
  264. {
  265. return c == ' ' || c == '\n';
  266. }
  267. /****************************
  268. * INLINE PARSING FUNCTIONS *
  269. ****************************/
  270. /* is_mail_autolink • looks for the address part of a mail autolink and '>' */
  271. /* this is less strict than the original markdown e-mail address matching */
  272. static size_t
  273. is_mail_autolink(uint8_t *data, size_t size)
  274. {
  275. size_t i = 0, nb = 0;
  276. /* address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' */
  277. for (i = 0; i < size; ++i) {
  278. if (isalnum(data[i]))
  279. continue;
  280. switch (data[i]) {
  281. case '@':
  282. nb++;
  283. case '-':
  284. case '.':
  285. case '_':
  286. break;
  287. case '>':
  288. return (nb == 1) ? i + 1 : 0;
  289. default:
  290. return 0;
  291. }
  292. }
  293. return 0;
  294. }
  295. /* tag_length • returns the length of the given tag, or 0 is it's not valid */
  296. static size_t
  297. tag_length(uint8_t *data, size_t size, enum hoedown_autolink *autolink)
  298. {
  299. size_t i, j;
  300. /* a valid tag can't be shorter than 3 chars */
  301. if (size < 3) return 0;
  302. /* begins with a '<' optionally followed by '/', followed by letter or number */
  303. if (data[0] != '<') return 0;
  304. i = (data[1] == '/') ? 2 : 1;
  305. if (!isalnum(data[i]))
  306. return 0;
  307. /* scheme test */
  308. *autolink = HOEDOWN_AUTOLINK_NONE;
  309. /* try to find the beginning of an URI */
  310. while (i < size && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-'))
  311. i++;
  312. if (i > 1 && data[i] == '@') {
  313. if ((j = is_mail_autolink(data + i, size - i)) != 0) {
  314. *autolink = HOEDOWN_AUTOLINK_EMAIL;
  315. return i + j;
  316. }
  317. }
  318. if (i > 2 && data[i] == ':') {
  319. *autolink = HOEDOWN_AUTOLINK_NORMAL;
  320. i++;
  321. }
  322. /* completing autolink test: no whitespace or ' or " */
  323. if (i >= size)
  324. *autolink = HOEDOWN_AUTOLINK_NONE;
  325. else if (*autolink) {
  326. j = i;
  327. while (i < size) {
  328. if (data[i] == '\\') i += 2;
  329. else if (data[i] == '>' || data[i] == '\'' ||
  330. data[i] == '"' || data[i] == ' ' || data[i] == '\n')
  331. break;
  332. else i++;
  333. }
  334. if (i >= size) return 0;
  335. if (i > j && data[i] == '>') return i + 1;
  336. /* one of the forbidden chars has been found */
  337. *autolink = HOEDOWN_AUTOLINK_NONE;
  338. }
  339. /* looking for sometinhg looking like a tag end */
  340. while (i < size && data[i] != '>') i++;
  341. if (i >= size) return 0;
  342. return i + 1;
  343. }
  344. /* parse_inline • parses inline markdown elements */
  345. static void
  346. parse_inline(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t size)
  347. {
  348. size_t i = 0, end = 0;
  349. uint8_t action = 0;
  350. hoedown_buffer work = { 0, 0, 0, 0 };
  351. if (md->work_bufs[BUFFER_SPAN].size +
  352. md->work_bufs[BUFFER_BLOCK].size > md->max_nesting)
  353. return;
  354. while (i < size) {
  355. /* copying inactive chars into the output */
  356. while (end < size && (action = md->active_char[data[end]]) == 0) {
  357. end++;
  358. }
  359. if (md->md.normal_text) {
  360. work.data = data + i;
  361. work.size = end - i;
  362. md->md.normal_text(ob, &work, md->md.opaque);
  363. }
  364. else
  365. hoedown_buffer_put(ob, data + i, end - i);
  366. if (end >= size) break;
  367. i = end;
  368. end = markdown_char_ptrs[(int)action](ob, md, data + i, i, size - i);
  369. if (!end) /* no action from the callback */
  370. end = i + 1;
  371. else {
  372. i += end;
  373. end = i;
  374. }
  375. }
  376. }
  377. /* find_emph_char • looks for the next emph uint8_t, skipping other constructs */
  378. static size_t
  379. find_emph_char(uint8_t *data, size_t size, uint8_t c)
  380. {
  381. size_t i = 1;
  382. while (i < size) {
  383. while (i < size && data[i] != c && data[i] != '[')
  384. i++;
  385. if (i == size)
  386. return 0;
  387. if (data[i] == c)
  388. return i;
  389. /* not counting escaped chars */
  390. if (i && data[i - 1] == '\\') {
  391. i++; continue;
  392. }
  393. if (data[i] == '`') {
  394. size_t span_nb = 0, bt;
  395. size_t tmp_i = 0;
  396. /* counting the number of opening backticks */
  397. while (i < size && data[i] == '`') {
  398. i++; span_nb++;
  399. }
  400. if (i >= size) return 0;
  401. /* finding the matching closing sequence */
  402. bt = 0;
  403. while (i < size && bt < span_nb) {
  404. if (!tmp_i && data[i] == c) tmp_i = i;
  405. if (data[i] == '`') bt++;
  406. else bt = 0;
  407. i++;
  408. }
  409. if (i >= size) return tmp_i;
  410. }
  411. /* skipping a link */
  412. else if (data[i] == '[') {
  413. size_t tmp_i = 0;
  414. uint8_t cc;
  415. i++;
  416. while (i < size && data[i] != ']') {
  417. if (!tmp_i && data[i] == c) tmp_i = i;
  418. i++;
  419. }
  420. i++;
  421. while (i < size && (data[i] == ' ' || data[i] == '\n'))
  422. i++;
  423. if (i >= size)
  424. return tmp_i;
  425. switch (data[i]) {
  426. case '[':
  427. cc = ']'; break;
  428. case '(':
  429. cc = ')'; break;
  430. default:
  431. if (tmp_i)
  432. return tmp_i;
  433. else
  434. continue;
  435. }
  436. i++;
  437. while (i < size && data[i] != cc) {
  438. if (!tmp_i && data[i] == c) tmp_i = i;
  439. i++;
  440. }
  441. if (i >= size)
  442. return tmp_i;
  443. i++;
  444. }
  445. }
  446. return 0;
  447. }
  448. /* parse_emph1 • parsing single emphase */
  449. /* closed by a symbol not preceded by whitespace and not followed by symbol */
  450. static size_t
  451. parse_emph1(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t size, uint8_t c)
  452. {
  453. size_t i = 0, len;
  454. hoedown_buffer *work = 0;
  455. int r;
  456. /* skipping one symbol if coming from emph3 */
  457. if (size > 1 && data[0] == c && data[1] == c) i = 1;
  458. while (i < size) {
  459. len = find_emph_char(data + i, size - i, c);
  460. if (!len) return 0;
  461. i += len;
  462. if (i >= size) return 0;
  463. if (data[i] == c && !_isspace(data[i - 1])) {
  464. if (md->ext_flags & HOEDOWN_EXT_NO_INTRA_EMPHASIS) {
  465. if (i + 1 < size && isalnum(data[i + 1]))
  466. continue;
  467. }
  468. work = newbuf(md, BUFFER_SPAN);
  469. parse_inline(work, md, data, i);
  470. if (md->ext_flags & HOEDOWN_EXT_UNDERLINE && c == '_')
  471. r = md->md.underline(ob, work, md->md.opaque);
  472. else
  473. r = md->md.emphasis(ob, work, md->md.opaque);
  474. popbuf(md, BUFFER_SPAN);
  475. return r ? i + 1 : 0;
  476. }
  477. }
  478. return 0;
  479. }
  480. /* parse_emph2 • parsing single emphase */
  481. static size_t
  482. parse_emph2(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t size, uint8_t c)
  483. {
  484. size_t i = 0, len;
  485. hoedown_buffer *work = 0;
  486. int r;
  487. while (i < size) {
  488. len = find_emph_char(data + i, size - i, c);
  489. if (!len) return 0;
  490. i += len;
  491. if (i + 1 < size && data[i] == c && data[i + 1] == c && i && !_isspace(data[i - 1])) {
  492. work = newbuf(md, BUFFER_SPAN);
  493. parse_inline(work, md, data, i);
  494. if (c == '~')
  495. r = md->md.strikethrough(ob, work, md->md.opaque);
  496. else if (c == '=')
  497. r = md->md.highlight(ob, work, md->md.opaque);
  498. else
  499. r = md->md.double_emphasis(ob, work, md->md.opaque);
  500. popbuf(md, BUFFER_SPAN);
  501. return r ? i + 2 : 0;
  502. }
  503. i++;
  504. }
  505. return 0;
  506. }
  507. /* parse_emph3 • parsing single emphase */
  508. /* finds the first closing tag, and delegates to the other emph */
  509. static size_t
  510. parse_emph3(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t size, uint8_t c)
  511. {
  512. size_t i = 0, len;
  513. int r;
  514. while (i < size) {
  515. len = find_emph_char(data + i, size - i, c);
  516. if (!len) return 0;
  517. i += len;
  518. /* skip whitespace preceded symbols */
  519. if (data[i] != c || _isspace(data[i - 1]))
  520. continue;
  521. if (i + 2 < size && data[i + 1] == c && data[i + 2] == c && md->md.triple_emphasis) {
  522. /* triple symbol found */
  523. hoedown_buffer *work = newbuf(md, BUFFER_SPAN);
  524. parse_inline(work, md, data, i);
  525. r = md->md.triple_emphasis(ob, work, md->md.opaque);
  526. popbuf(md, BUFFER_SPAN);
  527. return r ? i + 3 : 0;
  528. } else if (i + 1 < size && data[i + 1] == c) {
  529. /* double symbol found, handing over to emph1 */
  530. len = parse_emph1(ob, md, data - 2, size + 2, c);
  531. if (!len) return 0;
  532. else return len - 2;
  533. } else {
  534. /* single symbol found, handing over to emph2 */
  535. len = parse_emph2(ob, md, data - 1, size + 1, c);
  536. if (!len) return 0;
  537. else return len - 1;
  538. }
  539. }
  540. return 0;
  541. }
  542. /* char_emphasis • single and double emphasis parsing */
  543. static size_t
  544. char_emphasis(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size)
  545. {
  546. uint8_t c = data[0];
  547. size_t ret;
  548. if (md->ext_flags & HOEDOWN_EXT_NO_INTRA_EMPHASIS) {
  549. if (offset > 0 && !_isspace(data[-1]) && data[-1] != '>' && data[-1] != '(')
  550. return 0;
  551. }
  552. if (size > 2 && data[1] != c) {
  553. /* whitespace cannot follow an opening emphasis;
  554. * strikethrough only takes two characters '~~' */
  555. if (c == '~' || c == '=' || _isspace(data[1]) || (ret = parse_emph1(ob, md, data + 1, size - 1, c)) == 0)
  556. return 0;
  557. return ret + 1;
  558. }
  559. if (size > 3 && data[1] == c && data[2] != c) {
  560. if (_isspace(data[2]) || (ret = parse_emph2(ob, md, data + 2, size - 2, c)) == 0)
  561. return 0;
  562. return ret + 2;
  563. }
  564. if (size > 4 && data[1] == c && data[2] == c && data[3] != c) {
  565. if (c == '~' || c == '=' || _isspace(data[3]) || (ret = parse_emph3(ob, md, data + 3, size - 3, c)) == 0)
  566. return 0;
  567. return ret + 3;
  568. }
  569. return 0;
  570. }
  571. /* char_linebreak • '\n' preceded by two spaces (assuming linebreak != 0) */
  572. static size_t
  573. char_linebreak(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size)
  574. {
  575. if (offset < 2 || data[-1] != ' ' || data[-2] != ' ')
  576. return 0;
  577. /* removing the last space from ob and rendering */
  578. while (ob->size && ob->data[ob->size - 1] == ' ')
  579. ob->size--;
  580. return md->md.linebreak(ob, md->md.opaque) ? 1 : 0;
  581. }
  582. /* char_codespan • '`' parsing a code span (assuming codespan != 0) */
  583. static size_t
  584. char_codespan(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size)
  585. {
  586. size_t end, nb = 0, i, f_begin, f_end;
  587. /* counting the number of backticks in the delimiter */
  588. while (nb < size && data[nb] == '`')
  589. nb++;
  590. /* finding the next delimiter */
  591. i = 0;
  592. for (end = nb; end < size && i < nb; end++) {
  593. if (data[end] == '`') i++;
  594. else i = 0;
  595. }
  596. if (i < nb && end >= size)
  597. return 0; /* no matching delimiter */
  598. /* trimming outside whitespaces */
  599. f_begin = nb;
  600. while (f_begin < end && data[f_begin] == ' ')
  601. f_begin++;
  602. f_end = end - nb;
  603. while (f_end > nb && data[f_end-1] == ' ')
  604. f_end--;
  605. /* real code span */
  606. if (f_begin < f_end) {
  607. hoedown_buffer work = { data + f_begin, f_end - f_begin, 0, 0 };
  608. if (!md->md.codespan(ob, &work, md->md.opaque))
  609. end = 0;
  610. } else {
  611. if (!md->md.codespan(ob, 0, md->md.opaque))
  612. end = 0;
  613. }
  614. return end;
  615. }
  616. /* char_quote • '"' parsing a quote */
  617. static size_t
  618. char_quote(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size)
  619. {
  620. size_t end, nq = 0, i, f_begin, f_end;
  621. /* counting the number of quotes in the delimiter */
  622. while (nq < size && data[nq] == '"')
  623. nq++;
  624. /* finding the next delimiter */
  625. i = 0;
  626. for (end = nq; end < size && i < nq; end++) {
  627. if (data[end] == '"') i++;
  628. else i = 0;
  629. }
  630. if (i < nq && end >= size)
  631. return 0; /* no matching delimiter */
  632. /* trimming outside whitespaces */
  633. f_begin = nq;
  634. while (f_begin < end && data[f_begin] == ' ')
  635. f_begin++;
  636. f_end = end - nq;
  637. while (f_end > nq && data[f_end-1] == ' ')
  638. f_end--;
  639. /* real quote */
  640. if (f_begin < f_end) {
  641. hoedown_buffer work = { data + f_begin, f_end - f_begin, 0, 0 };
  642. if (!md->md.quote(ob, &work, md->md.opaque))
  643. end = 0;
  644. } else {
  645. if (!md->md.quote(ob, 0, md->md.opaque))
  646. end = 0;
  647. }
  648. return end;
  649. }
  650. /* char_escape • '\\' backslash escape */
  651. static size_t
  652. char_escape(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size)
  653. {
  654. static const char *escape_chars = "\\`*_{}[]()#+-.!:|&<>^~";
  655. hoedown_buffer work = { 0, 0, 0, 0 };
  656. if (size > 1) {
  657. if (strchr(escape_chars, data[1]) == NULL)
  658. return 0;
  659. if (md->md.normal_text) {
  660. work.data = data + 1;
  661. work.size = 1;
  662. md->md.normal_text(ob, &work, md->md.opaque);
  663. }
  664. else hoedown_buffer_putc(ob, data[1]);
  665. } else if (size == 1) {
  666. hoedown_buffer_putc(ob, data[0]);
  667. }
  668. return 2;
  669. }
  670. /* char_entity • '&' escaped when it doesn't belong to an entity */
  671. /* valid entities are assumed to be anything matching &#?[A-Za-z0-9]+; */
  672. static size_t
  673. char_entity(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size)
  674. {
  675. size_t end = 1;
  676. hoedown_buffer work = { 0, 0, 0, 0 };
  677. if (end < size && data[end] == '#')
  678. end++;
  679. while (end < size && isalnum(data[end]))
  680. end++;
  681. if (end < size && data[end] == ';')
  682. end++; /* real entity */
  683. else
  684. return 0; /* lone '&' */
  685. if (md->md.entity) {
  686. work.data = data;
  687. work.size = end;
  688. md->md.entity(ob, &work, md->md.opaque);
  689. }
  690. else hoedown_buffer_put(ob, data, end);
  691. return end;
  692. }
  693. /* char_langle_tag • '<' when tags or autolinks are allowed */
  694. static size_t
  695. char_langle_tag(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size)
  696. {
  697. enum hoedown_autolink altype = HOEDOWN_AUTOLINK_NONE;
  698. size_t end = tag_length(data, size, &altype);
  699. hoedown_buffer work = { data, end, 0, 0 };
  700. int ret = 0;
  701. if (end > 2) {
  702. if (md->md.autolink && altype != HOEDOWN_AUTOLINK_NONE) {
  703. hoedown_buffer *u_link = newbuf(md, BUFFER_SPAN);
  704. work.data = data + 1;
  705. work.size = end - 2;
  706. unscape_text(u_link, &work);
  707. ret = md->md.autolink(ob, u_link, altype, md->md.opaque);
  708. popbuf(md, BUFFER_SPAN);
  709. }
  710. else if (md->md.raw_html_tag)
  711. ret = md->md.raw_html_tag(ob, &work, md->md.opaque);
  712. }
  713. if (!ret) return 0;
  714. else return end;
  715. }
  716. static size_t
  717. char_autolink_www(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size)
  718. {
  719. hoedown_buffer *link, *link_url, *link_text;
  720. size_t link_len, rewind;
  721. if (!md->md.link || md->in_link_body)
  722. return 0;
  723. link = newbuf(md, BUFFER_SPAN);
  724. if ((link_len = hoedown_autolink__www(&rewind, link, data, offset, size, HOEDOWN_AUTOLINK_SHORT_DOMAINS)) > 0) {
  725. link_url = newbuf(md, BUFFER_SPAN);
  726. HOEDOWN_BUFPUTSL(link_url, "http://");
  727. hoedown_buffer_put(link_url, link->data, link->size);
  728. ob->size -= rewind;
  729. if (md->md.normal_text) {
  730. link_text = newbuf(md, BUFFER_SPAN);
  731. md->md.normal_text(link_text, link, md->md.opaque);
  732. md->md.link(ob, link_url, NULL, link_text, md->md.opaque);
  733. popbuf(md, BUFFER_SPAN);
  734. } else {
  735. md->md.link(ob, link_url, NULL, link, md->md.opaque);
  736. }
  737. popbuf(md, BUFFER_SPAN);
  738. }
  739. popbuf(md, BUFFER_SPAN);
  740. return link_len;
  741. }
  742. static size_t
  743. char_autolink_email(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size)
  744. {
  745. hoedown_buffer *link;
  746. size_t link_len, rewind;
  747. if (!md->md.autolink || md->in_link_body)
  748. return 0;
  749. link = newbuf(md, BUFFER_SPAN);
  750. if ((link_len = hoedown_autolink__email(&rewind, link, data, offset, size, 0)) > 0) {
  751. ob->size -= rewind;
  752. md->md.autolink(ob, link, HOEDOWN_AUTOLINK_EMAIL, md->md.opaque);
  753. }
  754. popbuf(md, BUFFER_SPAN);
  755. return link_len;
  756. }
  757. static size_t
  758. char_autolink_url(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size)
  759. {
  760. hoedown_buffer *link;
  761. size_t link_len, rewind;
  762. if (!md->md.autolink || md->in_link_body)
  763. return 0;
  764. link = newbuf(md, BUFFER_SPAN);
  765. if ((link_len = hoedown_autolink__url(&rewind, link, data, offset, size, 0)) > 0) {
  766. ob->size -= rewind;
  767. md->md.autolink(ob, link, HOEDOWN_AUTOLINK_NORMAL, md->md.opaque);
  768. }
  769. popbuf(md, BUFFER_SPAN);
  770. return link_len;
  771. }
  772. /* char_link • '[': parsing a link or an image */
  773. static size_t
  774. char_link(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size)
  775. {
  776. int is_img = (offset && data[-1] == '!'), level;
  777. size_t i = 1, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0;
  778. hoedown_buffer *content = 0;
  779. hoedown_buffer *link = 0;
  780. hoedown_buffer *title = 0;
  781. hoedown_buffer *u_link = 0;
  782. size_t org_work_size = md->work_bufs[BUFFER_SPAN].size;
  783. int text_has_nl = 0, ret = 0;
  784. int in_title = 0, qtype = 0;
  785. /* checking whether the correct renderer exists */
  786. if ((is_img && !md->md.image) || (!is_img && !md->md.link))
  787. goto cleanup;
  788. /* looking for the matching closing bracket */
  789. for (level = 1; i < size; i++) {
  790. if (data[i] == '\n')
  791. text_has_nl = 1;
  792. else if (data[i - 1] == '\\')
  793. continue;
  794. else if (data[i] == '[')
  795. level++;
  796. else if (data[i] == ']') {
  797. level--;
  798. if (level <= 0)
  799. break;
  800. }
  801. }
  802. if (i >= size)
  803. goto cleanup;
  804. txt_e = i;
  805. i++;
  806. /* footnote link */
  807. if (md->ext_flags & HOEDOWN_EXT_FOOTNOTES && data[1] == '^') {
  808. hoedown_buffer id = { 0, 0, 0, 0 };
  809. struct footnote_ref *fr;
  810. if (txt_e < 3)
  811. goto cleanup;
  812. id.data = data + 2;
  813. id.size = txt_e - 2;
  814. fr = find_footnote_ref(&md->footnotes_found, id.data, id.size);
  815. /* mark footnote used */
  816. if (fr && !fr->is_used) {
  817. if(!add_footnote_ref(&md->footnotes_used, fr))
  818. goto cleanup;
  819. fr->is_used = 1;
  820. fr->num = md->footnotes_used.count;
  821. }
  822. /* render */
  823. if (fr && md->md.footnote_ref)
  824. ret = md->md.footnote_ref(ob, fr->num, md->md.opaque);
  825. goto cleanup;
  826. }
  827. /* skip any amount of whitespace or newline */
  828. /* (this is much more laxist than original markdown syntax) */
  829. while (i < size && _isspace(data[i]))
  830. i++;
  831. /* inline style link */
  832. if (i < size && data[i] == '(') {
  833. size_t nb_p;
  834. /* skipping initial whitespace */
  835. i++;
  836. while (i < size && _isspace(data[i]))
  837. i++;
  838. link_b = i;
  839. /* looking for link end: ' " ) */
  840. /* Count the number of open parenthesis */
  841. nb_p = 0;
  842. while (i < size) {
  843. if (data[i] == '\\') i += 2;
  844. else if (data[i] == '(' && i != 0) {
  845. nb_p++; i++;
  846. }
  847. else if (data[i] == ')') {
  848. if (nb_p == 0) break;
  849. else nb_p--; i++;
  850. } else if (i >= 1 && _isspace(data[i-1]) && (data[i] == '\'' || data[i] == '"')) break;
  851. else i++;
  852. }
  853. if (i >= size) goto cleanup;
  854. link_e = i;
  855. /* looking for title end if present */
  856. if (data[i] == '\'' || data[i] == '"') {
  857. qtype = data[i];
  858. in_title = 1;
  859. i++;
  860. title_b = i;
  861. while (i < size) {
  862. if (data[i] == '\\') i += 2;
  863. else if (data[i] == qtype) {in_title = 0; i++;}
  864. else if ((data[i] == ')') && !in_title) break;
  865. else i++;
  866. }
  867. if (i >= size) goto cleanup;
  868. /* skipping whitespaces after title */
  869. title_e = i - 1;
  870. while (title_e > title_b && _isspace(data[title_e]))
  871. title_e--;
  872. /* checking for closing quote presence */
  873. if (data[title_e] != '\'' && data[title_e] != '"') {
  874. title_b = title_e = 0;
  875. link_e = i;
  876. }
  877. }
  878. /* remove whitespace at the end of the link */
  879. while (link_e > link_b && _isspace(data[link_e - 1]))
  880. link_e--;
  881. /* remove optional angle brackets around the link */
  882. if (data[link_b] == '<') link_b++;
  883. if (data[link_e - 1] == '>') link_e--;
  884. /* building escaped link and title */
  885. if (link_e > link_b) {
  886. link = newbuf(md, BUFFER_SPAN);
  887. hoedown_buffer_put(link, data + link_b, link_e - link_b);
  888. }
  889. if (title_e > title_b) {
  890. title = newbuf(md, BUFFER_SPAN);
  891. hoedown_buffer_put(title, data + title_b, title_e - title_b);
  892. }
  893. i++;
  894. }
  895. /* reference style link */
  896. else if (i < size && data[i] == '[') {
  897. hoedown_buffer id = { 0, 0, 0, 0 };
  898. struct link_ref *lr;
  899. /* looking for the id */
  900. i++;
  901. link_b = i;
  902. while (i < size && data[i] != ']') i++;
  903. if (i >= size) goto cleanup;
  904. link_e = i;
  905. /* finding the link_ref */
  906. if (link_b == link_e) {
  907. if (text_has_nl) {
  908. hoedown_buffer *b = newbuf(md, BUFFER_SPAN);
  909. size_t j;
  910. for (j = 1; j < txt_e; j++) {
  911. if (data[j] != '\n')
  912. hoedown_buffer_putc(b, data[j]);
  913. else if (data[j - 1] != ' ')
  914. hoedown_buffer_putc(b, ' ');
  915. }
  916. id.data = b->data;
  917. id.size = b->size;
  918. } else {
  919. id.data = data + 1;
  920. id.size = txt_e - 1;
  921. }
  922. } else {
  923. id.data = data + link_b;
  924. id.size = link_e - link_b;
  925. }
  926. lr = find_link_ref(md->refs, id.data, id.size);
  927. if (!lr)
  928. goto cleanup;
  929. /* keeping link and title from link_ref */
  930. link = lr->link;
  931. title = lr->title;
  932. i++;
  933. }
  934. /* shortcut reference style link */
  935. else {
  936. hoedown_buffer id = { 0, 0, 0, 0 };
  937. struct link_ref *lr;
  938. /* crafting the id */
  939. if (text_has_nl) {
  940. hoedown_buffer *b = newbuf(md, BUFFER_SPAN);
  941. size_t j;
  942. for (j = 1; j < txt_e; j++) {
  943. if (data[j] != '\n')
  944. hoedown_buffer_putc(b, data[j]);
  945. else if (data[j - 1] != ' ')
  946. hoedown_buffer_putc(b, ' ');
  947. }
  948. id.data = b->data;
  949. id.size = b->size;
  950. } else {
  951. id.data = data + 1;
  952. id.size = txt_e - 1;
  953. }
  954. /* finding the link_ref */
  955. lr = find_link_ref(md->refs, id.data, id.size);
  956. if (!lr)
  957. goto cleanup;
  958. /* keeping link and title from link_ref */
  959. link = lr->link;
  960. title = lr->title;
  961. /* rewinding the whitespace */
  962. i = txt_e + 1;
  963. }
  964. /* building content: img alt is escaped, link content is parsed */
  965. if (txt_e > 1) {
  966. content = newbuf(md, BUFFER_SPAN);
  967. if (is_img) {
  968. hoedown_buffer_put(content, data + 1, txt_e - 1);
  969. } else {
  970. /* disable autolinking when parsing inline the
  971. * content of a link */
  972. md->in_link_body = 1;
  973. parse_inline(content, md, data + 1, txt_e - 1);
  974. md->in_link_body = 0;
  975. }
  976. }
  977. if (link) {
  978. u_link = newbuf(md, BUFFER_SPAN);
  979. unscape_text(u_link, link);
  980. }
  981. /* calling the relevant rendering function */
  982. if (is_img) {
  983. if (ob->size && ob->data[ob->size - 1] == '!')
  984. ob->size -= 1;
  985. ret = md->md.image(ob, u_link, title, content, md->md.opaque);
  986. } else {
  987. ret = md->md.link(ob, u_link, title, content, md->md.opaque);
  988. }
  989. /* cleanup */
  990. cleanup:
  991. md->work_bufs[BUFFER_SPAN].size = (int)org_work_size;
  992. return ret ? i : 0;
  993. }
  994. static size_t
  995. char_superscript(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size)
  996. {
  997. size_t sup_start, sup_len;
  998. hoedown_buffer *sup;
  999. if (!md->md.superscript)
  1000. return 0;
  1001. if (size < 2)
  1002. return 0;
  1003. if (data[1] == '(') {
  1004. sup_start = sup_len = 2;
  1005. while (sup_len < size && data[sup_len] != ')' && data[sup_len - 1] != '\\')
  1006. sup_len++;
  1007. if (sup_len == size)
  1008. return 0;
  1009. } else {
  1010. sup_start = sup_len = 1;
  1011. while (sup_len < size && !_isspace(data[sup_len]))
  1012. sup_len++;
  1013. }
  1014. if (sup_len - sup_start == 0)
  1015. return (sup_start == 2) ? 3 : 0;
  1016. sup = newbuf(md, BUFFER_SPAN);
  1017. parse_inline(sup, md, data + sup_start, sup_len - sup_start);
  1018. md->md.superscript(ob, sup, md->md.opaque);
  1019. popbuf(md, BUFFER_SPAN);
  1020. return (sup_start == 2) ? sup_len + 1 : sup_len;
  1021. }
  1022. /*********************************
  1023. * BLOCK-LEVEL PARSING FUNCTIONS *
  1024. *********************************/
  1025. /* is_empty • returns the line length when it is empty, 0 otherwise */
  1026. static size_t
  1027. is_empty(const uint8_t *data, size_t size)
  1028. {
  1029. size_t i;
  1030. for (i = 0; i < size && data[i] != '\n'; i++)
  1031. if (data[i] != ' ')
  1032. return 0;
  1033. return i + 1;
  1034. }
  1035. /* is_hrule • returns whether a line is a horizontal rule */
  1036. static int
  1037. is_hrule(uint8_t *data, size_t size)
  1038. {
  1039. size_t i = 0, n = 0;
  1040. uint8_t c;
  1041. /* skipping initial spaces */
  1042. if (size < 3) return 0;
  1043. if (data[0] == ' ') { i++;
  1044. if (data[1] == ' ') { i++;
  1045. if (data[2] == ' ') { i++; } } }
  1046. /* looking at the hrule uint8_t */
  1047. if (i + 2 >= size
  1048. || (data[i] != '*' && data[i] != '-' && data[i] != '_'))
  1049. return 0;
  1050. c = data[i];
  1051. /* the whole line must be the char or whitespace */
  1052. while (i < size && data[i] != '\n') {
  1053. if (data[i] == c) n++;
  1054. else if (data[i] != ' ')
  1055. return 0;
  1056. i++;
  1057. }
  1058. return n >= 3;
  1059. }
  1060. /* check if a line begins with a code fence; return the
  1061. * width of the code fence */
  1062. static size_t
  1063. prefix_codefence(uint8_t *data, size_t size)
  1064. {
  1065. size_t i = 0, n = 0;
  1066. uint8_t c;
  1067. /* skipping initial spaces */
  1068. if (size < 3) return 0;
  1069. if (data[0] == ' ') { i++;
  1070. if (data[1] == ' ') { i++;
  1071. if (data[2] == ' ') { i++; } } }
  1072. /* looking at the hrule uint8_t */
  1073. if (i + 2 >= size || !(data[i] == '~' || data[i] == '`'))
  1074. return 0;
  1075. c = data[i];
  1076. /* the whole line must be the uint8_t or whitespace */
  1077. while (i < size && data[i] == c) {
  1078. n++; i++;
  1079. }
  1080. if (n < 3)
  1081. return 0;
  1082. return i;
  1083. }
  1084. /* check if a line is a code fence; return its size if it is */
  1085. static size_t
  1086. is_codefence(uint8_t *data, size_t size, hoedown_buffer *syntax)
  1087. {
  1088. size_t i = 0, syn_len = 0;
  1089. uint8_t *syn_start;
  1090. i = prefix_codefence(data, size);
  1091. if (i == 0)
  1092. return 0;
  1093. while (i < size && data[i] == ' ')
  1094. i++;
  1095. syn_start = data + i;
  1096. if (i < size && data[i] == '{') {
  1097. i++; syn_start++;
  1098. while (i < size && data[i] != '}' && data[i] != '\n') {
  1099. syn_len++; i++;
  1100. }
  1101. if (i == size || data[i] != '}')
  1102. return 0;
  1103. /* strip all whitespace at the beginning and the end
  1104. * of the {} block */
  1105. while (syn_len > 0 && _isspace(syn_start[0])) {
  1106. syn_start++; syn_len--;
  1107. }
  1108. while (syn_len > 0 && _isspace(syn_start[syn_len - 1]))
  1109. syn_len--;
  1110. i++;
  1111. } else {
  1112. while (i < size && !_isspace(data[i])) {
  1113. syn_len++; i++;
  1114. }
  1115. }
  1116. if (syntax) {
  1117. syntax->data = syn_start;
  1118. syntax->size = syn_len;
  1119. }
  1120. while (i < size && data[i] != '\n') {
  1121. if (!_isspace(data[i]))
  1122. return 0;
  1123. i++;
  1124. }
  1125. return i + 1;
  1126. }
  1127. /* is_atxheader • returns whether the line is a hash-prefixed header */
  1128. static int
  1129. is_atxheader(hoedown_markdown *md, uint8_t *data, size_t size)
  1130. {
  1131. if (data[0] != '#')
  1132. return 0;
  1133. if (md->ext_flags & HOEDOWN_EXT_SPACE_HEADERS) {
  1134. size_t level = 0;
  1135. while (level < size && level < 6 && data[level] == '#')
  1136. level++;
  1137. if (level < size && data[level] != ' ')
  1138. return 0;
  1139. }
  1140. return 1;
  1141. }
  1142. /* is_headerline • returns whether the line is a setext-style hdr underline */
  1143. static int
  1144. is_headerline(uint8_t *data, size_t size)
  1145. {
  1146. size_t i = 0;
  1147. /* test of level 1 header */
  1148. if (data[i] == '=') {
  1149. for (i = 1; i < size && data[i] == '='; i++);
  1150. while (i < size && data[i] == ' ') i++;
  1151. return (i >= size || data[i] == '\n') ? 1 : 0; }
  1152. /* test of level 2 header */
  1153. if (data[i] == '-') {
  1154. for (i = 1; i < size && data[i] == '-'; i++);
  1155. while (i < size && data[i] == ' ') i++;
  1156. return (i >= size || data[i] == '\n') ? 2 : 0; }
  1157. return 0;
  1158. }
  1159. static int
  1160. is_next_headerline(uint8_t *data, size_t size)
  1161. {
  1162. size_t i = 0;
  1163. while (i < size && data[i] != '\n')
  1164. i++;
  1165. if (++i >= size)
  1166. return 0;
  1167. return is_headerline(data + i, size - i);
  1168. }
  1169. /* prefix_quote • returns blockquote prefix length */
  1170. static size_t
  1171. prefix_quote(uint8_t *data, size_t size)
  1172. {
  1173. size_t i = 0;
  1174. if (i < size && data[i] == ' ') i++;
  1175. if (i < size && data[i] == ' ') i++;
  1176. if (i < size && data[i] == ' ') i++;
  1177. if (i < size && data[i] == '>') {
  1178. if (i + 1 < size && data[i + 1] == ' ')
  1179. return i + 2;
  1180. return i + 1;
  1181. }
  1182. return 0;
  1183. }
  1184. /* prefix_code • returns prefix length for block code*/
  1185. static size_t
  1186. prefix_code(uint8_t *data, size_t size)
  1187. {
  1188. if (size > 3 && data[0] == ' ' && data[1] == ' '
  1189. && data[2] == ' ' && data[3] == ' ') return 4;
  1190. return 0;
  1191. }
  1192. /* prefix_oli • returns ordered list item prefix */
  1193. static size_t
  1194. prefix_oli(uint8_t *data, size_t size)
  1195. {
  1196. size_t i = 0;
  1197. if (i < size && data[i] == ' ') i++;
  1198. if (i < size && data[i] == ' ') i++;
  1199. if (i < size && data[i] == ' ') i++;
  1200. if (i >= size || data[i] < '0' || data[i] > '9')
  1201. return 0;
  1202. while (i < size && data[i] >= '0' && data[i] <= '9')
  1203. i++;
  1204. if (i + 1 >= size || data[i] != '.' || data[i + 1] != ' ')
  1205. return 0;
  1206. if (is_next_headerline(data + i, size - i))
  1207. return 0;
  1208. return i + 2;
  1209. }
  1210. /* prefix_uli • returns ordered list item prefix */
  1211. static size_t
  1212. prefix_uli(uint8_t *data, size_t size)
  1213. {
  1214. size_t i = 0;
  1215. if (i < size && data[i] == ' ') i++;
  1216. if (i < size && data[i] == ' ') i++;
  1217. if (i < size && data[i] == ' ') i++;
  1218. if (i + 1 >= size ||
  1219. (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
  1220. data[i + 1] != ' ')
  1221. return 0;
  1222. if (is_next_headerline(data + i, size - i))
  1223. return 0;
  1224. return i + 2;
  1225. }
  1226. /* parse_block • parsing of one block, returning next uint8_t to parse */
  1227. static void parse_block(hoedown_buffer *ob, hoedown_markdown *md,
  1228. uint8_t *data, size_t size);
  1229. /* parse_blockquote • handles parsing of a blockquote fragment */
  1230. static size_t
  1231. parse_blockquote(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t size)
  1232. {
  1233. size_t beg, end = 0, pre, work_size = 0;
  1234. uint8_t *work_data = 0;
  1235. hoedown_buffer *out = 0;
  1236. out = newbuf(md, BUFFER_BLOCK);
  1237. beg = 0;
  1238. while (beg < size) {
  1239. for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
  1240. pre = prefix_quote(data + beg, end - beg);
  1241. if (pre)
  1242. beg += pre; /* skipping prefix */
  1243. /* empty line followed by non-quote line */
  1244. else if (is_empty(data + beg, end - beg) &&
  1245. (end >= size || (prefix_quote(data + end, size - end) == 0 &&
  1246. !is_empty(data + end, size - end))))
  1247. break;
  1248. if (beg < end) { /* copy into the in-place working buffer */
  1249. /* hoedown_buffer_put(work, data + beg, end - beg); */
  1250. if (!work_data)
  1251. work_data = data + beg;
  1252. else if (data + beg != work_data + work_size)
  1253. memmove(work_data + work_size, data + beg, end - beg);
  1254. work_size += end - beg;
  1255. }
  1256. beg = end;
  1257. }
  1258. parse_block(out, md, work_data, work_size);
  1259. if (md->md.blockquote)
  1260. md->md.blockquote(ob, out, md->md.opaque);
  1261. popbuf(md, BUFFER_BLOCK);
  1262. return end;
  1263. }
  1264. static size_t
  1265. parse_htmlblock(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t size, int do_render);
  1266. /* parse_blockquote • handles parsing of a regular paragraph */
  1267. static size_t
  1268. parse_paragraph(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t size)
  1269. {
  1270. size_t i = 0, end = 0;
  1271. int level = 0;
  1272. hoedown_buffer work = { data, 0, 0, 0 };
  1273. while (i < size) {
  1274. for (end = i + 1; end < size && data[end - 1] != '\n'; end++) /* empty */;
  1275. if (is_empty(data + i, size - i))
  1276. break;
  1277. if ((level = is_headerline(data + i, size - i)) != 0)
  1278. break;
  1279. if (is_atxheader(md, data + i, size - i) ||
  1280. is_hrule(data + i, size - i) ||
  1281. prefix_quote(data + i, size - i)) {
  1282. end = i;
  1283. break;
  1284. }
  1285. /*
  1286. * Early termination of a paragraph with the same logic
  1287. * as Markdown 1.0.0. If this logic is applied, the
  1288. * Markdown 1.0.3 test suite won't pass cleanly
  1289. *
  1290. * :: If the first character in a new line is not a letter,
  1291. * let's check to see if there's some kind of block starting
  1292. * here
  1293. */
  1294. if ((md->ext_flags & HOEDOWN_EXT_LAX_SPACING) && !isalnum(data[i])) {
  1295. if (prefix_oli(data + i, size - i) ||
  1296. prefix_uli(data + i, size - i)) {
  1297. end = i;
  1298. break;
  1299. }
  1300. /* see if an html block starts here */
  1301. if (data[i] == '<' && md->md.blockhtml &&
  1302. parse_htmlblock(ob, md, data + i, size - i, 0)) {
  1303. end = i;
  1304. break;
  1305. }
  1306. /* see if a code fence starts here */
  1307. if ((md->ext_flags & HOEDOWN_EXT_FENCED_CODE) != 0 &&
  1308. is_codefence(data + i, size - i, NULL) != 0) {
  1309. end = i;
  1310. break;
  1311. }
  1312. }
  1313. i = end;
  1314. }
  1315. work.size = i;
  1316. while (work.size && data[work.size - 1] == '\n')
  1317. work.size--;
  1318. if (!level) {
  1319. hoedown_buffer *tmp = newbuf(md, BUFFER_BLOCK);
  1320. parse_inline(tmp, md, work.data, work.size);
  1321. if (md->md.paragraph)
  1322. md->md.paragraph(ob, tmp, md->md.opaque);
  1323. popbuf(md, BUFFER_BLOCK);
  1324. } else {
  1325. hoedown_buffer *header_work;
  1326. if (work.size) {
  1327. size_t beg;
  1328. i = work.size;
  1329. work.size -= 1;
  1330. while (work.size && data[work.size] != '\n')
  1331. work.size -= 1;
  1332. beg = work.size + 1;
  1333. while (work.size && data[work.size - 1] == '\n')
  1334. work.size -= 1;
  1335. if (work.size > 0) {
  1336. hoedown_buffer *tmp = newbuf(md, BUFFER_BLOCK);
  1337. parse_inline(tmp, md, work.data, work.size);
  1338. if (md->md.paragraph)
  1339. md->md.paragraph(ob, tmp, md->md.opaque);
  1340. popbuf(md, BUFFER_BLOCK);
  1341. work.data += beg;
  1342. work.size = i - beg;
  1343. }
  1344. else work.size = i;
  1345. }
  1346. header_work = newbuf(md, BUFFER_SPAN);
  1347. parse_inline(header_work, md, work.data, work.size);
  1348. if (md->md.header)
  1349. md->md.header(ob, header_work, (int)level, md->md.opaque);
  1350. popbuf(md, BUFFER_SPAN);
  1351. }
  1352. return end;
  1353. }
  1354. /* parse_fencedcode • handles parsing of a block-level code fragment */
  1355. static size_t
  1356. parse_fencedcode(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t size)
  1357. {
  1358. size_t beg, end;
  1359. hoedown_buffer *work = 0;
  1360. hoedown_buffer lang = { 0, 0, 0, 0 };
  1361. beg = is_codefence(data, size, &lang);
  1362. if (beg == 0) return 0;
  1363. work = newbuf(md, BUFFER_BLOCK);
  1364. while (beg < size) {
  1365. size_t fence_end;
  1366. hoedown_buffer fence_trail = { 0, 0, 0, 0 };
  1367. fence_end = is_codefence(data + beg, size - beg, &fence_trail);
  1368. if (fence_end != 0 && fence_trail.size == 0) {
  1369. beg += fence_end;
  1370. break;
  1371. }
  1372. for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
  1373. if (beg < end) {
  1374. /* verbatim copy to the working buffer,
  1375. escaping entities */
  1376. if (is_empty(data + beg, end - beg))
  1377. hoedown_buffer_putc(work, '\n');
  1378. else hoedown_buffer_put(work, data + beg, end - beg);
  1379. }
  1380. beg = end;
  1381. }
  1382. if (work->size && work->data[work->size - 1] != '\n')
  1383. hoedown_buffer_putc(work, '\n');
  1384. if (md->md.blockcode)
  1385. md->md.blockcode(ob, work, lang.size ? &lang : NULL, md->md.opaque);
  1386. popbuf(md, BUFFER_BLOCK);
  1387. return beg;
  1388. }
  1389. static size_t
  1390. parse_blockcode(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t size)
  1391. {
  1392. size_t beg, end, pre;
  1393. hoedown_buffer *work = 0;
  1394. work = newbuf(md, BUFFER_BLOCK);
  1395. beg = 0;
  1396. while (beg < size) {
  1397. for (end = beg + 1; end < size && data[end - 1] != '\n'; end++) {};
  1398. pre = prefix_code(data + beg, end - beg);
  1399. if (pre)
  1400. beg += pre; /* skipping prefix */
  1401. else if (!is_empty(data + beg, end - beg))
  1402. /* non-empty non-prefixed line breaks the pre */
  1403. break;
  1404. if (beg < end) {
  1405. /* verbatim copy to the working buffer,
  1406. escaping entities */
  1407. if (is_empty(data + beg, end - beg))
  1408. hoedown_buffer_putc(work, '\n');
  1409. else hoedown_buffer_put(work, data + beg, end - beg);
  1410. }
  1411. beg = end;
  1412. }
  1413. while (work->size && work->data[work->size - 1] == '\n')
  1414. work->size -= 1;
  1415. hoedown_buffer_putc(work, '\n');
  1416. if (md->md.blockcode)
  1417. md->md.blockcode(ob, work, NULL, md->md.opaque);
  1418. popbuf(md, BUFFER_BLOCK);
  1419. return beg;
  1420. }
  1421. /* parse_listitem • parsing of a single list item */
  1422. /* assuming initial prefix is already removed */
  1423. static size_t
  1424. parse_listitem(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t size, int *flags)
  1425. {
  1426. hoedown_buffer *work = 0, *inter = 0;
  1427. size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i;
  1428. int in_empty = 0, has_inside_empty = 0, in_fence = 0;
  1429. /* keeping track of the first indentation prefix */
  1430. while (orgpre < 3 && orgpre < size && data[orgpre] == ' ')
  1431. orgpre++;
  1432. beg = prefix_uli(data, size);
  1433. if (!beg)
  1434. beg = prefix_oli(data, size);
  1435. if (!beg)
  1436. return 0;
  1437. /* skipping to the beginning of the following line */
  1438. end = beg;
  1439. while (end < size && data[end - 1] != '\n')
  1440. end++;
  1441. /* getting working buffers */
  1442. work = newbuf(md, BUFFER_SPAN);
  1443. inter = newbuf(md, BUFFER_SPAN);
  1444. /* putting the first line into the working buffer */
  1445. hoedown_buffer_put(work, data + beg, end - beg);
  1446. beg = end;
  1447. /* process the following lines */
  1448. while (beg < size) {
  1449. size_t has_next_uli = 0, has_next_oli = 0;
  1450. end++;
  1451. while (end < size && data[end - 1] != '\n')
  1452. end++;
  1453. /* process an empty line */
  1454. if (is_empty(data + beg, end - beg)) {
  1455. in_empty = 1;
  1456. beg = end;
  1457. continue;
  1458. }
  1459. /* calculating the indentation */
  1460. i = 0;
  1461. while (i < 4 && beg + i < end && data[beg + i] == ' ')
  1462. i++;
  1463. pre = i;
  1464. if (md->ext_flags & HOEDOWN_EXT_FENCED_CODE) {
  1465. if (is_codefence(data + beg + i, end - beg - i, NULL) != 0)
  1466. in_fence = !in_fence;
  1467. }
  1468. /* Only check for new list items if we are **not** inside
  1469. * a fenced code block */
  1470. if (!in_fence) {
  1471. has_next_uli = prefix_uli(data + beg + i, end - beg - i);
  1472. has_next_oli = prefix_oli(data + beg + i, end - beg - i);
  1473. }
  1474. /* checking for ul/ol switch */
  1475. if (in_empty && (
  1476. ((*flags & HOEDOWN_LIST_ORDERED) && has_next_uli) ||
  1477. (!(*flags & HOEDOWN_LIST_ORDERED) && has_next_oli))){
  1478. *flags |= HOEDOWN_LI_END;
  1479. break; /* the following item must have same list type */
  1480. }
  1481. /* checking for a new item */
  1482. if ((has_next_uli && !is_hrule(data + beg + i, end - beg - i)) || has_next_oli) {
  1483. if (in_empty)
  1484. has_inside_empty = 1;
  1485. if (pre == orgpre) /* the following item must have */
  1486. break; /* the same indentation */
  1487. if (!sublist)
  1488. sublist = work->size;
  1489. }
  1490. /* joining only indented stuff after empty lines;
  1491. * note that now we only require 1 space of indentation
  1492. * to continue a list */
  1493. else if (in_empty && pre == 0) {
  1494. *flags |= HOEDOWN_LI_END;
  1495. break;
  1496. }
  1497. else if (in_empty) {
  1498. hoedown_buffer_putc(work, '\n');
  1499. has_inside_empty = 1;
  1500. }
  1501. in_empty = 0;
  1502. /* adding the line without prefix into the working buffer */
  1503. hoedown_buffer_put(work, data + beg + i, end - beg - i);
  1504. beg = end;
  1505. }
  1506. /* render of li contents */
  1507. if (has_inside_empty)
  1508. *flags |= HOEDOWN_LI_BLOCK;
  1509. if (*flags & HOEDOWN_LI_BLOCK) {
  1510. /* intermediate render of block li */
  1511. if (sublist && sublist < work->size) {
  1512. parse_block(inter, md, work->data, sublist);
  1513. parse_block(inter, md, work->data + sublist, work->size - sublist);
  1514. }
  1515. else
  1516. parse_block(inter, md, work->data, work->size);
  1517. } else {
  1518. /* intermediate render of inline li */
  1519. if (sublist && sublist < work->size) {
  1520. parse_inline(inter, md, work->data, sublist);
  1521. parse_block(inter, md, work->data + sublist, work->size - sublist);
  1522. }
  1523. else
  1524. parse_inline(inter, md, work->data, work->size);
  1525. }
  1526. /* render of li itself */
  1527. if (md->md.listitem)
  1528. md->md.listitem(ob, inter, *flags, md->md.opaque);
  1529. popbuf(md, BUFFER_SPAN);
  1530. popbuf(md, BUFFER_SPAN);
  1531. return beg;
  1532. }
  1533. /* parse_list • parsing ordered or unordered list block */
  1534. static size_t
  1535. parse_list(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t size, int flags)
  1536. {
  1537. hoedown_buffer *work = 0;
  1538. size_t i = 0, j;
  1539. work = newbuf(md, BUFFER_BLOCK);
  1540. while (i < size) {
  1541. j = parse_listitem(work, md, data + i, size - i, &flags);
  1542. i += j;
  1543. if (!j || (flags & HOEDOWN_LI_END))
  1544. break;
  1545. }
  1546. if (md->md.list)
  1547. md->md.list(ob, work, flags, md->md.opaque);
  1548. popbuf(md, BUFFER_BLOCK);
  1549. return i;
  1550. }
  1551. /* parse_atxheader • parsing of atx-style headers */
  1552. static size_t
  1553. parse_atxheader(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t size)
  1554. {
  1555. size_t level = 0;
  1556. size_t i, end, skip;
  1557. while (level < size && level < 6 && data[level] == '#')
  1558. level++;
  1559. for (i = level; i < size && data[i] == ' '; i++);
  1560. for (end = i; end < size && data[end] != '\n'; end++);
  1561. skip = end;
  1562. while (end && data[end - 1] == '#')
  1563. end--;
  1564. while (end && data[end - 1] == ' ')
  1565. end--;
  1566. if (end > i) {
  1567. hoedown_buffer *work = newbuf(md, BUFFER_SPAN);
  1568. parse_inline(work, md, data + i, end - i);
  1569. if (md->md.header)
  1570. md->md.header(ob, work, (int)level, md->md.opaque);
  1571. popbuf(md, BUFFER_SPAN);
  1572. }
  1573. return skip;
  1574. }
  1575. /* parse_footnote_def • parse a single footnote definition */
  1576. static void
  1577. parse_footnote_def(hoedown_buffer *ob, hoedown_markdown *md, unsigned int num, uint8_t *data, size_t size)
  1578. {
  1579. hoedown_buffer *work = 0;
  1580. work = newbuf(md, BUFFER_SPAN);
  1581. parse_block(work, md, data, size);
  1582. if (md->md.footnote_def)
  1583. md->md.footnote_def(ob, work, num, md->md.opaque);
  1584. popbuf(md, BUFFER_SPAN);
  1585. }
  1586. /* parse_footnote_list • render the contents of the footnotes */
  1587. static void
  1588. parse_footnote_list(hoedown_buffer *ob, hoedown_markdown *md, struct footnote_list *footnotes)
  1589. {
  1590. hoedown_buffer *work = 0;
  1591. struct footnote_item *item;
  1592. struct footnote_ref *ref;
  1593. if (footnotes->count == 0)
  1594. return;
  1595. work = newbuf(md, BUFFER_BLOCK);
  1596. item = footnotes->head;
  1597. while (item) {
  1598. ref = item->ref;
  1599. parse_footnote_def(work, md, ref->num, ref->contents->data, ref->contents->size);
  1600. item = item->next;
  1601. }
  1602. if (md->md.footnotes)
  1603. md->md.footnotes(ob, work, md->md.opaque);
  1604. popbuf(md, BUFFER_BLOCK);
  1605. }
  1606. /* htmlblock_end • checking end of HTML block : </tag>[ \t]*\n[ \t*]\n */
  1607. /* returns the length on match, 0 otherwise */
  1608. static size_t
  1609. htmlblock_end_tag(
  1610. const char *tag,
  1611. size_t tag_len,
  1612. hoedown_markdown *md,
  1613. uint8_t *data,
  1614. size_t size)
  1615. {
  1616. size_t i, w;
  1617. /* checking if tag is a match */
  1618. if (tag_len + 3 >= size ||
  1619. strncasecmp((char *)data + 2, tag, tag_len) != 0 ||
  1620. data[tag_len + 2] != '>')
  1621. return 0;
  1622. /* checking white lines */
  1623. i = tag_len + 3;
  1624. w = 0;
  1625. if (i < size && (w = is_empty(data + i, size - i)) == 0)
  1626. return 0; /* non-blank after tag */
  1627. i += w;
  1628. w = 0;
  1629. if (i < size)
  1630. w = is_empty(data + i, size - i);
  1631. return i + w;
  1632. }
  1633. static size_t
  1634. htmlblock_end(const char *curtag,
  1635. hoedown_markdown *md,
  1636. uint8_t *data,
  1637. size_t size,
  1638. int start_of_line)
  1639. {
  1640. size_t tag_size = strlen(curtag);
  1641. size_t i = 1, end_tag;
  1642. int block_lines = 0;
  1643. while (i < size) {
  1644. i++;
  1645. while (i < size && !(data[i - 1] == '<' && data[i] == '/')) {
  1646. if (data[i] == '\n')
  1647. block_lines++;
  1648. i++;
  1649. }
  1650. /* If we are only looking for unindented tags, skip the tag
  1651. * if it doesn't follow a newline.
  1652. *
  1653. * The only exception to this is if the tag is still on the
  1654. * initial line; in that case it still counts as a closing
  1655. * tag
  1656. */
  1657. if (start_of_line && block_lines > 0 && data[i - 2] != '\n')
  1658. continue;
  1659. if (i + 2 + tag_size >= size)
  1660. break;
  1661. end_tag = htmlblock_end_tag(curtag, tag_size, md, data + i - 1, size - i + 1);
  1662. if (end_tag)
  1663. return i + end_tag - 1;
  1664. }
  1665. return 0;
  1666. }
  1667. /* parse_htmlblock • parsing of inline HTML block */
  1668. static size_t
  1669. parse_htmlblock(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t size, int do_render)
  1670. {
  1671. size_t i, j = 0, tag_end;
  1672. const char *curtag = NULL;
  1673. hoedown_buffer work = { data, 0, 0, 0 };
  1674. /* identification of the opening tag */
  1675. if (size < 2 || data[0] != '<')
  1676. return 0;
  1677. i = 1;
  1678. while (i < size && data[i] != '>' && data[i] != ' ')
  1679. i++;
  1680. if (i < size)
  1681. curtag = hoedown_find_block_tag((char *)data + 1, (int)i - 1);
  1682. /* handling of special cases */
  1683. if (!curtag) {
  1684. /* HTML comment, laxist form */
  1685. if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') {
  1686. i = 5;
  1687. while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>'))
  1688. i++;
  1689. i++;
  1690. if (i < size)
  1691. j = is_empty(data + i, size - i);
  1692. if (j) {
  1693. work.size = i + j;
  1694. if (do_render && md->md.blockhtml)
  1695. md->md.blockhtml(ob, &work, md->md.opaque);
  1696. return work.size;
  1697. }
  1698. }
  1699. /* HR, which is the only self-closing block tag considered */
  1700. if (size > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R')) {
  1701. i = 3;
  1702. while (i < size && data[i] != '>')
  1703. i++;
  1704. if (i + 1 < size) {
  1705. i++;
  1706. j = is_empty(data + i, size - i);
  1707. if (j) {
  1708. work.size = i + j;
  1709. if (do_render && md->md.blockhtml)
  1710. md->md.blockhtml(ob, &work, md->md.opaque);
  1711. return work.size;
  1712. }
  1713. }
  1714. }
  1715. /* no special case recognised */
  1716. return 0;
  1717. }
  1718. /* looking for an unindented matching closing tag */
  1719. /* followed by a blank line */
  1720. tag_end = htmlblock_end(curtag, md, data, size, 1);
  1721. /* if not found, trying a second pass looking for indented match */
  1722. /* but not if tag is "ins" or "del" (following original Markdown.pl) */
  1723. if (!tag_end && strcmp(curtag, "ins") != 0 &&

Large files files are truncated, but you can click here to view the full file