PageRenderTime 62ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 1ms

/misaka/hoedown/document.c

https://github.com/FSX/misaka
C | 3012 lines | 2784 code | 159 blank | 69 comment | 235 complexity | 5e80b52d9651aed69699268bc9bb5f6c MD5 | raw file
  1. #include "document.h"
  2. #include <assert.h>
  3. #include <string.h>
  4. #include <ctype.h>
  5. #include <stdio.h>
  6. #include "stack.h"
  7. #ifndef _MSC_VER
  8. #include <strings.h>
  9. #else
  10. #define strncasecmp _strnicmp
  11. #endif
  12. #define REF_TABLE_SIZE 8
  13. #define BUFFER_BLOCK 0
  14. #define BUFFER_SPAN 1
  15. #define HOEDOWN_LI_END 8 /* internal list flag */
  16. const char *hoedown_find_block_tag(const char *str, unsigned int len);
  17. /***************
  18. * LOCAL TYPES *
  19. ***************/
  20. /* link_ref: reference to a link */
  21. struct link_ref {
  22. unsigned int id;
  23. hoedown_buffer *link;
  24. hoedown_buffer *title;
  25. struct link_ref *next;
  26. };
  27. /* footnote_ref: reference to a footnote */
  28. struct footnote_ref {
  29. unsigned int id;
  30. int is_used;
  31. unsigned int num;
  32. hoedown_buffer *contents;
  33. };
  34. /* footnote_item: an item in a footnote_list */
  35. struct footnote_item {
  36. struct footnote_ref *ref;
  37. struct footnote_item *next;
  38. };
  39. /* footnote_list: linked list of footnote_item */
  40. struct footnote_list {
  41. unsigned int count;
  42. struct footnote_item *head;
  43. struct footnote_item *tail;
  44. };
  45. /* char_trigger: function pointer to render active chars */
  46. /* returns the number of chars taken care of */
  47. /* data is the pointer of the beginning of the span */
  48. /* offset is the number of valid chars before data */
  49. typedef size_t
  50. (*char_trigger)(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
  51. static size_t char_emphasis(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
  52. static size_t char_quote(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
  53. static size_t char_linebreak(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
  54. static size_t char_codespan(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
  55. static size_t char_escape(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
  56. static size_t char_entity(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
  57. static size_t char_langle_tag(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
  58. static size_t char_autolink_url(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
  59. static size_t char_autolink_email(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
  60. static size_t char_autolink_www(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
  61. static size_t char_link(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
  62. static size_t char_image(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
  63. static size_t char_superscript(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
  64. static size_t char_math(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
  65. enum markdown_char_t {
  66. MD_CHAR_NONE = 0,
  67. MD_CHAR_EMPHASIS,
  68. MD_CHAR_CODESPAN,
  69. MD_CHAR_LINEBREAK,
  70. MD_CHAR_LINK,
  71. MD_CHAR_IMAGE,
  72. MD_CHAR_LANGLE,
  73. MD_CHAR_ESCAPE,
  74. MD_CHAR_ENTITY,
  75. MD_CHAR_AUTOLINK_URL,
  76. MD_CHAR_AUTOLINK_EMAIL,
  77. MD_CHAR_AUTOLINK_WWW,
  78. MD_CHAR_SUPERSCRIPT,
  79. MD_CHAR_QUOTE,
  80. MD_CHAR_MATH
  81. };
  82. static char_trigger markdown_char_ptrs[] = {
  83. NULL,
  84. &char_emphasis,
  85. &char_codespan,
  86. &char_linebreak,
  87. &char_link,
  88. &char_image,
  89. &char_langle_tag,
  90. &char_escape,
  91. &char_entity,
  92. &char_autolink_url,
  93. &char_autolink_email,
  94. &char_autolink_www,
  95. &char_superscript,
  96. &char_quote,
  97. &char_math
  98. };
  99. struct hoedown_document {
  100. hoedown_renderer md;
  101. hoedown_renderer_data data;
  102. struct link_ref *refs[REF_TABLE_SIZE];
  103. struct footnote_list footnotes_found;
  104. struct footnote_list footnotes_used;
  105. uint8_t active_char[256];
  106. hoedown_stack work_bufs[2];
  107. hoedown_extensions ext_flags;
  108. size_t max_nesting;
  109. int in_link_body;
  110. };
  111. /***************************
  112. * HELPER FUNCTIONS *
  113. ***************************/
  114. static hoedown_buffer *
  115. newbuf(hoedown_document *doc, int type)
  116. {
  117. static const size_t buf_size[2] = {256, 64};
  118. hoedown_buffer *work = NULL;
  119. hoedown_stack *pool = &doc->work_bufs[type];
  120. if (pool->size < pool->asize &&
  121. pool->item[pool->size] != NULL) {
  122. work = pool->item[pool->size++];
  123. work->size = 0;
  124. } else {
  125. work = hoedown_buffer_new(buf_size[type]);
  126. hoedown_stack_push(pool, work);
  127. }
  128. return work;
  129. }
  130. static void
  131. popbuf(hoedown_document *doc, int type)
  132. {
  133. doc->work_bufs[type].size--;
  134. }
  135. static void
  136. unscape_text(hoedown_buffer *ob, hoedown_buffer *src)
  137. {
  138. size_t i = 0, org;
  139. while (i < src->size) {
  140. org = i;
  141. while (i < src->size && src->data[i] != '\\')
  142. i++;
  143. if (i > org)
  144. hoedown_buffer_put(ob, src->data + org, i - org);
  145. if (i + 1 >= src->size)
  146. break;
  147. hoedown_buffer_putc(ob, src->data[i + 1]);
  148. i += 2;
  149. }
  150. }
  151. static unsigned int
  152. hash_link_ref(const uint8_t *link_ref, size_t length)
  153. {
  154. size_t i;
  155. unsigned int hash = 0;
  156. for (i = 0; i < length; ++i)
  157. hash = tolower(link_ref[i]) + (hash << 6) + (hash << 16) - hash;
  158. return hash;
  159. }
  160. static struct link_ref *
  161. add_link_ref(
  162. struct link_ref **references,
  163. const uint8_t *name, size_t name_size)
  164. {
  165. struct link_ref *ref = hoedown_calloc(1, sizeof(struct link_ref));
  166. ref->id = hash_link_ref(name, name_size);
  167. ref->next = references[ref->id % REF_TABLE_SIZE];
  168. references[ref->id % REF_TABLE_SIZE] = ref;
  169. return ref;
  170. }
  171. static struct link_ref *
  172. find_link_ref(struct link_ref **references, uint8_t *name, size_t length)
  173. {
  174. unsigned int hash = hash_link_ref(name, length);
  175. struct link_ref *ref = NULL;
  176. ref = references[hash % REF_TABLE_SIZE];
  177. while (ref != NULL) {
  178. if (ref->id == hash)
  179. return ref;
  180. ref = ref->next;
  181. }
  182. return NULL;
  183. }
  184. static void
  185. free_link_refs(struct link_ref **references)
  186. {
  187. size_t i;
  188. for (i = 0; i < REF_TABLE_SIZE; ++i) {
  189. struct link_ref *r = references[i];
  190. struct link_ref *next;
  191. while (r) {
  192. next = r->next;
  193. hoedown_buffer_free(r->link);
  194. hoedown_buffer_free(r->title);
  195. free(r);
  196. r = next;
  197. }
  198. }
  199. }
  200. static struct footnote_ref *
  201. create_footnote_ref(struct footnote_list *list, const uint8_t *name, size_t name_size)
  202. {
  203. struct footnote_ref *ref = hoedown_calloc(1, sizeof(struct footnote_ref));
  204. ref->id = hash_link_ref(name, name_size);
  205. return ref;
  206. }
  207. static int
  208. add_footnote_ref(struct footnote_list *list, struct footnote_ref *ref)
  209. {
  210. struct footnote_item *item = hoedown_calloc(1, sizeof(struct footnote_item));
  211. if (!item)
  212. return 0;
  213. item->ref = ref;
  214. if (list->head == NULL) {
  215. list->head = list->tail = item;
  216. } else {
  217. list->tail->next = item;
  218. list->tail = item;
  219. }
  220. list->count++;
  221. return 1;
  222. }
  223. static struct footnote_ref *
  224. find_footnote_ref(struct footnote_list *list, uint8_t *name, size_t length)
  225. {
  226. unsigned int hash = hash_link_ref(name, length);
  227. struct footnote_item *item = NULL;
  228. item = list->head;
  229. while (item != NULL) {
  230. if (item->ref->id == hash)
  231. return item->ref;
  232. item = item->next;
  233. }
  234. return NULL;
  235. }
  236. static void
  237. free_footnote_ref(struct footnote_ref *ref)
  238. {
  239. hoedown_buffer_free(ref->contents);
  240. free(ref);
  241. }
  242. static void
  243. free_footnote_list(struct footnote_list *list, int free_refs)
  244. {
  245. struct footnote_item *item = list->head;
  246. struct footnote_item *next;
  247. while (item) {
  248. next = item->next;
  249. if (free_refs)
  250. free_footnote_ref(item->ref);
  251. free(item);
  252. item = next;
  253. }
  254. }
  255. /*
  256. * Check whether a char is a Markdown spacing char.
  257. * Right now we only consider spaces the actual
  258. * space and a newline: tabs and carriage returns
  259. * are filtered out during the preprocessing phase.
  260. *
  261. * If we wanted to actually be UTF-8 compliant, we
  262. * should instead extract an Unicode codepoint from
  263. * this character and check for space properties.
  264. */
  265. static int
  266. _isspace(int c)
  267. {
  268. return c == ' ' || c == '\n';
  269. }
  270. /* is_empty_all: verify that all the data is spacing */
  271. static int
  272. is_empty_all(const uint8_t *data, size_t size)
  273. {
  274. size_t i = 0;
  275. while (i < size && _isspace(data[i])) i++;
  276. return i == size;
  277. }
  278. /*
  279. * Replace all spacing characters in data with spaces. As a special
  280. * case, this collapses a newline with the previous space, if possible.
  281. */
  282. static void
  283. replace_spacing(hoedown_buffer *ob, const uint8_t *data, size_t size)
  284. {
  285. size_t i = 0, mark;
  286. hoedown_buffer_grow(ob, size);
  287. while (1) {
  288. mark = i;
  289. while (i < size && data[i] != '\n') i++;
  290. hoedown_buffer_put(ob, data + mark, i - mark);
  291. if (i >= size) break;
  292. if (!(i > 0 && data[i-1] == ' '))
  293. hoedown_buffer_putc(ob, ' ');
  294. i++;
  295. }
  296. }
  297. /****************************
  298. * INLINE PARSING FUNCTIONS *
  299. ****************************/
  300. /* is_mail_autolink • looks for the address part of a mail autolink and '>' */
  301. /* this is less strict than the original markdown e-mail address matching */
  302. static size_t
  303. is_mail_autolink(uint8_t *data, size_t size)
  304. {
  305. size_t i = 0, nb = 0;
  306. /* address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' */
  307. for (i = 0; i < size; ++i) {
  308. if (isalnum(data[i]))
  309. continue;
  310. switch (data[i]) {
  311. case '@':
  312. nb++;
  313. case '-':
  314. case '.':
  315. case '_':
  316. break;
  317. case '>':
  318. return (nb == 1) ? i + 1 : 0;
  319. default:
  320. return 0;
  321. }
  322. }
  323. return 0;
  324. }
  325. /* tag_length • returns the length of the given tag, or 0 is it's not valid */
  326. static size_t
  327. tag_length(uint8_t *data, size_t size, hoedown_autolink_type *autolink)
  328. {
  329. size_t i, j;
  330. /* a valid tag can't be shorter than 3 chars */
  331. if (size < 3) return 0;
  332. if (data[0] != '<') return 0;
  333. /* HTML comment, laxist form */
  334. if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') {
  335. i = 5;
  336. while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>'))
  337. i++;
  338. i++;
  339. if (i <= size)
  340. return i;
  341. }
  342. /* begins with a '<' optionally followed by '/', followed by letter or number */
  343. i = (data[1] == '/') ? 2 : 1;
  344. if (!isalnum(data[i]))
  345. return 0;
  346. /* scheme test */
  347. *autolink = HOEDOWN_AUTOLINK_NONE;
  348. /* try to find the beginning of an URI */
  349. while (i < size && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-'))
  350. i++;
  351. if (i > 1 && data[i] == '@') {
  352. if ((j = is_mail_autolink(data + i, size - i)) != 0) {
  353. *autolink = HOEDOWN_AUTOLINK_EMAIL;
  354. return i + j;
  355. }
  356. }
  357. if (i > 2 && data[i] == ':') {
  358. *autolink = HOEDOWN_AUTOLINK_NORMAL;
  359. i++;
  360. }
  361. /* completing autolink test: no spacing or ' or " */
  362. if (i >= size)
  363. *autolink = HOEDOWN_AUTOLINK_NONE;
  364. else if (*autolink) {
  365. j = i;
  366. while (i < size) {
  367. if (data[i] == '\\') i += 2;
  368. else if (data[i] == '>' || data[i] == '\'' ||
  369. data[i] == '"' || data[i] == ' ' || data[i] == '\n')
  370. break;
  371. else i++;
  372. }
  373. if (i >= size) return 0;
  374. if (i > j && data[i] == '>') return i + 1;
  375. /* one of the forbidden chars has been found */
  376. *autolink = HOEDOWN_AUTOLINK_NONE;
  377. }
  378. /* looking for something looking like a tag end */
  379. while (i < size && data[i] != '>') i++;
  380. if (i >= size) return 0;
  381. return i + 1;
  382. }
  383. /* parse_inline • parses inline markdown elements */
  384. static void
  385. parse_inline(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size)
  386. {
  387. size_t i = 0, end = 0, consumed = 0;
  388. hoedown_buffer work = { 0, 0, 0, 0, NULL, NULL, NULL };
  389. uint8_t *active_char = doc->active_char;
  390. if (doc->work_bufs[BUFFER_SPAN].size +
  391. doc->work_bufs[BUFFER_BLOCK].size > doc->max_nesting)
  392. return;
  393. while (i < size) {
  394. /* copying inactive chars into the output */
  395. while (end < size && active_char[data[end]] == 0)
  396. end++;
  397. if (doc->md.normal_text) {
  398. work.data = data + i;
  399. work.size = end - i;
  400. doc->md.normal_text(ob, &work, &doc->data);
  401. }
  402. else
  403. hoedown_buffer_put(ob, data + i, end - i);
  404. if (end >= size) break;
  405. i = end;
  406. end = markdown_char_ptrs[ (int)active_char[data[end]] ](ob, doc, data + i, i - consumed, size - i);
  407. if (!end) /* no action from the callback */
  408. end = i + 1;
  409. else {
  410. i += end;
  411. end = i;
  412. consumed = i;
  413. }
  414. }
  415. }
  416. /* is_escaped • returns whether special char at data[loc] is escaped by '\\' */
  417. static int
  418. is_escaped(uint8_t *data, size_t loc)
  419. {
  420. size_t i = loc;
  421. while (i >= 1 && data[i - 1] == '\\')
  422. i--;
  423. /* odd numbers of backslashes escapes data[loc] */
  424. return (loc - i) % 2;
  425. }
  426. /* find_emph_char • looks for the next emph uint8_t, skipping other constructs */
  427. static size_t
  428. find_emph_char(uint8_t *data, size_t size, uint8_t c)
  429. {
  430. size_t i = 0;
  431. while (i < size) {
  432. while (i < size && data[i] != c && data[i] != '[' && data[i] != '`')
  433. i++;
  434. if (i == size)
  435. return 0;
  436. /* not counting escaped chars */
  437. if (is_escaped(data, i)) {
  438. i++; continue;
  439. }
  440. if (data[i] == c)
  441. return i;
  442. /* skipping a codespan */
  443. if (data[i] == '`') {
  444. size_t span_nb = 0, bt;
  445. size_t tmp_i = 0;
  446. /* counting the number of opening backticks */
  447. while (i < size && data[i] == '`') {
  448. i++; span_nb++;
  449. }
  450. if (i >= size) return 0;
  451. /* finding the matching closing sequence */
  452. bt = 0;
  453. while (i < size && bt < span_nb) {
  454. if (!tmp_i && data[i] == c) tmp_i = i;
  455. if (data[i] == '`') bt++;
  456. else bt = 0;
  457. i++;
  458. }
  459. /* not a well-formed codespan; use found matching emph char */
  460. if (bt < span_nb && i >= size) return tmp_i;
  461. }
  462. /* skipping a link */
  463. else if (data[i] == '[') {
  464. size_t tmp_i = 0;
  465. uint8_t cc;
  466. i++;
  467. while (i < size && data[i] != ']') {
  468. if (!tmp_i && data[i] == c) tmp_i = i;
  469. i++;
  470. }
  471. i++;
  472. while (i < size && _isspace(data[i]))
  473. i++;
  474. if (i >= size)
  475. return tmp_i;
  476. switch (data[i]) {
  477. case '[':
  478. cc = ']'; break;
  479. case '(':
  480. cc = ')'; break;
  481. default:
  482. if (tmp_i)
  483. return tmp_i;
  484. else
  485. continue;
  486. }
  487. i++;
  488. while (i < size && data[i] != cc) {
  489. if (!tmp_i && data[i] == c) tmp_i = i;
  490. i++;
  491. }
  492. if (i >= size)
  493. return tmp_i;
  494. i++;
  495. }
  496. }
  497. return 0;
  498. }
  499. /* parse_emph1 • parsing single emphase */
  500. /* closed by a symbol not preceded by spacing and not followed by symbol */
  501. static size_t
  502. parse_emph1(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, uint8_t c)
  503. {
  504. size_t i = 0, len;
  505. hoedown_buffer *work = 0;
  506. int r;
  507. /* skipping one symbol if coming from emph3 */
  508. if (size > 1 && data[0] == c && data[1] == c) i = 1;
  509. while (i < size) {
  510. len = find_emph_char(data + i, size - i, c);
  511. if (!len) return 0;
  512. i += len;
  513. if (i >= size) return 0;
  514. if (data[i] == c && !_isspace(data[i - 1])) {
  515. if (doc->ext_flags & HOEDOWN_EXT_NO_INTRA_EMPHASIS) {
  516. if (i + 1 < size && isalnum(data[i + 1]))
  517. continue;
  518. }
  519. work = newbuf(doc, BUFFER_SPAN);
  520. parse_inline(work, doc, data, i);
  521. if (doc->ext_flags & HOEDOWN_EXT_UNDERLINE && c == '_')
  522. r = doc->md.underline(ob, work, &doc->data);
  523. else
  524. r = doc->md.emphasis(ob, work, &doc->data);
  525. popbuf(doc, BUFFER_SPAN);
  526. return r ? i + 1 : 0;
  527. }
  528. }
  529. return 0;
  530. }
  531. /* parse_emph2 • parsing single emphase */
  532. static size_t
  533. parse_emph2(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, uint8_t c)
  534. {
  535. size_t i = 0, len;
  536. hoedown_buffer *work = 0;
  537. int r;
  538. while (i < size) {
  539. len = find_emph_char(data + i, size - i, c);
  540. if (!len) return 0;
  541. i += len;
  542. if (i + 1 < size && data[i] == c && data[i + 1] == c && i && !_isspace(data[i - 1])) {
  543. work = newbuf(doc, BUFFER_SPAN);
  544. parse_inline(work, doc, data, i);
  545. if (c == '~')
  546. r = doc->md.strikethrough(ob, work, &doc->data);
  547. else if (c == '=')
  548. r = doc->md.highlight(ob, work, &doc->data);
  549. else
  550. r = doc->md.double_emphasis(ob, work, &doc->data);
  551. popbuf(doc, BUFFER_SPAN);
  552. return r ? i + 2 : 0;
  553. }
  554. i++;
  555. }
  556. return 0;
  557. }
  558. /* parse_emph3 • parsing single emphase */
  559. /* finds the first closing tag, and delegates to the other emph */
  560. static size_t
  561. parse_emph3(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, uint8_t c)
  562. {
  563. size_t i = 0, len;
  564. int r;
  565. while (i < size) {
  566. len = find_emph_char(data + i, size - i, c);
  567. if (!len) return 0;
  568. i += len;
  569. /* skip spacing preceded symbols */
  570. if (data[i] != c || _isspace(data[i - 1]))
  571. continue;
  572. if (i + 2 < size && data[i + 1] == c && data[i + 2] == c && doc->md.triple_emphasis) {
  573. /* triple symbol found */
  574. hoedown_buffer *work = newbuf(doc, BUFFER_SPAN);
  575. parse_inline(work, doc, data, i);
  576. r = doc->md.triple_emphasis(ob, work, &doc->data);
  577. popbuf(doc, BUFFER_SPAN);
  578. return r ? i + 3 : 0;
  579. } else if (i + 1 < size && data[i + 1] == c) {
  580. /* double symbol found, handing over to emph1 */
  581. len = parse_emph1(ob, doc, data - 2, size + 2, c);
  582. if (!len) return 0;
  583. else return len - 2;
  584. } else {
  585. /* single symbol found, handing over to emph2 */
  586. len = parse_emph2(ob, doc, data - 1, size + 1, c);
  587. if (!len) return 0;
  588. else return len - 1;
  589. }
  590. }
  591. return 0;
  592. }
  593. /* parse_math • parses a math span until the given ending delimiter */
  594. static size_t
  595. parse_math(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size, const char *end, size_t delimsz, int displaymode)
  596. {
  597. hoedown_buffer text = { NULL, 0, 0, 0, NULL, NULL, NULL };
  598. size_t i = delimsz;
  599. if (!doc->md.math)
  600. return 0;
  601. /* find ending delimiter */
  602. while (1) {
  603. while (i < size && data[i] != (uint8_t)end[0])
  604. i++;
  605. if (i >= size)
  606. return 0;
  607. if (!is_escaped(data, i) && !(i + delimsz > size)
  608. && memcmp(data + i, end, delimsz) == 0)
  609. break;
  610. i++;
  611. }
  612. /* prepare buffers */
  613. text.data = data + delimsz;
  614. text.size = i - delimsz;
  615. /* if this is a $$ and MATH_EXPLICIT is not active,
  616. * guess whether displaymode should be enabled from the context */
  617. i += delimsz;
  618. if (delimsz == 2 && !(doc->ext_flags & HOEDOWN_EXT_MATH_EXPLICIT))
  619. displaymode = is_empty_all(data - offset, offset) && is_empty_all(data + i, size - i);
  620. /* call callback */
  621. if (doc->md.math(ob, &text, displaymode, &doc->data))
  622. return i;
  623. return 0;
  624. }
  625. /* char_emphasis • single and double emphasis parsing */
  626. static size_t
  627. char_emphasis(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
  628. {
  629. uint8_t c = data[0];
  630. size_t ret;
  631. if (doc->ext_flags & HOEDOWN_EXT_NO_INTRA_EMPHASIS) {
  632. if (offset > 0 && !_isspace(data[-1]) && data[-1] != '>' && data[-1] != '(')
  633. return 0;
  634. }
  635. if (size > 2 && data[1] != c) {
  636. /* spacing cannot follow an opening emphasis;
  637. * strikethrough and highlight only takes two characters '~~' */
  638. if (c == '~' || c == '=' || _isspace(data[1]) || (ret = parse_emph1(ob, doc, data + 1, size - 1, c)) == 0)
  639. return 0;
  640. return ret + 1;
  641. }
  642. if (size > 3 && data[1] == c && data[2] != c) {
  643. if (_isspace(data[2]) || (ret = parse_emph2(ob, doc, data + 2, size - 2, c)) == 0)
  644. return 0;
  645. return ret + 2;
  646. }
  647. if (size > 4 && data[1] == c && data[2] == c && data[3] != c) {
  648. if (c == '~' || c == '=' || _isspace(data[3]) || (ret = parse_emph3(ob, doc, data + 3, size - 3, c)) == 0)
  649. return 0;
  650. return ret + 3;
  651. }
  652. return 0;
  653. }
  654. /* char_linebreak • '\n' preceded by two spaces (assuming linebreak != 0) */
  655. static size_t
  656. char_linebreak(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
  657. {
  658. if (offset < 2 || data[-1] != ' ' || data[-2] != ' ')
  659. return 0;
  660. /* removing the last space from ob and rendering */
  661. while (ob->size && ob->data[ob->size - 1] == ' ')
  662. ob->size--;
  663. return doc->md.linebreak(ob, &doc->data) ? 1 : 0;
  664. }
  665. /* char_codespan • '`' parsing a code span (assuming codespan != 0) */
  666. static size_t
  667. char_codespan(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
  668. {
  669. hoedown_buffer work = { NULL, 0, 0, 0, NULL, NULL, NULL };
  670. size_t end, nb = 0, i, f_begin, f_end;
  671. /* counting the number of backticks in the delimiter */
  672. while (nb < size && data[nb] == '`')
  673. nb++;
  674. /* finding the next delimiter */
  675. i = 0;
  676. for (end = nb; end < size && i < nb; end++) {
  677. if (data[end] == '`') i++;
  678. else i = 0;
  679. }
  680. if (i < nb && end >= size)
  681. return 0; /* no matching delimiter */
  682. /* trimming outside spaces */
  683. f_begin = nb;
  684. while (f_begin < end && data[f_begin] == ' ')
  685. f_begin++;
  686. f_end = end - nb;
  687. while (f_end > nb && data[f_end-1] == ' ')
  688. f_end--;
  689. /* real code span */
  690. if (f_begin < f_end) {
  691. work.data = data + f_begin;
  692. work.size = f_end - f_begin;
  693. if (!doc->md.codespan(ob, &work, &doc->data))
  694. end = 0;
  695. } else {
  696. if (!doc->md.codespan(ob, 0, &doc->data))
  697. end = 0;
  698. }
  699. return end;
  700. }
  701. /* char_quote • '"' parsing a quote */
  702. static size_t
  703. char_quote(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
  704. {
  705. size_t end, nq = 0, i, f_begin, f_end;
  706. /* counting the number of quotes in the delimiter */
  707. while (nq < size && data[nq] == '"')
  708. nq++;
  709. /* finding the next delimiter */
  710. end = nq;
  711. while (1) {
  712. i = end;
  713. end += find_emph_char(data + end, size - end, '"');
  714. if (end == i) return 0; /* no matching delimiter */
  715. i = end;
  716. while (end < size && data[end] == '"' && end - i < nq) end++;
  717. if (end - i >= nq) break;
  718. }
  719. /* trimming outside spaces */
  720. f_begin = nq;
  721. while (f_begin < end && data[f_begin] == ' ')
  722. f_begin++;
  723. f_end = end - nq;
  724. while (f_end > nq && data[f_end-1] == ' ')
  725. f_end--;
  726. /* real quote */
  727. if (f_begin < f_end) {
  728. hoedown_buffer *work = newbuf(doc, BUFFER_SPAN);
  729. parse_inline(work, doc, data + f_begin, f_end - f_begin);
  730. if (!doc->md.quote(ob, work, &doc->data))
  731. end = 0;
  732. popbuf(doc, BUFFER_SPAN);
  733. } else {
  734. if (!doc->md.quote(ob, 0, &doc->data))
  735. end = 0;
  736. }
  737. return end;
  738. }
  739. /* char_escape • '\\' backslash escape */
  740. static size_t
  741. char_escape(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
  742. {
  743. static const char *escape_chars = "\\`*_{}[]()#+-.!:|&<>^~=\"$";
  744. hoedown_buffer work = { 0, 0, 0, 0, NULL, NULL, NULL };
  745. size_t w;
  746. if (size > 1) {
  747. if (data[1] == '\\' && (doc->ext_flags & HOEDOWN_EXT_MATH) &&
  748. size > 2 && (data[2] == '(' || data[2] == '[')) {
  749. const char *end = (data[2] == '[') ? "\\\\]" : "\\\\)";
  750. w = parse_math(ob, doc, data, offset, size, end, 3, data[2] == '[');
  751. if (w) return w;
  752. }
  753. if (strchr(escape_chars, data[1]) == NULL)
  754. return 0;
  755. if (doc->md.normal_text) {
  756. work.data = data + 1;
  757. work.size = 1;
  758. doc->md.normal_text(ob, &work, &doc->data);
  759. }
  760. else hoedown_buffer_putc(ob, data[1]);
  761. } else if (size == 1) {
  762. if (doc->md.normal_text) {
  763. work.data = data;
  764. work.size = 1;
  765. doc->md.normal_text(ob, &work, &doc->data);
  766. }
  767. else hoedown_buffer_putc(ob, data[0]);
  768. }
  769. return 2;
  770. }
  771. /* char_entity • '&' escaped when it doesn't belong to an entity */
  772. /* valid entities are assumed to be anything matching &#?[A-Za-z0-9]+; */
  773. static size_t
  774. char_entity(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
  775. {
  776. size_t end = 1;
  777. hoedown_buffer work = { 0, 0, 0, 0, NULL, NULL, NULL };
  778. if (end < size && data[end] == '#')
  779. end++;
  780. while (end < size && isalnum(data[end]))
  781. end++;
  782. if (end < size && data[end] == ';')
  783. end++; /* real entity */
  784. else
  785. return 0; /* lone '&' */
  786. if (doc->md.entity) {
  787. work.data = data;
  788. work.size = end;
  789. doc->md.entity(ob, &work, &doc->data);
  790. }
  791. else hoedown_buffer_put(ob, data, end);
  792. return end;
  793. }
  794. /* char_langle_tag • '<' when tags or autolinks are allowed */
  795. static size_t
  796. char_langle_tag(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
  797. {
  798. hoedown_buffer work = { NULL, 0, 0, 0, NULL, NULL, NULL };
  799. hoedown_autolink_type altype = HOEDOWN_AUTOLINK_NONE;
  800. size_t end = tag_length(data, size, &altype);
  801. int ret = 0;
  802. work.data = data;
  803. work.size = end;
  804. if (end > 2) {
  805. if (doc->md.autolink && altype != HOEDOWN_AUTOLINK_NONE) {
  806. hoedown_buffer *u_link = newbuf(doc, BUFFER_SPAN);
  807. work.data = data + 1;
  808. work.size = end - 2;
  809. unscape_text(u_link, &work);
  810. ret = doc->md.autolink(ob, u_link, altype, &doc->data);
  811. popbuf(doc, BUFFER_SPAN);
  812. }
  813. else if (doc->md.raw_html)
  814. ret = doc->md.raw_html(ob, &work, &doc->data);
  815. }
  816. if (!ret) return 0;
  817. else return end;
  818. }
  819. static size_t
  820. char_autolink_www(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
  821. {
  822. hoedown_buffer *link, *link_url, *link_text;
  823. size_t link_len, rewind;
  824. if (!doc->md.link || doc->in_link_body)
  825. return 0;
  826. link = newbuf(doc, BUFFER_SPAN);
  827. if ((link_len = hoedown_autolink__www(&rewind, link, data, offset, size, HOEDOWN_AUTOLINK_SHORT_DOMAINS)) > 0) {
  828. link_url = newbuf(doc, BUFFER_SPAN);
  829. HOEDOWN_BUFPUTSL(link_url, "http://");
  830. hoedown_buffer_put(link_url, link->data, link->size);
  831. if (ob->size > rewind)
  832. ob->size -= rewind;
  833. else
  834. ob->size = 0;
  835. if (doc->md.normal_text) {
  836. link_text = newbuf(doc, BUFFER_SPAN);
  837. doc->md.normal_text(link_text, link, &doc->data);
  838. doc->md.link(ob, link_text, link_url, NULL, &doc->data);
  839. popbuf(doc, BUFFER_SPAN);
  840. } else {
  841. doc->md.link(ob, link, link_url, NULL, &doc->data);
  842. }
  843. popbuf(doc, BUFFER_SPAN);
  844. }
  845. popbuf(doc, BUFFER_SPAN);
  846. return link_len;
  847. }
  848. static size_t
  849. char_autolink_email(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
  850. {
  851. hoedown_buffer *link;
  852. size_t link_len, rewind;
  853. if (!doc->md.autolink || doc->in_link_body)
  854. return 0;
  855. link = newbuf(doc, BUFFER_SPAN);
  856. if ((link_len = hoedown_autolink__email(&rewind, link, data, offset, size, 0)) > 0) {
  857. if (ob->size > rewind)
  858. ob->size -= rewind;
  859. else
  860. ob->size = 0;
  861. doc->md.autolink(ob, link, HOEDOWN_AUTOLINK_EMAIL, &doc->data);
  862. }
  863. popbuf(doc, BUFFER_SPAN);
  864. return link_len;
  865. }
  866. static size_t
  867. char_autolink_url(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
  868. {
  869. hoedown_buffer *link;
  870. size_t link_len, rewind;
  871. if (!doc->md.autolink || doc->in_link_body)
  872. return 0;
  873. link = newbuf(doc, BUFFER_SPAN);
  874. if ((link_len = hoedown_autolink__url(&rewind, link, data, offset, size, 0)) > 0) {
  875. if (ob->size > rewind)
  876. ob->size -= rewind;
  877. else
  878. ob->size = 0;
  879. doc->md.autolink(ob, link, HOEDOWN_AUTOLINK_NORMAL, &doc->data);
  880. }
  881. popbuf(doc, BUFFER_SPAN);
  882. return link_len;
  883. }
  884. static size_t
  885. char_image(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size) {
  886. size_t ret;
  887. if (size < 2 || data[1] != '[') return 0;
  888. ret = char_link(ob, doc, data + 1, offset + 1, size - 1);
  889. if (!ret) return 0;
  890. return ret + 1;
  891. }
  892. /* char_link • '[': parsing a link, a footnote or an image */
  893. static size_t
  894. char_link(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
  895. {
  896. int is_img = (offset && data[-1] == '!' && !is_escaped(data - offset, offset - 1));
  897. int is_footnote = (doc->ext_flags & HOEDOWN_EXT_FOOTNOTES && data[1] == '^');
  898. size_t i = 1, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0;
  899. hoedown_buffer *content = NULL;
  900. hoedown_buffer *link = NULL;
  901. hoedown_buffer *title = NULL;
  902. hoedown_buffer *u_link = NULL;
  903. size_t org_work_size = doc->work_bufs[BUFFER_SPAN].size;
  904. int ret = 0, in_title = 0, qtype = 0;
  905. /* checking whether the correct renderer exists */
  906. if ((is_footnote && !doc->md.footnote_ref) || (is_img && !doc->md.image)
  907. || (!is_img && !is_footnote && !doc->md.link))
  908. goto cleanup;
  909. /* looking for the matching closing bracket */
  910. i += find_emph_char(data + i, size - i, ']');
  911. txt_e = i;
  912. if (i < size && data[i] == ']') i++;
  913. else goto cleanup;
  914. /* footnote link */
  915. if (is_footnote) {
  916. hoedown_buffer id = { NULL, 0, 0, 0, NULL, NULL, NULL };
  917. struct footnote_ref *fr;
  918. if (txt_e < 3)
  919. goto cleanup;
  920. id.data = data + 2;
  921. id.size = txt_e - 2;
  922. fr = find_footnote_ref(&doc->footnotes_found, id.data, id.size);
  923. /* mark footnote used */
  924. if (fr && !fr->is_used) {
  925. if(!add_footnote_ref(&doc->footnotes_used, fr))
  926. goto cleanup;
  927. fr->is_used = 1;
  928. fr->num = doc->footnotes_used.count;
  929. /* render */
  930. if (doc->md.footnote_ref)
  931. ret = doc->md.footnote_ref(ob, fr->num, &doc->data);
  932. }
  933. goto cleanup;
  934. }
  935. /* skip any amount of spacing */
  936. /* (this is much more laxist than original markdown syntax) */
  937. while (i < size && _isspace(data[i]))
  938. i++;
  939. /* inline style link */
  940. if (i < size && data[i] == '(') {
  941. size_t nb_p;
  942. /* skipping initial spacing */
  943. i++;
  944. while (i < size && _isspace(data[i]))
  945. i++;
  946. link_b = i;
  947. /* looking for link end: ' " ) */
  948. /* Count the number of open parenthesis */
  949. nb_p = 0;
  950. while (i < size) {
  951. if (data[i] == '\\') i += 2;
  952. else if (data[i] == '(' && i != 0) {
  953. nb_p++; i++;
  954. }
  955. else if (data[i] == ')') {
  956. if (nb_p == 0) break;
  957. else nb_p--; i++;
  958. } else if (i >= 1 && _isspace(data[i-1]) && (data[i] == '\'' || data[i] == '"')) break;
  959. else i++;
  960. }
  961. if (i >= size) goto cleanup;
  962. link_e = i;
  963. /* looking for title end if present */
  964. if (data[i] == '\'' || data[i] == '"') {
  965. qtype = data[i];
  966. in_title = 1;
  967. i++;
  968. title_b = i;
  969. while (i < size) {
  970. if (data[i] == '\\') i += 2;
  971. else if (data[i] == qtype) {in_title = 0; i++;}
  972. else if ((data[i] == ')') && !in_title) break;
  973. else i++;
  974. }
  975. if (i >= size) goto cleanup;
  976. /* skipping spacing after title */
  977. title_e = i - 1;
  978. while (title_e > title_b && _isspace(data[title_e]))
  979. title_e--;
  980. /* checking for closing quote presence */
  981. if (data[title_e] != '\'' && data[title_e] != '"') {
  982. title_b = title_e = 0;
  983. link_e = i;
  984. }
  985. }
  986. /* remove spacing at the end of the link */
  987. while (link_e > link_b && _isspace(data[link_e - 1]))
  988. link_e--;
  989. /* remove optional angle brackets around the link */
  990. if (data[link_b] == '<' && data[link_e - 1] == '>') {
  991. link_b++;
  992. link_e--;
  993. }
  994. /* building escaped link and title */
  995. if (link_e > link_b) {
  996. link = newbuf(doc, BUFFER_SPAN);
  997. hoedown_buffer_put(link, data + link_b, link_e - link_b);
  998. }
  999. if (title_e > title_b) {
  1000. title = newbuf(doc, BUFFER_SPAN);
  1001. hoedown_buffer_put(title, data + title_b, title_e - title_b);
  1002. }
  1003. i++;
  1004. }
  1005. /* reference style link */
  1006. else if (i < size && data[i] == '[') {
  1007. hoedown_buffer *id = newbuf(doc, BUFFER_SPAN);
  1008. struct link_ref *lr;
  1009. /* looking for the id */
  1010. i++;
  1011. link_b = i;
  1012. while (i < size && data[i] != ']') i++;
  1013. if (i >= size) goto cleanup;
  1014. link_e = i;
  1015. /* finding the link_ref */
  1016. if (link_b == link_e)
  1017. replace_spacing(id, data + 1, txt_e - 1);
  1018. else
  1019. hoedown_buffer_put(id, data + link_b, link_e - link_b);
  1020. lr = find_link_ref(doc->refs, id->data, id->size);
  1021. if (!lr)
  1022. goto cleanup;
  1023. /* keeping link and title from link_ref */
  1024. link = lr->link;
  1025. title = lr->title;
  1026. i++;
  1027. }
  1028. /* shortcut reference style link */
  1029. else {
  1030. hoedown_buffer *id = newbuf(doc, BUFFER_SPAN);
  1031. struct link_ref *lr;
  1032. /* crafting the id */
  1033. replace_spacing(id, data + 1, txt_e - 1);
  1034. /* finding the link_ref */
  1035. lr = find_link_ref(doc->refs, id->data, id->size);
  1036. if (!lr)
  1037. goto cleanup;
  1038. /* keeping link and title from link_ref */
  1039. link = lr->link;
  1040. title = lr->title;
  1041. /* rewinding the spacing */
  1042. i = txt_e + 1;
  1043. }
  1044. /* building content: img alt is kept, only link content is parsed */
  1045. if (txt_e > 1) {
  1046. content = newbuf(doc, BUFFER_SPAN);
  1047. if (is_img) {
  1048. hoedown_buffer_put(content, data + 1, txt_e - 1);
  1049. } else {
  1050. /* disable autolinking when parsing inline the
  1051. * content of a link */
  1052. doc->in_link_body = 1;
  1053. parse_inline(content, doc, data + 1, txt_e - 1);
  1054. doc->in_link_body = 0;
  1055. }
  1056. }
  1057. if (link) {
  1058. u_link = newbuf(doc, BUFFER_SPAN);
  1059. unscape_text(u_link, link);
  1060. }
  1061. /* calling the relevant rendering function */
  1062. if (is_img) {
  1063. ret = doc->md.image(ob, u_link, title, content, &doc->data);
  1064. } else {
  1065. ret = doc->md.link(ob, content, u_link, title, &doc->data);
  1066. }
  1067. /* cleanup */
  1068. cleanup:
  1069. doc->work_bufs[BUFFER_SPAN].size = (int)org_work_size;
  1070. return ret ? i : 0;
  1071. }
  1072. static size_t
  1073. char_superscript(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
  1074. {
  1075. size_t sup_start, sup_len;
  1076. hoedown_buffer *sup;
  1077. if (!doc->md.superscript)
  1078. return 0;
  1079. if (size < 2)
  1080. return 0;
  1081. if (data[1] == '(') {
  1082. sup_start = 2;
  1083. sup_len = find_emph_char(data + 2, size - 2, ')') + 2;
  1084. if (sup_len == size)
  1085. return 0;
  1086. } else {
  1087. sup_start = sup_len = 1;
  1088. while (sup_len < size && !_isspace(data[sup_len]))
  1089. sup_len++;
  1090. }
  1091. if (sup_len - sup_start == 0)
  1092. return (sup_start == 2) ? 3 : 0;
  1093. sup = newbuf(doc, BUFFER_SPAN);
  1094. parse_inline(sup, doc, data + sup_start, sup_len - sup_start);
  1095. doc->md.superscript(ob, sup, &doc->data);
  1096. popbuf(doc, BUFFER_SPAN);
  1097. return (sup_start == 2) ? sup_len + 1 : sup_len;
  1098. }
  1099. static size_t
  1100. char_math(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
  1101. {
  1102. /* double dollar */
  1103. if (size > 1 && data[1] == '$')
  1104. return parse_math(ob, doc, data, offset, size, "$$", 2, 1);
  1105. /* single dollar allowed only with MATH_EXPLICIT flag */
  1106. if (doc->ext_flags & HOEDOWN_EXT_MATH_EXPLICIT)
  1107. return parse_math(ob, doc, data, offset, size, "$", 1, 0);
  1108. return 0;
  1109. }
  1110. /*********************************
  1111. * BLOCK-LEVEL PARSING FUNCTIONS *
  1112. *********************************/
  1113. /* is_empty • returns the line length when it is empty, 0 otherwise */
  1114. static size_t
  1115. is_empty(const uint8_t *data, size_t size)
  1116. {
  1117. size_t i;
  1118. for (i = 0; i < size && data[i] != '\n'; i++)
  1119. if (data[i] != ' ')
  1120. return 0;
  1121. return i + 1;
  1122. }
  1123. /* is_hrule • returns whether a line is a horizontal rule */
  1124. static int
  1125. is_hrule(uint8_t *data, size_t size)
  1126. {
  1127. size_t i = 0, n = 0;
  1128. uint8_t c;
  1129. /* skipping initial spaces */
  1130. if (size < 3) return 0;
  1131. if (data[0] == ' ') { i++;
  1132. if (data[1] == ' ') { i++;
  1133. if (data[2] == ' ') { i++; } } }
  1134. /* looking at the hrule uint8_t */
  1135. if (i + 2 >= size
  1136. || (data[i] != '*' && data[i] != '-' && data[i] != '_'))
  1137. return 0;
  1138. c = data[i];
  1139. /* the whole line must be the char or space */
  1140. while (i < size && data[i] != '\n') {
  1141. if (data[i] == c) n++;
  1142. else if (data[i] != ' ')
  1143. return 0;
  1144. i++;
  1145. }
  1146. return n >= 3;
  1147. }
  1148. /* check if a line is a code fence; return the
  1149. * end of the code fence. if passed, width of
  1150. * the fence rule and character will be returned */
  1151. static size_t
  1152. is_codefence(uint8_t *data, size_t size, size_t *width, uint8_t *chr)
  1153. {
  1154. size_t i = 0, n = 1;
  1155. uint8_t c;
  1156. /* skipping initial spaces */
  1157. if (size < 3)
  1158. return 0;
  1159. if (data[0] == ' ') { i++;
  1160. if (data[1] == ' ') { i++;
  1161. if (data[2] == ' ') { i++; } } }
  1162. /* looking at the hrule uint8_t */
  1163. c = data[i];
  1164. if (i + 2 >= size || !(c=='~' || c=='`'))
  1165. return 0;
  1166. /* the fence must be that same character */
  1167. while (++i < size && data[i] == c)
  1168. ++n;
  1169. if (n < 3)
  1170. return 0;
  1171. if (width) *width = n;
  1172. if (chr) *chr = c;
  1173. return i;
  1174. }
  1175. /* expects single line, checks if it's a codefence and extracts language */
  1176. static size_t
  1177. parse_codefence(uint8_t *data, size_t size, hoedown_buffer *lang, size_t *width, uint8_t *chr)
  1178. {
  1179. size_t i, w, lang_start;
  1180. i = w = is_codefence(data, size, width, chr);
  1181. if (i == 0)
  1182. return 0;
  1183. while (i < size && _isspace(data[i]))
  1184. i++;
  1185. lang_start = i;
  1186. while (i < size && !_isspace(data[i]))
  1187. i++;
  1188. lang->data = data + lang_start;
  1189. lang->size = i - lang_start;
  1190. /* Avoid parsing a codespan as a fence */
  1191. i = lang_start + 2;
  1192. while (i < size && !(data[i] == *chr && data[i-1] == *chr && data[i-2] == *chr)) i++;
  1193. if (i < size) return 0;
  1194. return w;
  1195. }
  1196. /* is_atxheader • returns whether the line is a hash-prefixed header */
  1197. static int
  1198. is_atxheader(hoedown_document *doc, uint8_t *data, size_t size)
  1199. {
  1200. if (data[0] != '#')
  1201. return 0;
  1202. if (doc->ext_flags & HOEDOWN_EXT_SPACE_HEADERS) {
  1203. size_t level = 0;
  1204. while (level < size && level < 6 && data[level] == '#')
  1205. level++;
  1206. if (level < size && data[level] != ' ')
  1207. return 0;
  1208. }
  1209. return 1;
  1210. }
  1211. /* is_headerline • returns whether the line is a setext-style hdr underline */
  1212. static int
  1213. is_headerline(uint8_t *data, size_t size)
  1214. {
  1215. size_t i = 0;
  1216. /* test of level 1 header */
  1217. if (data[i] == '=') {
  1218. for (i = 1; i < size && data[i] == '='; i++);
  1219. while (i < size && data[i] == ' ') i++;
  1220. return (i >= size || data[i] == '\n') ? 1 : 0; }
  1221. /* test of level 2 header */
  1222. if (data[i] == '-') {
  1223. for (i = 1; i < size && data[i] == '-'; i++);
  1224. while (i < size && data[i] == ' ') i++;
  1225. return (i >= size || data[i] == '\n') ? 2 : 0; }
  1226. return 0;
  1227. }
  1228. static int
  1229. is_next_headerline(uint8_t *data, size_t size)
  1230. {
  1231. size_t i = 0;
  1232. while (i < size && data[i] != '\n')
  1233. i++;
  1234. if (++i >= size)
  1235. return 0;
  1236. return is_headerline(data + i, size - i);
  1237. }
  1238. /* prefix_quote • returns blockquote prefix length */
  1239. static size_t
  1240. prefix_quote(uint8_t *data, size_t size)
  1241. {
  1242. size_t i = 0;
  1243. if (i < size && data[i] == ' ') i++;
  1244. if (i < size && data[i] == ' ') i++;
  1245. if (i < size && data[i] == ' ') i++;
  1246. if (i < size && data[i] == '>') {
  1247. if (i + 1 < size && data[i + 1] == ' ')
  1248. return i + 2;
  1249. return i + 1;
  1250. }
  1251. return 0;
  1252. }
  1253. /* prefix_code • returns prefix length for block code*/
  1254. static size_t
  1255. prefix_code(uint8_t *data, size_t size)
  1256. {
  1257. if (size > 3 && data[0] == ' ' && data[1] == ' '
  1258. && data[2] == ' ' && data[3] == ' ') return 4;
  1259. return 0;
  1260. }
  1261. /* prefix_oli • returns ordered list item prefix */
  1262. static size_t
  1263. prefix_oli(uint8_t *data, size_t size)
  1264. {
  1265. size_t i = 0;
  1266. if (i < size && data[i] == ' ') i++;
  1267. if (i < size && data[i] == ' ') i++;
  1268. if (i < size && data[i] == ' ') i++;
  1269. if (i >= size || data[i] < '0' || data[i] > '9')
  1270. return 0;
  1271. while (i < size && data[i] >= '0' && data[i] <= '9')
  1272. i++;
  1273. if (i + 1 >= size || data[i] != '.' || data[i + 1] != ' ')
  1274. return 0;
  1275. if (is_next_headerline(data + i, size - i))
  1276. return 0;
  1277. return i + 2;
  1278. }
  1279. /* prefix_uli • returns ordered list item prefix */
  1280. static size_t
  1281. prefix_uli(uint8_t *data, size_t size)
  1282. {
  1283. size_t i = 0;
  1284. if (i < size && data[i] == ' ') i++;
  1285. if (i < size && data[i] == ' ') i++;
  1286. if (i < size && data[i] == ' ') i++;
  1287. if (i + 1 >= size ||
  1288. (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
  1289. data[i + 1] != ' ')
  1290. return 0;
  1291. if (is_next_headerline(data + i, size - i))
  1292. return 0;
  1293. return i + 2;
  1294. }
  1295. /* parse_block • parsing of one block, returning next uint8_t to parse */
  1296. static void parse_block(hoedown_buffer *ob, hoedown_document *doc,
  1297. uint8_t *data, size_t size);
  1298. /* parse_blockquote • handles parsing of a blockquote fragment */
  1299. static size_t
  1300. parse_blockquote(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size)
  1301. {
  1302. size_t beg, end = 0, pre, work_size = 0;
  1303. uint8_t *work_data = 0;
  1304. hoedown_buffer *out = 0;
  1305. out = newbuf(doc, BUFFER_BLOCK);
  1306. beg = 0;
  1307. while (beg < size) {
  1308. for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
  1309. pre = prefix_quote(data + beg, end - beg);
  1310. if (pre)
  1311. beg += pre; /* skipping prefix */
  1312. /* empty line followed by non-quote line */
  1313. else if (is_empty(data + beg, end - beg) &&
  1314. (end >= size || (prefix_quote(data + end, size - end) == 0 &&
  1315. !is_empty(data + end, size - end))))
  1316. break;
  1317. if (beg < end) { /* copy into the in-place working buffer */
  1318. /* hoedown_buffer_put(work, data + beg, end - beg); */
  1319. if (!work_data)
  1320. work_data = data + beg;
  1321. else if (data + beg != work_data + work_size)
  1322. memmove(work_data + work_size, data + beg, end - beg);
  1323. work_size += end - beg;
  1324. }
  1325. beg = end;
  1326. }
  1327. parse_block(out, doc, work_data, work_size);
  1328. if (doc->md.blockquote)
  1329. doc->md.blockquote(ob, out, &doc->data);
  1330. popbuf(doc, BUFFER_BLOCK);
  1331. return end;
  1332. }
  1333. static size_t
  1334. parse_htmlblock(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, int do_render);
  1335. /* parse_blockquote • handles parsing of a regular paragraph */
  1336. static size_t
  1337. parse_paragraph(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size)
  1338. {
  1339. hoedown_buffer work = { NULL, 0, 0, 0, NULL, NULL, NULL };
  1340. size_t i = 0, end = 0;
  1341. int level = 0;
  1342. work.data = data;
  1343. while (i < size) {
  1344. for (end = i + 1; end < size && data[end - 1] != '\n'; end++) /* empty */;
  1345. if (is_empty(data + i, size - i))
  1346. break;
  1347. if ((level = is_headerline(data + i, size - i)) != 0)
  1348. break;
  1349. if (is_atxheader(doc, data + i, size - i) ||
  1350. is_hrule(data + i, size - i) ||
  1351. prefix_quote(data + i, size - i)) {
  1352. end = i;
  1353. break;
  1354. }
  1355. i = end;
  1356. }
  1357. work.size = i;
  1358. while (work.size && data[work.size - 1] == '\n')
  1359. work.size--;
  1360. if (!level) {
  1361. hoedown_buffer *tmp = newbuf(doc, BUFFER_BLOCK);
  1362. parse_inline(tmp, doc, work.data, work.size);
  1363. if (doc->md.paragraph)
  1364. doc->md.paragraph(ob, tmp, &doc->data);
  1365. popbuf(doc, BUFFER_BLOCK);
  1366. } else {
  1367. hoedown_buffer *header_work;
  1368. if (work.size) {
  1369. size_t beg;
  1370. i = work.size;
  1371. work.size -= 1;
  1372. while (work.size && data[work.size] != '\n')
  1373. work.size -= 1;
  1374. beg = work.size + 1;
  1375. while (work.size && data[work.size - 1] == '\n')
  1376. work.size -= 1;
  1377. if (work.size > 0) {
  1378. hoedown_buffer *tmp = newbuf(doc, BUFFER_BLOCK);
  1379. parse_inline(tmp, doc, work.data, work.size);
  1380. if (doc->md.paragraph)
  1381. doc->md.paragraph(ob, tmp, &doc->data);
  1382. popbuf(doc, BUFFER_BLOCK);
  1383. work.data += beg;
  1384. work.size = i - beg;
  1385. }
  1386. else work.size = i;
  1387. }
  1388. header_work = newbuf(doc, BUFFER_SPAN);
  1389. parse_inline(header_work, doc, work.data, work.size);
  1390. if (doc->md.header)
  1391. doc->md.header(ob, header_work, (int)level, &doc->data);
  1392. popbuf(doc, BUFFER_SPAN);
  1393. }
  1394. return end;
  1395. }
  1396. /* parse_fencedcode • handles parsing of a block-level code fragment */
  1397. static size_t
  1398. parse_fencedcode(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size)
  1399. {
  1400. hoedown_buffer text = { 0, 0, 0, 0, NULL, NULL, NULL };
  1401. hoedown_buffer lang = { 0, 0, 0, 0, NULL, NULL, NULL };
  1402. size_t i = 0, text_start, line_start;
  1403. size_t w, w2;
  1404. size_t width, width2;
  1405. uint8_t chr, chr2;
  1406. /* parse codefence line */
  1407. while (i < size && data[i] != '\n')
  1408. i++;
  1409. w = parse_codefence(data, i, &lang, &width, &chr);
  1410. if (!w)
  1411. return 0;
  1412. /* search for end */
  1413. i++;
  1414. text_start = i;
  1415. while ((line_start = i) < size) {
  1416. while (i < size && data[i] != '\n')
  1417. i++;
  1418. w2 = is_codefence(data + line_start, i - line_start, &width2, &chr2);
  1419. if (w == w2 && width == width2 && chr == chr2 &&
  1420. is_empty(data + (line_start+w), i - (line_start+w)))
  1421. break;
  1422. i++;
  1423. }
  1424. text.data = data + text_start;
  1425. text.size = line_start - text_start;
  1426. if (doc->md.blockcode)
  1427. doc->md.blockcode(ob, text.size ? &text : NULL, lang.size ? &lang : NULL, &doc->data);
  1428. return i;
  1429. }
  1430. static size_t
  1431. parse_blockcode(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size)
  1432. {
  1433. size_t beg, end, pre;
  1434. hoedown_buffer *work = 0;
  1435. work = newbuf(doc, BUFFER_BLOCK);
  1436. beg = 0;
  1437. while (beg < size) {
  1438. for (end = beg + 1; end < size && data[end - 1] != '\n'; end++) {};
  1439. pre = prefix_code(data + beg, end - beg);
  1440. if (pre)
  1441. beg += pre; /* skipping prefix */
  1442. else if (!is_empty(data + beg, end - beg))
  1443. /* non-empty non-prefixed line breaks the pre */
  1444. break;
  1445. if (beg < end) {
  1446. /* verbatim copy to the working buffer,
  1447. escaping entities */
  1448. if (is_empty(data + beg, end - beg))
  1449. hoedown_buffer_putc(work, '\n');
  1450. else hoedown_buffer_put(work, data + beg, end - beg);
  1451. }
  1452. beg = end;
  1453. }
  1454. while (work->size && work->data[work->size - 1] == '\n')
  1455. work->size -= 1;
  1456. hoedown_buffer_putc(work, '\n');
  1457. if (doc->md.blockcode)
  1458. doc->md.blockcode(ob, work, NULL, &doc->data);
  1459. popbuf(doc, BUFFER_BLOCK);
  1460. return beg;
  1461. }
  1462. /* parse_listitem • parsing of a single list item */
  1463. /* assuming initial prefix is already removed */
  1464. static size_t
  1465. parse_listitem(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, hoedown_list_flags *flags)
  1466. {
  1467. hoedown_buffer *work = 0, *inter = 0;
  1468. size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i;
  1469. int in_empty = 0, has_inside_empty = 0, in_fence = 0;
  1470. /* keeping track of the first indentation prefix */
  1471. while (orgpre < 3 && orgpre < size && data[orgpre] == ' ')
  1472. orgpre++;
  1473. beg = prefix_uli(data, size);
  1474. if (!beg)
  1475. beg = prefix_oli(data, size);
  1476. if (!beg)
  1477. return 0;
  1478. /* skipping to the beginning of the following line */
  1479. end = beg;
  1480. while (end < size && data[end - 1] != '\n')
  1481. end++;
  1482. /* getting working buffers */
  1483. work = newbuf(doc, BUFFER_SPAN);
  1484. inter = newbuf(doc, BUFFER_SPAN);
  1485. /* putting the first line into the working buffer */
  1486. hoedown_buffer_put(work, data + beg, end - beg);
  1487. beg = end;
  1488. /* process the following lines */
  1489. while (beg < size) {
  1490. size_t has_next_uli = 0, has_next_oli = 0;
  1491. end++;
  1492. while (end < size && data[end - 1] != '\n')
  1493. end++;
  1494. /* process an empty line */
  1495. if (is_empty(data + beg, end - beg)) {
  1496. in_empty = 1;
  1497. beg = end;
  1498. continue;
  1499. }
  1500. /* calculating the indentation */
  1501. i = 0;
  1502. while (i < 4 && beg + i < end && data[beg + i] == ' ')
  1503. i++;
  1504. pre = i;
  1505. if (doc->ext_flags & HOEDOWN_EXT_FENCED_CODE) {
  1506. if (is_codefence(data + beg + i, end - beg - i, NULL, NULL))
  1507. in_fence = !in_fence;
  1508. }
  1509. /* Only check for new list items if we are **not** inside
  1510. * a fenced code block */
  1511. if (!in_fence) {
  1512. has_next_uli = prefix_uli(data + beg + i, end - beg - i);
  1513. has_next_oli = prefix_oli(data + beg + i, end - beg - i);
  1514. }
  1515. /* checking for a new item */
  1516. if ((has_next_uli && !is_hrule(data + beg + i, end - beg - i)) || has_next_oli) {
  1517. if (in_empty)
  1518. has_inside_empty = 1;
  1519. /* the following item must have the same (or less) indentation */
  1520. if (pre <= orgpre) {
  1521. /* if the following item has different list type, we end this list */
  1522. if (in_empty && (
  1523. ((*flags & HOEDOWN_LIST_ORDERED) && has_next_uli) ||
  1524. (!(*flags & HOEDOWN_LIST_ORDERED) && has_next_oli)))
  1525. *flags |= HOEDOWN_LI_END;
  1526. break;
  1527. }
  1528. if (!sublist)
  1529. sublist = work->size;
  1530. }
  1531. /* joining only indented stuff after empty lines;
  1532. * note that now we only require 1 space of indentation
  1533. * to continue a list */
  1534. else if (in_empty && pre == 0) {
  1535. *flags |= HOEDOWN_LI_END;
  1536. break;
  1537. }
  1538. if (in_empty) {
  1539. hoedown_buffer_putc(work, '\n');
  1540. has_inside_empty = 1;
  1541. in_empty = 0;
  1542. }
  1543. /* adding the line without prefix into the working buffer */
  1544. hoedown_buffer_put(work, data + beg + i, end - beg - i);
  1545. beg = end;
  1546. }
  1547. /* render of li contents */
  1548. if (has_inside_empty)
  1549. *flags |= HOEDOWN_LI_BLOCK;
  1550. if (*flags & HOEDOWN_LI_BLOCK) {
  1551. /* intermediate render of block li */
  1552. if (sublist && sublist < work->size) {
  1553. parse_block(inter, doc, work->data, sublist);
  1554. parse_block(inter, doc, work->data + sublist, work->size - sublist);
  1555. }
  1556. else
  1557. parse_block(inter, doc, work->data, work->size);
  1558. } else {
  1559. /* intermediate render of inline li */
  1560. if (sublist && sublist < work->size) {
  1561. parse_inline(inter, doc, work->data, sublist);
  1562. parse_block(inter, doc, work->data + sublist, work->size - sublist);
  1563. }
  1564. else
  1565. parse_inline(inter, doc, work->data, work->size);
  1566. }
  1567. /* render of li itself */
  1568. if (doc->md.listitem)
  1569. doc->md.listitem(ob, inter, *flags, &doc->data);
  1570. popbuf(doc, BUFFER_SPAN);
  1571. popbuf(doc, BUFFER_SPAN);
  1572. return beg;
  1573. }
  1574. /* parse_list • parsing ordered or unordered list block */
  1575. static size_t
  1576. parse_list(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, hoedown_list_flags flags)
  1577. {
  1578. hoedown_buffer *work = 0;
  1579. size_t i = 0, j;
  1580. work = newbuf(doc, BUFFER_BLOCK);
  1581. while (i < size) {
  1582. j = parse_listitem(work, doc, data + i, size - i, &flags);
  1583. i += j;
  1584. if (!j || (flags & HOEDOWN_LI_END))
  1585. break;
  1586. }
  1587. if (doc->md.list)
  1588. doc->md.list(ob, work, flags, &doc->data);
  1589. popbuf(doc, BUFFER_BLOCK);
  1590. return i;
  1591. }
  1592. /* parse_atxheader • parsing of atx-style headers */
  1593. static size_t
  1594. parse_atxheader(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size)
  1595. {
  1596. size_t level = 0;
  1597. size_t i, end, skip;
  1598. while (level < size && level < 6 && data[level] == '#')
  1599. level++;
  1600. for (i = level; i < size && data[i] == ' '; i++);
  1601. for (end = i; end < size && data[end] != '\n'; end++);
  1602. skip = end;
  1603. while (end && data[end - 1] == '#')
  1604. end--;
  1605. while (end && data[end - 1] == ' ')
  1606. end--;
  1607. if (end > i) {
  1608. hoedown_buffer *work = newbuf(doc, BUFFER_SPAN);
  1609. parse_inline(work, doc, data + i, end - i);
  1610. if (doc->md.header)
  1611. doc->md.header(ob, work, (int)level, &doc->data);
  1612. popbuf(doc, BUFFER_SPAN);
  1613. }
  1614. return skip;
  1615. }
  1616. /* parse_footnote_def • parse a single footnote definition */
  1617. static void
  1618. parse_footnote_def(hoedown_buffer *ob, hoedown_document *doc, unsigned int num, uint8_t *data, size_t size)
  1619. {
  1620. hoedown_buffer *work = 0;
  1621. work = newbuf(doc, BUFFER_SPAN);
  1622. parse_block(work, doc, data, size);
  1623. if (doc->md.footnote_def)
  1624. doc->md.footnote_def(ob, work, num, &doc->data);
  1625. popbuf(doc, BUFFER_SPAN);
  1626. }
  1627. /* parse_footnote_list • render the contents of the footnotes */
  1628. static void
  1629. parse_footnote_list(hoedown_buffer *ob, hoedown_document *doc, struct footnote_list *footnotes)
  1630. {
  1631. hoedown_buffer *work = 0;
  1632. struct footnote_item *item;
  1633. struct footnote_ref *ref;
  1634. if (footnotes->count == 0)
  1635. return;
  1636. work = newbuf(doc, BUFFER_BLOCK);
  1637. item = footnotes->head;
  1638. while (item) {
  1639. ref = item->ref;
  1640. parse_footnote_def(work, doc, ref->num, ref->contents->data, ref->contents->size);
  1641. item = item->next;
  1642. }
  1643. if (doc->md.footnotes)
  1644. doc->md.footnotes(ob, work, &doc->data);
  1645. popbuf(doc, BUFFER_BLOCK);
  1646. }
  1647. /* htmlblock_is_end • check for end of HTML block : </tag>( *)\n */
  1648. /* returns tag length on match, 0 otherwise */
  1649. /* assumes data starts with "<" */
  1650. static size_t
  1651. htmlblock_is_end(
  1652. const char *tag,
  1653. size_t tag_len,
  1654. hoedown_document *doc,
  1655. uint8_t *data,
  1656. size_t size)
  1657. {
  1658. size_t i = tag_len + 3, w;
  1659. /* try to match the end tag */
  1660. /* note: we're not considering tags like "</tag >" which are still valid */
  1661. if (i > size ||
  1662. data[1] != '/' ||
  1663. strncasecmp((char *)data + 2, tag, tag_len) != 0 ||
  1664. data[tag_len + 2] != '>')
  1665. return 0;
  1666. /* rest of the line must be empty */
  1667. if ((w = is_empty(data + i, size - i)) == 0 && i < size)
  1668. return 0;
  1669. return i + w;
  1670. }
  1671. /* htmlblock_find_end • try to find HTML block ending tag */
  1672. /* returns the length on match, 0 otherwise */
  1673. static size_t
  1674. htmlblock_find_end(
  1675. const char *tag,
  1676. size_t tag_len,
  1677. hoedown_document *doc,
  1678. uint8_t *data,
  1679. size_t size)
  1680. {
  1681. size_t i = 0, w;
  1682. while (1) {
  1683. while (i < size && data[i] != '<') i++;
  1684. if (i >= size) return 0;
  1685. w = htmlblock_is_end(tag, tag_len, doc, data + i, size - i);
  1686. if (w) return i + w;
  1687. i++;
  1688. }
  1689. }
  1690. /* htmlblock_find_end_strict • try to find end of HTML block in strict mode */
  1691. /* (it must be an unindented line, and have a blank line afterwads) */
  1692. /* returns the length on match, 0 otherwise */
  1693. static size_t
  1694. htmlblock_find_end_strict(
  1695. const char *tag,
  1696. size_t tag_len,
  1697. hoedown_document *doc,
  1698. uint8_t *data,
  1699. size_t size)
  1700. {
  1701. size_t i = 0, mark;
  1702. while (1) {
  1703. mark = i;
  1704. while (i < size && data[i] != '\n') i++;
  1705. if (i < size) i++;
  1706. if (i == mark) return 0;
  1707. if (data[mark] == ' ' && mark > 0) continue;
  1708. mark += htmlblock_find_end(tag, tag_len, doc, data + mark, i - mark);
  1709. if (mark == i && (is_empty(data + i, size - i) || i >= size)) break;
  1710. }
  1711. return i;
  1712. }
  1713. /* parse_htmlblock • parsing of inline HTML block */
  1714. static size_t
  1715. parse_htmlblock(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, int do_render)
  1716. {
  1717. hoedown_buffer work = { NULL, 0, 0, 0, NULL, NULL, NULL };
  1718. size_t i, j = 0, tag_len, tag_end;
  1719. const char *curtag = NULL;
  1720. work.data = data;
  1721. /* identification of the opening tag */
  1722. if (size < 2 || data[0] != '<')
  1723. return 0;
  1724. i = 1;
  1725. while (i < size && data[i] != '>' && data[i] != ' ')
  1726. i++;
  1727. if (i < size)
  1728. curtag = hoedown_find_block_tag((char *)data + 1, (int)i - 1);
  1729. /* handling of special cases */
  1730. if (!curtag) {
  1731. /* HTML comment, laxist form */
  1732. if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') {
  1733. i = 5;
  1734. while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>'))
  1735. i++;
  1736. i++;
  1737. if (i < size)
  1738. j = is_empty(data + i, size - i);
  1739. if (j) {
  1740. work.size = i + j;
  1741. if (do_render && doc->md.blockhtml)
  1742. doc->md.blockhtml(ob, &work, &doc->data);
  1743. return work.size;
  1744. }
  1745. }
  1746. /* HR, which is the only self-closing block tag considered */
  1747. if (size > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R')) {
  1748. i = 3;
  1749. while (i < size && data[i] != '>')
  1750. i++;
  1751. if (i + 1 < size) {
  1752. i++;
  1753. j = is_empty(data + i, size - i);
  1754. if (j) {
  1755. work.size = i + j;
  1756. if (do_render && doc->md.blockhtml)
  1757. doc->md.blockhtml(ob, &work, &doc->data);
  1758. return work.size;
  1759. }
  1760. }
  1761. }
  1762. /* no special case recognised */
  1763. return 0;
  1764. }
  1765. /* looking for a matching closing tag in strict mode */
  1766. tag_len = strlen(curtag);
  1767. tag_end = htmlblock_find_end_strict(curtag, tag_len, doc, data, size);
  1768. /* if not found, trying a second pass looking for indented match */
  1769. /* but not if tag is "ins" or "del" (following original Markdown.pl) */
  1770. if (!tag_end && strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0)
  1771. tag_end = htmlblock_find_end(curtag, tag_len, doc, data, size);
  1772. if (!tag_end)
  1773. return 0;
  1774. /* the end of the block has been found */
  1775. work.size = tag_end;
  1776. if (do_render && doc->md.blockhtml)
  1777. doc->md.blockhtml(ob, &work, &doc->data);
  1778. return tag_end;
  1779. }
  1780. static void
  1781. parse_table_row(
  1782. hoedown_buffer *ob,
  1783. hoedown_document *doc,
  1784. uint8_t *data,
  1785. size_t size,
  1786. size_t columns,
  1787. hoedown_table_flags *col_data,
  1788. hoedown_table_flags header_flag)
  1789. {
  1790. size_t i = 0, col, len;
  1791. hoedown_buffer *row_work = 0;
  1792. if (!doc->md.table_cell || !doc->md.table_row)
  1793. return;
  1794. row_work = newbuf(doc, BUFFER_SPAN);
  1795. if (i < size && data[i] == '|')
  1796. i++;
  1797. for (col = 0; col < columns && i < size; ++col) {
  1798. size_t cell_start, cell_end;
  1799. hoedown_buffer *cell_work;
  1800. cell_work = newbuf(doc, BUFFER_SPAN);
  1801. while (i < size && _isspace(data[i]))
  1802. i++;
  1803. cell_start = i;
  1804. len = find_emph_char(data + i, size - i, '|');
  1805. /* Two possibilities for len == 0:
  1806. 1) No more pipe char found in the current line.
  1807. 2) The next pipe is right after the current one, i.e. empty cell.
  1808. For case 1, we skip to the end of line; for case 2 we just continue.
  1809. */
  1810. if (len == 0 && i < size && data[i] != '|')
  1811. len = size - i;
  1812. i += len;
  1813. cell_end = i - 1;
  1814. while (cell_end > cell_start && _isspace(data[cell_end]))
  1815. cell_end--;
  1816. parse_inline(cell_work, doc, data + cell_start, 1 + cell_end - cell_start);
  1817. doc->md.table_cell(row_work, cell_work, col_data[col] | header_flag, &doc->data);
  1818. popbuf(doc, BUFFER_SPAN);
  1819. i++;
  1820. }
  1821. for (; col < columns; ++col) {
  1822. hoedown_buffer empty_cell = { 0, 0, 0, 0, NULL, NULL, NULL };
  1823. doc->md.table_cell(row_work, &empty_cell, col_data[col] | header_flag, &doc->data);
  1824. }
  1825. doc->md.table_row(ob, row_work, &doc->data);
  1826. popbuf(doc, BUFFER_SPAN);
  1827. }
  1828. static size_t
  1829. parse_table_header(
  1830. hoedown_buffer *ob,
  1831. hoedown_document *doc,
  1832. uint8_t *data,
  1833. size_t size,
  1834. size_t *columns,
  1835. hoedown_table_flags **column_data)
  1836. {
  1837. int pipes;
  1838. size_t i = 0, col, header_end, under_end;
  1839. pipes = 0;
  1840. while (i < size && data[i] != '\n')
  1841. if (data[i++] == '|')
  1842. pipes++;
  1843. if (i == size || pipes == 0)
  1844. return 0;
  1845. header_end = i;
  1846. while (header_end > 0 && _isspace(data[header_end - 1]))
  1847. header_end--;
  1848. if (data[0] == '|')
  1849. pipes--;
  1850. if (header_end && data[header_end - 1] == '|')
  1851. pipes--;
  1852. if (pipes < 0)
  1853. return 0;
  1854. *columns = pipes + 1;
  1855. *column_data = hoedown_calloc(*columns, sizeof(hoedown_table_flags));
  1856. /* Parse the header underline */
  1857. i++;
  1858. if (i < size && data[i] == '|')
  1859. i++;
  1860. under_end = i;
  1861. while (under_end < size && data[under_end] != '\n')
  1862. under_end++;
  1863. for (col = 0; col < *columns && i < under_end; ++col) {
  1864. size_t dashes = 0;
  1865. while (i < under_end && data[i] == ' ')
  1866. i++;
  1867. if (data[i] == ':') {
  1868. i++; (*column_data)[col] |= HOEDOWN_TABLE_ALIGN_LEFT;
  1869. dashes++;
  1870. }
  1871. while (i < under_end && data[i] == '-') {
  1872. i++; dashes++;
  1873. }
  1874. if (i < under_end && data[i] == ':') {
  1875. i++; (*column_data)[col] |= HOEDOWN_TABLE_ALIGN_RIGHT;
  1876. dashes++;
  1877. }
  1878. while (i < under_end && data[i] == ' ')
  1879. i++;
  1880. if (i < under_end && data[i] != '|' && data[i] != '+')
  1881. break;
  1882. if (dashes < 3)
  1883. break;
  1884. i++;
  1885. }
  1886. if (col < *columns)
  1887. return 0;
  1888. parse_table_row(
  1889. ob, doc, data,
  1890. header_end,
  1891. *columns,
  1892. *column_data,
  1893. HOEDOWN_TABLE_HEADER
  1894. );
  1895. return under_end + 1;
  1896. }
  1897. static size_t
  1898. parse_table(
  1899. hoedown_buffer *ob,
  1900. hoedown_document *doc,
  1901. uint8_t *data,
  1902. size_t size)
  1903. {
  1904. size_t i;
  1905. hoedown_buffer *work = 0;
  1906. hoedown_buffer *header_work = 0;
  1907. hoedown_buffer *body_work = 0;
  1908. size_t columns;
  1909. hoedown_table_flags *col_data = NULL;
  1910. work = newbuf(doc, BUFFER_BLOCK);
  1911. header_work = newbuf(doc, BUFFER_SPAN);
  1912. body_work = newbuf(doc, BUFFER_BLOCK);
  1913. i = parse_table_header(header_work, doc, data, size, &columns, &col_data);
  1914. if (i > 0) {
  1915. while (i < size) {
  1916. size_t row_start;
  1917. int pipes = 0;
  1918. row_start = i;
  1919. while (i < size && data[i] != '\n')
  1920. if (data[i++] == '|')
  1921. pipes++;
  1922. if (pipes == 0 || i == size) {
  1923. i = row_start;
  1924. break;
  1925. }
  1926. parse_table_row(
  1927. body_work,
  1928. doc,
  1929. data + row_start,
  1930. i - row_start,
  1931. columns,
  1932. col_data, 0
  1933. );
  1934. i++;
  1935. }
  1936. if (doc->md.table_header)
  1937. doc->md.table_header(work, header_work, &doc->data);
  1938. if (doc->md.table_body)
  1939. doc->md.table_body(work, body_work, &doc->data);
  1940. if (doc->md.table)
  1941. doc->md.table(ob, work, &doc->data);
  1942. }
  1943. free(col_data);
  1944. popbuf(doc, BUFFER_SPAN);
  1945. popbuf(doc, BUFFER_BLOCK);
  1946. popbuf(doc, BUFFER_BLOCK);
  1947. return i;
  1948. }
  1949. /* parse_block • parsing of one block, returning next uint8_t to parse */
  1950. static void
  1951. parse_block(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size)
  1952. {
  1953. size_t beg, end, i;
  1954. uint8_t *txt_data;
  1955. beg = 0;
  1956. if (doc->work_bufs[BUFFER_SPAN].size +
  1957. doc->work_bufs[BUFFER_BLOCK].size > doc->max_nesting)
  1958. return;
  1959. while (beg < size) {
  1960. txt_data = data + beg;
  1961. end = size - beg;
  1962. if (is_atxheader(doc, txt_data, end))
  1963. beg += parse_atxheader(ob, doc, txt_data, end);
  1964. else if (data[beg] == '<' && doc->md.blockhtml &&
  1965. (i = parse_htmlblock(ob, doc, txt_data, end, 1)) != 0)
  1966. beg += i;
  1967. else if ((i = is_empty(txt_data, end)) != 0)
  1968. beg += i;
  1969. else if (is_hrule(txt_data, end)) {
  1970. if (doc->md.hrule)
  1971. doc->md.hrule(ob, &doc->data);
  1972. while (beg < size && data[beg] != '\n')
  1973. beg++;
  1974. beg++;
  1975. }
  1976. else if ((doc->ext_flags & HOEDOWN_EXT_FENCED_CODE) != 0 &&
  1977. (i = parse_fencedcode(ob, doc, txt_data, end)) != 0)
  1978. beg += i;
  1979. else if ((doc->ext_flags & HOEDOWN_EXT_TABLES) != 0 &&
  1980. (i = parse_table(ob, doc, txt_data, end)) != 0)
  1981. beg += i;
  1982. else if (prefix_quote(txt_data, end))
  1983. beg += parse_blockquote(ob, doc, txt_data, end);
  1984. else if (!(doc->ext_flags & HOEDOWN_EXT_DISABLE_INDENTED_CODE) && prefix_code(txt_data, end))
  1985. beg += parse_blockcode(ob, doc, txt_data, end);
  1986. else if (prefix_uli(txt_data, end))
  1987. beg += parse_list(ob, doc, txt_data, end, 0);
  1988. else if (prefix_oli(txt_data, end))
  1989. beg += parse_list(ob, doc, txt_data, end, HOEDOWN_LIST_ORDERED);
  1990. else
  1991. beg += parse_paragraph(ob, doc, txt_data, end);
  1992. }
  1993. }
  1994. /*********************
  1995. * REFERENCE PARSING *
  1996. *********************/
  1997. /* is_footnote • returns whether a line is a footnote definition or not */
  1998. static int
  1999. is_footnote(const uint8_t *data, size_t beg, size_t end, size_t *last, struct footnote_list *list)
  2000. {
  2001. size_t i = 0;
  2002. hoedown_buffer *contents = 0;
  2003. size_t ind = 0;
  2004. int in_empty = 0;
  2005. size_t start = 0;
  2006. size_t id_offset, id_end;
  2007. /* up to 3 optional leading spaces */
  2008. if (beg + 3 >= end) return 0;
  2009. if (data[beg] == ' ') { i = 1;
  2010. if (data[beg + 1] == ' ') { i = 2;
  2011. if (data[beg + 2] == ' ') { i = 3;
  2012. if (data[beg + 3] == ' ') return 0; } } }
  2013. i += beg;
  2014. /* id part: caret followed by anything between brackets */
  2015. if (data[i] != '[') return 0;
  2016. i++;
  2017. if (i >= end || data[i] != '^') return 0;
  2018. i++;
  2019. id_offset = i;
  2020. while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']')
  2021. i++;
  2022. if (i >= end || data[i] != ']') return 0;
  2023. id_end = i;
  2024. /* spacer: colon (space | tab)* newline? (space | tab)* */
  2025. i++;
  2026. if (i >= end || data[i] != ':') return 0;
  2027. i++;
  2028. /* getting content buffer */
  2029. contents = hoedown_buffer_new(64);
  2030. start = i;
  2031. /* process lines similar to a list item */
  2032. while (i < end) {
  2033. while (i < end && data[i] != '\n' && data[i] != '\r') i++;
  2034. /* process an empty line */
  2035. if (is_empty(data + start, i - start)) {
  2036. in_empty = 1;
  2037. if (i < end && (data[i] == '\n' || data[i] == '\r')) {
  2038. i++;
  2039. if (i < end && data[i] == '\n' && data[i - 1] == '\r') i++;
  2040. }
  2041. start = i;
  2042. continue;
  2043. }
  2044. /* calculating the indentation */
  2045. ind = 0;
  2046. while (ind < 4 && start + ind < end && data[start + ind] == ' ')
  2047. ind++;
  2048. /* joining only indented stuff after empty lines;
  2049. * note that now we only require 1 space of indentation
  2050. * to continue, just like lists */
  2051. if (ind == 0) {
  2052. if (start == id_end + 2 && data[start] == '\t') {}
  2053. else break;
  2054. }
  2055. else if (in_empty) {
  2056. hoedown_buffer_putc(contents, '\n');
  2057. }
  2058. in_empty = 0;
  2059. /* adding the line into the content buffer */
  2060. hoedown_buffer_put(contents, data + start + ind, i - start - ind);
  2061. /* add carriage return */
  2062. if (i < end) {
  2063. hoedown_buffer_putc(contents, '\n');
  2064. if (i < end && (data[i] == '\n' || data[i] == '\r')) {
  2065. i++;
  2066. if (i < end && data[i] == '\n' && data[i - 1] == '\r') i++;
  2067. }
  2068. }
  2069. start = i;
  2070. }
  2071. if (last)
  2072. *last = start;
  2073. if (list) {
  2074. struct footnote_ref *ref;
  2075. ref = create_footnote_ref(list, data + id_offset, id_end - id_offset);
  2076. if (!ref)
  2077. return 0;
  2078. if (!add_footnote_ref(list, ref)) {
  2079. free_footnote_ref(ref);
  2080. return 0;
  2081. }
  2082. ref->contents = contents;
  2083. }
  2084. return 1;
  2085. }
  2086. /* is_ref • returns whether a line is a reference or not */
  2087. static int
  2088. is_ref(const uint8_t *data, size_t beg, size_t end, size_t *last, struct link_ref **refs)
  2089. {
  2090. /* int n; */
  2091. size_t i = 0;
  2092. size_t id_offset, id_end;
  2093. size_t link_offset, link_end;
  2094. size_t title_offset, title_end;
  2095. size_t line_end;
  2096. /* up to 3 optional leading spaces */
  2097. if (beg + 3 >= end) return 0;
  2098. if (data[beg] == ' ') { i = 1;
  2099. if (data[beg + 1] == ' ') { i = 2;
  2100. if (data[beg + 2] == ' ') { i = 3;
  2101. if (data[beg + 3] == ' ') return 0; } } }
  2102. i += beg;
  2103. /* id part: anything but a newline between brackets */
  2104. if (data[i] != '[') return 0;
  2105. i++;
  2106. id_offset = i;
  2107. while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']')
  2108. i++;
  2109. if (i >= end || data[i] != ']') return 0;
  2110. id_end = i;
  2111. /* spacer: colon (space | tab)* newline? (space | tab)* */
  2112. i++;
  2113. if (i >= end || data[i] != ':') return 0;
  2114. i++;
  2115. while (i < end && data[i] == ' ') i++;
  2116. if (i < end && (data[i] == '\n' || data[i] == '\r')) {
  2117. i++;
  2118. if (i < end && data[i] == '\r' && data[i - 1] == '\n') i++; }
  2119. while (i < end && data[i] == ' ') i++;
  2120. if (i >= end) return 0;
  2121. /* link: spacing-free sequence, optionally between angle brackets */
  2122. if (data[i] == '<')
  2123. i++;
  2124. link_offset = i;
  2125. while (i < end && data[i] != ' ' && data[i] != '\n' && data[i] != '\r')
  2126. i++;
  2127. if (data[i - 1] == '>') link_end = i - 1;
  2128. else link_end = i;
  2129. /* optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) */
  2130. while (i < end && data[i] == ' ') i++;
  2131. if (i < end && data[i] != '\n' && data[i] != '\r'
  2132. && data[i] != '\'' && data[i] != '"' && data[i] != '(')
  2133. return 0;
  2134. line_end = 0;
  2135. /* computing end-of-line */
  2136. if (i >= end || data[i] == '\r' || data[i] == '\n') line_end = i;
  2137. if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
  2138. line_end = i + 1;
  2139. /* optional (space|tab)* spacer after a newline */
  2140. if (line_end) {
  2141. i = line_end + 1;
  2142. while (i < end && data[i] == ' ') i++; }
  2143. /* optional title: any non-newline sequence enclosed in '"()
  2144. alone on its line */
  2145. title_offset = title_end = 0;
  2146. if (i + 1 < end
  2147. && (data[i] == '\'' || data[i] == '"' || data[i] == '(')) {
  2148. i++;
  2149. title_offset = i;
  2150. /* looking for EOL */
  2151. while (i < end && data[i] != '\n' && data[i] != '\r') i++;
  2152. if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
  2153. title_end = i + 1;
  2154. else title_end = i;
  2155. /* stepping back */
  2156. i -= 1;
  2157. while (i > title_offset && data[i] == ' ')
  2158. i -= 1;
  2159. if (i > title_offset
  2160. && (data[i] == '\'' || data[i] == '"' || data[i] == ')')) {
  2161. line_end = title_end;
  2162. title_end = i; } }
  2163. if (!line_end || link_end == link_offset)
  2164. return 0; /* garbage after the link empty link */
  2165. /* a valid ref has been found, filling-in return structures */
  2166. if (last)
  2167. *last = line_end;
  2168. if (refs) {
  2169. struct link_ref *ref;
  2170. ref = add_link_ref(refs, data + id_offset, id_end - id_offset);
  2171. if (!ref)
  2172. return 0;
  2173. ref->link = hoedown_buffer_new(link_end - link_offset);
  2174. hoedown_buffer_put(ref->link, data + link_offset, link_end - link_offset);
  2175. if (title_end > title_offset) {
  2176. ref->title = hoedown_buffer_new(title_end - title_offset);
  2177. hoedown_buffer_put(ref->title, data + title_offset, title_end - title_offset);
  2178. }
  2179. }
  2180. return 1;
  2181. }
  2182. static void expand_tabs(hoedown_buffer *ob, const uint8_t *line, size_t size)
  2183. {
  2184. /* This code makes two assumptions:
  2185. * - Input is valid UTF-8. (Any byte with top two bits 10 is skipped,
  2186. * whether or not it is a valid UTF-8 continuation byte.)
  2187. * - Input contains no combining characters. (Combining characters
  2188. * should be skipped but are not.)
  2189. */
  2190. size_t i = 0, tab = 0;
  2191. while (i < size) {
  2192. size_t org = i;
  2193. while (i < size && line[i] != '\t') {
  2194. /* ignore UTF-8 continuation bytes */
  2195. if ((line[i] & 0xc0) != 0x80)
  2196. tab++;
  2197. i++;
  2198. }
  2199. if (i > org)
  2200. hoedown_buffer_put(ob, line + org, i - org);
  2201. if (i >= size)
  2202. break;
  2203. do {
  2204. hoedown_buffer_putc(ob, ' '); tab++;
  2205. } while (tab % 4);
  2206. i++;
  2207. }
  2208. }
  2209. /**********************
  2210. * EXPORTED FUNCTIONS *
  2211. **********************/
  2212. hoedown_document *
  2213. hoedown_document_new(
  2214. const hoedown_renderer *renderer,
  2215. hoedown_extensions extensions,
  2216. size_t max_nesting)
  2217. {
  2218. hoedown_document *doc = NULL;
  2219. assert(max_nesting > 0 && renderer);
  2220. doc = hoedown_malloc(sizeof(hoedown_document));
  2221. memcpy(&doc->md, renderer, sizeof(hoedown_renderer));
  2222. doc->data.opaque = renderer->opaque;
  2223. hoedown_stack_init(&doc->work_bufs[BUFFER_BLOCK], 4);
  2224. hoedown_stack_init(&doc->work_bufs[BUFFER_SPAN], 8);
  2225. memset(doc->active_char, 0x0, 256);
  2226. if (extensions & HOEDOWN_EXT_UNDERLINE && doc->md.underline) {
  2227. doc->active_char['_'] = MD_CHAR_EMPHASIS;
  2228. }
  2229. if (doc->md.emphasis || doc->md.double_emphasis || doc->md.triple_emphasis) {
  2230. doc->active_char['*'] = MD_CHAR_EMPHASIS;
  2231. doc->active_char['_'] = MD_CHAR_EMPHASIS;
  2232. if (extensions & HOEDOWN_EXT_STRIKETHROUGH)
  2233. doc->active_char['~'] = MD_CHAR_EMPHASIS;
  2234. if (extensions & HOEDOWN_EXT_HIGHLIGHT)
  2235. doc->active_char['='] = MD_CHAR_EMPHASIS;
  2236. }
  2237. if (doc->md.codespan)
  2238. doc->active_char['`'] = MD_CHAR_CODESPAN;
  2239. if (doc->md.linebreak)
  2240. doc->active_char['\n'] = MD_CHAR_LINEBREAK;
  2241. if (doc->md.image || doc->md.link || doc->md.footnotes || doc->md.footnote_ref) {
  2242. doc->active_char['['] = MD_CHAR_LINK;
  2243. doc->active_char['!'] = MD_CHAR_IMAGE;
  2244. }
  2245. doc->active_char['<'] = MD_CHAR_LANGLE;
  2246. doc->active_char['\\'] = MD_CHAR_ESCAPE;
  2247. doc->active_char['&'] = MD_CHAR_ENTITY;
  2248. if (extensions & HOEDOWN_EXT_AUTOLINK) {
  2249. doc->active_char[':'] = MD_CHAR_AUTOLINK_URL;
  2250. doc->active_char['@'] = MD_CHAR_AUTOLINK_EMAIL;
  2251. doc->active_char['w'] = MD_CHAR_AUTOLINK_WWW;
  2252. }
  2253. if (extensions & HOEDOWN_EXT_SUPERSCRIPT)
  2254. doc->active_char['^'] = MD_CHAR_SUPERSCRIPT;
  2255. if (extensions & HOEDOWN_EXT_QUOTE)
  2256. doc->active_char['"'] = MD_CHAR_QUOTE;
  2257. if (extensions & HOEDOWN_EXT_MATH)
  2258. doc->active_char['$'] = MD_CHAR_MATH;
  2259. /* Extension data */
  2260. doc->ext_flags = extensions;
  2261. doc->max_nesting = max_nesting;
  2262. doc->in_link_body = 0;
  2263. return doc;
  2264. }
  2265. void
  2266. hoedown_document_render(hoedown_document *doc, hoedown_buffer *ob, const uint8_t *data, size_t size)
  2267. {
  2268. static const uint8_t UTF8_BOM[] = {0xEF, 0xBB, 0xBF};
  2269. hoedown_buffer *text;
  2270. size_t beg, end;
  2271. int footnotes_enabled;
  2272. text = hoedown_buffer_new(64);
  2273. /* Preallocate enough space for our buffer to avoid expanding while copying */
  2274. hoedown_buffer_grow(text, size);
  2275. /* reset the references table */
  2276. memset(&doc->refs, 0x0, REF_TABLE_SIZE * sizeof(void *));
  2277. footnotes_enabled = doc->ext_flags & HOEDOWN_EXT_FOOTNOTES;
  2278. /* reset the footnotes lists */
  2279. if (footnotes_enabled) {
  2280. memset(&doc->footnotes_found, 0x0, sizeof(doc->footnotes_found));
  2281. memset(&doc->footnotes_used, 0x0, sizeof(doc->footnotes_used));
  2282. }
  2283. /* first pass: looking for references, copying everything else */
  2284. beg = 0;
  2285. /* Skip a possible UTF-8 BOM, even though the Unicode standard
  2286. * discourages having these in UTF-8 documents */
  2287. if (size >= 3 && memcmp(data, UTF8_BOM, 3) == 0)
  2288. beg += 3;
  2289. while (beg < size) /* iterating over lines */
  2290. if (footnotes_enabled && is_footnote(data, beg, size, &end, &doc->footnotes_found))
  2291. beg = end;
  2292. else if (is_ref(data, beg, size, &end, doc->refs))
  2293. beg = end;
  2294. else { /* skipping to the next line */
  2295. end = beg;
  2296. while (end < size && data[end] != '\n' && data[end] != '\r')
  2297. end++;
  2298. /* adding the line body if present */
  2299. if (end > beg)
  2300. expand_tabs(text, data + beg, end - beg);
  2301. while (end < size && (data[end] == '\n' || data[end] == '\r')) {
  2302. /* add one \n per newline */
  2303. if (data[end] == '\n' || (end + 1 < size && data[end + 1] != '\n'))
  2304. hoedown_buffer_putc(text, '\n');
  2305. end++;
  2306. }
  2307. beg = end;
  2308. }
  2309. /* pre-grow the output buffer to minimize allocations */
  2310. hoedown_buffer_grow(ob, text->size + (text->size >> 1));
  2311. /* second pass: actual rendering */
  2312. if (doc->md.doc_header)
  2313. doc->md.doc_header(ob, 0, &doc->data);
  2314. if (text->size) {
  2315. /* adding a final newline if not already present */
  2316. if (text->data[text->size - 1] != '\n' && text->data[text->size - 1] != '\r')
  2317. hoedown_buffer_putc(text, '\n');
  2318. parse_block(ob, doc, text->data, text->size);
  2319. }
  2320. /* footnotes */
  2321. if (footnotes_enabled)
  2322. parse_footnote_list(ob, doc, &doc->footnotes_used);
  2323. if (doc->md.doc_footer)
  2324. doc->md.doc_footer(ob, 0, &doc->data);
  2325. /* clean-up */
  2326. hoedown_buffer_free(text);
  2327. free_link_refs(doc->refs);
  2328. if (footnotes_enabled) {
  2329. free_footnote_list(&doc->footnotes_found, 1);
  2330. free_footnote_list(&doc->footnotes_used, 0);
  2331. }
  2332. assert(doc->work_bufs[BUFFER_SPAN].size == 0);
  2333. assert(doc->work_bufs[BUFFER_BLOCK].size == 0);
  2334. }
  2335. void
  2336. hoedown_document_render_inline(hoedown_document *doc, hoedown_buffer *ob, const uint8_t *data, size_t size)
  2337. {
  2338. size_t i = 0, mark;
  2339. hoedown_buffer *text = hoedown_buffer_new(64);
  2340. /* reset the references table */
  2341. memset(&doc->refs, 0x0, REF_TABLE_SIZE * sizeof(void *));
  2342. /* first pass: expand tabs and process newlines */
  2343. hoedown_buffer_grow(text, size);
  2344. while (1) {
  2345. mark = i;
  2346. while (i < size && data[i] != '\n' && data[i] != '\r')
  2347. i++;
  2348. expand_tabs(text, data + mark, i - mark);
  2349. if (i >= size)
  2350. break;
  2351. while (i < size && (data[i] == '\n' || data[i] == '\r')) {
  2352. /* add one \n per newline */
  2353. if (data[i] == '\n' || (i + 1 < size && data[i + 1] != '\n'))
  2354. hoedown_buffer_putc(text, '\n');
  2355. i++;
  2356. }
  2357. }
  2358. /* second pass: actual rendering */
  2359. hoedown_buffer_grow(ob, text->size + (text->size >> 1));
  2360. if (doc->md.doc_header)
  2361. doc->md.doc_header(ob, 1, &doc->data);
  2362. parse_inline(ob, doc, text->data, text->size);
  2363. if (doc->md.doc_footer)
  2364. doc->md.doc_footer(ob, 1, &doc->data);
  2365. /* clean-up */
  2366. hoedown_buffer_free(text);
  2367. assert(doc->work_bufs[BUFFER_SPAN].size == 0);
  2368. assert(doc->work_bufs[BUFFER_BLOCK].size == 0);
  2369. }
  2370. void
  2371. hoedown_document_free(hoedown_document *doc)
  2372. {
  2373. size_t i;
  2374. for (i = 0; i < (size_t)doc->work_bufs[BUFFER_SPAN].asize; ++i)
  2375. hoedown_buffer_free(doc->work_bufs[BUFFER_SPAN].item[i]);
  2376. for (i = 0; i < (size_t)doc->work_bufs[BUFFER_BLOCK].asize; ++i)
  2377. hoedown_buffer_free(doc->work_bufs[BUFFER_BLOCK].item[i]);
  2378. hoedown_stack_uninit(&doc->work_bufs[BUFFER_SPAN]);
  2379. hoedown_stack_uninit(&doc->work_bufs[BUFFER_BLOCK]);
  2380. free(doc);
  2381. }