PageRenderTime 53ms CodeModel.GetById 14ms RepoModel.GetById 1ms app.codeStats 0ms

/misaka/hoedown/document.c

https://github.com/FSX/misaka
C | 3012 lines | 2784 code | 159 blank | 69 comment | 235 complexity | 5e80b52d9651aed69699268bc9bb5f6c MD5 | raw file

Large files files are truncated, but you can click here to view the full file

  1. #include "document.h"
  2. #include <assert.h>
  3. #include <string.h>
  4. #include <ctype.h>
  5. #include <stdio.h>
  6. #include "stack.h"
  7. #ifndef _MSC_VER
  8. #include <strings.h>
  9. #else
  10. #define strncasecmp _strnicmp
  11. #endif
  12. #define REF_TABLE_SIZE 8
  13. #define BUFFER_BLOCK 0
  14. #define BUFFER_SPAN 1
  15. #define HOEDOWN_LI_END 8 /* internal list flag */
  16. const char *hoedown_find_block_tag(const char *str, unsigned int len);
  17. /***************
  18. * LOCAL TYPES *
  19. ***************/
  20. /* link_ref: reference to a link */
  21. struct link_ref {
  22. unsigned int id;
  23. hoedown_buffer *link;
  24. hoedown_buffer *title;
  25. struct link_ref *next;
  26. };
  27. /* footnote_ref: reference to a footnote */
  28. struct footnote_ref {
  29. unsigned int id;
  30. int is_used;
  31. unsigned int num;
  32. hoedown_buffer *contents;
  33. };
  34. /* footnote_item: an item in a footnote_list */
  35. struct footnote_item {
  36. struct footnote_ref *ref;
  37. struct footnote_item *next;
  38. };
  39. /* footnote_list: linked list of footnote_item */
  40. struct footnote_list {
  41. unsigned int count;
  42. struct footnote_item *head;
  43. struct footnote_item *tail;
  44. };
  45. /* char_trigger: function pointer to render active chars */
  46. /* returns the number of chars taken care of */
  47. /* data is the pointer of the beginning of the span */
  48. /* offset is the number of valid chars before data */
  49. typedef size_t
  50. (*char_trigger)(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
  51. static size_t char_emphasis(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
  52. static size_t char_quote(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
  53. static size_t char_linebreak(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
  54. static size_t char_codespan(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
  55. static size_t char_escape(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
  56. static size_t char_entity(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
  57. static size_t char_langle_tag(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
  58. static size_t char_autolink_url(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
  59. static size_t char_autolink_email(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
  60. static size_t char_autolink_www(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
  61. static size_t char_link(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
  62. static size_t char_image(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
  63. static size_t char_superscript(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
  64. static size_t char_math(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
  65. enum markdown_char_t {
  66. MD_CHAR_NONE = 0,
  67. MD_CHAR_EMPHASIS,
  68. MD_CHAR_CODESPAN,
  69. MD_CHAR_LINEBREAK,
  70. MD_CHAR_LINK,
  71. MD_CHAR_IMAGE,
  72. MD_CHAR_LANGLE,
  73. MD_CHAR_ESCAPE,
  74. MD_CHAR_ENTITY,
  75. MD_CHAR_AUTOLINK_URL,
  76. MD_CHAR_AUTOLINK_EMAIL,
  77. MD_CHAR_AUTOLINK_WWW,
  78. MD_CHAR_SUPERSCRIPT,
  79. MD_CHAR_QUOTE,
  80. MD_CHAR_MATH
  81. };
  82. static char_trigger markdown_char_ptrs[] = {
  83. NULL,
  84. &char_emphasis,
  85. &char_codespan,
  86. &char_linebreak,
  87. &char_link,
  88. &char_image,
  89. &char_langle_tag,
  90. &char_escape,
  91. &char_entity,
  92. &char_autolink_url,
  93. &char_autolink_email,
  94. &char_autolink_www,
  95. &char_superscript,
  96. &char_quote,
  97. &char_math
  98. };
  99. struct hoedown_document {
  100. hoedown_renderer md;
  101. hoedown_renderer_data data;
  102. struct link_ref *refs[REF_TABLE_SIZE];
  103. struct footnote_list footnotes_found;
  104. struct footnote_list footnotes_used;
  105. uint8_t active_char[256];
  106. hoedown_stack work_bufs[2];
  107. hoedown_extensions ext_flags;
  108. size_t max_nesting;
  109. int in_link_body;
  110. };
  111. /***************************
  112. * HELPER FUNCTIONS *
  113. ***************************/
  114. static hoedown_buffer *
  115. newbuf(hoedown_document *doc, int type)
  116. {
  117. static const size_t buf_size[2] = {256, 64};
  118. hoedown_buffer *work = NULL;
  119. hoedown_stack *pool = &doc->work_bufs[type];
  120. if (pool->size < pool->asize &&
  121. pool->item[pool->size] != NULL) {
  122. work = pool->item[pool->size++];
  123. work->size = 0;
  124. } else {
  125. work = hoedown_buffer_new(buf_size[type]);
  126. hoedown_stack_push(pool, work);
  127. }
  128. return work;
  129. }
  130. static void
  131. popbuf(hoedown_document *doc, int type)
  132. {
  133. doc->work_bufs[type].size--;
  134. }
  135. static void
  136. unscape_text(hoedown_buffer *ob, hoedown_buffer *src)
  137. {
  138. size_t i = 0, org;
  139. while (i < src->size) {
  140. org = i;
  141. while (i < src->size && src->data[i] != '\\')
  142. i++;
  143. if (i > org)
  144. hoedown_buffer_put(ob, src->data + org, i - org);
  145. if (i + 1 >= src->size)
  146. break;
  147. hoedown_buffer_putc(ob, src->data[i + 1]);
  148. i += 2;
  149. }
  150. }
  151. static unsigned int
  152. hash_link_ref(const uint8_t *link_ref, size_t length)
  153. {
  154. size_t i;
  155. unsigned int hash = 0;
  156. for (i = 0; i < length; ++i)
  157. hash = tolower(link_ref[i]) + (hash << 6) + (hash << 16) - hash;
  158. return hash;
  159. }
  160. static struct link_ref *
  161. add_link_ref(
  162. struct link_ref **references,
  163. const uint8_t *name, size_t name_size)
  164. {
  165. struct link_ref *ref = hoedown_calloc(1, sizeof(struct link_ref));
  166. ref->id = hash_link_ref(name, name_size);
  167. ref->next = references[ref->id % REF_TABLE_SIZE];
  168. references[ref->id % REF_TABLE_SIZE] = ref;
  169. return ref;
  170. }
  171. static struct link_ref *
  172. find_link_ref(struct link_ref **references, uint8_t *name, size_t length)
  173. {
  174. unsigned int hash = hash_link_ref(name, length);
  175. struct link_ref *ref = NULL;
  176. ref = references[hash % REF_TABLE_SIZE];
  177. while (ref != NULL) {
  178. if (ref->id == hash)
  179. return ref;
  180. ref = ref->next;
  181. }
  182. return NULL;
  183. }
  184. static void
  185. free_link_refs(struct link_ref **references)
  186. {
  187. size_t i;
  188. for (i = 0; i < REF_TABLE_SIZE; ++i) {
  189. struct link_ref *r = references[i];
  190. struct link_ref *next;
  191. while (r) {
  192. next = r->next;
  193. hoedown_buffer_free(r->link);
  194. hoedown_buffer_free(r->title);
  195. free(r);
  196. r = next;
  197. }
  198. }
  199. }
  200. static struct footnote_ref *
  201. create_footnote_ref(struct footnote_list *list, const uint8_t *name, size_t name_size)
  202. {
  203. struct footnote_ref *ref = hoedown_calloc(1, sizeof(struct footnote_ref));
  204. ref->id = hash_link_ref(name, name_size);
  205. return ref;
  206. }
  207. static int
  208. add_footnote_ref(struct footnote_list *list, struct footnote_ref *ref)
  209. {
  210. struct footnote_item *item = hoedown_calloc(1, sizeof(struct footnote_item));
  211. if (!item)
  212. return 0;
  213. item->ref = ref;
  214. if (list->head == NULL) {
  215. list->head = list->tail = item;
  216. } else {
  217. list->tail->next = item;
  218. list->tail = item;
  219. }
  220. list->count++;
  221. return 1;
  222. }
  223. static struct footnote_ref *
  224. find_footnote_ref(struct footnote_list *list, uint8_t *name, size_t length)
  225. {
  226. unsigned int hash = hash_link_ref(name, length);
  227. struct footnote_item *item = NULL;
  228. item = list->head;
  229. while (item != NULL) {
  230. if (item->ref->id == hash)
  231. return item->ref;
  232. item = item->next;
  233. }
  234. return NULL;
  235. }
  236. static void
  237. free_footnote_ref(struct footnote_ref *ref)
  238. {
  239. hoedown_buffer_free(ref->contents);
  240. free(ref);
  241. }
  242. static void
  243. free_footnote_list(struct footnote_list *list, int free_refs)
  244. {
  245. struct footnote_item *item = list->head;
  246. struct footnote_item *next;
  247. while (item) {
  248. next = item->next;
  249. if (free_refs)
  250. free_footnote_ref(item->ref);
  251. free(item);
  252. item = next;
  253. }
  254. }
  255. /*
  256. * Check whether a char is a Markdown spacing char.
  257. * Right now we only consider spaces the actual
  258. * space and a newline: tabs and carriage returns
  259. * are filtered out during the preprocessing phase.
  260. *
  261. * If we wanted to actually be UTF-8 compliant, we
  262. * should instead extract an Unicode codepoint from
  263. * this character and check for space properties.
  264. */
  265. static int
  266. _isspace(int c)
  267. {
  268. return c == ' ' || c == '\n';
  269. }
  270. /* is_empty_all: verify that all the data is spacing */
  271. static int
  272. is_empty_all(const uint8_t *data, size_t size)
  273. {
  274. size_t i = 0;
  275. while (i < size && _isspace(data[i])) i++;
  276. return i == size;
  277. }
  278. /*
  279. * Replace all spacing characters in data with spaces. As a special
  280. * case, this collapses a newline with the previous space, if possible.
  281. */
  282. static void
  283. replace_spacing(hoedown_buffer *ob, const uint8_t *data, size_t size)
  284. {
  285. size_t i = 0, mark;
  286. hoedown_buffer_grow(ob, size);
  287. while (1) {
  288. mark = i;
  289. while (i < size && data[i] != '\n') i++;
  290. hoedown_buffer_put(ob, data + mark, i - mark);
  291. if (i >= size) break;
  292. if (!(i > 0 && data[i-1] == ' '))
  293. hoedown_buffer_putc(ob, ' ');
  294. i++;
  295. }
  296. }
  297. /****************************
  298. * INLINE PARSING FUNCTIONS *
  299. ****************************/
  300. /* is_mail_autolink • looks for the address part of a mail autolink and '>' */
  301. /* this is less strict than the original markdown e-mail address matching */
  302. static size_t
  303. is_mail_autolink(uint8_t *data, size_t size)
  304. {
  305. size_t i = 0, nb = 0;
  306. /* address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' */
  307. for (i = 0; i < size; ++i) {
  308. if (isalnum(data[i]))
  309. continue;
  310. switch (data[i]) {
  311. case '@':
  312. nb++;
  313. case '-':
  314. case '.':
  315. case '_':
  316. break;
  317. case '>':
  318. return (nb == 1) ? i + 1 : 0;
  319. default:
  320. return 0;
  321. }
  322. }
  323. return 0;
  324. }
  325. /* tag_length • returns the length of the given tag, or 0 is it's not valid */
  326. static size_t
  327. tag_length(uint8_t *data, size_t size, hoedown_autolink_type *autolink)
  328. {
  329. size_t i, j;
  330. /* a valid tag can't be shorter than 3 chars */
  331. if (size < 3) return 0;
  332. if (data[0] != '<') return 0;
  333. /* HTML comment, laxist form */
  334. if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') {
  335. i = 5;
  336. while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>'))
  337. i++;
  338. i++;
  339. if (i <= size)
  340. return i;
  341. }
  342. /* begins with a '<' optionally followed by '/', followed by letter or number */
  343. i = (data[1] == '/') ? 2 : 1;
  344. if (!isalnum(data[i]))
  345. return 0;
  346. /* scheme test */
  347. *autolink = HOEDOWN_AUTOLINK_NONE;
  348. /* try to find the beginning of an URI */
  349. while (i < size && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-'))
  350. i++;
  351. if (i > 1 && data[i] == '@') {
  352. if ((j = is_mail_autolink(data + i, size - i)) != 0) {
  353. *autolink = HOEDOWN_AUTOLINK_EMAIL;
  354. return i + j;
  355. }
  356. }
  357. if (i > 2 && data[i] == ':') {
  358. *autolink = HOEDOWN_AUTOLINK_NORMAL;
  359. i++;
  360. }
  361. /* completing autolink test: no spacing or ' or " */
  362. if (i >= size)
  363. *autolink = HOEDOWN_AUTOLINK_NONE;
  364. else if (*autolink) {
  365. j = i;
  366. while (i < size) {
  367. if (data[i] == '\\') i += 2;
  368. else if (data[i] == '>' || data[i] == '\'' ||
  369. data[i] == '"' || data[i] == ' ' || data[i] == '\n')
  370. break;
  371. else i++;
  372. }
  373. if (i >= size) return 0;
  374. if (i > j && data[i] == '>') return i + 1;
  375. /* one of the forbidden chars has been found */
  376. *autolink = HOEDOWN_AUTOLINK_NONE;
  377. }
  378. /* looking for something looking like a tag end */
  379. while (i < size && data[i] != '>') i++;
  380. if (i >= size) return 0;
  381. return i + 1;
  382. }
  383. /* parse_inline • parses inline markdown elements */
  384. static void
  385. parse_inline(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size)
  386. {
  387. size_t i = 0, end = 0, consumed = 0;
  388. hoedown_buffer work = { 0, 0, 0, 0, NULL, NULL, NULL };
  389. uint8_t *active_char = doc->active_char;
  390. if (doc->work_bufs[BUFFER_SPAN].size +
  391. doc->work_bufs[BUFFER_BLOCK].size > doc->max_nesting)
  392. return;
  393. while (i < size) {
  394. /* copying inactive chars into the output */
  395. while (end < size && active_char[data[end]] == 0)
  396. end++;
  397. if (doc->md.normal_text) {
  398. work.data = data + i;
  399. work.size = end - i;
  400. doc->md.normal_text(ob, &work, &doc->data);
  401. }
  402. else
  403. hoedown_buffer_put(ob, data + i, end - i);
  404. if (end >= size) break;
  405. i = end;
  406. end = markdown_char_ptrs[ (int)active_char[data[end]] ](ob, doc, data + i, i - consumed, size - i);
  407. if (!end) /* no action from the callback */
  408. end = i + 1;
  409. else {
  410. i += end;
  411. end = i;
  412. consumed = i;
  413. }
  414. }
  415. }
  416. /* is_escaped • returns whether special char at data[loc] is escaped by '\\' */
  417. static int
  418. is_escaped(uint8_t *data, size_t loc)
  419. {
  420. size_t i = loc;
  421. while (i >= 1 && data[i - 1] == '\\')
  422. i--;
  423. /* odd numbers of backslashes escapes data[loc] */
  424. return (loc - i) % 2;
  425. }
  426. /* find_emph_char • looks for the next emph uint8_t, skipping other constructs */
  427. static size_t
  428. find_emph_char(uint8_t *data, size_t size, uint8_t c)
  429. {
  430. size_t i = 0;
  431. while (i < size) {
  432. while (i < size && data[i] != c && data[i] != '[' && data[i] != '`')
  433. i++;
  434. if (i == size)
  435. return 0;
  436. /* not counting escaped chars */
  437. if (is_escaped(data, i)) {
  438. i++; continue;
  439. }
  440. if (data[i] == c)
  441. return i;
  442. /* skipping a codespan */
  443. if (data[i] == '`') {
  444. size_t span_nb = 0, bt;
  445. size_t tmp_i = 0;
  446. /* counting the number of opening backticks */
  447. while (i < size && data[i] == '`') {
  448. i++; span_nb++;
  449. }
  450. if (i >= size) return 0;
  451. /* finding the matching closing sequence */
  452. bt = 0;
  453. while (i < size && bt < span_nb) {
  454. if (!tmp_i && data[i] == c) tmp_i = i;
  455. if (data[i] == '`') bt++;
  456. else bt = 0;
  457. i++;
  458. }
  459. /* not a well-formed codespan; use found matching emph char */
  460. if (bt < span_nb && i >= size) return tmp_i;
  461. }
  462. /* skipping a link */
  463. else if (data[i] == '[') {
  464. size_t tmp_i = 0;
  465. uint8_t cc;
  466. i++;
  467. while (i < size && data[i] != ']') {
  468. if (!tmp_i && data[i] == c) tmp_i = i;
  469. i++;
  470. }
  471. i++;
  472. while (i < size && _isspace(data[i]))
  473. i++;
  474. if (i >= size)
  475. return tmp_i;
  476. switch (data[i]) {
  477. case '[':
  478. cc = ']'; break;
  479. case '(':
  480. cc = ')'; break;
  481. default:
  482. if (tmp_i)
  483. return tmp_i;
  484. else
  485. continue;
  486. }
  487. i++;
  488. while (i < size && data[i] != cc) {
  489. if (!tmp_i && data[i] == c) tmp_i = i;
  490. i++;
  491. }
  492. if (i >= size)
  493. return tmp_i;
  494. i++;
  495. }
  496. }
  497. return 0;
  498. }
  499. /* parse_emph1 • parsing single emphase */
  500. /* closed by a symbol not preceded by spacing and not followed by symbol */
  501. static size_t
  502. parse_emph1(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, uint8_t c)
  503. {
  504. size_t i = 0, len;
  505. hoedown_buffer *work = 0;
  506. int r;
  507. /* skipping one symbol if coming from emph3 */
  508. if (size > 1 && data[0] == c && data[1] == c) i = 1;
  509. while (i < size) {
  510. len = find_emph_char(data + i, size - i, c);
  511. if (!len) return 0;
  512. i += len;
  513. if (i >= size) return 0;
  514. if (data[i] == c && !_isspace(data[i - 1])) {
  515. if (doc->ext_flags & HOEDOWN_EXT_NO_INTRA_EMPHASIS) {
  516. if (i + 1 < size && isalnum(data[i + 1]))
  517. continue;
  518. }
  519. work = newbuf(doc, BUFFER_SPAN);
  520. parse_inline(work, doc, data, i);
  521. if (doc->ext_flags & HOEDOWN_EXT_UNDERLINE && c == '_')
  522. r = doc->md.underline(ob, work, &doc->data);
  523. else
  524. r = doc->md.emphasis(ob, work, &doc->data);
  525. popbuf(doc, BUFFER_SPAN);
  526. return r ? i + 1 : 0;
  527. }
  528. }
  529. return 0;
  530. }
  531. /* parse_emph2 • parsing single emphase */
  532. static size_t
  533. parse_emph2(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, uint8_t c)
  534. {
  535. size_t i = 0, len;
  536. hoedown_buffer *work = 0;
  537. int r;
  538. while (i < size) {
  539. len = find_emph_char(data + i, size - i, c);
  540. if (!len) return 0;
  541. i += len;
  542. if (i + 1 < size && data[i] == c && data[i + 1] == c && i && !_isspace(data[i - 1])) {
  543. work = newbuf(doc, BUFFER_SPAN);
  544. parse_inline(work, doc, data, i);
  545. if (c == '~')
  546. r = doc->md.strikethrough(ob, work, &doc->data);
  547. else if (c == '=')
  548. r = doc->md.highlight(ob, work, &doc->data);
  549. else
  550. r = doc->md.double_emphasis(ob, work, &doc->data);
  551. popbuf(doc, BUFFER_SPAN);
  552. return r ? i + 2 : 0;
  553. }
  554. i++;
  555. }
  556. return 0;
  557. }
  558. /* parse_emph3 • parsing single emphase */
  559. /* finds the first closing tag, and delegates to the other emph */
  560. static size_t
  561. parse_emph3(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, uint8_t c)
  562. {
  563. size_t i = 0, len;
  564. int r;
  565. while (i < size) {
  566. len = find_emph_char(data + i, size - i, c);
  567. if (!len) return 0;
  568. i += len;
  569. /* skip spacing preceded symbols */
  570. if (data[i] != c || _isspace(data[i - 1]))
  571. continue;
  572. if (i + 2 < size && data[i + 1] == c && data[i + 2] == c && doc->md.triple_emphasis) {
  573. /* triple symbol found */
  574. hoedown_buffer *work = newbuf(doc, BUFFER_SPAN);
  575. parse_inline(work, doc, data, i);
  576. r = doc->md.triple_emphasis(ob, work, &doc->data);
  577. popbuf(doc, BUFFER_SPAN);
  578. return r ? i + 3 : 0;
  579. } else if (i + 1 < size && data[i + 1] == c) {
  580. /* double symbol found, handing over to emph1 */
  581. len = parse_emph1(ob, doc, data - 2, size + 2, c);
  582. if (!len) return 0;
  583. else return len - 2;
  584. } else {
  585. /* single symbol found, handing over to emph2 */
  586. len = parse_emph2(ob, doc, data - 1, size + 1, c);
  587. if (!len) return 0;
  588. else return len - 1;
  589. }
  590. }
  591. return 0;
  592. }
  593. /* parse_math • parses a math span until the given ending delimiter */
  594. static size_t
  595. parse_math(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size, const char *end, size_t delimsz, int displaymode)
  596. {
  597. hoedown_buffer text = { NULL, 0, 0, 0, NULL, NULL, NULL };
  598. size_t i = delimsz;
  599. if (!doc->md.math)
  600. return 0;
  601. /* find ending delimiter */
  602. while (1) {
  603. while (i < size && data[i] != (uint8_t)end[0])
  604. i++;
  605. if (i >= size)
  606. return 0;
  607. if (!is_escaped(data, i) && !(i + delimsz > size)
  608. && memcmp(data + i, end, delimsz) == 0)
  609. break;
  610. i++;
  611. }
  612. /* prepare buffers */
  613. text.data = data + delimsz;
  614. text.size = i - delimsz;
  615. /* if this is a $$ and MATH_EXPLICIT is not active,
  616. * guess whether displaymode should be enabled from the context */
  617. i += delimsz;
  618. if (delimsz == 2 && !(doc->ext_flags & HOEDOWN_EXT_MATH_EXPLICIT))
  619. displaymode = is_empty_all(data - offset, offset) && is_empty_all(data + i, size - i);
  620. /* call callback */
  621. if (doc->md.math(ob, &text, displaymode, &doc->data))
  622. return i;
  623. return 0;
  624. }
  625. /* char_emphasis • single and double emphasis parsing */
  626. static size_t
  627. char_emphasis(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
  628. {
  629. uint8_t c = data[0];
  630. size_t ret;
  631. if (doc->ext_flags & HOEDOWN_EXT_NO_INTRA_EMPHASIS) {
  632. if (offset > 0 && !_isspace(data[-1]) && data[-1] != '>' && data[-1] != '(')
  633. return 0;
  634. }
  635. if (size > 2 && data[1] != c) {
  636. /* spacing cannot follow an opening emphasis;
  637. * strikethrough and highlight only takes two characters '~~' */
  638. if (c == '~' || c == '=' || _isspace(data[1]) || (ret = parse_emph1(ob, doc, data + 1, size - 1, c)) == 0)
  639. return 0;
  640. return ret + 1;
  641. }
  642. if (size > 3 && data[1] == c && data[2] != c) {
  643. if (_isspace(data[2]) || (ret = parse_emph2(ob, doc, data + 2, size - 2, c)) == 0)
  644. return 0;
  645. return ret + 2;
  646. }
  647. if (size > 4 && data[1] == c && data[2] == c && data[3] != c) {
  648. if (c == '~' || c == '=' || _isspace(data[3]) || (ret = parse_emph3(ob, doc, data + 3, size - 3, c)) == 0)
  649. return 0;
  650. return ret + 3;
  651. }
  652. return 0;
  653. }
  654. /* char_linebreak • '\n' preceded by two spaces (assuming linebreak != 0) */
  655. static size_t
  656. char_linebreak(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
  657. {
  658. if (offset < 2 || data[-1] != ' ' || data[-2] != ' ')
  659. return 0;
  660. /* removing the last space from ob and rendering */
  661. while (ob->size && ob->data[ob->size - 1] == ' ')
  662. ob->size--;
  663. return doc->md.linebreak(ob, &doc->data) ? 1 : 0;
  664. }
  665. /* char_codespan • '`' parsing a code span (assuming codespan != 0) */
  666. static size_t
  667. char_codespan(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
  668. {
  669. hoedown_buffer work = { NULL, 0, 0, 0, NULL, NULL, NULL };
  670. size_t end, nb = 0, i, f_begin, f_end;
  671. /* counting the number of backticks in the delimiter */
  672. while (nb < size && data[nb] == '`')
  673. nb++;
  674. /* finding the next delimiter */
  675. i = 0;
  676. for (end = nb; end < size && i < nb; end++) {
  677. if (data[end] == '`') i++;
  678. else i = 0;
  679. }
  680. if (i < nb && end >= size)
  681. return 0; /* no matching delimiter */
  682. /* trimming outside spaces */
  683. f_begin = nb;
  684. while (f_begin < end && data[f_begin] == ' ')
  685. f_begin++;
  686. f_end = end - nb;
  687. while (f_end > nb && data[f_end-1] == ' ')
  688. f_end--;
  689. /* real code span */
  690. if (f_begin < f_end) {
  691. work.data = data + f_begin;
  692. work.size = f_end - f_begin;
  693. if (!doc->md.codespan(ob, &work, &doc->data))
  694. end = 0;
  695. } else {
  696. if (!doc->md.codespan(ob, 0, &doc->data))
  697. end = 0;
  698. }
  699. return end;
  700. }
  701. /* char_quote • '"' parsing a quote */
  702. static size_t
  703. char_quote(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
  704. {
  705. size_t end, nq = 0, i, f_begin, f_end;
  706. /* counting the number of quotes in the delimiter */
  707. while (nq < size && data[nq] == '"')
  708. nq++;
  709. /* finding the next delimiter */
  710. end = nq;
  711. while (1) {
  712. i = end;
  713. end += find_emph_char(data + end, size - end, '"');
  714. if (end == i) return 0; /* no matching delimiter */
  715. i = end;
  716. while (end < size && data[end] == '"' && end - i < nq) end++;
  717. if (end - i >= nq) break;
  718. }
  719. /* trimming outside spaces */
  720. f_begin = nq;
  721. while (f_begin < end && data[f_begin] == ' ')
  722. f_begin++;
  723. f_end = end - nq;
  724. while (f_end > nq && data[f_end-1] == ' ')
  725. f_end--;
  726. /* real quote */
  727. if (f_begin < f_end) {
  728. hoedown_buffer *work = newbuf(doc, BUFFER_SPAN);
  729. parse_inline(work, doc, data + f_begin, f_end - f_begin);
  730. if (!doc->md.quote(ob, work, &doc->data))
  731. end = 0;
  732. popbuf(doc, BUFFER_SPAN);
  733. } else {
  734. if (!doc->md.quote(ob, 0, &doc->data))
  735. end = 0;
  736. }
  737. return end;
  738. }
  739. /* char_escape • '\\' backslash escape */
  740. static size_t
  741. char_escape(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
  742. {
  743. static const char *escape_chars = "\\`*_{}[]()#+-.!:|&<>^~=\"$";
  744. hoedown_buffer work = { 0, 0, 0, 0, NULL, NULL, NULL };
  745. size_t w;
  746. if (size > 1) {
  747. if (data[1] == '\\' && (doc->ext_flags & HOEDOWN_EXT_MATH) &&
  748. size > 2 && (data[2] == '(' || data[2] == '[')) {
  749. const char *end = (data[2] == '[') ? "\\\\]" : "\\\\)";
  750. w = parse_math(ob, doc, data, offset, size, end, 3, data[2] == '[');
  751. if (w) return w;
  752. }
  753. if (strchr(escape_chars, data[1]) == NULL)
  754. return 0;
  755. if (doc->md.normal_text) {
  756. work.data = data + 1;
  757. work.size = 1;
  758. doc->md.normal_text(ob, &work, &doc->data);
  759. }
  760. else hoedown_buffer_putc(ob, data[1]);
  761. } else if (size == 1) {
  762. if (doc->md.normal_text) {
  763. work.data = data;
  764. work.size = 1;
  765. doc->md.normal_text(ob, &work, &doc->data);
  766. }
  767. else hoedown_buffer_putc(ob, data[0]);
  768. }
  769. return 2;
  770. }
  771. /* char_entity • '&' escaped when it doesn't belong to an entity */
  772. /* valid entities are assumed to be anything matching &#?[A-Za-z0-9]+; */
  773. static size_t
  774. char_entity(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
  775. {
  776. size_t end = 1;
  777. hoedown_buffer work = { 0, 0, 0, 0, NULL, NULL, NULL };
  778. if (end < size && data[end] == '#')
  779. end++;
  780. while (end < size && isalnum(data[end]))
  781. end++;
  782. if (end < size && data[end] == ';')
  783. end++; /* real entity */
  784. else
  785. return 0; /* lone '&' */
  786. if (doc->md.entity) {
  787. work.data = data;
  788. work.size = end;
  789. doc->md.entity(ob, &work, &doc->data);
  790. }
  791. else hoedown_buffer_put(ob, data, end);
  792. return end;
  793. }
  794. /* char_langle_tag • '<' when tags or autolinks are allowed */
  795. static size_t
  796. char_langle_tag(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
  797. {
  798. hoedown_buffer work = { NULL, 0, 0, 0, NULL, NULL, NULL };
  799. hoedown_autolink_type altype = HOEDOWN_AUTOLINK_NONE;
  800. size_t end = tag_length(data, size, &altype);
  801. int ret = 0;
  802. work.data = data;
  803. work.size = end;
  804. if (end > 2) {
  805. if (doc->md.autolink && altype != HOEDOWN_AUTOLINK_NONE) {
  806. hoedown_buffer *u_link = newbuf(doc, BUFFER_SPAN);
  807. work.data = data + 1;
  808. work.size = end - 2;
  809. unscape_text(u_link, &work);
  810. ret = doc->md.autolink(ob, u_link, altype, &doc->data);
  811. popbuf(doc, BUFFER_SPAN);
  812. }
  813. else if (doc->md.raw_html)
  814. ret = doc->md.raw_html(ob, &work, &doc->data);
  815. }
  816. if (!ret) return 0;
  817. else return end;
  818. }
  819. static size_t
  820. char_autolink_www(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
  821. {
  822. hoedown_buffer *link, *link_url, *link_text;
  823. size_t link_len, rewind;
  824. if (!doc->md.link || doc->in_link_body)
  825. return 0;
  826. link = newbuf(doc, BUFFER_SPAN);
  827. if ((link_len = hoedown_autolink__www(&rewind, link, data, offset, size, HOEDOWN_AUTOLINK_SHORT_DOMAINS)) > 0) {
  828. link_url = newbuf(doc, BUFFER_SPAN);
  829. HOEDOWN_BUFPUTSL(link_url, "http://");
  830. hoedown_buffer_put(link_url, link->data, link->size);
  831. if (ob->size > rewind)
  832. ob->size -= rewind;
  833. else
  834. ob->size = 0;
  835. if (doc->md.normal_text) {
  836. link_text = newbuf(doc, BUFFER_SPAN);
  837. doc->md.normal_text(link_text, link, &doc->data);
  838. doc->md.link(ob, link_text, link_url, NULL, &doc->data);
  839. popbuf(doc, BUFFER_SPAN);
  840. } else {
  841. doc->md.link(ob, link, link_url, NULL, &doc->data);
  842. }
  843. popbuf(doc, BUFFER_SPAN);
  844. }
  845. popbuf(doc, BUFFER_SPAN);
  846. return link_len;
  847. }
  848. static size_t
  849. char_autolink_email(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
  850. {
  851. hoedown_buffer *link;
  852. size_t link_len, rewind;
  853. if (!doc->md.autolink || doc->in_link_body)
  854. return 0;
  855. link = newbuf(doc, BUFFER_SPAN);
  856. if ((link_len = hoedown_autolink__email(&rewind, link, data, offset, size, 0)) > 0) {
  857. if (ob->size > rewind)
  858. ob->size -= rewind;
  859. else
  860. ob->size = 0;
  861. doc->md.autolink(ob, link, HOEDOWN_AUTOLINK_EMAIL, &doc->data);
  862. }
  863. popbuf(doc, BUFFER_SPAN);
  864. return link_len;
  865. }
  866. static size_t
  867. char_autolink_url(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
  868. {
  869. hoedown_buffer *link;
  870. size_t link_len, rewind;
  871. if (!doc->md.autolink || doc->in_link_body)
  872. return 0;
  873. link = newbuf(doc, BUFFER_SPAN);
  874. if ((link_len = hoedown_autolink__url(&rewind, link, data, offset, size, 0)) > 0) {
  875. if (ob->size > rewind)
  876. ob->size -= rewind;
  877. else
  878. ob->size = 0;
  879. doc->md.autolink(ob, link, HOEDOWN_AUTOLINK_NORMAL, &doc->data);
  880. }
  881. popbuf(doc, BUFFER_SPAN);
  882. return link_len;
  883. }
  884. static size_t
  885. char_image(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size) {
  886. size_t ret;
  887. if (size < 2 || data[1] != '[') return 0;
  888. ret = char_link(ob, doc, data + 1, offset + 1, size - 1);
  889. if (!ret) return 0;
  890. return ret + 1;
  891. }
  892. /* char_link • '[': parsing a link, a footnote or an image */
  893. static size_t
  894. char_link(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
  895. {
  896. int is_img = (offset && data[-1] == '!' && !is_escaped(data - offset, offset - 1));
  897. int is_footnote = (doc->ext_flags & HOEDOWN_EXT_FOOTNOTES && data[1] == '^');
  898. size_t i = 1, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0;
  899. hoedown_buffer *content = NULL;
  900. hoedown_buffer *link = NULL;
  901. hoedown_buffer *title = NULL;
  902. hoedown_buffer *u_link = NULL;
  903. size_t org_work_size = doc->work_bufs[BUFFER_SPAN].size;
  904. int ret = 0, in_title = 0, qtype = 0;
  905. /* checking whether the correct renderer exists */
  906. if ((is_footnote && !doc->md.footnote_ref) || (is_img && !doc->md.image)
  907. || (!is_img && !is_footnote && !doc->md.link))
  908. goto cleanup;
  909. /* looking for the matching closing bracket */
  910. i += find_emph_char(data + i, size - i, ']');
  911. txt_e = i;
  912. if (i < size && data[i] == ']') i++;
  913. else goto cleanup;
  914. /* footnote link */
  915. if (is_footnote) {
  916. hoedown_buffer id = { NULL, 0, 0, 0, NULL, NULL, NULL };
  917. struct footnote_ref *fr;
  918. if (txt_e < 3)
  919. goto cleanup;
  920. id.data = data + 2;
  921. id.size = txt_e - 2;
  922. fr = find_footnote_ref(&doc->footnotes_found, id.data, id.size);
  923. /* mark footnote used */
  924. if (fr && !fr->is_used) {
  925. if(!add_footnote_ref(&doc->footnotes_used, fr))
  926. goto cleanup;
  927. fr->is_used = 1;
  928. fr->num = doc->footnotes_used.count;
  929. /* render */
  930. if (doc->md.footnote_ref)
  931. ret = doc->md.footnote_ref(ob, fr->num, &doc->data);
  932. }
  933. goto cleanup;
  934. }
  935. /* skip any amount of spacing */
  936. /* (this is much more laxist than original markdown syntax) */
  937. while (i < size && _isspace(data[i]))
  938. i++;
  939. /* inline style link */
  940. if (i < size && data[i] == '(') {
  941. size_t nb_p;
  942. /* skipping initial spacing */
  943. i++;
  944. while (i < size && _isspace(data[i]))
  945. i++;
  946. link_b = i;
  947. /* looking for link end: ' " ) */
  948. /* Count the number of open parenthesis */
  949. nb_p = 0;
  950. while (i < size) {
  951. if (data[i] == '\\') i += 2;
  952. else if (data[i] == '(' && i != 0) {
  953. nb_p++; i++;
  954. }
  955. else if (data[i] == ')') {
  956. if (nb_p == 0) break;
  957. else nb_p--; i++;
  958. } else if (i >= 1 && _isspace(data[i-1]) && (data[i] == '\'' || data[i] == '"')) break;
  959. else i++;
  960. }
  961. if (i >= size) goto cleanup;
  962. link_e = i;
  963. /* looking for title end if present */
  964. if (data[i] == '\'' || data[i] == '"') {
  965. qtype = data[i];
  966. in_title = 1;
  967. i++;
  968. title_b = i;
  969. while (i < size) {
  970. if (data[i] == '\\') i += 2;
  971. else if (data[i] == qtype) {in_title = 0; i++;}
  972. else if ((data[i] == ')') && !in_title) break;
  973. else i++;
  974. }
  975. if (i >= size) goto cleanup;
  976. /* skipping spacing after title */
  977. title_e = i - 1;
  978. while (title_e > title_b && _isspace(data[title_e]))
  979. title_e--;
  980. /* checking for closing quote presence */
  981. if (data[title_e] != '\'' && data[title_e] != '"') {
  982. title_b = title_e = 0;
  983. link_e = i;
  984. }
  985. }
  986. /* remove spacing at the end of the link */
  987. while (link_e > link_b && _isspace(data[link_e - 1]))
  988. link_e--;
  989. /* remove optional angle brackets around the link */
  990. if (data[link_b] == '<' && data[link_e - 1] == '>') {
  991. link_b++;
  992. link_e--;
  993. }
  994. /* building escaped link and title */
  995. if (link_e > link_b) {
  996. link = newbuf(doc, BUFFER_SPAN);
  997. hoedown_buffer_put(link, data + link_b, link_e - link_b);
  998. }
  999. if (title_e > title_b) {
  1000. title = newbuf(doc, BUFFER_SPAN);
  1001. hoedown_buffer_put(title, data + title_b, title_e - title_b);
  1002. }
  1003. i++;
  1004. }
  1005. /* reference style link */
  1006. else if (i < size && data[i] == '[') {
  1007. hoedown_buffer *id = newbuf(doc, BUFFER_SPAN);
  1008. struct link_ref *lr;
  1009. /* looking for the id */
  1010. i++;
  1011. link_b = i;
  1012. while (i < size && data[i] != ']') i++;
  1013. if (i >= size) goto cleanup;
  1014. link_e = i;
  1015. /* finding the link_ref */
  1016. if (link_b == link_e)
  1017. replace_spacing(id, data + 1, txt_e - 1);
  1018. else
  1019. hoedown_buffer_put(id, data + link_b, link_e - link_b);
  1020. lr = find_link_ref(doc->refs, id->data, id->size);
  1021. if (!lr)
  1022. goto cleanup;
  1023. /* keeping link and title from link_ref */
  1024. link = lr->link;
  1025. title = lr->title;
  1026. i++;
  1027. }
  1028. /* shortcut reference style link */
  1029. else {
  1030. hoedown_buffer *id = newbuf(doc, BUFFER_SPAN);
  1031. struct link_ref *lr;
  1032. /* crafting the id */
  1033. replace_spacing(id, data + 1, txt_e - 1);
  1034. /* finding the link_ref */
  1035. lr = find_link_ref(doc->refs, id->data, id->size);
  1036. if (!lr)
  1037. goto cleanup;
  1038. /* keeping link and title from link_ref */
  1039. link = lr->link;
  1040. title = lr->title;
  1041. /* rewinding the spacing */
  1042. i = txt_e + 1;
  1043. }
  1044. /* building content: img alt is kept, only link content is parsed */
  1045. if (txt_e > 1) {
  1046. content = newbuf(doc, BUFFER_SPAN);
  1047. if (is_img) {
  1048. hoedown_buffer_put(content, data + 1, txt_e - 1);
  1049. } else {
  1050. /* disable autolinking when parsing inline the
  1051. * content of a link */
  1052. doc->in_link_body = 1;
  1053. parse_inline(content, doc, data + 1, txt_e - 1);
  1054. doc->in_link_body = 0;
  1055. }
  1056. }
  1057. if (link) {
  1058. u_link = newbuf(doc, BUFFER_SPAN);
  1059. unscape_text(u_link, link);
  1060. }
  1061. /* calling the relevant rendering function */
  1062. if (is_img) {
  1063. ret = doc->md.image(ob, u_link, title, content, &doc->data);
  1064. } else {
  1065. ret = doc->md.link(ob, content, u_link, title, &doc->data);
  1066. }
  1067. /* cleanup */
  1068. cleanup:
  1069. doc->work_bufs[BUFFER_SPAN].size = (int)org_work_size;
  1070. return ret ? i : 0;
  1071. }
  1072. static size_t
  1073. char_superscript(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
  1074. {
  1075. size_t sup_start, sup_len;
  1076. hoedown_buffer *sup;
  1077. if (!doc->md.superscript)
  1078. return 0;
  1079. if (size < 2)
  1080. return 0;
  1081. if (data[1] == '(') {
  1082. sup_start = 2;
  1083. sup_len = find_emph_char(data + 2, size - 2, ')') + 2;
  1084. if (sup_len == size)
  1085. return 0;
  1086. } else {
  1087. sup_start = sup_len = 1;
  1088. while (sup_len < size && !_isspace(data[sup_len]))
  1089. sup_len++;
  1090. }
  1091. if (sup_len - sup_start == 0)
  1092. return (sup_start == 2) ? 3 : 0;
  1093. sup = newbuf(doc, BUFFER_SPAN);
  1094. parse_inline(sup, doc, data + sup_start, sup_len - sup_start);
  1095. doc->md.superscript(ob, sup, &doc->data);
  1096. popbuf(doc, BUFFER_SPAN);
  1097. return (sup_start == 2) ? sup_len + 1 : sup_len;
  1098. }
  1099. static size_t
  1100. char_math(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
  1101. {
  1102. /* double dollar */
  1103. if (size > 1 && data[1] == '$')
  1104. return parse_math(ob, doc, data, offset, size, "$$", 2, 1);
  1105. /* single dollar allowed only with MATH_EXPLICIT flag */
  1106. if (doc->ext_flags & HOEDOWN_EXT_MATH_EXPLICIT)
  1107. return parse_math(ob, doc, data, offset, size, "$", 1, 0);
  1108. return 0;
  1109. }
  1110. /*********************************
  1111. * BLOCK-LEVEL PARSING FUNCTIONS *
  1112. *********************************/
  1113. /* is_empty • returns the line length when it is empty, 0 otherwise */
  1114. static size_t
  1115. is_empty(const uint8_t *data, size_t size)
  1116. {
  1117. size_t i;
  1118. for (i = 0; i < size && data[i] != '\n'; i++)
  1119. if (data[i] != ' ')
  1120. return 0;
  1121. return i + 1;
  1122. }
  1123. /* is_hrule • returns whether a line is a horizontal rule */
  1124. static int
  1125. is_hrule(uint8_t *data, size_t size)
  1126. {
  1127. size_t i = 0, n = 0;
  1128. uint8_t c;
  1129. /* skipping initial spaces */
  1130. if (size < 3) return 0;
  1131. if (data[0] == ' ') { i++;
  1132. if (data[1] == ' ') { i++;
  1133. if (data[2] == ' ') { i++; } } }
  1134. /* looking at the hrule uint8_t */
  1135. if (i + 2 >= size
  1136. || (data[i] != '*' && data[i] != '-' && data[i] != '_'))
  1137. return 0;
  1138. c = data[i];
  1139. /* the whole line must be the char or space */
  1140. while (i < size && data[i] != '\n') {
  1141. if (data[i] == c) n++;
  1142. else if (data[i] != ' ')
  1143. return 0;
  1144. i++;
  1145. }
  1146. return n >= 3;
  1147. }
  1148. /* check if a line is a code fence; return the
  1149. * end of the code fence. if passed, width of
  1150. * the fence rule and character will be returned */
  1151. static size_t
  1152. is_codefence(uint8_t *data, size_t size, size_t *width, uint8_t *chr)
  1153. {
  1154. size_t i = 0, n = 1;
  1155. uint8_t c;
  1156. /* skipping initial spaces */
  1157. if (size < 3)
  1158. return 0;
  1159. if (data[0] == ' ') { i++;
  1160. if (data[1] == ' ') { i++;
  1161. if (data[2] == ' ') { i++; } } }
  1162. /* looking at the hrule uint8_t */
  1163. c = data[i];
  1164. if (i + 2 >= size || !(c=='~' || c=='`'))
  1165. return 0;
  1166. /* the fence must be that same character */
  1167. while (++i < size && data[i] == c)
  1168. ++n;
  1169. if (n < 3)
  1170. return 0;
  1171. if (width) *width = n;
  1172. if (chr) *chr = c;
  1173. return i;
  1174. }
  1175. /* expects single line, checks if it's a codefence and extracts language */
  1176. static size_t
  1177. parse_codefence(uint8_t *data, size_t size, hoedown_buffer *lang, size_t *width, uint8_t *chr)
  1178. {
  1179. size_t i, w, lang_start;
  1180. i = w = is_codefence(data, size, width, chr);
  1181. if (i == 0)
  1182. return 0;
  1183. while (i < size && _isspace(data[i]))
  1184. i++;
  1185. lang_start = i;
  1186. while (i < size && !_isspace(data[i]))
  1187. i++;
  1188. lang->data = data + lang_start;
  1189. lang->size = i - lang_start;
  1190. /* Avoid parsing a codespan as a fence */
  1191. i = lang_start + 2;
  1192. while (i < size && !(data[i] == *chr && data[i-1] == *chr && data[i-2] == *chr)) i++;
  1193. if (i < size) return 0;
  1194. return w;
  1195. }
  1196. /* is_atxheader • returns whether the line is a hash-prefixed header */
  1197. static int
  1198. is_atxheader(hoedown_document *doc, uint8_t *data, size_t size)
  1199. {
  1200. if (data[0] != '#')
  1201. return 0;
  1202. if (doc->ext_flags & HOEDOWN_EXT_SPACE_HEADERS) {
  1203. size_t level = 0;
  1204. while (level < size && level < 6 && data[level] == '#')
  1205. level++;
  1206. if (level < size && data[level] != ' ')
  1207. return 0;
  1208. }
  1209. return 1;
  1210. }
  1211. /* is_headerline • returns whether the line is a setext-style hdr underline */
  1212. static int
  1213. is_headerline(uint8_t *data, size_t size)
  1214. {
  1215. size_t i = 0;
  1216. /* test of level 1 header */
  1217. if (data[i] == '=') {
  1218. for (i = 1; i < size && data[i] == '='; i++);
  1219. while (i < size && data[i] == ' ') i++;
  1220. return (i >= size || data[i] == '\n') ? 1 : 0; }
  1221. /* test of level 2 header */
  1222. if (data[i] == '-') {
  1223. for (i = 1; i < size && data[i] == '-'; i++);
  1224. while (i < size && data[i] == ' ') i++;
  1225. return (i >= size || data[i] == '\n') ? 2 : 0; }
  1226. return 0;
  1227. }
  1228. static int
  1229. is_next_headerline(uint8_t *data, size_t size)
  1230. {
  1231. size_t i = 0;
  1232. while (i < size && data[i] != '\n')
  1233. i++;
  1234. if (++i >= size)
  1235. return 0;
  1236. return is_headerline(data + i, size - i);
  1237. }
  1238. /* prefix_quote • returns blockquote prefix length */
  1239. static size_t
  1240. prefix_quote(uint8_t *data, size_t size)
  1241. {
  1242. size_t i = 0;
  1243. if (i < size && data[i] == ' ') i++;
  1244. if (i < size && data[i] == ' ') i++;
  1245. if (i < size && data[i] == ' ') i++;
  1246. if (i < size && data[i] == '>') {
  1247. if (i + 1 < size && data[i + 1] == ' ')
  1248. return i + 2;
  1249. return i + 1;
  1250. }
  1251. return 0;
  1252. }
  1253. /* prefix_code • returns prefix length for block code*/
  1254. static size_t
  1255. prefix_code(uint8_t *data, size_t size)
  1256. {
  1257. if (size > 3 && data[0] == ' ' && data[1] == ' '
  1258. && data[2] == ' ' && data[3] == ' ') return 4;
  1259. return 0;
  1260. }
  1261. /* prefix_oli • returns ordered list item prefix */
  1262. static size_t
  1263. prefix_oli(uint8_t *data, size_t size)
  1264. {
  1265. size_t i = 0;
  1266. if (i < size && data[i] == ' ') i++;
  1267. if (i < size && data[i] == ' ') i++;
  1268. if (i < size && data[i] == ' ') i++;
  1269. if (i >= size || data[i] < '0' || data[i] > '9')
  1270. return 0;
  1271. while (i < size && data[i] >= '0' && data[i] <= '9')
  1272. i++;
  1273. if (i + 1 >= size || data[i] != '.' || data[i + 1] != ' ')
  1274. return 0;
  1275. if (is_next_headerline(data + i, size - i))
  1276. return 0;
  1277. return i + 2;
  1278. }
  1279. /* prefix_uli • returns ordered list item prefix */
  1280. static size_t
  1281. prefix_uli(uint8_t *data, size_t size)
  1282. {
  1283. size_t i = 0;
  1284. if (i < size && data[i] == ' ') i++;
  1285. if (i < size && data[i] == ' ') i++;
  1286. if (i < size && data[i] == ' ') i++;
  1287. if (i + 1 >= size ||
  1288. (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
  1289. data[i + 1] != ' ')
  1290. return 0;
  1291. if (is_next_headerline(data + i, size - i))
  1292. return 0;
  1293. return i + 2;
  1294. }
  1295. /* parse_block • parsing of one block, returning next uint8_t to parse */
  1296. static void parse_block(hoedown_buffer *ob, hoedown_document *doc,
  1297. uint8_t *data, size_t size);
  1298. /* parse_blockquote • handles parsing of a blockquote fragment */
  1299. static size_t
  1300. parse_blockquote(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size)
  1301. {
  1302. size_t beg, end = 0, pre, work_size = 0;
  1303. uint8_t *work_data = 0;
  1304. hoedown_buffer *out = 0;
  1305. out = newbuf(doc, BUFFER_BLOCK);
  1306. beg = 0;
  1307. while (beg < size) {
  1308. for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
  1309. pre = prefix_quote(data + beg, end - beg);
  1310. if (pre)
  1311. beg += pre; /* skipping prefix */
  1312. /* empty line followed by non-quote line */
  1313. else if (is_empty(data + beg, end - beg) &&
  1314. (end >= size || (prefix_quote(data + end, size - end) == 0 &&
  1315. !is_empty(data + end, size - end))))
  1316. break;
  1317. if (beg < end) { /* copy into the in-place working buffer */
  1318. /* hoedown_buffer_put(work, data + beg, end - beg); */
  1319. if (!work_data)
  1320. work_data = data + beg;
  1321. else if (data + beg != work_data + work_size)
  1322. memmove(work_data + work_size, data + beg, end - beg);
  1323. work_size += end - beg;
  1324. }
  1325. beg = end;
  1326. }
  1327. parse_block(out, doc, work_data, work_size);
  1328. if (doc->md.blockquote)
  1329. doc->md.blockquote(ob, out, &doc->data);
  1330. popbuf(doc, BUFFER_BLOCK);
  1331. return end;
  1332. }
  1333. static size_t
  1334. parse_htmlblock(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, int do_render);
  1335. /* parse_blockquote • handles parsing of a regular paragraph */
  1336. static size_t
  1337. parse_paragraph(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size)
  1338. {
  1339. hoedown_buffer work = { NULL, 0, 0, 0, NULL, NULL, NULL };
  1340. size_t i = 0, end = 0;
  1341. int level = 0;
  1342. work.data = data;
  1343. while (i < size) {
  1344. for (end = i + 1; end < size && data[end - 1] != '\n'; end++) /* empty */;
  1345. if (is_empty(data + i, size - i))
  1346. break;
  1347. if ((level = is_headerline(data + i, size - i)) != 0)
  1348. break;
  1349. if (is_atxheader(doc, data + i, size - i) ||
  1350. is_hrule(data + i, size - i) ||
  1351. prefix_quote(data + i, size - i)) {
  1352. end = i;
  1353. break;
  1354. }
  1355. i = end;
  1356. }
  1357. work.size = i;
  1358. while (work.size && data[work.size - 1] == '\n')
  1359. work.size--;
  1360. if (!level) {
  1361. hoedown_buffer *tmp = newbuf(doc, BUFFER_BLOCK);
  1362. parse_inline(tmp, doc, work.data, work.size);
  1363. if (doc->md.paragraph)
  1364. doc->md.paragraph(ob, tmp, &doc->data);
  1365. popbuf(doc, BUFFER_BLOCK);
  1366. } else {
  1367. hoedown_buffer *header_work;
  1368. if (work.size) {
  1369. size_t beg;
  1370. i = work.size;
  1371. work.size -= 1;
  1372. while (work.size && data[work.size] != '\n')
  1373. work.size -= 1;
  1374. beg = work.size + 1;
  1375. while (work.size && data[work.size - 1] == '\n')
  1376. work.size -= 1;
  1377. if (work.size > 0) {
  1378. hoedown_buffer *tmp = newbuf(doc, BUFFER_BLOCK);
  1379. parse_inline(tmp, doc, work.data, work.size);
  1380. if (doc->md.paragraph)
  1381. doc->md.paragraph(ob, tmp, &doc->data);
  1382. popbuf(doc, BUFFER_BLOCK);
  1383. work.data += beg;
  1384. work.size = i - beg;
  1385. }
  1386. else work.size = i;
  1387. }
  1388. header_work = newbuf(doc, BUFFER_SPAN);
  1389. parse_inline(header_work, doc, work.data, work.size);
  1390. if (doc->md.header)
  1391. doc->md.header(ob, header_work, (int)level, &doc->data);
  1392. popbuf(doc, BUFFER_SPAN);
  1393. }
  1394. return end;
  1395. }
  1396. /* parse_fencedcode • handles parsing of a block-level code fragment */
  1397. static size_t
  1398. parse_fencedcode(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size)
  1399. {
  1400. hoedown_buffer text = { 0, 0, 0, 0, NULL, NULL, NULL };
  1401. hoedown_buffer lang = { 0, 0, 0, 0, NULL, NULL, NULL };
  1402. size_t i = 0, text_start, line_start;
  1403. size_t w, w2;
  1404. size_t width, width2;
  1405. uint8_t chr, chr2;
  1406. /* parse codefence line */
  1407. while (i < size && data[i] != '\n')
  1408. i++;
  1409. w = parse_codefence(data, i, &lang, &width, &chr);
  1410. if (!w)
  1411. return 0;
  1412. /* search for end */
  1413. i++;
  1414. text_start = i;
  1415. while ((line_start = i) < size) {
  1416. while (i < size && data[i] != '\n')
  1417. i++;
  1418. w2 = is_codefence(data + line_start, i - line_start, &width2, &chr2);
  1419. if (w == w2 && width == width2 && chr == chr2 &&
  1420. is_empty(data + (line_start+w), i - (line_start+w)))
  1421. break;
  1422. i++;
  1423. }
  1424. text.data = data + text_start;
  1425. text.size = line_start - text_start;
  1426. if (doc->md.blockcode)
  1427. doc->md.blockcode(ob, text.size ? &text : NULL, lang.size ? &lang : NULL, &doc->data);
  1428. return i;
  1429. }
  1430. static size_t
  1431. parse_blockcode(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size)
  1432. {
  1433. size_t beg, end, pre;
  1434. hoedown_buffer *work = 0;
  1435. work = newbuf(doc, BUFFER_BLOCK);
  1436. beg = 0;
  1437. while (beg < size) {
  1438. for (end = beg + 1; end < size && data[end - 1] != '\n'; end++) {};
  1439. pre = prefix_code(data + beg, end - beg);
  1440. if (pre)
  1441. beg += pre; /* skipping prefix */
  1442. else if (!is_empty(data + beg, end - beg))
  1443. /* non-empty non-prefixed line breaks the pre */
  1444. break;
  1445. if (beg < end) {
  1446. /* verbatim copy to the working buffer,
  1447. escaping entities */
  1448. if (is_empty(data + beg, end - beg))
  1449. hoedown_buffer_putc(work, '\n');
  1450. else hoedown_buffer_put(work, data + beg, end - beg);
  1451. }
  1452. beg = end;
  1453. }
  1454. while (work->size && work->data[work->size - 1] == '\n')
  1455. work->size -= 1;
  1456. hoedown_buffer_putc(work, '\n');
  1457. if (doc->md.blockcode)
  1458. doc->md.blockcode(ob, work, NULL, &doc->data);
  1459. popbuf(doc, BUFFER_BLOCK);
  1460. return beg;
  1461. }
  1462. /* parse_listitem • parsing of a single list item */
  1463. /* assuming initial prefix is already removed */
  1464. static size_t
  1465. parse_listitem(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, hoedown_list_flags *flags)
  1466. {
  1467. hoedown_buffer *work = 0, *inter = 0;
  1468. size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i;
  1469. int in_empty = 0, has_inside_empty = 0, in_fence = 0;
  1470. /* keeping track of the first indentation prefix */
  1471. while (orgpre < 3 && orgpre < size && data[orgpre] == ' ')
  1472. orgpre++;
  1473. beg = prefix_uli(data, size);
  1474. if (!beg)
  1475. beg = prefix_oli(data, size);
  1476. if (!beg)
  1477. return 0;
  1478. /* skipping to the beginning of the following line */
  1479. end = beg;
  1480. while (end < size && data[end - 1] != '\n')
  1481. end++;
  1482. /* getting working buffers */
  1483. work = newbuf(doc, BUFFER_SPAN);
  1484. inter = newbuf(doc, BUFFER_SPAN);
  1485. /* putting the first line into the working buffer */
  1486. hoedown_buffer_put(work, data + beg, end - beg);
  1487. beg = end;
  1488. /* process the following lines */
  1489. while (beg < size) {
  1490. size_t has_next_uli = 0, has_next_oli = 0;
  1491. end++;
  1492. while (end < size && data[end - 1] != '\n')
  1493. end++;
  1494. /* process an empty line */
  1495. if (is_empty(data + beg, end - beg)) {
  1496. in_empty = 1;
  1497. beg = end;
  1498. continue;
  1499. }
  1500. /* calculating the indentation */
  1501. i = 0;
  1502. while (i < 4 && beg + i < end && data[beg + i] == ' ')
  1503. i++;
  1504. pre = i;
  1505. if (doc->ext_flags & HOEDOWN_EXT_FENCED_CODE) {
  1506. if (is_codefence(data + beg + i, end - beg - i, NULL, NULL))
  1507. in_fence = !in_fence;
  1508. }
  1509. /* Only check for new list items if we are **not** inside
  1510. * a fenced code block */
  1511. if (!in_fence) {
  1512. has_next_uli = prefix_uli(data + beg + i, end - beg - i);
  1513. has_next_oli = prefix_oli(data + beg + i, end - beg - i);
  1514. }
  1515. /* checking for a new item */
  1516. if ((has_next_uli && !is_hrule(data + beg + i, end - beg - i)) || has_next_oli) {
  1517. if (in_empty)
  1518. has_inside_empty = 1;
  1519. /* the following item must have the same (or less) indentation */
  1520. if (pre <= orgpre) {
  1521. /* if the following item has different list type, we end this list */
  1522. if (in_empty && (
  1523. ((*flags & HOEDOWN_LIST_ORDERED) && has_next_uli) ||
  1524. (!(*flags & HOEDOWN_LIST_ORDERED) && has_next_oli)))
  1525. *flags |= HOEDOWN_LI_END;
  1526. break;
  1527. }
  1528. if (!sublist)
  1529. sublist = work->size;
  1530. }
  1531. /* joining only indented stuff after empty lines;
  1532. * note that now we only require 1 space of indentation
  1533. * to continue a list */
  1534. else if (in_empty && pre == 0) {
  1535. *flags |= HOEDOWN_LI_END;
  1536. break;
  1537. }
  1538. if (in_empty) {
  1539. hoedown_buffer_putc(work, '\n');
  1540. has_inside_empty = 1;
  1541. in_empty = 0;
  1542. }
  1543. /* adding the line without prefix into the working buffer */
  1544. hoedown_buffer_put(work, data + beg + i, end - beg - i);
  1545. beg = end;
  1546. }
  1547. /* render of li contents */
  1548. if (has_inside_empty)
  1549. *flags |= HOEDOWN_LI_BLOCK;
  1550. if (*flags & HOEDOWN_LI_BLOCK) {
  1551. /* intermediate render of block li */
  1552. if (sublist && sublist < work->size) {
  1553. parse_block(inter, doc, work->data, sublist);
  1554. parse_block(inter, doc, work->data + sublist, work->size - sublist);
  1555. }
  1556. else
  1557. parse_block(inter, doc, work->data, work->size);
  1558. } else {
  1559. /* intermediate render of inline li */
  1560. if (sublist && sublist < work->size) {
  1561. parse_inline(inter, doc, work->data, sublist);
  1562. parse_block(inter, doc, work->data + sublist, work->size - sublist);
  1563. }
  1564. else
  1565. parse_inline(inter, doc, work->data, work->size);
  1566. }
  1567. /* render of li itself */
  1568. if (doc->md.listitem)
  1569. doc->md.listitem(ob, inter, *flags, &doc->data);
  1570. popbuf(doc, BUFFER_SPAN);
  1571. popbuf(doc, BUFFER_SPAN);
  1572. return beg;
  1573. }
  1574. /* parse_list • parsing ordered or unordered list block */
  1575. static size_t
  1576. parse_list(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, hoedown_list_flags flags)
  1577. {
  1578. hoedown_buffer *work = 0;
  1579. size_t i = 0, j;
  1580. work = newbuf(doc, BUFFER_BLOCK);
  1581. while (i < size) {
  1582. j = parse_listitem(work, doc, data + i, size - i, &flags);
  1583. i += j;
  1584. if (!j || (flags & HOEDOWN_LI_END))
  1585. break;
  1586. }
  1587. if (doc->md.list)
  1588. doc->md.list(ob, work, flags, &doc->data);
  1589. popbuf(doc, BUFFER_BLOCK);
  1590. return i;
  1591. }
  1592. /* parse_atxheader • parsing of atx-style headers */
  1593. static size_t
  1594. parse_atxheader(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size)
  1595. {
  1596. size_t level = 0;
  1597. size_t i, end, skip;
  1598. while (level < size && level < 6 && data[level] == '#')
  1599. level++;
  1600. for (i = level; i < size && data[i] == ' '; i++);
  1601. for (end = i; end < size && data[end] != '\n'; end++);
  1602. skip = end;
  1603. while (end && data[end - 1] == '#')
  1604. end--;
  1605. while (end && data[end - 1] == ' ')
  1606. end--;
  1607. if (end > i) {
  1608. hoedown_buffer *work = newbuf(doc, BUFFER_SPAN);
  1609. parse_inline(work, doc, data + i, end - i);
  1610. if (doc->md.header)
  1611. doc->md.header(ob, work, (int)level, &doc->data);
  1612. popbuf(doc, BUFFER_SPAN);
  1613. }
  1614. return skip;
  1615. }
  1616. /* parse_footnote_def • parse a single footnote definition */
  1617. static void
  1618. parse_footnote_def(hoedown_buffer *ob, hoedown_document *doc, unsigned int num, uint8_t *data, size_t size)
  1619. {
  1620. hoedown_buffer *work = 0;
  1621. work = newbuf(doc, BUFFER_SPAN);
  1622. parse_block(work, doc, data, size);
  1623. if (doc->md.footnote_def)
  1624. doc->md.footnote_def(ob, work, num, &doc->data);
  1625. popbuf(doc, BUFFER_SPAN);
  1626. }
  1627. /* parse_footnote_list • render the contents of the footnotes */
  1628. static void
  1629. parse_footnote_list(hoedown_buffer *ob, hoedown_document *doc, struct footnote_list *footnotes)
  1630. {
  1631. hoedown_buffer *work = 0;
  1632. struct footnote_item *item;
  1633. struct footnote_ref *ref;
  1634. if (footnotes->count == 0)
  1635. return;
  1636. work = newbuf(doc, BUFFER_BLOCK);
  1637. item = footnotes->head;
  1638. while (item) {
  1639. ref = item->ref;
  1640. parse_footnote_def(work, doc, ref->num, ref->contents->data, ref->contents->size);
  1641. item = item->next;
  1642. }
  1643. if (doc->md.footnotes)
  1644. doc->md.footnotes(ob, work, &doc->data);
  1645. popbuf(doc, BUFFER_BLOCK);
  1646. }
  1647. /* htmlblock_is_end • check for end of HTML block : </tag>( *)\n */
  1648. /* returns tag length on match, 0 otherwise */
  1649. /* assumes data starts with "<" */
  1650. static size_t
  1651. htmlblock_is_end(
  1652. const char *tag,
  1653. size_t tag_len,
  1654. hoedown_document *doc,
  1655. uint8_t *data,
  1656. size_t size)
  1657. {
  1658. size_t i = tag_len + 3, w;
  1659. /* try to match the end tag */
  1660. /* note: we're not considering tags like "</tag >" which are still valid */
  1661. if (i > size ||
  1662. data[1] != '/' ||
  1663. strncasecmp((char *)data + 2, tag, tag_len) != 0 ||
  1664. data[tag_len + 2] != '>')
  1665. return 0;
  1666. /* rest of the line must be empty */
  1667. if ((w = is_empty(data + i, size - i)) == 0 && i < size)
  1668. return 0;
  1669. return i + w;
  1670. }
  1671. /* htmlblock_find_end • try to find HTML block ending tag */
  1672. /* returns the length on match, 0 otherwise */
  1673. static size_t
  1674. htmlblock_find_end(
  1675. const char *tag,
  1676. size_t tag_len,
  1677. hoedown_document *doc,
  1678. uint8_t *data,
  1679. size_t size)
  1680. {
  1681. size_t i = 0, w;
  1682. while (1) {
  1683. while (i < size && data[

Large files files are truncated, but you can click here to view the full file