PageRenderTime 303ms CodeModel.GetById 3ms app.highlight 270ms RepoModel.GetById 1ms app.codeStats 1ms

/writeily/libs/anddown/jni/src/markdown.c

https://github.com/jpsullivan/writeily
C | 2884 lines | 2648 code | 172 blank | 64 comment | 243 complexity | afb653adb6a6a64b20987109ab8261be MD5 | raw file
   1/* markdown.c - generic markdown parser */
   2
   3#include "markdown.h"
   4
   5#include <assert.h>
   6#include <string.h>
   7#include <ctype.h>
   8#include <stdio.h>
   9
  10#include "stack.h"
  11
  12#ifdef _MSC_VER
  13#define strncasecmp	_strnicmp
  14#endif
  15
  16#define REF_TABLE_SIZE 8
  17
  18#define BUFFER_BLOCK 0
  19#define BUFFER_SPAN 1
  20
  21#define HOEDOWN_LI_END 8	/* internal list flag */
  22
  23const char *hoedown_find_block_tag(const char *str, unsigned int len);
  24
  25/***************
  26 * LOCAL TYPES *
  27 ***************/
  28
  29/* link_ref: reference to a link */
  30struct link_ref {
  31	unsigned int id;
  32
  33	hoedown_buffer *link;
  34	hoedown_buffer *title;
  35
  36	struct link_ref *next;
  37};
  38
  39/* footnote_ref: reference to a footnote */
  40struct footnote_ref {
  41	unsigned int id;
  42
  43	int is_used;
  44	unsigned int num;
  45	
  46	hoedown_buffer *contents;
  47};
  48
  49/* footnote_item: an item in a footnote_list */
  50struct footnote_item {
  51	struct footnote_ref *ref;
  52	struct footnote_item *next;
  53};
  54
  55/* footnote_list: linked list of footnote_item */
  56struct footnote_list {
  57	unsigned int count;
  58	struct footnote_item *head;
  59	struct footnote_item *tail;
  60};
  61
  62/* char_trigger: function pointer to render active chars */
  63/*   returns the number of chars taken care of */
  64/*   data is the pointer of the beginning of the span */
  65/*   offset is the number of valid chars before data */
  66typedef size_t
  67(*char_trigger)(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size);
  68
  69static size_t char_emphasis(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size);
  70static size_t char_quote(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size);
  71static size_t char_linebreak(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size);
  72static size_t char_codespan(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size);
  73static size_t char_escape(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size);
  74static size_t char_entity(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size);
  75static size_t char_langle_tag(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size);
  76static size_t char_autolink_url(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size);
  77static size_t char_autolink_email(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size);
  78static size_t char_autolink_www(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size);
  79static size_t char_link(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size);
  80static size_t char_superscript(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size);
  81
  82enum markdown_char_t {
  83	MD_CHAR_NONE = 0,
  84	MD_CHAR_EMPHASIS,
  85	MD_CHAR_CODESPAN,
  86	MD_CHAR_LINEBREAK,
  87	MD_CHAR_LINK,
  88	MD_CHAR_LANGLE,
  89	MD_CHAR_ESCAPE,
  90	MD_CHAR_ENTITITY,
  91	MD_CHAR_AUTOLINK_URL,
  92	MD_CHAR_AUTOLINK_EMAIL,
  93	MD_CHAR_AUTOLINK_WWW,
  94	MD_CHAR_SUPERSCRIPT,
  95	MD_CHAR_QUOTE
  96};
  97
  98static char_trigger markdown_char_ptrs[] = {
  99	NULL,
 100	&char_emphasis,
 101	&char_codespan,
 102	&char_linebreak,
 103	&char_link,
 104	&char_langle_tag,
 105	&char_escape,
 106	&char_entity,
 107	&char_autolink_url,
 108	&char_autolink_email,
 109	&char_autolink_www,
 110	&char_superscript,
 111	&char_quote
 112};
 113
 114/* render • structure containing state for a parser instance */
 115struct hoedown_markdown {
 116	hoedown_renderer md;
 117
 118	struct link_ref *refs[REF_TABLE_SIZE];
 119	struct footnote_list footnotes_found;
 120	struct footnote_list footnotes_used;
 121	uint8_t active_char[256];
 122	hoedown_stack work_bufs[2];
 123	unsigned int ext_flags;
 124	size_t max_nesting;
 125	int in_link_body;
 126};
 127
 128/***************************
 129 * HELPER FUNCTIONS *
 130 ***************************/
 131
 132static inline hoedown_buffer *
 133newbuf(hoedown_markdown *md, int type)
 134{
 135	static const size_t buf_size[2] = {256, 64};
 136	hoedown_buffer *work = NULL;
 137	hoedown_stack *pool = &md->work_bufs[type];
 138
 139	if (pool->size < pool->asize &&
 140		pool->item[pool->size] != NULL) {
 141		work = pool->item[pool->size++];
 142		work->size = 0;
 143	} else {
 144		work = hoedown_buffer_new(buf_size[type]);
 145		hoedown_stack_push(pool, work);
 146	}
 147
 148	return work;
 149}
 150
 151static inline void
 152popbuf(hoedown_markdown *md, int type)
 153{
 154	md->work_bufs[type].size--;
 155}
 156
 157static void
 158unscape_text(hoedown_buffer *ob, hoedown_buffer *src)
 159{
 160	size_t i = 0, org;
 161	while (i < src->size) {
 162		org = i;
 163		while (i < src->size && src->data[i] != '\\')
 164			i++;
 165
 166		if (i > org)
 167			hoedown_buffer_put(ob, src->data + org, i - org);
 168
 169		if (i + 1 >= src->size)
 170			break;
 171
 172		hoedown_buffer_putc(ob, src->data[i + 1]);
 173		i += 2;
 174	}
 175}
 176
 177static unsigned int
 178hash_link_ref(const uint8_t *link_ref, size_t length)
 179{
 180	size_t i;
 181	unsigned int hash = 0;
 182
 183	for (i = 0; i < length; ++i)
 184		hash = tolower(link_ref[i]) + (hash << 6) + (hash << 16) - hash;
 185
 186	return hash;
 187}
 188
 189static struct link_ref *
 190add_link_ref(
 191	struct link_ref **references,
 192	const uint8_t *name, size_t name_size)
 193{
 194	struct link_ref *ref = calloc(1, sizeof(struct link_ref));
 195
 196	if (!ref)
 197		return NULL;
 198
 199	ref->id = hash_link_ref(name, name_size);
 200	ref->next = references[ref->id % REF_TABLE_SIZE];
 201
 202	references[ref->id % REF_TABLE_SIZE] = ref;
 203	return ref;
 204}
 205
 206static struct link_ref *
 207find_link_ref(struct link_ref **references, uint8_t *name, size_t length)
 208{
 209	unsigned int hash = hash_link_ref(name, length);
 210	struct link_ref *ref = NULL;
 211
 212	ref = references[hash % REF_TABLE_SIZE];
 213
 214	while (ref != NULL) {
 215		if (ref->id == hash)
 216			return ref;
 217
 218		ref = ref->next;
 219	}
 220
 221	return NULL;
 222}
 223
 224static void
 225free_link_refs(struct link_ref **references)
 226{
 227	size_t i;
 228
 229	for (i = 0; i < REF_TABLE_SIZE; ++i) {
 230		struct link_ref *r = references[i];
 231		struct link_ref *next;
 232
 233		while (r) {
 234			next = r->next;
 235			hoedown_buffer_free(r->link);
 236			hoedown_buffer_free(r->title);
 237			free(r);
 238			r = next;
 239		}
 240	}
 241}
 242
 243static struct footnote_ref *
 244create_footnote_ref(struct footnote_list *list, const uint8_t *name, size_t name_size)
 245{
 246	struct footnote_ref *ref = calloc(1, sizeof(struct footnote_ref));
 247	if (!ref)
 248		return NULL;
 249	
 250	ref->id = hash_link_ref(name, name_size);
 251	
 252	return ref;
 253}
 254
 255static int
 256add_footnote_ref(struct footnote_list *list, struct footnote_ref *ref)
 257{
 258	struct footnote_item *item = calloc(1, sizeof(struct footnote_item));
 259	if (!item)
 260		return 0;
 261	item->ref = ref;
 262	
 263	if (list->head == NULL) {
 264		list->head = list->tail = item;
 265	} else {
 266		list->tail->next = item;
 267		list->tail = item;
 268	}
 269	list->count++;
 270	
 271	return 1;
 272}
 273
 274static struct footnote_ref *
 275find_footnote_ref(struct footnote_list *list, uint8_t *name, size_t length)
 276{
 277	unsigned int hash = hash_link_ref(name, length);
 278	struct footnote_item *item = NULL;
 279	
 280	item = list->head;
 281	
 282	while (item != NULL) {
 283		if (item->ref->id == hash)
 284			return item->ref;
 285		item = item->next;
 286	}
 287	
 288	return NULL;
 289}
 290
 291static void
 292free_footnote_ref(struct footnote_ref *ref)
 293{
 294	hoedown_buffer_free(ref->contents);
 295	free(ref);
 296}
 297
 298static void
 299free_footnote_list(struct footnote_list *list, int free_refs)
 300{
 301	struct footnote_item *item = list->head;
 302	struct footnote_item *next;
 303	
 304	while (item) {
 305		next = item->next;
 306		if (free_refs)
 307			free_footnote_ref(item->ref);
 308		free(item);
 309		item = next;
 310	}
 311}
 312
 313
 314/*
 315 * Check whether a char is a Markdown space.
 316
 317 * Right now we only consider spaces the actual
 318 * space and a newline: tabs and carriage returns
 319 * are filtered out during the preprocessing phase.
 320 *
 321 * If we wanted to actually be UTF-8 compliant, we
 322 * should instead extract an Unicode codepoint from
 323 * this character and check for space properties.
 324 */
 325static inline int
 326_isspace(int c)
 327{
 328	return c == ' ' || c == '\n';
 329}
 330
 331/****************************
 332 * INLINE PARSING FUNCTIONS *
 333 ****************************/
 334
 335/* is_mail_autolink • looks for the address part of a mail autolink and '>' */
 336/* this is less strict than the original markdown e-mail address matching */
 337static size_t
 338is_mail_autolink(uint8_t *data, size_t size)
 339{
 340	size_t i = 0, nb = 0;
 341
 342	/* address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' */
 343	for (i = 0; i < size; ++i) {
 344		if (isalnum(data[i]))
 345			continue;
 346
 347		switch (data[i]) {
 348			case '@':
 349				nb++;
 350
 351			case '-':
 352			case '.':
 353			case '_':
 354				break;
 355
 356			case '>':
 357				return (nb == 1) ? i + 1 : 0;
 358
 359			default:
 360				return 0;
 361		}
 362	}
 363
 364	return 0;
 365}
 366
 367/* tag_length • returns the length of the given tag, or 0 is it's not valid */
 368static size_t
 369tag_length(uint8_t *data, size_t size, enum hoedown_autolink *autolink)
 370{
 371	size_t i, j;
 372
 373	/* a valid tag can't be shorter than 3 chars */
 374	if (size < 3) return 0;
 375
 376	/* begins with a '<' optionally followed by '/', followed by letter or number */
 377	if (data[0] != '<') return 0;
 378	i = (data[1] == '/') ? 2 : 1;
 379
 380	if (!isalnum(data[i]))
 381		return 0;
 382
 383	/* scheme test */
 384	*autolink = HOEDOWN_AUTOLINK_NONE;
 385
 386	/* try to find the beginning of an URI */
 387	while (i < size && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-'))
 388		i++;
 389
 390	if (i > 1 && data[i] == '@') {
 391		if ((j = is_mail_autolink(data + i, size - i)) != 0) {
 392			*autolink = HOEDOWN_AUTOLINK_EMAIL;
 393			return i + j;
 394		}
 395	}
 396
 397	if (i > 2 && data[i] == ':') {
 398		*autolink = HOEDOWN_AUTOLINK_NORMAL;
 399		i++;
 400	}
 401
 402	/* completing autolink test: no whitespace or ' or " */
 403	if (i >= size)
 404		*autolink = HOEDOWN_AUTOLINK_NONE;
 405
 406	else if (*autolink) {
 407		j = i;
 408
 409		while (i < size) {
 410			if (data[i] == '\\') i += 2;
 411			else if (data[i] == '>' || data[i] == '\'' ||
 412					data[i] == '"' || data[i] == ' ' || data[i] == '\n')
 413					break;
 414			else i++;
 415		}
 416
 417		if (i >= size) return 0;
 418		if (i > j && data[i] == '>') return i + 1;
 419		/* one of the forbidden chars has been found */
 420		*autolink = HOEDOWN_AUTOLINK_NONE;
 421	}
 422
 423	/* looking for sometinhg looking like a tag end */
 424	while (i < size && data[i] != '>') i++;
 425	if (i >= size) return 0;
 426	return i + 1;
 427}
 428
 429/* parse_inline • parses inline markdown elements */
 430static void
 431parse_inline(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t size)
 432{
 433	size_t i = 0, end = 0;
 434	uint8_t action = 0;
 435	hoedown_buffer work = { 0, 0, 0, 0 };
 436
 437	if (md->work_bufs[BUFFER_SPAN].size +
 438		md->work_bufs[BUFFER_BLOCK].size > md->max_nesting)
 439		return;
 440
 441	while (i < size) {
 442		/* copying inactive chars into the output */
 443		while (end < size && (action = md->active_char[data[end]]) == 0) {
 444			end++;
 445		}
 446
 447		if (md->md.normal_text) {
 448			work.data = data + i;
 449			work.size = end - i;
 450			md->md.normal_text(ob, &work, md->md.opaque);
 451		}
 452		else
 453			hoedown_buffer_put(ob, data + i, end - i);
 454
 455		if (end >= size) break;
 456		i = end;
 457
 458		end = markdown_char_ptrs[(int)action](ob, md, data + i, i, size - i);
 459		if (!end) /* no action from the callback */
 460			end = i + 1;
 461		else {
 462			i += end;
 463			end = i;
 464		}
 465	}
 466}
 467
 468/* find_emph_char • looks for the next emph uint8_t, skipping other constructs */
 469static size_t
 470find_emph_char(uint8_t *data, size_t size, uint8_t c)
 471{
 472	size_t i = 1;
 473
 474	while (i < size) {
 475		while (i < size && data[i] != c && data[i] != '[')
 476			i++;
 477
 478		if (i == size)
 479			return 0;
 480
 481		if (data[i] == c)
 482			return i;
 483
 484		/* not counting escaped chars */
 485		if (i && data[i - 1] == '\\') {
 486			i++; continue;
 487		}
 488
 489		if (data[i] == '`') {
 490			size_t span_nb = 0, bt;
 491			size_t tmp_i = 0;
 492
 493			/* counting the number of opening backticks */
 494			while (i < size && data[i] == '`') {
 495				i++; span_nb++;
 496			}
 497
 498			if (i >= size) return 0;
 499
 500			/* finding the matching closing sequence */
 501			bt = 0;
 502			while (i < size && bt < span_nb) {
 503				if (!tmp_i && data[i] == c) tmp_i = i;
 504				if (data[i] == '`') bt++;
 505				else bt = 0;
 506				i++;
 507			}
 508
 509			if (i >= size) return tmp_i;
 510		}
 511		/* skipping a link */
 512		else if (data[i] == '[') {
 513			size_t tmp_i = 0;
 514			uint8_t cc;
 515
 516			i++;
 517			while (i < size && data[i] != ']') {
 518				if (!tmp_i && data[i] == c) tmp_i = i;
 519				i++;
 520			}
 521
 522			i++;
 523			while (i < size && (data[i] == ' ' || data[i] == '\n'))
 524				i++;
 525
 526			if (i >= size)
 527				return tmp_i;
 528
 529			switch (data[i]) {
 530			case '[':
 531				cc = ']'; break;
 532
 533			case '(':
 534				cc = ')'; break;
 535
 536			default:
 537				if (tmp_i)
 538					return tmp_i;
 539				else
 540					continue;
 541			}
 542
 543			i++;
 544			while (i < size && data[i] != cc) {
 545				if (!tmp_i && data[i] == c) tmp_i = i;
 546				i++;
 547			}
 548
 549			if (i >= size)
 550				return tmp_i;
 551
 552			i++;
 553		}
 554	}
 555
 556	return 0;
 557}
 558
 559/* parse_emph1 • parsing single emphase */
 560/* closed by a symbol not preceded by whitespace and not followed by symbol */
 561static size_t
 562parse_emph1(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t size, uint8_t c)
 563{
 564	size_t i = 0, len;
 565	hoedown_buffer *work = 0;
 566	int r;
 567
 568	/* skipping one symbol if coming from emph3 */
 569	if (size > 1 && data[0] == c && data[1] == c) i = 1;
 570
 571	while (i < size) {
 572		len = find_emph_char(data + i, size - i, c);
 573		if (!len) return 0;
 574		i += len;
 575		if (i >= size) return 0;
 576
 577		if (data[i] == c && !_isspace(data[i - 1])) {
 578
 579			if (md->ext_flags & HOEDOWN_EXT_NO_INTRA_EMPHASIS) {
 580				if (i + 1 < size && isalnum(data[i + 1]))
 581					continue;
 582			}
 583
 584			work = newbuf(md, BUFFER_SPAN);
 585			parse_inline(work, md, data, i);
 586
 587			if (md->ext_flags & HOEDOWN_EXT_UNDERLINE && c == '_')
 588				r = md->md.underline(ob, work, md->md.opaque);
 589			else
 590				r = md->md.emphasis(ob, work, md->md.opaque);
 591
 592			popbuf(md, BUFFER_SPAN);
 593			return r ? i + 1 : 0;
 594		}
 595	}
 596
 597	return 0;
 598}
 599
 600/* parse_emph2 • parsing single emphase */
 601static size_t
 602parse_emph2(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t size, uint8_t c)
 603{
 604	size_t i = 0, len;
 605	hoedown_buffer *work = 0;
 606	int r;
 607
 608	while (i < size) {
 609		len = find_emph_char(data + i, size - i, c);
 610		if (!len) return 0;
 611		i += len;
 612
 613		if (i + 1 < size && data[i] == c && data[i + 1] == c && i && !_isspace(data[i - 1])) {
 614			work = newbuf(md, BUFFER_SPAN);
 615			parse_inline(work, md, data, i);
 616
 617			if (c == '~')
 618				r = md->md.strikethrough(ob, work, md->md.opaque);
 619			else if (c == '=')
 620				r = md->md.highlight(ob, work, md->md.opaque);
 621			else
 622				r = md->md.double_emphasis(ob, work, md->md.opaque);
 623
 624			popbuf(md, BUFFER_SPAN);
 625			return r ? i + 2 : 0;
 626		}
 627		i++;
 628	}
 629	return 0;
 630}
 631
 632/* parse_emph3 • parsing single emphase */
 633/* finds the first closing tag, and delegates to the other emph */
 634static size_t
 635parse_emph3(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t size, uint8_t c)
 636{
 637	size_t i = 0, len;
 638	int r;
 639
 640	while (i < size) {
 641		len = find_emph_char(data + i, size - i, c);
 642		if (!len) return 0;
 643		i += len;
 644
 645		/* skip whitespace preceded symbols */
 646		if (data[i] != c || _isspace(data[i - 1]))
 647			continue;
 648
 649		if (i + 2 < size && data[i + 1] == c && data[i + 2] == c && md->md.triple_emphasis) {
 650			/* triple symbol found */
 651			hoedown_buffer *work = newbuf(md, BUFFER_SPAN);
 652
 653			parse_inline(work, md, data, i);
 654			r = md->md.triple_emphasis(ob, work, md->md.opaque);
 655			popbuf(md, BUFFER_SPAN);
 656			return r ? i + 3 : 0;
 657
 658		} else if (i + 1 < size && data[i + 1] == c) {
 659			/* double symbol found, handing over to emph1 */
 660			len = parse_emph1(ob, md, data - 2, size + 2, c);
 661			if (!len) return 0;
 662			else return len - 2;
 663
 664		} else {
 665			/* single symbol found, handing over to emph2 */
 666			len = parse_emph2(ob, md, data - 1, size + 1, c);
 667			if (!len) return 0;
 668			else return len - 1;
 669		}
 670	}
 671	return 0;
 672}
 673
 674/* char_emphasis • single and double emphasis parsing */
 675static size_t
 676char_emphasis(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size)
 677{
 678	uint8_t c = data[0];
 679	size_t ret;
 680
 681	if (md->ext_flags & HOEDOWN_EXT_NO_INTRA_EMPHASIS) {
 682		if (offset > 0 && !_isspace(data[-1]) && data[-1] != '>' && data[-1] != '(')
 683			return 0;
 684	}
 685
 686	if (size > 2 && data[1] != c) {
 687		/* whitespace cannot follow an opening emphasis;
 688		 * strikethrough only takes two characters '~~' */
 689		if (c == '~' || c == '=' || _isspace(data[1]) || (ret = parse_emph1(ob, md, data + 1, size - 1, c)) == 0)
 690			return 0;
 691
 692		return ret + 1;
 693	}
 694
 695	if (size > 3 && data[1] == c && data[2] != c) {
 696		if (_isspace(data[2]) || (ret = parse_emph2(ob, md, data + 2, size - 2, c)) == 0)
 697			return 0;
 698
 699		return ret + 2;
 700	}
 701
 702	if (size > 4 && data[1] == c && data[2] == c && data[3] != c) {
 703		if (c == '~' || c == '=' || _isspace(data[3]) || (ret = parse_emph3(ob, md, data + 3, size - 3, c)) == 0)
 704			return 0;
 705
 706		return ret + 3;
 707	}
 708
 709	return 0;
 710}
 711
 712
 713/* char_linebreak • '\n' preceded by two spaces (assuming linebreak != 0) */
 714static size_t
 715char_linebreak(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size)
 716{
 717	if (offset < 2 || data[-1] != ' ' || data[-2] != ' ')
 718		return 0;
 719
 720	/* removing the last space from ob and rendering */
 721	while (ob->size && ob->data[ob->size - 1] == ' ')
 722		ob->size--;
 723
 724	return md->md.linebreak(ob, md->md.opaque) ? 1 : 0;
 725}
 726
 727
 728/* char_codespan • '`' parsing a code span (assuming codespan != 0) */
 729static size_t
 730char_codespan(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size)
 731{
 732	size_t end, nb = 0, i, f_begin, f_end;
 733
 734	/* counting the number of backticks in the delimiter */
 735	while (nb < size && data[nb] == '`')
 736		nb++;
 737
 738	/* finding the next delimiter */
 739	i = 0;
 740	for (end = nb; end < size && i < nb; end++) {
 741		if (data[end] == '`') i++;
 742		else i = 0;
 743	}
 744
 745	if (i < nb && end >= size)
 746		return 0; /* no matching delimiter */
 747
 748	/* trimming outside whitespaces */
 749	f_begin = nb;
 750	while (f_begin < end && data[f_begin] == ' ')
 751		f_begin++;
 752
 753	f_end = end - nb;
 754	while (f_end > nb && data[f_end-1] == ' ')
 755		f_end--;
 756
 757	/* real code span */
 758	if (f_begin < f_end) {
 759		hoedown_buffer work = { data + f_begin, f_end - f_begin, 0, 0 };
 760		if (!md->md.codespan(ob, &work, md->md.opaque))
 761			end = 0;
 762	} else {
 763		if (!md->md.codespan(ob, 0, md->md.opaque))
 764			end = 0;
 765	}
 766
 767	return end;
 768}
 769
 770/* char_quote • '"' parsing a quote */
 771static size_t
 772char_quote(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size)
 773{    
 774	size_t end, nq = 0, i, f_begin, f_end;
 775
 776	/* counting the number of quotes in the delimiter */
 777	while (nq < size && data[nq] == '"')
 778		nq++;
 779
 780	/* finding the next delimiter */
 781	i = 0;
 782	for (end = nq; end < size && i < nq; end++) {
 783		if (data[end] == '"') i++;
 784		else i = 0;
 785	}
 786
 787	if (i < nq && end >= size)
 788		return 0; /* no matching delimiter */
 789
 790	/* trimming outside whitespaces */
 791	f_begin = nq;
 792	while (f_begin < end && data[f_begin] == ' ')
 793		f_begin++;
 794
 795	f_end = end - nq;
 796	while (f_end > nq && data[f_end-1] == ' ')
 797		f_end--;
 798
 799	/* real quote */
 800	if (f_begin < f_end) {
 801		hoedown_buffer work = { data + f_begin, f_end - f_begin, 0, 0 };
 802		if (!md->md.quote(ob, &work, md->md.opaque))
 803			end = 0;
 804	} else {
 805		if (!md->md.quote(ob, 0, md->md.opaque))
 806			end = 0;
 807	}
 808
 809	return end;
 810}
 811
 812
 813/* char_escape • '\\' backslash escape */
 814static size_t
 815char_escape(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size)
 816{
 817	static const char *escape_chars = "\\`*_{}[]()#+-.!:|&<>^~";
 818	hoedown_buffer work = { 0, 0, 0, 0 };
 819
 820	if (size > 1) {
 821		if (strchr(escape_chars, data[1]) == NULL)
 822			return 0;
 823
 824		if (md->md.normal_text) {
 825			work.data = data + 1;
 826			work.size = 1;
 827			md->md.normal_text(ob, &work, md->md.opaque);
 828		}
 829		else hoedown_buffer_putc(ob, data[1]);
 830	} else if (size == 1) {
 831		hoedown_buffer_putc(ob, data[0]);
 832	}
 833
 834	return 2;
 835}
 836
 837/* char_entity • '&' escaped when it doesn't belong to an entity */
 838/* valid entities are assumed to be anything matching &#?[A-Za-z0-9]+; */
 839static size_t
 840char_entity(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size)
 841{
 842	size_t end = 1;
 843	hoedown_buffer work = { 0, 0, 0, 0 };
 844
 845	if (end < size && data[end] == '#')
 846		end++;
 847
 848	while (end < size && isalnum(data[end]))
 849		end++;
 850
 851	if (end < size && data[end] == ';')
 852		end++; /* real entity */
 853	else
 854		return 0; /* lone '&' */
 855
 856	if (md->md.entity) {
 857		work.data = data;
 858		work.size = end;
 859		md->md.entity(ob, &work, md->md.opaque);
 860	}
 861	else hoedown_buffer_put(ob, data, end);
 862
 863	return end;
 864}
 865
 866/* char_langle_tag • '<' when tags or autolinks are allowed */
 867static size_t
 868char_langle_tag(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size)
 869{
 870	enum hoedown_autolink altype = HOEDOWN_AUTOLINK_NONE;
 871	size_t end = tag_length(data, size, &altype);
 872	hoedown_buffer work = { data, end, 0, 0 };
 873	int ret = 0;
 874
 875	if (end > 2) {
 876		if (md->md.autolink && altype != HOEDOWN_AUTOLINK_NONE) {
 877			hoedown_buffer *u_link = newbuf(md, BUFFER_SPAN);
 878			work.data = data + 1;
 879			work.size = end - 2;
 880			unscape_text(u_link, &work);
 881			ret = md->md.autolink(ob, u_link, altype, md->md.opaque);
 882			popbuf(md, BUFFER_SPAN);
 883		}
 884		else if (md->md.raw_html_tag)
 885			ret = md->md.raw_html_tag(ob, &work, md->md.opaque);
 886	}
 887
 888	if (!ret) return 0;
 889	else return end;
 890}
 891
 892static size_t
 893char_autolink_www(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size)
 894{
 895	hoedown_buffer *link, *link_url, *link_text;
 896	size_t link_len, rewind;
 897
 898	if (!md->md.link || md->in_link_body)
 899		return 0;
 900
 901	link = newbuf(md, BUFFER_SPAN);
 902
 903	if ((link_len = hoedown_autolink__www(&rewind, link, data, offset, size, HOEDOWN_AUTOLINK_SHORT_DOMAINS)) > 0) {
 904		link_url = newbuf(md, BUFFER_SPAN);
 905		HOEDOWN_BUFPUTSL(link_url, "http://");
 906		hoedown_buffer_put(link_url, link->data, link->size);
 907
 908		ob->size -= rewind;
 909		if (md->md.normal_text) {
 910			link_text = newbuf(md, BUFFER_SPAN);
 911			md->md.normal_text(link_text, link, md->md.opaque);
 912			md->md.link(ob, link_url, NULL, link_text, md->md.opaque);
 913			popbuf(md, BUFFER_SPAN);
 914		} else {
 915			md->md.link(ob, link_url, NULL, link, md->md.opaque);
 916		}
 917		popbuf(md, BUFFER_SPAN);
 918	}
 919
 920	popbuf(md, BUFFER_SPAN);
 921	return link_len;
 922}
 923
 924static size_t
 925char_autolink_email(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size)
 926{
 927	hoedown_buffer *link;
 928	size_t link_len, rewind;
 929
 930	if (!md->md.autolink || md->in_link_body)
 931		return 0;
 932
 933	link = newbuf(md, BUFFER_SPAN);
 934
 935	if ((link_len = hoedown_autolink__email(&rewind, link, data, offset, size, 0)) > 0) {
 936		ob->size -= rewind;
 937		md->md.autolink(ob, link, HOEDOWN_AUTOLINK_EMAIL, md->md.opaque);
 938	}
 939
 940	popbuf(md, BUFFER_SPAN);
 941	return link_len;
 942}
 943
 944static size_t
 945char_autolink_url(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size)
 946{
 947	hoedown_buffer *link;
 948	size_t link_len, rewind;
 949
 950	if (!md->md.autolink || md->in_link_body)
 951		return 0;
 952
 953	link = newbuf(md, BUFFER_SPAN);
 954
 955	if ((link_len = hoedown_autolink__url(&rewind, link, data, offset, size, 0)) > 0) {
 956		ob->size -= rewind;
 957		md->md.autolink(ob, link, HOEDOWN_AUTOLINK_NORMAL, md->md.opaque);
 958	}
 959
 960	popbuf(md, BUFFER_SPAN);
 961	return link_len;
 962}
 963
 964/* char_link • '[': parsing a link or an image */
 965static size_t
 966char_link(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size)
 967{
 968	int is_img = (offset && data[-1] == '!'), level;
 969	size_t i = 1, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0;
 970	hoedown_buffer *content = 0;
 971	hoedown_buffer *link = 0;
 972	hoedown_buffer *title = 0;
 973	hoedown_buffer *u_link = 0;
 974	size_t org_work_size = md->work_bufs[BUFFER_SPAN].size;
 975	int text_has_nl = 0, ret = 0;
 976	int in_title = 0, qtype = 0;
 977
 978	/* checking whether the correct renderer exists */
 979	if ((is_img && !md->md.image) || (!is_img && !md->md.link))
 980		goto cleanup;
 981
 982	/* looking for the matching closing bracket */
 983	for (level = 1; i < size; i++) {
 984		if (data[i] == '\n')
 985			text_has_nl = 1;
 986
 987		else if (data[i - 1] == '\\')
 988			continue;
 989
 990		else if (data[i] == '[')
 991			level++;
 992
 993		else if (data[i] == ']') {
 994			level--;
 995			if (level <= 0)
 996				break;
 997		}
 998	}
 999
1000	if (i >= size)
1001		goto cleanup;
1002
1003	txt_e = i;
1004	i++;
1005	
1006	/* footnote link */
1007	if (md->ext_flags & HOEDOWN_EXT_FOOTNOTES && data[1] == '^') {
1008		hoedown_buffer id = { 0, 0, 0, 0 };
1009		struct footnote_ref *fr;
1010
1011		if (txt_e < 3)
1012			goto cleanup;
1013		
1014		id.data = data + 2;
1015		id.size = txt_e - 2;
1016		
1017		fr = find_footnote_ref(&md->footnotes_found, id.data, id.size);
1018		
1019		/* mark footnote used */
1020		if (fr && !fr->is_used) {
1021			if(!add_footnote_ref(&md->footnotes_used, fr))
1022				goto cleanup;
1023			fr->is_used = 1;
1024			fr->num = md->footnotes_used.count;
1025		}
1026		
1027		/* render */
1028		if (fr && md->md.footnote_ref)
1029				ret = md->md.footnote_ref(ob, fr->num, md->md.opaque);
1030		
1031		goto cleanup;
1032	}
1033
1034	/* skip any amount of whitespace or newline */
1035	/* (this is much more laxist than original markdown syntax) */
1036	while (i < size && _isspace(data[i]))
1037		i++;
1038
1039	/* inline style link */
1040	if (i < size && data[i] == '(') {
1041		size_t nb_p;
1042
1043		/* skipping initial whitespace */
1044		i++;
1045
1046		while (i < size && _isspace(data[i]))
1047			i++;
1048
1049		link_b = i;
1050
1051		/* looking for link end: ' " ) */
1052		/* Count the number of open parenthesis */
1053		nb_p = 0;
1054
1055		while (i < size) {
1056			if (data[i] == '\\') i += 2;
1057			else if (data[i] == '(' && i != 0) {
1058				nb_p++; i++;
1059			}
1060			else if (data[i] == ')') {
1061				if (nb_p == 0) break;
1062				else nb_p--; i++;
1063			} else if (i >= 1 && _isspace(data[i-1]) && (data[i] == '\'' || data[i] == '"')) break;
1064			else i++;
1065		}
1066
1067		if (i >= size) goto cleanup;
1068		link_e = i;
1069
1070		/* looking for title end if present */
1071		if (data[i] == '\'' || data[i] == '"') {
1072			qtype = data[i];
1073			in_title = 1;
1074			i++;
1075			title_b = i;
1076
1077			while (i < size) {
1078				if (data[i] == '\\') i += 2;
1079				else if (data[i] == qtype) {in_title = 0; i++;}
1080				else if ((data[i] == ')') && !in_title) break;
1081				else i++;
1082			}
1083
1084			if (i >= size) goto cleanup;
1085
1086			/* skipping whitespaces after title */
1087			title_e = i - 1;
1088			while (title_e > title_b && _isspace(data[title_e]))
1089				title_e--;
1090
1091			/* checking for closing quote presence */
1092			if (data[title_e] != '\'' &&  data[title_e] != '"') {
1093				title_b = title_e = 0;
1094				link_e = i;
1095			}
1096		}
1097
1098		/* remove whitespace at the end of the link */
1099		while (link_e > link_b && _isspace(data[link_e - 1]))
1100			link_e--;
1101
1102		/* remove optional angle brackets around the link */
1103		if (data[link_b] == '<') link_b++;
1104		if (data[link_e - 1] == '>') link_e--;
1105
1106		/* building escaped link and title */
1107		if (link_e > link_b) {
1108			link = newbuf(md, BUFFER_SPAN);
1109			hoedown_buffer_put(link, data + link_b, link_e - link_b);
1110		}
1111
1112		if (title_e > title_b) {
1113			title = newbuf(md, BUFFER_SPAN);
1114			hoedown_buffer_put(title, data + title_b, title_e - title_b);
1115		}
1116
1117		i++;
1118	}
1119
1120	/* reference style link */
1121	else if (i < size && data[i] == '[') {
1122		hoedown_buffer id = { 0, 0, 0, 0 };
1123		struct link_ref *lr;
1124
1125		/* looking for the id */
1126		i++;
1127		link_b = i;
1128		while (i < size && data[i] != ']') i++;
1129		if (i >= size) goto cleanup;
1130		link_e = i;
1131
1132		/* finding the link_ref */
1133		if (link_b == link_e) {
1134			if (text_has_nl) {
1135				hoedown_buffer *b = newbuf(md, BUFFER_SPAN);
1136				size_t j;
1137
1138				for (j = 1; j < txt_e; j++) {
1139					if (data[j] != '\n')
1140						hoedown_buffer_putc(b, data[j]);
1141					else if (data[j - 1] != ' ')
1142						hoedown_buffer_putc(b, ' ');
1143				}
1144
1145				id.data = b->data;
1146				id.size = b->size;
1147			} else {
1148				id.data = data + 1;
1149				id.size = txt_e - 1;
1150			}
1151		} else {
1152			id.data = data + link_b;
1153			id.size = link_e - link_b;
1154		}
1155
1156		lr = find_link_ref(md->refs, id.data, id.size);
1157		if (!lr)
1158			goto cleanup;
1159
1160		/* keeping link and title from link_ref */
1161		link = lr->link;
1162		title = lr->title;
1163		i++;
1164	}
1165
1166	/* shortcut reference style link */
1167	else {
1168		hoedown_buffer id = { 0, 0, 0, 0 };
1169		struct link_ref *lr;
1170
1171		/* crafting the id */
1172		if (text_has_nl) {
1173			hoedown_buffer *b = newbuf(md, BUFFER_SPAN);
1174			size_t j;
1175
1176			for (j = 1; j < txt_e; j++) {
1177				if (data[j] != '\n')
1178					hoedown_buffer_putc(b, data[j]);
1179				else if (data[j - 1] != ' ')
1180					hoedown_buffer_putc(b, ' ');
1181			}
1182
1183			id.data = b->data;
1184			id.size = b->size;
1185		} else {
1186			id.data = data + 1;
1187			id.size = txt_e - 1;
1188		}
1189
1190		/* finding the link_ref */
1191		lr = find_link_ref(md->refs, id.data, id.size);
1192		if (!lr)
1193			goto cleanup;
1194
1195		/* keeping link and title from link_ref */
1196		link = lr->link;
1197		title = lr->title;
1198
1199		/* rewinding the whitespace */
1200		i = txt_e + 1;
1201	}
1202
1203	/* building content: img alt is escaped, link content is parsed */
1204	if (txt_e > 1) {
1205		content = newbuf(md, BUFFER_SPAN);
1206		if (is_img) {
1207			hoedown_buffer_put(content, data + 1, txt_e - 1);
1208		} else {
1209			/* disable autolinking when parsing inline the
1210			 * content of a link */
1211			md->in_link_body = 1;
1212			parse_inline(content, md, data + 1, txt_e - 1);
1213			md->in_link_body = 0;
1214		}
1215	}
1216
1217	if (link) {
1218		u_link = newbuf(md, BUFFER_SPAN);
1219		unscape_text(u_link, link);
1220	}
1221
1222	/* calling the relevant rendering function */
1223	if (is_img) {
1224		if (ob->size && ob->data[ob->size - 1] == '!')
1225			ob->size -= 1;
1226
1227		ret = md->md.image(ob, u_link, title, content, md->md.opaque);
1228	} else {
1229		ret = md->md.link(ob, u_link, title, content, md->md.opaque);
1230	}
1231
1232	/* cleanup */
1233cleanup:
1234	md->work_bufs[BUFFER_SPAN].size = (int)org_work_size;
1235	return ret ? i : 0;
1236}
1237
1238static size_t
1239char_superscript(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t offset, size_t size)
1240{
1241	size_t sup_start, sup_len;
1242	hoedown_buffer *sup;
1243
1244	if (!md->md.superscript)
1245		return 0;
1246
1247	if (size < 2)
1248		return 0;
1249
1250	if (data[1] == '(') {
1251		sup_start = sup_len = 2;
1252
1253		while (sup_len < size && data[sup_len] != ')' && data[sup_len - 1] != '\\')
1254			sup_len++;
1255
1256		if (sup_len == size)
1257			return 0;
1258	} else {
1259		sup_start = sup_len = 1;
1260
1261		while (sup_len < size && !_isspace(data[sup_len]))
1262			sup_len++;
1263	}
1264
1265	if (sup_len - sup_start == 0)
1266		return (sup_start == 2) ? 3 : 0;
1267
1268	sup = newbuf(md, BUFFER_SPAN);
1269	parse_inline(sup, md, data + sup_start, sup_len - sup_start);
1270	md->md.superscript(ob, sup, md->md.opaque);
1271	popbuf(md, BUFFER_SPAN);
1272
1273	return (sup_start == 2) ? sup_len + 1 : sup_len;
1274}
1275
1276/*********************************
1277 * BLOCK-LEVEL PARSING FUNCTIONS *
1278 *********************************/
1279
1280/* is_empty • returns the line length when it is empty, 0 otherwise */
1281static size_t
1282is_empty(const uint8_t *data, size_t size)
1283{
1284	size_t i;
1285
1286	for (i = 0; i < size && data[i] != '\n'; i++)
1287		if (data[i] != ' ')
1288			return 0;
1289
1290	return i + 1;
1291}
1292
1293/* is_hrule • returns whether a line is a horizontal rule */
1294static int
1295is_hrule(uint8_t *data, size_t size)
1296{
1297	size_t i = 0, n = 0;
1298	uint8_t c;
1299
1300	/* skipping initial spaces */
1301	if (size < 3) return 0;
1302	if (data[0] == ' ') { i++;
1303	if (data[1] == ' ') { i++;
1304	if (data[2] == ' ') { i++; } } }
1305
1306	/* looking at the hrule uint8_t */
1307	if (i + 2 >= size
1308	|| (data[i] != '*' && data[i] != '-' && data[i] != '_'))
1309		return 0;
1310	c = data[i];
1311
1312	/* the whole line must be the char or whitespace */
1313	while (i < size && data[i] != '\n') {
1314		if (data[i] == c) n++;
1315		else if (data[i] != ' ')
1316			return 0;
1317
1318		i++;
1319	}
1320
1321	return n >= 3;
1322}
1323
1324/* check if a line begins with a code fence; return the
1325 * width of the code fence */
1326static size_t
1327prefix_codefence(uint8_t *data, size_t size)
1328{
1329	size_t i = 0, n = 0;
1330	uint8_t c;
1331
1332	/* skipping initial spaces */
1333	if (size < 3) return 0;
1334	if (data[0] == ' ') { i++;
1335	if (data[1] == ' ') { i++;
1336	if (data[2] == ' ') { i++; } } }
1337
1338	/* looking at the hrule uint8_t */
1339	if (i + 2 >= size || !(data[i] == '~' || data[i] == '`'))
1340		return 0;
1341
1342	c = data[i];
1343
1344	/* the whole line must be the uint8_t or whitespace */
1345	while (i < size && data[i] == c) {
1346		n++; i++;
1347	}
1348
1349	if (n < 3)
1350		return 0;
1351
1352	return i;
1353}
1354
1355/* check if a line is a code fence; return its size if it is */
1356static size_t
1357is_codefence(uint8_t *data, size_t size, hoedown_buffer *syntax)
1358{
1359	size_t i = 0, syn_len = 0;
1360	uint8_t *syn_start;
1361
1362	i = prefix_codefence(data, size);
1363	if (i == 0)
1364		return 0;
1365
1366	while (i < size && data[i] == ' ')
1367		i++;
1368
1369	syn_start = data + i;
1370
1371	if (i < size && data[i] == '{') {
1372		i++; syn_start++;
1373
1374		while (i < size && data[i] != '}' && data[i] != '\n') {
1375			syn_len++; i++;
1376		}
1377
1378		if (i == size || data[i] != '}')
1379			return 0;
1380
1381		/* strip all whitespace at the beginning and the end
1382		 * of the {} block */
1383		while (syn_len > 0 && _isspace(syn_start[0])) {
1384			syn_start++; syn_len--;
1385		}
1386
1387		while (syn_len > 0 && _isspace(syn_start[syn_len - 1]))
1388			syn_len--;
1389
1390		i++;
1391	} else {
1392		while (i < size && !_isspace(data[i])) {
1393			syn_len++; i++;
1394		}
1395	}
1396
1397	if (syntax) {
1398		syntax->data = syn_start;
1399		syntax->size = syn_len;
1400	}
1401
1402	while (i < size && data[i] != '\n') {
1403		if (!_isspace(data[i]))
1404			return 0;
1405
1406		i++;
1407	}
1408
1409	return i + 1;
1410}
1411
1412/* is_atxheader • returns whether the line is a hash-prefixed header */
1413static int
1414is_atxheader(hoedown_markdown *md, uint8_t *data, size_t size)
1415{
1416	if (data[0] != '#')
1417		return 0;
1418
1419	if (md->ext_flags & HOEDOWN_EXT_SPACE_HEADERS) {
1420		size_t level = 0;
1421
1422		while (level < size && level < 6 && data[level] == '#')
1423			level++;
1424
1425		if (level < size && data[level] != ' ')
1426			return 0;
1427	}
1428
1429	return 1;
1430}
1431
1432/* is_headerline • returns whether the line is a setext-style hdr underline */
1433static int
1434is_headerline(uint8_t *data, size_t size)
1435{
1436	size_t i = 0;
1437
1438	/* test of level 1 header */
1439	if (data[i] == '=') {
1440		for (i = 1; i < size && data[i] == '='; i++);
1441		while (i < size && data[i] == ' ') i++;
1442		return (i >= size || data[i] == '\n') ? 1 : 0; }
1443
1444	/* test of level 2 header */
1445	if (data[i] == '-') {
1446		for (i = 1; i < size && data[i] == '-'; i++);
1447		while (i < size && data[i] == ' ') i++;
1448		return (i >= size || data[i] == '\n') ? 2 : 0; }
1449
1450	return 0;
1451}
1452
1453static int
1454is_next_headerline(uint8_t *data, size_t size)
1455{
1456	size_t i = 0;
1457
1458	while (i < size && data[i] != '\n')
1459		i++;
1460
1461	if (++i >= size)
1462		return 0;
1463
1464	return is_headerline(data + i, size - i);
1465}
1466
1467/* prefix_quote • returns blockquote prefix length */
1468static size_t
1469prefix_quote(uint8_t *data, size_t size)
1470{
1471	size_t i = 0;
1472	if (i < size && data[i] == ' ') i++;
1473	if (i < size && data[i] == ' ') i++;
1474	if (i < size && data[i] == ' ') i++;
1475
1476	if (i < size && data[i] == '>') {
1477		if (i + 1 < size && data[i + 1] == ' ')
1478			return i + 2;
1479
1480		return i + 1;
1481	}
1482
1483	return 0;
1484}
1485
1486/* prefix_code • returns prefix length for block code*/
1487static size_t
1488prefix_code(uint8_t *data, size_t size)
1489{
1490	if (size > 3 && data[0] == ' ' && data[1] == ' '
1491		&& data[2] == ' ' && data[3] == ' ') return 4;
1492
1493	return 0;
1494}
1495
1496/* prefix_oli • returns ordered list item prefix */
1497static size_t
1498prefix_oli(uint8_t *data, size_t size)
1499{
1500	size_t i = 0;
1501
1502	if (i < size && data[i] == ' ') i++;
1503	if (i < size && data[i] == ' ') i++;
1504	if (i < size && data[i] == ' ') i++;
1505
1506	if (i >= size || data[i] < '0' || data[i] > '9')
1507		return 0;
1508
1509	while (i < size && data[i] >= '0' && data[i] <= '9')
1510		i++;
1511
1512	if (i + 1 >= size || data[i] != '.' || data[i + 1] != ' ')
1513		return 0;
1514
1515	if (is_next_headerline(data + i, size - i))
1516		return 0;
1517
1518	return i + 2;
1519}
1520
1521/* prefix_uli • returns ordered list item prefix */
1522static size_t
1523prefix_uli(uint8_t *data, size_t size)
1524{
1525	size_t i = 0;
1526
1527	if (i < size && data[i] == ' ') i++;
1528	if (i < size && data[i] == ' ') i++;
1529	if (i < size && data[i] == ' ') i++;
1530
1531	if (i + 1 >= size ||
1532		(data[i] != '*' && data[i] != '+' && data[i] != '-') ||
1533		data[i + 1] != ' ')
1534		return 0;
1535
1536	if (is_next_headerline(data + i, size - i))
1537		return 0;
1538
1539	return i + 2;
1540}
1541
1542
1543/* parse_block • parsing of one block, returning next uint8_t to parse */
1544static void parse_block(hoedown_buffer *ob, hoedown_markdown *md,
1545			uint8_t *data, size_t size);
1546
1547
1548/* parse_blockquote • handles parsing of a blockquote fragment */
1549static size_t
1550parse_blockquote(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t size)
1551{
1552	size_t beg, end = 0, pre, work_size = 0;
1553	uint8_t *work_data = 0;
1554	hoedown_buffer *out = 0;
1555
1556	out = newbuf(md, BUFFER_BLOCK);
1557	beg = 0;
1558	while (beg < size) {
1559		for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
1560
1561		pre = prefix_quote(data + beg, end - beg);
1562
1563		if (pre)
1564			beg += pre; /* skipping prefix */
1565
1566		/* empty line followed by non-quote line */
1567		else if (is_empty(data + beg, end - beg) &&
1568				(end >= size || (prefix_quote(data + end, size - end) == 0 &&
1569				!is_empty(data + end, size - end))))
1570			break;
1571
1572		if (beg < end) { /* copy into the in-place working buffer */
1573			/* hoedown_buffer_put(work, data + beg, end - beg); */
1574			if (!work_data)
1575				work_data = data + beg;
1576			else if (data + beg != work_data + work_size)
1577				memmove(work_data + work_size, data + beg, end - beg);
1578			work_size += end - beg;
1579		}
1580		beg = end;
1581	}
1582
1583	parse_block(out, md, work_data, work_size);
1584	if (md->md.blockquote)
1585		md->md.blockquote(ob, out, md->md.opaque);
1586	popbuf(md, BUFFER_BLOCK);
1587	return end;
1588}
1589
1590static size_t
1591parse_htmlblock(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t size, int do_render);
1592
1593/* parse_blockquote • handles parsing of a regular paragraph */
1594static size_t
1595parse_paragraph(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t size)
1596{
1597	size_t i = 0, end = 0;
1598	int level = 0;
1599	hoedown_buffer work = { data, 0, 0, 0 };
1600
1601	while (i < size) {
1602		for (end = i + 1; end < size && data[end - 1] != '\n'; end++) /* empty */;
1603
1604		if (is_empty(data + i, size - i))
1605			break;
1606
1607		if ((level = is_headerline(data + i, size - i)) != 0)
1608			break;
1609
1610		if (is_atxheader(md, data + i, size - i) ||
1611			is_hrule(data + i, size - i) ||
1612			prefix_quote(data + i, size - i)) {
1613			end = i;
1614			break;
1615		}
1616
1617		/*
1618		 * Early termination of a paragraph with the same logic
1619		 * as Markdown 1.0.0. If this logic is applied, the
1620		 * Markdown 1.0.3 test suite won't pass cleanly
1621		 *
1622		 * :: If the first character in a new line is not a letter,
1623		 * let's check to see if there's some kind of block starting
1624		 * here
1625		 */
1626		if ((md->ext_flags & HOEDOWN_EXT_LAX_SPACING) && !isalnum(data[i])) {
1627			if (prefix_oli(data + i, size - i) ||
1628				prefix_uli(data + i, size - i)) {
1629				end = i;
1630				break;
1631			}
1632
1633			/* see if an html block starts here */
1634			if (data[i] == '<' && md->md.blockhtml &&
1635				parse_htmlblock(ob, md, data + i, size - i, 0)) {
1636				end = i;
1637				break;
1638			}
1639
1640			/* see if a code fence starts here */
1641			if ((md->ext_flags & HOEDOWN_EXT_FENCED_CODE) != 0 &&
1642				is_codefence(data + i, size - i, NULL) != 0) {
1643				end = i;
1644				break;
1645			}
1646		}
1647
1648		i = end;
1649	}
1650
1651	work.size = i;
1652	while (work.size && data[work.size - 1] == '\n')
1653		work.size--;
1654
1655	if (!level) {
1656		hoedown_buffer *tmp = newbuf(md, BUFFER_BLOCK);
1657		parse_inline(tmp, md, work.data, work.size);
1658		if (md->md.paragraph)
1659			md->md.paragraph(ob, tmp, md->md.opaque);
1660		popbuf(md, BUFFER_BLOCK);
1661	} else {
1662		hoedown_buffer *header_work;
1663
1664		if (work.size) {
1665			size_t beg;
1666			i = work.size;
1667			work.size -= 1;
1668
1669			while (work.size && data[work.size] != '\n')
1670				work.size -= 1;
1671
1672			beg = work.size + 1;
1673			while (work.size && data[work.size - 1] == '\n')
1674				work.size -= 1;
1675
1676			if (work.size > 0) {
1677				hoedown_buffer *tmp = newbuf(md, BUFFER_BLOCK);
1678				parse_inline(tmp, md, work.data, work.size);
1679
1680				if (md->md.paragraph)
1681					md->md.paragraph(ob, tmp, md->md.opaque);
1682
1683				popbuf(md, BUFFER_BLOCK);
1684				work.data += beg;
1685				work.size = i - beg;
1686			}
1687			else work.size = i;
1688		}
1689
1690		header_work = newbuf(md, BUFFER_SPAN);
1691		parse_inline(header_work, md, work.data, work.size);
1692
1693		if (md->md.header)
1694			md->md.header(ob, header_work, (int)level, md->md.opaque);
1695
1696		popbuf(md, BUFFER_SPAN);
1697	}
1698
1699	return end;
1700}
1701
1702/* parse_fencedcode • handles parsing of a block-level code fragment */
1703static size_t
1704parse_fencedcode(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t size)
1705{
1706	size_t beg, end;
1707	hoedown_buffer *work = 0;
1708	hoedown_buffer lang = { 0, 0, 0, 0 };
1709
1710	beg = is_codefence(data, size, &lang);
1711	if (beg == 0) return 0;
1712
1713	work = newbuf(md, BUFFER_BLOCK);
1714
1715	while (beg < size) {
1716		size_t fence_end;
1717		hoedown_buffer fence_trail = { 0, 0, 0, 0 };
1718
1719		fence_end = is_codefence(data + beg, size - beg, &fence_trail);
1720		if (fence_end != 0 && fence_trail.size == 0) {
1721			beg += fence_end;
1722			break;
1723		}
1724
1725		for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
1726
1727		if (beg < end) {
1728			/* verbatim copy to the working buffer,
1729				escaping entities */
1730			if (is_empty(data + beg, end - beg))
1731				hoedown_buffer_putc(work, '\n');
1732			else hoedown_buffer_put(work, data + beg, end - beg);
1733		}
1734		beg = end;
1735	}
1736
1737	if (work->size && work->data[work->size - 1] != '\n')
1738		hoedown_buffer_putc(work, '\n');
1739
1740	if (md->md.blockcode)
1741		md->md.blockcode(ob, work, lang.size ? &lang : NULL, md->md.opaque);
1742
1743	popbuf(md, BUFFER_BLOCK);
1744	return beg;
1745}
1746
1747static size_t
1748parse_blockcode(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t size)
1749{
1750	size_t beg, end, pre;
1751	hoedown_buffer *work = 0;
1752
1753	work = newbuf(md, BUFFER_BLOCK);
1754
1755	beg = 0;
1756	while (beg < size) {
1757		for (end = beg + 1; end < size && data[end - 1] != '\n'; end++) {};
1758		pre = prefix_code(data + beg, end - beg);
1759
1760		if (pre)
1761			beg += pre; /* skipping prefix */
1762		else if (!is_empty(data + beg, end - beg))
1763			/* non-empty non-prefixed line breaks the pre */
1764			break;
1765
1766		if (beg < end) {
1767			/* verbatim copy to the working buffer,
1768				escaping entities */
1769			if (is_empty(data + beg, end - beg))
1770				hoedown_buffer_putc(work, '\n');
1771			else hoedown_buffer_put(work, data + beg, end - beg);
1772		}
1773		beg = end;
1774	}
1775
1776	while (work->size && work->data[work->size - 1] == '\n')
1777		work->size -= 1;
1778
1779	hoedown_buffer_putc(work, '\n');
1780
1781	if (md->md.blockcode)
1782		md->md.blockcode(ob, work, NULL, md->md.opaque);
1783
1784	popbuf(md, BUFFER_BLOCK);
1785	return beg;
1786}
1787
1788/* parse_listitem • parsing of a single list item */
1789/*	assuming initial prefix is already removed */
1790static size_t
1791parse_listitem(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t size, int *flags)
1792{
1793	hoedown_buffer *work = 0, *inter = 0;
1794	size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i;
1795	int in_empty = 0, has_inside_empty = 0, in_fence = 0;
1796
1797	/* keeping track of the first indentation prefix */
1798	while (orgpre < 3 && orgpre < size && data[orgpre] == ' ')
1799		orgpre++;
1800
1801	beg = prefix_uli(data, size);
1802	if (!beg)
1803		beg = prefix_oli(data, size);
1804
1805	if (!beg)
1806		return 0;
1807
1808	/* skipping to the beginning of the following line */
1809	end = beg;
1810	while (end < size && data[end - 1] != '\n')
1811		end++;
1812
1813	/* getting working buffers */
1814	work = newbuf(md, BUFFER_SPAN);
1815	inter = newbuf(md, BUFFER_SPAN);
1816
1817	/* putting the first line into the working buffer */
1818	hoedown_buffer_put(work, data + beg, end - beg);
1819	beg = end;
1820
1821	/* process the following lines */
1822	while (beg < size) {
1823		size_t has_next_uli = 0, has_next_oli = 0;
1824
1825		end++;
1826
1827		while (end < size && data[end - 1] != '\n')
1828			end++;
1829
1830		/* process an empty line */
1831		if (is_empty(data + beg, end - beg)) {
1832			in_empty = 1;
1833			beg = end;
1834			continue;
1835		}
1836
1837		/* calculating the indentation */
1838		i = 0;
1839		while (i < 4 && beg + i < end && data[beg + i] == ' ')
1840			i++;
1841
1842		pre = i;
1843
1844		if (md->ext_flags & HOEDOWN_EXT_FENCED_CODE) {
1845			if (is_codefence(data + beg + i, end - beg - i, NULL) != 0)
1846				in_fence = !in_fence;
1847		}
1848
1849		/* Only check for new list items if we are **not** inside
1850		 * a fenced code block */
1851		if (!in_fence) {
1852			has_next_uli = prefix_uli(data + beg + i, end - beg - i);
1853			has_next_oli = prefix_oli(data + beg + i, end - beg - i);
1854		}
1855
1856		/* checking for ul/ol switch */
1857		if (in_empty && (
1858			((*flags & HOEDOWN_LIST_ORDERED) && has_next_uli) ||
1859			(!(*flags & HOEDOWN_LIST_ORDERED) && has_next_oli))){
1860			*flags |= HOEDOWN_LI_END;
1861			break; /* the following item must have same list type */
1862		}
1863
1864		/* checking for a new item */
1865		if ((has_next_uli && !is_hrule(data + beg + i, end - beg - i)) || has_next_oli) {
1866			if (in_empty)
1867				has_inside_empty = 1;
1868
1869			if (pre == orgpre) /* the following item must have */
1870				break;             /* the same indentation */
1871
1872			if (!sublist)
1873				sublist = work->size;
1874		}
1875		/* joining only indented stuff after empty lines;
1876		 * note that now we only require 1 space of indentation
1877		 * to continue a list */
1878		else if (in_empty && pre == 0) {
1879			*flags |= HOEDOWN_LI_END;
1880			break;
1881		}
1882		else if (in_empty) {
1883			hoedown_buffer_putc(work, '\n');
1884			has_inside_empty = 1;
1885		}
1886
1887		in_empty = 0;
1888
1889		/* adding the line without prefix into the working buffer */
1890		hoedown_buffer_put(work, data + beg + i, end - beg - i);
1891		beg = end;
1892	}
1893
1894	/* render of li contents */
1895	if (has_inside_empty)
1896		*flags |= HOEDOWN_LI_BLOCK;
1897
1898	if (*flags & HOEDOWN_LI_BLOCK) {
1899		/* intermediate render of block li */
1900		if (sublist && sublist < work->size) {
1901			parse_block(inter, md, work->data, sublist);
1902			parse_block(inter, md, work->data + sublist, work->size - sublist);
1903		}
1904		else
1905			parse_block(inter, md, work->data, work->size);
1906	} else {
1907		/* intermediate render of inline li */
1908		if (sublist && sublist < work->size) {
1909			parse_inline(inter, md, work->data, sublist);
1910			parse_block(inter, md, work->data + sublist, work->size - sublist);
1911		}
1912		else
1913			parse_inline(inter, md, work->data, work->size);
1914	}
1915
1916	/* render of li itself */
1917	if (md->md.listitem)
1918		md->md.listitem(ob, inter, *flags, md->md.opaque);
1919
1920	popbuf(md, BUFFER_SPAN);
1921	popbuf(md, BUFFER_SPAN);
1922	return beg;
1923}
1924
1925
1926/* parse_list • parsing ordered or unordered list block */
1927static size_t
1928parse_list(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t size, int flags)
1929{
1930	hoedown_buffer *work = 0;
1931	size_t i = 0, j;
1932
1933	work = newbuf(md, BUFFER_BLOCK);
1934
1935	while (i < size) {
1936		j = parse_listitem(work, md, data + i, size - i, &flags);
1937		i += j;
1938
1939		if (!j || (flags & HOEDOWN_LI_END))
1940			break;
1941	}
1942
1943	if (md->md.list)
1944		md->md.list(ob, work, flags, md->md.opaque);
1945	popbuf(md, BUFFER_BLOCK);
1946	return i;
1947}
1948
1949/* parse_atxheader • parsing of atx-style headers */
1950static size_t
1951parse_atxheader(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t size)
1952{
1953	size_t level = 0;
1954	size_t i, end, skip;
1955
1956	while (level < size && level < 6 && data[level] == '#')
1957		level++;
1958
1959	for (i = level; i < size && data[i] == ' '; i++);
1960
1961	for (end = i; end < size && data[end] != '\n'; end++);
1962	skip = end;
1963
1964	while (end && data[end - 1] == '#')
1965		end--;
1966
1967	while (end && data[end - 1] == ' ')
1968		end--;
1969
1970	if (end > i) {
1971		hoedown_buffer *work = newbuf(md, BUFFER_SPAN);
1972
1973		parse_inline(work, md, data + i, end - i);
1974
1975		if (md->md.header)
1976			md->md.header(ob, work, (int)level, md->md.opaque);
1977
1978		popbuf(md, BUFFER_SPAN);
1979	}
1980
1981	return skip;
1982}
1983
1984/* parse_footnote_def • parse a single footnote definition */
1985static void
1986parse_footnote_def(hoedown_buffer *ob, hoedown_markdown *md, unsigned int num, uint8_t *data, size_t size)
1987{
1988	hoedown_buffer *work = 0;
1989	work = newbuf(md, BUFFER_SPAN);
1990	
1991	parse_block(work, md, data, size);
1992	
1993	if (md->md.footnote_def)
1994	md->md.footnote_def(ob, work, num, md->md.opaque);
1995	popbuf(md, BUFFER_SPAN);
1996}
1997
1998/* parse_footnote_list • render the contents of the footnotes */
1999static void
2000parse_footnote_list(hoedown_buffer *ob, hoedown_markdown *md, struct footnote_list *footnotes)
2001{
2002	hoedown_buffer *work = 0;
2003	struct footnote_item *item;
2004	struct footnote_ref *ref;
2005	
2006	if (footnotes->count == 0)
2007		return;
2008	
2009	work = newbuf(md, BUFFER_BLOCK);
2010	
2011	item = footnotes->head;
2012	while (item) {
2013		ref = item->ref;
2014		parse_footnote_def(work, md, ref->num, ref->contents->data, ref->contents->size);
2015		item = item->next;
2016	}
2017	
2018	if (md->md.footnotes)
2019		md->md.footnotes(ob, work, md->md.opaque);
2020	popbuf(md, BUFFER_BLOCK);
2021}
2022
2023/* htmlblock_end • checking end of HTML block : </tag>[ \t]*\n[ \t*]\n */
2024/*	returns the length on match, 0 otherwise */
2025static size_t
2026htmlblock_end_tag(
2027	const char *tag,
2028	size_t tag_len,
2029	hoedown_markdown *md,
2030	uint8_t *data,
2031	size_t size)
2032{
2033	size_t i, w;
2034
2035	/* checking if tag is a match */
2036	if (tag_len + 3 >= size ||
2037		strncasecmp((char *)data + 2, tag, tag_len) != 0 ||
2038		data[tag_len + 2] != '>')
2039		return 0;
2040
2041	/* checking white lines */
2042	i = tag_len + 3;
2043	w = 0;
2044	if (i < size && (w = is_empty(data + i, size - i)) == 0)
2045		return 0; /* non-blank after tag */
2046	i += w;
2047	w = 0;
2048
2049	if (i < size)
2050		w = is_empty(data + i, size - i);
2051
2052	return i + w;
2053}
2054
2055static size_t
2056htmlblock_end(const char *curtag,
2057	hoedown_markdown *md,
2058	uint8_t *data,
2059	size_t size,
2060	int start_of_line)
2061{
2062	size_t tag_size = strlen(curtag);
2063	size_t i = 1, end_tag;
2064	int block_lines = 0;
2065
2066	while (i < size) {
2067		i++;
2068		while (i < size && !(data[i - 1] == '<' && data[i] == '/')) {
2069			if (data[i] == '\n')
2070				block_lines++;
2071
2072			i++;
2073		}
2074
2075		/* If we are only looking for unindented tags, skip the tag
2076		 * if it doesn't follow a newline.
2077		 *
2078		 * The only exception to this is if the tag is still on the
2079		 * initial line; in that case it still counts as a closing
2080		 * tag
2081		 */
2082		if (start_of_line && block_lines > 0 && data[i - 2] != '\n')
2083			continue;
2084
2085		if (i + 2 + tag_size >= size)
2086			break;
2087
2088		end_tag = htmlblock_end_tag(curtag, tag_size, md, data + i - 1, size - i + 1);
2089		if (end_tag)
2090			return i + end_tag - 1;
2091	}
2092
2093	return 0;
2094}
2095
2096
2097/* parse_htmlblock • parsing of inline HTML block */
2098static size_t
2099parse_htmlblock(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t size, int do_render)
2100{
2101	size_t i, j = 0, tag_end;
2102	const char *curtag = NULL;
2103	hoedown_buffer work = { data, 0, 0, 0 };
2104
2105	/* identification of the opening tag */
2106	if (size < 2 || data[0] != '<')
2107		return 0;
2108
2109	i = 1;
2110	while (i < size && data[i] != '>' && data[i] != ' ')
2111		i++;
2112
2113	if (i < size)
2114		curtag = hoedown_find_block_tag((char *)data + 1, (int)i - 1);
2115
2116	/* handling of special cases */
2117	if (!curtag) {
2118
2119		/* HTML comment, laxist form */
2120		if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') {
2121			i = 5;
2122
2123			while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>'))
2124				i++;
2125
2126			i++;
2127
2128			if (i < size)
2129				j = is_empty(data + i, size - i);
2130
2131			if (j) {
2132				work.size = i + j;
2133				if (do_render && md->md.blockhtml)
2134					md->md.blockhtml(ob, &work, md->md.opaque);
2135				return work.size;
2136			}
2137		}
2138
2139		/* HR, which is the only self-closing block tag considered */
2140		if (size > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R')) {
2141			i = 3;
2142			while (i < size && data[i] != '>')
2143				i++;
2144
2145			if (i + 1 < size) {
2146				i++;
2147				j = is_empty(data + i, size - i);
2148				if (j) {
2149					work.size = i + j;
2150					if (do_render && md->md.blockhtml)
2151						md->md.blockhtml(ob, &work, md->md.opaque);
2152					return work.size;
2153				}
2154			}
2155		}
2156
2157		/* no special case recognised */
2158		return 0;
2159	}
2160
2161	/* looking for an unindented matching closing tag */
2162	/*	followed by a blank line */
2163	tag_end = htmlblock_end(curtag, md, data, size, 1);
2164
2165	/* if not found, trying a second pass looking for indented match */
2166	/* but not if tag is "ins" or "del" (following original Markdown.pl) */
2167	if (!tag_end && strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0) {
2168		tag_end = htmlblock_end(curtag, md, data, size, 0);
2169	}
2170
2171	if (!tag_end)
2172		return 0;
2173
2174	/* the end of the block has been found */
2175	work.size = tag_end;
2176	if (do_render && md->md.blockhtml)
2177		md->md.blockhtml(ob, &work, md->md.opaque);
2178
2179	return tag_end;
2180}
2181
2182static void
2183parse_table_row(
2184	hoedown_buffer *ob,
2185	hoedown_markdown *md,
2186	uint8_t *data,
2187	size_t size,
2188	size_t columns,
2189	int *col_data,
2190	int header_flag)
2191{
2192	size_t i = 0, col;
2193	hoedown_buffer *row_work = 0;
2194
2195	if (!md->md.table_cell || !md->md.table_row)
2196		return;
2197
2198	row_work = newbuf(md, BUFFER_SPAN);
2199
2200	if (i < size && data[i] == '|')
2201		i++;
2202
2203	for (col = 0; col < columns && i < size; ++col) {
2204		size_t cell_start, cell_end;
2205		hoedown_buffer *cell_work;
2206
2207		cell_work = newbuf(md, BUFFER_SPAN);
2208
2209		while (i < size && _isspace(data[i]))
2210			i++;
2211
2212		cell_start = i;
2213
2214		while (i < size && data[i] != '|')
2215			i++;
2216
2217		cell_end = i - 1;
2218
2219		while (cell_end > cell_start && _isspace(data[cell_end]))
2220			cell_end--;
2221
2222		parse_inline(cell_work, md, data + cell_start, 1 + cell_end - cell_start);
2223		md->md.table_cell(row_work, cell_work, col_data[col] | header_flag, md->md.opaque);
2224
2225		popbuf(md, BUFFER_SPAN);
2226		i++;
2227	}
2228
2229	for (; col < columns; ++col) {
2230		hoedown_buffer empty_cell = { 0, 0, 0, 0 };
2231		md->md.table_cell(row_work, &empty_cell, col_data[col] | header_flag, md->md.opaque);
2232	}
2233
2234	md->md.table_row(ob, row_work, md->md.opaque);
2235
2236	popbuf(md, BUFFER_SPAN);
2237}
2238
2239static size_t
2240parse_table_header(
2241	hoedown_buffer *ob,
2242	hoedown_markdown *md,
2243	uint8_t *data,
2244	size_t size,
2245	size_t *columns,
2246	int **column_data)
2247{
2248	int pipes;
2249	size_t i = 0, col, header_end, under_end;
2250
2251	pipes = 0;
2252	while (i < size && data[i] != '\n')
2253		if (data[i++] == '|')
2254			pipes++;
2255
2256	if (i == size || pipes == 0)
2257		return 0;
2258
2259	header_end = i;
2260
2261	while (header_end > 0 && _isspace(data[header_end - 1]))
2262		header_end--;
2263
2264	if (data[0] == '|')
2265		pipes--;
2266
2267	if (header_end && data[header_end - 1] == '|')
2268		pipes--;
2269
2270	if (pipes < 0)
2271		return 0;
2272
2273	*columns = pipes + 1;
2274	*column_data = calloc(*columns, sizeof(int));
2275
2276	/* Parse the header underline */
2277	i++;
2278	if (i < size && data[i] == '|')
2279		i++;
2280
2281	under_end = i;
2282	while (under_end < size && data[under_end] != '\n')
2283		under_end++;
2284
2285	for (col = 0; col < *columns && i < under_end; ++col) {
2286		size_t dashes = 0;
2287
2288		while (i < under_end && data[i] == ' ')
2289			i++;
2290
2291		if (data[i] == ':') {
2292			i++; (*column_data)[col] |= HOEDOWN_TABLE_ALIGN_L;
2293			dashes++;
2294		}
2295
2296		while (i < under_end && data[i] == '-') {
2297			i++; dashes++;
2298		}
2299
2300		if (i < under_end && data[i] == ':') {
2301			i++; (*column_data)[col] |= HOEDOWN_TABLE_ALIGN_R;
2302			dashes++;
2303		}
2304
2305		while (i < under_end && data[i] == ' ')
2306			i++;
2307
2308		if (i < under_end && data[i] != '|' && data[i] != '+')
2309			break;
2310
2311		if (dashes < 3)
2312			break;
2313
2314		i++;
2315	}
2316
2317	if (col < *columns)
2318		return 0;
2319
2320	parse_table_row(
2321		ob, md, data,
2322		header_end,
2323		*columns,
2324		*column_data,
2325		HOEDOWN_TABLE_HEADER
2326	);
2327
2328	return under_end + 1;
2329}
2330
2331static size_t
2332parse_table(
2333	hoedown_buffer *ob,
2334	hoedown_markdown *md,
2335	uint8_t *data,
2336	size_t size)
2337{
2338	size_t i;
2339
2340	hoedown_buffer *header_work = 0;
2341	hoedown_buffer *body_work = 0;
2342
2343	size_t columns;
2344	int *col_data = NULL;
2345
2346	header_work = newbuf(md, BUFFER_SPAN);
2347	body_work = newbuf(md, BUFFER_BLOCK);
2348
2349	i = parse_table_header(header_work, md, data, size, &columns, &col_data);
2350	if (i > 0) {
2351
2352		while (i < size) {
2353			size_t row_start;
2354			int pipes = 0;
2355
2356			row_start = i;
2357
2358			while (i < size && data[i] != '\n')
2359				if (data[i++] == '|')
2360					pipes++;
2361
2362			if (pipes == 0 || i == size) {
2363				i = row_start;
2364				break;
2365			}
2366
2367			parse_table_row(
2368				body_work,
2369				md,
2370				data + row_start,
2371				i - row_start,
2372				columns,
2373				col_data, 0
2374			);
2375
2376			i++;
2377		}
2378
2379		if (md->md.table)
2380			md->md.table(ob, header_work, body_work, md->md.opaque);
2381	}
2382
2383	free(col_data);
2384	popbuf(md, BUFFER_SPAN);
2385	popbuf(md, BUFFER_BLOCK);
2386	return i;
2387}
2388
2389/* parse_block • parsing of one block, returning next uint8_t to parse */
2390static void
2391parse_block(hoedown_buffer *ob, hoedown_markdown *md, uint8_t *data, size_t size)
2392{
2393	size_t beg, end, i;
2394	uint8_t *txt_data;
2395	beg = 0;
2396
2397	if (md->work_bufs[BUFFER_SPAN].size +
2398		md->work_bufs[BUFFER_BLOCK].size > md->max_nesting)
2399		return;
2400
2401	while (beg < size) {
2402		txt_data = data + beg;
2403		end = size - beg;
2404
2405		if (is_atxheader(md, txt_data, end))
2406			beg += parse_atxheader(ob, md, txt_data, end);
2407
2408		else if (data[beg] == '<' && md->md.blockhtml &&
2409				(i = parse_htmlblock(ob, md, txt_data, end, 1)) != 0)
2410			beg += i;
2411
2412		else if ((i = is_empty(txt_data, end)) != 0)
2413			beg += i;
2414
2415		else if (is_hrule(txt_data, end)) {
2416			if (md->md.hrule)
2417				md->md.hrule(ob, md->md.opaque);
2418
2419			while (beg < size && data[beg] != '\n')
2420				beg++;
2421
2422			beg++;
2423		}
2424
2425		else if ((md->ext_flags & HOEDOWN_EXT_FENCED_CODE) != 0 &&
2426			(i = parse_fencedcode(ob, md, txt_data, end)) != 0)
2427			beg += i;
2428
2429		else if ((md->ext_flags & HOEDOWN_EXT_TABLES) != 0 &&
2430			(i = parse_table(ob, md, txt_data, end)) != 0)
2431			beg += i;
2432
2433		else if (prefix_quote(txt_data, end))
2434			beg += parse_blockquote(ob, md, txt_data, end);
2435
2436		else if (!(md->ext_flags & HOEDOWN_EXT_DISABLE_INDENTED_CODE) && prefix_code(txt_data, end))
2437			beg += parse_blockcode(ob, md, txt_data, end);
2438
2439		else if (prefix_uli(txt_data, end))
2440			beg += parse_list(ob, md, txt_data, end, 0);
2441
2442		else if (prefix_oli(txt_data, end))
2443			beg += parse_list(ob, md, txt_data, end, HOEDOWN_LIST_ORDERED);
2444
2445		else
2446			beg += parse_paragraph(ob, md, txt_data, end);
2447	}
2448}
2449
2450
2451
2452/*********************
2453 * REFERENCE PARSING *
2454 *********************/
2455
2456/* is_footnote • returns whether a line is a footnote definition or not */
2457static int
2458is_footnote(const uint8_t *data, size_t beg, size_t end, size_t *last, struct footnote_list *list)
2459{
2460	size_t i = 0;
2461	hoedown_buffer *contents = 0;
2462	size_t ind = 0;
2463	int in_empty = 0;
2464	size_t start = 0;
2465	
2466	size_t id_offset, id_end;
2467	
2468	/* up to 3 optional leading spaces */
2469	if (beg + 3 >= end) return 0;
2470	if (data[beg] == ' ') { i = 1;
2471	if (data[beg + 1] == ' ') { i = 2;
2472	if (data[beg + 2] == ' ') { i = 3;
2473	if (data[beg + 3] == ' ') return 0; } } }
2474	i += beg;
2475	
2476	/* id part: caret followed by anything between brackets */
2477	if (data[i] != '[') return 0;
2478	i++;
2479	if (i >= end || data[i] != '^') return 0;
2480	i++;
2481	id_offset = i;
2482	while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']')
2483		i++;
2484	if (i >= end || data[i] != ']') return 0;
2485	id_end = i;
2486	
2487	/* spacer: colon (space | tab)* newline? (space | tab)* */
2488	i++;
2489	if (i >= end || data[i] != ':') return 0; 
2490	i++;
2491	
2492	/* getting content buffer */
2493	contents = hoedown_buffer_new(64);
2494	
2495	start = i;
2496	
2497	/* process lines similiar to a list item */
2498	while (i < end) {
2499		while (i < end && data[i] != '\n' && data[i] != '\r') i++;
2500		
2501		/* process an empty line */
2502		if (is_empty(data + start, i - start)) {
2503			in_empty = 1;
2504			if (i < end && (data[i] == '\n' || data[i] == '\r')) {
2505				i++;
2506				if (i < end && data[i] == '\n' && data[i - 1] == '\r') i++;
2507			}
2508			start = i;
2509			continue;
2510		}
2511	
2512		/* calculating the indentation */
2513		ind = 0;
2514		while (ind < 4 && start + ind < end && data[start + ind] == ' ')
2515			ind++;
2516	
2517		/* joining only indented stuff after empty lines;
2518		 * note that now we only require 1 space of indentation
2519		 * to continue, just like lists */
2520		if (ind == 0) {
2521			if (start == id_end + 2 && data[start] == '\t') {}
2522			else break;
2523		}
2524		else if (in_empty) {
2525			hoedown_buffer_putc(contents, '\n');
2526		}
2527	
2528		in_empty = 0;
2529	
2530		/* adding the line into the content buffer */
2531		hoedown_buffer_put(contents, data + start + ind, i - start - ind);
2532		/* add carriage return */
2533		if (i < end) {
2534			hoedown_buffer_put(contents, "\n", 1);
2535			if (i < end && (data[i] == '\n' || data[i] == '\r')) {
2536				i++;
2537				if (i < end && data[i] == '\n' && data[i - 1] == '\r') i++;
2538			}
2539		}
2540		start = i;
2541	}
2542	
2543	if (last)
2544		*last = start;
2545	
2546	if (list) {
2547		struct footnote_ref *ref;
2548		ref = create_footnote_ref(list, data + id_offset, id_end - id_offset);
2549		if (!ref)
2550			return 0;
2551		if (!add_footnote_ref(list, ref)) {
2552			free_footnote_ref(ref);
2553			return 0;
2554		}
2555		ref->contents = contents;
2556	}
2557	
2558	return 1;
2559}
2560
2561/* is_ref • returns whether a line is a reference or not */
2562static int
2563is_ref(const uint8_t *data, size_t beg, size_t end, size_t *last, struct link_ref **refs)
2564{
2565/*	int n; */
2566	size_t i = 0;
2567	size_t id_offset, id_end;
2568	size_t link_offset, link_end;
2569	size_t title_offset, title_end;
2570	size_t line_end;
2571
2572	/* up to 3 optional leading spaces */
2573	if (beg + 3 >= end) return 0;
2574	if (data[beg] == ' ') { i = 1;
2575	if (data[beg + 1] == ' ') { i = 2;
2576	if (data[beg + 2] == ' ') { i = 3;
2577	if (data[beg + 3] == ' ') return 0; } } }
2578	i += beg;
2579
2580	/* id part: anything but a newline between brackets */
2581	if (data[i] != '[') return 0;
2582	i++;
2583	id_offset = i;
2584	while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']')
2585		i++;
2586	if (i >= end || data[i] != ']') return 0;
2587	id_end = i;
2588
2589	/* spacer: colon (space | tab)* newline? (space | tab)* */
2590	i++;
2591	if (i >= end || data[i] != ':') return 0;
2592	i++;
2593	while (i < end && data[i] == ' ') i++;
2594	if (i < end && (data[i] == '\n' || data[i] == '\r')) {
2595		i++;
2596		if (i < end && data[i] == '\r' && data[i - 1] == '\n') i++; }
2597	while (i < end && data[i] == ' ') i++;
2598	if (i >= end) return 0;
2599
2600	/* link: whitespace-free sequence, optionally between angle brackets */
2601	if (data[i] == '<')
2602		i++;
2603
2604	link_offset = i;
2605
2606	while (i < end && data[i] != ' ' && data[i] != '\n' && data[i] != '\r')
2607		i++;
2608
2609	if (data[i - 1] == '>') link_end = i - 1;
2610	else link_end = i;
2611
2612	/* optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) */
2613	while (i < end && data[i] == ' ') i++;
2614	if (i < end && data[i] != '\n' && data[i] != '\r'
2615			&& data[i] != '\'' && data[i] != '"' && data[i] != '(')
2616		return 0;
2617	line_end = 0;
2618	/* computing end-of-line */
2619	if (i >= end || data[i] == '\r' || data[i] == '\n') line_end = i;
2620	if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
2621		line_end = i + 1;
2622
2623	/* optional (space|tab)* spacer after a newline */
2624	if (line_end) {
2625		i = line_end + 1;
2626		while (i < end && data[i] == ' ') i++; }
2627
2628	/* optional title: any non-newline sequence enclosed in '"()
2629					alone on its line */
2630	title_offset = title_end = 0;
2631	if (i + 1 < end
2632	&& (data[i] == '\'' || data[i] == '"' || data[i] == '(')) {
2633		i++;
2634		title_offset = i;
2635		/* looking for EOL */
2636		while (i < end && data[i] != '\n' && data[i] != '\r') i++;
2637		if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
2638			title_end = i + 1;
2639		else	title_end = i;
2640		/* stepping back */
2641		i -= 1;
2642		while (i > title_offset && data[i] == ' ')
2643			i -= 1;
2644		if (i > title_offset
2645		&& (data[i] == '\'' || data[i] == '"' || data[i] == ')')) {
2646			line_end = title_end;
2647			title_end = i; } }
2648
2649	if (!line_end || link_end == link_offset)
2650		return 0; /* garbage after the link empty link */
2651
2652	/* a valid ref has been found, filling-in return structures */
2653	if (last)
2654		*last = line_end;
2655
2656	if (refs) {
2657		struct link_ref *ref;
2658
2659		ref = add_link_ref(refs, data + id_offset, id_end - id_offset);
2660		if (!ref)
2661			return 0;
2662
2663		ref->link = hoedown_buffer_new(link_end - link_offset);
2664		hoedown_buffer_put(ref->link, data + link_offset, link_end - link_offset);
2665
2666		if (title_end > title_offset) {
2667			ref->title = hoedown_buffer_new(title_end - title_offset);
2668			hoedown_buffer_put(ref->title, data + title_offset, title_end - title_offset);
2669		}
2670	}
2671
2672	return 1;
2673}
2674
2675static void expand_tabs(hoedown_buffer *ob, const uint8_t *line, size_t size)
2676{
2677	size_t  i = 0, tab = 0;
2678
2679	while (i < size) {
2680		size_t org = i;
2681
2682		while (i < size && line[i] != '\t') {
2683			i++; tab++;
2684		}
2685
2686		if (i > org)
2687			hoedown_buffer_put(ob, line + org, i - org);
2688
2689		if (i >= size)
2690			break;
2691
2692		do {
2693			hoedown_buffer_putc(ob, ' '); tab++;
2694		} while (tab % 4);
2695
2696		i++;
2697	}
2698}
2699
2700/**********************
2701 * EXPORTED FUNCTIONS *
2702 **********************/
2703
2704hoedown_markdown *
2705hoedown_markdown_new(
2706	unsigned int extensions,
2707	size_t max_nesting,
2708	const hoedown_renderer *renderer)
2709{
2710	hoedown_markdown *md = NULL;
2711
2712	assert(max_nesting > 0 && renderer);
2713
2714	md = malloc(sizeof(hoedown_markdown));
2715	if (!md)
2716		return NULL;
2717
2718	memcpy(&md->md, renderer, sizeof(hoedown_renderer));
2719
2720	hoedown_stack_new(&md->work_bufs[BUFFER_BLOCK], 4);
2721	hoedown_stack_new(&md->work_bufs[BUFFER_SPAN], 8);
2722
2723	memset(md->active_char, 0x0, 256);
2724
2725	if (md->md.emphasis || md->md.double_emphasis || md->md.triple_emphasis) {
2726		md->active_char['*'] = MD_CHAR_EMPHASIS;
2727		md->active_char['_'] = MD_CHAR_EMPHASIS;
2728		if (extensions & HOEDOWN_EXT_STRIKETHROUGH)
2729			md->active_char['~'] = MD_CHAR_EMPHASIS;
2730		if (extensions & HOEDOWN_EXT_HIGHLIGHT)
2731			md->active_char['='] = MD_CHAR_EMPHASIS;
2732	}
2733
2734	if (md->md.codespan)
2735		md->active_char['`'] = MD_CHAR_CODESPAN;
2736
2737	if (md->md.linebreak)
2738		md->active_char['\n'] = MD_CHAR_LINEBREAK;
2739
2740	if (md->md.image || md->md.link)
2741		md->active_char['['] = MD_CHAR_LINK;
2742
2743	md->active_char['<'] = MD_CHAR_LANGLE;
2744	md->active_char['\\'] = MD_CHAR_ESCAPE;
2745	md->active_char['&'] = MD_CHAR_ENTITITY;
2746
2747	if (extensions & HOEDOWN_EXT_AUTOLINK) {
2748		md->active_char[':'] = MD_CHAR_AUTOLINK_URL;
2749		md->active_char['@'] = MD_CHAR_AUTOLINK_EMAIL;
2750		md->active_char['w'] = MD_CHAR_AUTOLINK_WWW;
2751	}
2752
2753	if (extensions & HOEDOWN_EXT_SUPERSCRIPT)
2754		md->active_char['^'] = MD_CHAR_SUPERSCRIPT;
2755
2756	if (extensions & HOEDOWN_EXT_QUOTE)
2757		md->active_char['"'] = MD_CHAR_QUOTE;
2758
2759	/* Extension data */
2760	md->ext_flags = extensions;
2761	md->max_nesting = max_nesting;
2762	md->in_link_body = 0;
2763
2764	return md;
2765}
2766
2767void
2768hoedown_markdown_render(hoedown_buffer *ob, const uint8_t *document, size_t doc_size, hoedown_markdown *md)
2769{
2770	static const uint8_t UTF8_BOM[] = {0xEF, 0xBB, 0xBF};
2771
2772	hoedown_buffer *text;
2773	size_t beg, end;
2774
2775	int footnotes_enabled;
2776
2777	text = hoedown_buffer_new(64);
2778	if (!text)
2779		return;
2780
2781	/* Preallocate enough space for our buffer to avoid expanding while copying */
2782	hoedown_buffer_grow(text, doc_size);
2783
2784	/* reset the references table */
2785	memset(&md->refs, 0x0, REF_TABLE_SIZE * sizeof(void *));
2786	
2787	footnotes_enabled = md->ext_flags & HOEDOWN_EXT_FOOTNOTES;
2788	
2789	/* reset the footnotes lists */
2790	if (footnotes_enabled) {
2791		memset(&md->footnotes_found, 0x0, sizeof(md->footnotes_found));
2792		memset(&md->footnotes_used, 0x0, sizeof(md->footnotes_used));
2793	}
2794
2795	/* first pass: looking for references, copying everything else */
2796	beg = 0;
2797
2798	/* Skip a possible UTF-8 BOM, even though the Unicode standard
2799	 * discourages having these in UTF-8 documents */
2800	if (doc_size >= 3 && memcmp(document, UTF8_BOM, 3) == 0)
2801		beg += 3;
2802
2803	while (beg < doc_size) /* iterating over lines */
2804		if (footnotes_enabled && is_footnote(document, beg, doc_size, &end, &md->footnotes_found))
2805			beg = end;
2806		else if (is_ref(document, beg, doc_size, &end, md->refs))
2807			beg = end;
2808		else { /* skipping to the next line */
2809			end = beg;
2810			while (end < doc_size && document[end] != '\n' && document[end] != '\r')
2811				end++;
2812
2813			/* adding the line body if present */
2814			if (end > beg)
2815				expand_tabs(text, document + beg, end - beg);
2816
2817			while (end < doc_size && (document[end] == '\n' || document[end] == '\r')) {
2818				/* add one \n per newline */
2819				if (document[end] == '\n' || (end + 1 < doc_size && document[end + 1] != '\n'))
2820					hoedown_buffer_putc(text, '\n');
2821				end++;
2822			}
2823
2824			beg = end;
2825		}
2826
2827	/* pre-grow the output buffer to minimize allocations */
2828	hoedown_buffer_grow(ob, text->size + (text->size >> 1));
2829
2830	/* second pass: actual rendering */
2831	if (md->md.doc_header)
2832		md->md.doc_header(ob, md->md.opaque);
2833
2834	if (text->size) {
2835		/* adding a final newline if not already present */
2836		if (text->data[text->size - 1] != '\n' &&  text->data[text->size - 1] != '\r')
2837			hoedown_buffer_putc(text, '\n');
2838
2839		parse_block(ob, md, text->data, text->size);
2840	}
2841	
2842	/* footnotes */
2843	if (footnotes_enabled)
2844		parse_footnote_list(ob, md, &md->footnotes_used);
2845
2846	if (md->md.doc_footer)
2847		md->md.doc_footer(ob, md->md.opaque);
2848
2849	/* clean-up */
2850	hoedown_buffer_free(text);
2851	free_link_refs(md->refs);
2852	if (footnotes_enabled) {
2853		free_footnote_list(&md->footnotes_found, 1);
2854		free_footnote_list(&md->footnotes_used, 0);
2855	}
2856
2857	assert(md->work_bufs[BUFFER_SPAN].size == 0);
2858	assert(md->work_bufs[BUFFER_BLOCK].size == 0);
2859}
2860
2861void
2862hoedown_markdown_free(hoedown_markdown *md)
2863{
2864	size_t i;
2865
2866	for (i = 0; i < (size_t)md->work_bufs[BUFFER_SPAN].asize; ++i)
2867		hoedown_buffer_free(md->work_bufs[BUFFER_SPAN].item[i]);
2868
2869	for (i = 0; i < (size_t)md->work_bufs[BUFFER_BLOCK].asize; ++i)
2870		hoedown_buffer_free(md->work_bufs[BUFFER_BLOCK].item[i]);
2871
2872	hoedown_stack_free(&md->work_bufs[BUFFER_SPAN]);
2873	hoedown_stack_free(&md->work_bufs[BUFFER_BLOCK]);
2874
2875	free(md);
2876}
2877
2878void
2879hoedown_version(int *ver_major, int *ver_minor, int *ver_revision)
2880{
2881	*ver_major = HOEDOWN_VERSION_MAJOR;
2882	*ver_minor = HOEDOWN_VERSION_MINOR;
2883	*ver_revision = HOEDOWN_VERSION_REVISION;
2884}