PageRenderTime 13ms CodeModel.GetById 10ms app.highlight 190ms RepoModel.GetById 14ms app.codeStats 0ms

/SnuDom/src/markdown.cpp

https://github.com/hippiehunter/Baconography
C++ | 2586 lines | 2356 code | 150 blank | 80 comment | 207 complexity | 9509e15c85cde3562310fe96d461d9b1 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1/* markdown.c - generic markdown parser */
   2
   3/*
   4 * Copyright (c) 2009, Natacha Porté
   5 * Copyright (c) 2011, Vicent Marti
   6 *
   7 * Permission to use, copy, modify, and distribute this software for any
   8 * purpose with or without fee is hereby granted, provided that the above
   9 * copyright notice and this permission notice appear in all copies.
  10 *
  11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  18 */
  19
  20#include "markdown.h"
  21#include "stack.h"
  22
  23#include <assert.h>
  24#include <string.h>
  25#include <ctype.h>
  26#include <stdio.h>
  27
  28#if defined(_WIN32)
  29#define strncasecmp	_strnicmp
  30#endif
  31
  32#define REF_TABLE_SIZE 8
  33
  34#define BUFFER_BLOCK 0
  35#define BUFFER_SPAN 1
  36
  37#define MKD_LI_END 8	/* internal list flag */
  38
  39#define gperf_case_strncmp(s1, s2, n) strncasecmp(s1, s2, n)
  40#define GPERF_DOWNCASE 1
  41#define GPERF_CASE_STRNCMP 1
  42#include "html_blocks.h"
  43
  44/***************
  45 * LOCAL TYPES *
  46 ***************/
  47
  48/* link_ref: reference to a link */
  49struct link_ref {
  50	unsigned int id;
  51
  52	struct buf *link;
  53	struct buf *title;
  54
  55	struct link_ref *next;
  56};
  57
  58/* char_trigger: function pointer to render active chars */
  59/*   returns the number of chars taken care of */
  60/*   data is the pointer of the beginning of the span */
  61/*   offset is the number of valid chars before data */
  62struct sd_markdown;
  63typedef size_t
  64(*char_trigger)(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  65
  66static size_t char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  67static size_t char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  68static size_t char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  69static size_t char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  70static size_t char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  71static size_t char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  72static size_t char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  73static size_t char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  74static size_t char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  75static size_t char_autolink_subreddit_or_username(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  76static size_t char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  77static size_t char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  78
  79enum markdown_char_t {
  80	MD_CHAR_NONE = 0,
  81	MD_CHAR_EMPHASIS,
  82	MD_CHAR_CODESPAN,
  83	MD_CHAR_LINEBREAK,
  84	MD_CHAR_LINK,
  85	MD_CHAR_LANGLE,
  86	MD_CHAR_ESCAPE,
  87	MD_CHAR_ENTITITY,
  88	MD_CHAR_AUTOLINK_URL,
  89	MD_CHAR_AUTOLINK_EMAIL,
  90	MD_CHAR_AUTOLINK_WWW,
  91	MD_CHAR_AUTOLINK_SUBREDDIT_OR_USERNAME,
  92	MD_CHAR_SUPERSCRIPT,
  93};
  94
  95static char_trigger markdown_char_ptrs[] = {
  96	NULL,
  97	&char_emphasis,
  98	&char_codespan,
  99	&char_linebreak,
 100	&char_link,
 101	&char_langle_tag,
 102	&char_escape,
 103	&char_entity,
 104	&char_autolink_url,
 105	&char_autolink_email,
 106	&char_autolink_www,
 107	&char_autolink_subreddit_or_username,
 108	&char_superscript,
 109};
 110
 111/* render • structure containing one particular render */
 112struct sd_markdown {
 113	struct sd_callbacks	cb;
 114	void *opaque;
 115
 116	struct link_ref *refs[REF_TABLE_SIZE];
 117	uint8_t active_char[256];
 118	struct stack work_bufs[2];
 119	unsigned int ext_flags;
 120	size_t max_nesting;
 121	int in_link_body;
 122};
 123
 124/***************************
 125 * HELPER FUNCTIONS *
 126 ***************************/
 127
 128static inline struct buf *
 129rndr_newbuf(struct sd_markdown *rndr, int type)
 130{
 131	static const size_t buf_size[2] = {256, 64};
 132	struct buf *work = NULL;
 133	struct stack *pool = &rndr->work_bufs[type];
 134
 135	if (pool->size < pool->asize &&
 136		pool->item[pool->size] != NULL) {
 137		work = (buf*)pool->item[pool->size++];
 138		work->size = 0;
 139	} else {
 140		work = bufnew(rndr->opaque, rndr->cb.allocate, buf_size[type]);
 141		stack_push(pool, work);
 142	}
 143
 144	return work;
 145}
 146
 147static inline void
 148rndr_popbuf(struct sd_markdown *rndr, int type)
 149{
 150	rndr->work_bufs[type].size--;
 151}
 152
 153static void
 154unscape_text(void* opaque, void* (*allocate)(void *opaque, size_t size), struct buf *ob, struct buf *src)
 155{
 156	size_t i = 0, org;
 157	while (i < src->size) {
 158		org = i;
 159		while (i < src->size && src->data[i] != '\\')
 160			i++;
 161
 162		if (i > org)
 163			bufput(opaque, allocate, ob, src->data + org, i - org);
 164
 165		if (i + 1 >= src->size)
 166			break;
 167
 168		bufputc(opaque, allocate, ob, src->data[i + 1]);
 169		i += 2;
 170	}
 171}
 172
 173static unsigned int
 174hash_link_ref(const uint8_t *link_ref, size_t length)
 175{
 176	size_t i;
 177	unsigned int hash = 0;
 178
 179	for (i = 0; i < length; ++i)
 180		hash = tolower(link_ref[i]) + (hash << 6) + (hash << 16) - hash;
 181
 182	return hash;
 183}
 184
 185static struct link_ref *
 186add_link_ref(
 187void* opaque, void* (*allocate)(void *opaque, size_t size),
 188	struct link_ref **references,
 189	const uint8_t *name, size_t name_size)
 190{
 191	struct link_ref *ref = (link_ref*)allocate(opaque, sizeof(struct link_ref));
 192	memset(ref, 0, sizeof(struct link_ref));
 193
 194	if (!ref)
 195		return NULL;
 196
 197	ref->id = hash_link_ref(name, name_size);
 198	ref->next = references[ref->id % REF_TABLE_SIZE];
 199
 200	references[ref->id % REF_TABLE_SIZE] = ref;
 201	return ref;
 202}
 203
 204static struct link_ref *
 205find_link_ref(struct link_ref **references, uint8_t *name, size_t length)
 206{
 207	unsigned int hash = hash_link_ref(name, length);
 208	struct link_ref *ref = NULL;
 209
 210	ref = references[hash % REF_TABLE_SIZE];
 211
 212	while (ref != NULL) {
 213		if (ref->id == hash)
 214			return ref;
 215
 216		ref = ref->next;
 217	}
 218
 219	return NULL;
 220}
 221
 222static void
 223free_link_refs(struct link_ref **references)
 224{
 225	size_t i;
 226
 227	for (i = 0; i < REF_TABLE_SIZE; ++i) {
 228		struct link_ref *r = references[i];
 229		struct link_ref *next;
 230
 231		while (r) {
 232			next = r->next;
 233			bufrelease(r->link);
 234			bufrelease(r->title);
 235			//free(r);
 236			r = next;
 237		}
 238	}
 239}
 240
 241/*
 242 * Check whether a char is a Markdown space.
 243
 244 * Right now we only consider spaces the actual
 245 * space and a newline: tabs and carriage returns
 246 * are filtered out during the preprocessing phase.
 247 *
 248 * If we wanted to actually be UTF-8 compliant, we
 249 * should instead extract an Unicode codepoint from
 250 * this character and check for space properties.
 251 */
 252static inline int
 253_isspace(int c)
 254{
 255	return c == ' ' || c == '\n';
 256}
 257
 258/****************************
 259 * INLINE PARSING FUNCTIONS *
 260 ****************************/
 261
 262/* is_mail_autolink • looks for the address part of a mail autolink and '>' */
 263/* this is less strict than the original markdown e-mail address matching */
 264static size_t
 265is_mail_autolink(uint8_t *data, size_t size)
 266{
 267	size_t i = 0, nb = 0;
 268
 269	/* address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' */
 270	for (i = 0; i < size; ++i) {
 271		if (isalnum(data[i]))
 272			continue;
 273
 274		switch (data[i]) {
 275			case '@':
 276				nb++;
 277
 278			case '-':
 279			case '.':
 280			case '_':
 281				break;
 282
 283			case '>':
 284				return (nb == 1) ? i + 1 : 0;
 285
 286			default:
 287				return 0;
 288		}
 289	}
 290
 291	return 0;
 292}
 293
 294/* tag_length • returns the length of the given tag, or 0 is it's not valid */
 295static size_t
 296tag_length(uint8_t *data, size_t size, enum mkd_autolink *autolink)
 297{
 298	size_t i, j;
 299
 300	/* a valid tag can't be shorter than 3 chars */
 301	if (size < 3) return 0;
 302
 303	/* begins with a '<' optionally followed by '/', followed by letter or number */
 304	if (data[0] != '<') return 0;
 305	i = (data[1] == '/') ? 2 : 1;
 306
 307	if (!isalnum(data[i]))
 308		return 0;
 309
 310	/* scheme test */
 311	*autolink = MKDA_NOT_AUTOLINK;
 312
 313	/* try to find the beginning of an URI */
 314	while (i < size && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-'))
 315		i++;
 316
 317	if (i > 1 && data[i] == '@') {
 318		if ((j = is_mail_autolink(data + i, size - i)) != 0) {
 319			*autolink = MKDA_EMAIL;
 320			return i + j;
 321		}
 322	}
 323
 324	if (i > 2 && data[i] == ':') {
 325		*autolink = MKDA_NORMAL;
 326		i++;
 327	}
 328
 329	/* completing autolink test: no whitespace or ' or " */
 330	if (i >= size)
 331		*autolink = MKDA_NOT_AUTOLINK;
 332
 333	else if (*autolink) {
 334		j = i;
 335
 336		while (i < size) {
 337			if (data[i] == '\\') i += 2;
 338			else if (data[i] == '>' || data[i] == '\'' ||
 339					data[i] == '"' || data[i] == ' ' || data[i] == '\n')
 340					break;
 341			else i++;
 342		}
 343
 344		if (i >= size) return 0;
 345		if (i > j && data[i] == '>') return i + 1;
 346		/* one of the forbidden chars has been found */
 347		*autolink = MKDA_NOT_AUTOLINK;
 348	}
 349
 350	/* looking for sometinhg looking like a tag end */
 351	while (i < size && data[i] != '>') i++;
 352	if (i >= size) return 0;
 353	return i + 1;
 354}
 355
 356/* parse_inline • parses inline markdown elements */
 357static void
 358parse_inline(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
 359{
 360	size_t i = 0, end = 0;
 361	uint8_t action = 0;
 362	struct buf work = { 0, 0, 0, 0 };
 363
 364	if (rndr->work_bufs[BUFFER_SPAN].size +
 365		rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting)
 366		return;
 367
 368	while (i < size) {
 369		/* copying inactive chars into the output */
 370		while (end < size && (action = rndr->active_char[data[end]]) == 0) {
 371			end++;
 372		}
 373
 374		if (rndr->cb.normal_text) {
 375			work.data = data + i;
 376			work.size = end - i;
 377			rndr->cb.normal_text(ob, &work, rndr->opaque);
 378		}
 379		else
 380			bufput(rndr->opaque, rndr->cb.allocate, ob, data + i, end - i);
 381
 382		if (end >= size) break;
 383		i = end;
 384
 385		end = markdown_char_ptrs[(int)action](ob, rndr, data + i, i, size - i);
 386		if (!end) /* no action from the callback */
 387			end = i + 1;
 388		else {
 389			i += end;
 390			end = i;
 391		}
 392	}
 393}
 394
 395/* find_emph_char • looks for the next emph uint8_t, skipping other constructs */
 396static size_t
 397find_emph_char(uint8_t *data, size_t size, uint8_t c)
 398{
 399	size_t i = 1;
 400
 401	while (i < size) {
 402		while (i < size && data[i] != c && data[i] != '`' && data[i] != '[')
 403			i++;
 404
 405		if (i == size)
 406			return 0;
 407
 408		if (data[i] == c)
 409			return i;
 410
 411		/* not counting escaped chars */
 412		if (i && data[i - 1] == '\\') {
 413			i++; continue;
 414		}
 415
 416		if (data[i] == '`') {
 417			size_t span_nb = 0, bt;
 418			size_t tmp_i = 0;
 419
 420			/* counting the number of opening backticks */
 421			while (i < size && data[i] == '`') {
 422				i++; span_nb++;
 423			}
 424
 425			if (i >= size) return 0;
 426
 427			/* finding the matching closing sequence */
 428			bt = 0;
 429			while (i < size && bt < span_nb) {
 430				if (!tmp_i && data[i] == c) tmp_i = i;
 431				if (data[i] == '`') bt++;
 432				else bt = 0;
 433				i++;
 434			}
 435
 436			if (i >= size) return tmp_i;
 437		}
 438		/* skipping a link */
 439		else if (data[i] == '[') {
 440			size_t tmp_i = 0;
 441			uint8_t cc;
 442
 443			i++;
 444			while (i < size && data[i] != ']') {
 445				if (!tmp_i && data[i] == c) tmp_i = i;
 446				i++;
 447			}
 448
 449			i++;
 450			while (i < size && (data[i] == ' ' || data[i] == '\n'))
 451				i++;
 452
 453			if (i >= size)
 454				return tmp_i;
 455
 456			switch (data[i]) {
 457			case '[':
 458				cc = ']'; break;
 459
 460			case '(':
 461				cc = ')'; break;
 462
 463			default:
 464				if (tmp_i)
 465					return tmp_i;
 466				else
 467					continue;
 468			}
 469
 470			i++;
 471			while (i < size && data[i] != cc) {
 472				if (!tmp_i && data[i] == c) tmp_i = i;
 473				i++;
 474			}
 475
 476			if (i >= size)
 477				return tmp_i;
 478
 479			i++;
 480		}
 481	}
 482
 483	return 0;
 484}
 485
 486/* parse_emph1 • parsing single emphase */
 487/* closed by a symbol not preceded by whitespace and not followed by symbol */
 488static size_t
 489parse_emph1(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
 490{
 491	size_t i = 0, len;
 492	struct buf *work = 0;
 493	int r;
 494
 495	if (!rndr->cb.emphasis) return 0;
 496
 497	/* skipping one symbol if coming from emph3 */
 498	if (size > 1 && data[0] == c && data[1] == c) i = 1;
 499
 500	while (i < size) {
 501		len = find_emph_char(data + i, size - i, c);
 502		if (!len) return 0;
 503		i += len;
 504		if (i >= size) return 0;
 505
 506		if (data[i] == c && !_isspace(data[i - 1])) {
 507			if ((rndr->ext_flags & MKDEXT_NO_INTRA_EMPHASIS) && (c == '_')) {
 508				if (!(i + 1 == size || _isspace(data[i + 1]) || ispunct(data[i + 1])))
 509					continue;
 510			}
 511
 512			work = rndr_newbuf(rndr, BUFFER_SPAN);
 513			parse_inline(work, rndr, data, i);
 514			r = rndr->cb.emphasis(ob, work, rndr->opaque);
 515			rndr_popbuf(rndr, BUFFER_SPAN);
 516			return r ? i + 1 : 0;
 517		}
 518	}
 519
 520	return 0;
 521}
 522
 523/* parse_emph2 • parsing single emphase */
 524static size_t
 525parse_emph2(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
 526{
 527	int (*render_method)(struct buf *ob, const struct buf *text, void *opaque);
 528	size_t i = 0, len;
 529	struct buf *work = 0;
 530	int r;
 531
 532	render_method = (c == '~') ? rndr->cb.strikethrough : rndr->cb.double_emphasis;
 533
 534	if (!render_method)
 535		return 0;
 536
 537	while (i < size) {
 538		len = find_emph_char(data + i, size - i, c);
 539		if (!len) return 0;
 540		i += len;
 541
 542		if (i + 1 < size && data[i] == c && data[i + 1] == c && i && !_isspace(data[i - 1])) {
 543			work = rndr_newbuf(rndr, BUFFER_SPAN);
 544			parse_inline(work, rndr, data, i);
 545			r = render_method(ob, work, rndr->opaque);
 546			rndr_popbuf(rndr, BUFFER_SPAN);
 547			return r ? i + 2 : 0;
 548		}
 549		i++;
 550	}
 551	return 0;
 552}
 553
 554/* parse_emph3 • parsing single emphase */
 555/* finds the first closing tag, and delegates to the other emph */
 556static size_t
 557parse_emph3(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
 558{
 559	size_t i = 0, len;
 560	int r;
 561
 562	while (i < size) {
 563		len = find_emph_char(data + i, size - i, c);
 564		if (!len) return 0;
 565		i += len;
 566
 567		/* skip whitespace preceded symbols */
 568		if (data[i] != c || _isspace(data[i - 1]))
 569			continue;
 570
 571		if (i + 2 < size && data[i + 1] == c && data[i + 2] == c && rndr->cb.triple_emphasis) {
 572			/* triple symbol found */
 573			struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN);
 574
 575			parse_inline(work, rndr, data, i);
 576			r = rndr->cb.triple_emphasis(ob, work, rndr->opaque);
 577			rndr_popbuf(rndr, BUFFER_SPAN);
 578			return r ? i + 3 : 0;
 579
 580		} else if (i + 1 < size && data[i + 1] == c) {
 581			/* double symbol found, handing over to emph1 */
 582			len = parse_emph1(ob, rndr, data - 2, size + 2, c);
 583			if (!len) return 0;
 584			else return len - 2;
 585
 586		} else {
 587			/* single symbol found, handing over to emph2 */
 588			len = parse_emph2(ob, rndr, data - 1, size + 1, c);
 589			if (!len) return 0;
 590			else return len - 1;
 591		}
 592	}
 593	return 0;
 594}
 595
 596/* char_emphasis • single and double emphasis parsing */
 597static size_t
 598char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
 599{
 600	uint8_t c = data[0];
 601	size_t ret;
 602
 603	if (size > 2 && data[1] != c) {
 604		/* whitespace cannot follow an opening emphasis;
 605		 * strikethrough only takes two characters '~~' */
 606		if (c == '~' || _isspace(data[1]) || (ret = parse_emph1(ob, rndr, data + 1, size - 1, c)) == 0)
 607			return 0;
 608
 609		return ret + 1;
 610	}
 611
 612	if (size > 3 && data[1] == c && data[2] != c) {
 613		if (_isspace(data[2]) || (ret = parse_emph2(ob, rndr, data + 2, size - 2, c)) == 0)
 614			return 0;
 615
 616		return ret + 2;
 617	}
 618
 619	if (size > 4 && data[1] == c && data[2] == c && data[3] != c) {
 620		if (c == '~' || _isspace(data[3]) || (ret = parse_emph3(ob, rndr, data + 3, size - 3, c)) == 0)
 621			return 0;
 622
 623		return ret + 3;
 624	}
 625
 626	return 0;
 627}
 628
 629
 630/* char_linebreak • '\n' preceded by two spaces (assuming linebreak != 0) */
 631static size_t
 632char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
 633{
 634	if (offset < 2 || data[-1] != ' ' || data[-2] != ' ')
 635		return 0;
 636
 637	/* removing the last space from ob and rendering */
 638	while (ob->size && ob->data[ob->size - 1] == ' ')
 639		ob->size--;
 640
 641	return rndr->cb.linebreak(ob, rndr->opaque) ? 1 : 0;
 642}
 643
 644
 645/* char_codespan • '`' parsing a code span (assuming codespan != 0) */
 646static size_t
 647char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
 648{
 649	size_t end, nb = 0, i, f_begin, f_end;
 650
 651	/* counting the number of backticks in the delimiter */
 652	while (nb < size && data[nb] == '`')
 653		nb++;
 654
 655	/* finding the next delimiter */
 656	i = 0;
 657	for (end = nb; end < size && i < nb; end++) {
 658		if (data[end] == '`') i++;
 659		else i = 0;
 660	}
 661
 662	if (i < nb && end >= size)
 663		return 0; /* no matching delimiter */
 664
 665	/* trimming outside whitespaces */
 666	f_begin = nb;
 667	while (f_begin < end && data[f_begin] == ' ')
 668		f_begin++;
 669
 670	f_end = end - nb;
 671	while (f_end > nb && data[f_end-1] == ' ')
 672		f_end--;
 673
 674	/* real code span */
 675	if (f_begin < f_end) {
 676		struct buf work = { data + f_begin, f_end - f_begin, 0, 0 };
 677		if (!rndr->cb.codespan(ob, &work, rndr->opaque))
 678			end = 0;
 679	} else {
 680		if (!rndr->cb.codespan(ob, 0, rndr->opaque))
 681			end = 0;
 682	}
 683
 684	return end;
 685}
 686
 687
 688/* char_escape • '\\' backslash escape */
 689static size_t
 690char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
 691{
 692	static const char *escape_chars = "\\`*_{}[]()#+-.!:|&<>/^~";
 693	struct buf work = { 0, 0, 0, 0 };
 694
 695	if (size > 1) {
 696		if (strchr(escape_chars, data[1]) == NULL)
 697			return 0;
 698
 699		if (rndr->cb.normal_text) {
 700			work.data = data + 1;
 701			work.size = 1;
 702			rndr->cb.normal_text(ob, &work, rndr->opaque);
 703		}
 704		else bufputc(rndr->opaque, rndr->cb.allocate, ob, data[1]);
 705	} else if (size == 1) {
 706		bufputc(rndr->opaque, rndr->cb.allocate, ob, data[0]);
 707	}
 708
 709	return 2;
 710}
 711
 712/* char_entity • '&' escaped when it doesn't belong to an entity */
 713/* valid entities are assumed to be anything matching &#?[A-Za-z0-9]+; */
 714static size_t
 715char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
 716{
 717	size_t end = 1;
 718	struct buf work = { 0, 0, 0, 0 };
 719
 720	if (end < size && data[end] == '#')
 721		end++;
 722
 723	while (end < size && isalnum(data[end]))
 724		end++;
 725
 726	if (end < size && data[end] == ';')
 727		end++; /* real entity */
 728	else
 729		return 0; /* lone '&' */
 730
 731	if (rndr->cb.entity) {
 732		work.data = data;
 733		work.size = end;
 734		rndr->cb.entity(ob, &work, rndr->opaque);
 735	}
 736	else bufput(rndr->opaque, rndr->cb.allocate, ob, data, end);
 737
 738	return end;
 739}
 740
 741/* char_langle_tag • '<' when tags or autolinks are allowed */
 742static size_t
 743char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
 744{
 745	enum mkd_autolink altype = MKDA_NOT_AUTOLINK;
 746	size_t end = tag_length(data, size, &altype);
 747	struct buf work = { data, end, 0, 0 };
 748	int ret = 0;
 749
 750	if (end > 2) {
 751		if (rndr->cb.autolink && altype != MKDA_NOT_AUTOLINK) {
 752			struct buf *u_link = rndr_newbuf(rndr, BUFFER_SPAN);
 753			work.data = data + 1;
 754			work.size = end - 2;
 755			unscape_text(rndr->opaque, rndr->cb.allocate, u_link, &work);
 756			ret = rndr->cb.autolink(ob, u_link, altype, rndr->opaque);
 757			rndr_popbuf(rndr, BUFFER_SPAN);
 758		}
 759		else if (rndr->cb.raw_html_tag)
 760			ret = rndr->cb.raw_html_tag(ob, &work, rndr->opaque);
 761	}
 762
 763	if (!ret) return 0;
 764	else return end;
 765}
 766
 767static size_t
 768char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
 769{
 770	struct buf *link, *link_url, *link_text;
 771	size_t link_len, rewind;
 772
 773	if (!rndr->cb.link || rndr->in_link_body)
 774		return 0;
 775
 776	link = rndr_newbuf(rndr, BUFFER_SPAN);
 777
 778	if ((link_len = sd_autolink__www(rndr->opaque, rndr->cb.allocate, &rewind, link, data, offset, size, 0)) > 0) {
 779		link_url = rndr_newbuf(rndr, BUFFER_SPAN);
 780		BUFPUTSL(rndr->opaque, rndr->cb.allocate,link_url, "http://");
 781		bufput(rndr->opaque, rndr->cb.allocate, link_url, link->data, link->size);
 782
 783		ob->size -= rewind;
 784		if (rndr->cb.normal_text) {
 785			link_text = rndr_newbuf(rndr, BUFFER_SPAN);
 786			rndr->cb.normal_text(link_text, link, rndr->opaque);
 787			rndr->cb.link(ob, link_url, NULL, link_text, rndr->opaque);
 788			rndr_popbuf(rndr, BUFFER_SPAN);
 789		} else {
 790			rndr->cb.link(ob, link_url, NULL, link, rndr->opaque);
 791		}
 792		rndr_popbuf(rndr, BUFFER_SPAN);
 793	}
 794
 795	rndr_popbuf(rndr, BUFFER_SPAN);
 796	return link_len;
 797}
 798
 799static size_t
 800char_autolink_subreddit_or_username(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
 801{
 802	struct buf *link;
 803	size_t link_len, rewind;
 804
 805	if (!rndr->cb.autolink || rndr->in_link_body)
 806		return 0;
 807
 808	link = rndr_newbuf(rndr, BUFFER_SPAN);
 809	if ((link_len = sd_autolink__subreddit(rndr->opaque, rndr->cb.allocate, &rewind, link, data, offset, size)) > 0) {
 810		ob->size -= rewind;
 811		rndr->cb.autolink(ob, link, MKDA_NORMAL, rndr->opaque);
 812	} else if ((link_len = sd_autolink__username(rndr->opaque, rndr->cb.allocate, &rewind, link, data, offset, size)) > 0) {
 813		ob->size -= rewind;
 814		rndr->cb.autolink(ob, link, MKDA_NORMAL, rndr->opaque);
 815	}
 816	rndr_popbuf(rndr, BUFFER_SPAN);
 817
 818	return link_len;
 819}
 820
 821static size_t
 822char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
 823{
 824	struct buf *link;
 825	size_t link_len, rewind;
 826
 827	if (!rndr->cb.autolink || rndr->in_link_body)
 828		return 0;
 829
 830	link = rndr_newbuf(rndr, BUFFER_SPAN);
 831
 832	if ((link_len = sd_autolink__email(rndr->opaque, rndr->cb.allocate, &rewind, link, data, offset, size, 0)) > 0) {
 833		ob->size -= rewind;
 834		rndr->cb.autolink(ob, link, MKDA_EMAIL, rndr->opaque);
 835	}
 836
 837	rndr_popbuf(rndr, BUFFER_SPAN);
 838	return link_len;
 839}
 840
 841static size_t
 842char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
 843{
 844	struct buf *link;
 845	size_t link_len, rewind;
 846
 847	if (!rndr->cb.autolink || rndr->in_link_body)
 848		return 0;
 849
 850	link = rndr_newbuf(rndr, BUFFER_SPAN);
 851
 852	if ((link_len = sd_autolink__url(rndr->opaque, rndr->cb.allocate, &rewind, link, data, offset, size, 0)) > 0) {
 853		ob->size -= rewind;
 854		rndr->cb.autolink(ob, link, MKDA_NORMAL, rndr->opaque);
 855	}
 856
 857	rndr_popbuf(rndr, BUFFER_SPAN);
 858	return link_len;
 859}
 860
 861/* char_link • '[': parsing a link or an image */
 862static size_t
 863char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
 864{
 865	int is_img = (offset && data[-1] == '!'), level;
 866	size_t i = 1, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0;
 867	struct buf *content = 0;
 868	struct buf *link = 0;
 869	struct buf *title = 0;
 870	struct buf *u_link = 0;
 871	size_t org_work_size = rndr->work_bufs[BUFFER_SPAN].size;
 872	int text_has_nl = 0, ret = 0;
 873	int in_title = 0, qtype = 0;
 874
 875	/* checking whether the correct renderer exists */
 876	if ((is_img && !rndr->cb.image) || (!is_img && !rndr->cb.link))
 877		goto cleanup;
 878
 879	/* looking for the matching closing bracket */
 880	for (level = 1; i < size; i++) {
 881		if (data[i] == '\n')
 882			text_has_nl = 1;
 883
 884		else if (data[i - 1] == '\\')
 885			continue;
 886
 887		else if (data[i] == '[')
 888			level++;
 889
 890		else if (data[i] == ']') {
 891			level--;
 892			if (level <= 0)
 893				break;
 894		}
 895	}
 896
 897	if (i >= size)
 898		goto cleanup;
 899
 900	txt_e = i;
 901	i++;
 902
 903	/* skip any amount of whitespace or newline */
 904	/* (this is much more laxist than original markdown syntax) */
 905	while (i < size && _isspace(data[i]))
 906		i++;
 907
 908	/* inline style link */
 909	if (i < size && data[i] == '(') {
 910		/* skipping initial whitespace */
 911		i++;
 912
 913		while (i < size && _isspace(data[i]))
 914			i++;
 915
 916		link_b = i;
 917
 918		/* looking for link end: ' " ) */
 919		while (i < size) {
 920			if (data[i] == '\\') i += 2;
 921			else if (data[i] == ')') break;
 922			else if (i >= 1 && _isspace(data[i-1]) && (data[i] == '\'' || data[i] == '"')) break;
 923			else i++;
 924		}
 925
 926		if (i >= size) goto cleanup;
 927		link_e = i;
 928
 929		/* looking for title end if present */
 930		if (data[i] == '\'' || data[i] == '"') {
 931			qtype = data[i];
 932			in_title = 1;
 933			i++;
 934			title_b = i;
 935
 936			while (i < size) {
 937				if (data[i] == '\\') i += 2;
 938				else if (data[i] == qtype) {in_title = 0; i++;}
 939				else if ((data[i] == ')') && !in_title) break;
 940				else i++;
 941			}
 942
 943			if (i >= size) goto cleanup;
 944
 945			/* skipping whitespaces after title */
 946			title_e = i - 1;
 947			while (title_e > title_b && _isspace(data[title_e]))
 948				title_e--;
 949
 950			/* checking for closing quote presence */
 951			if (data[title_e] != '\'' &&  data[title_e] != '"') {
 952				title_b = title_e = 0;
 953				link_e = i;
 954			}
 955		}
 956
 957		/* remove whitespace at the end of the link */
 958		while (link_e > link_b && _isspace(data[link_e - 1]))
 959			link_e--;
 960
 961		/* remove optional angle brackets around the link */
 962		if (data[link_b] == '<') link_b++;
 963		if (data[link_e - 1] == '>') link_e--;
 964
 965		/* building escaped link and title */
 966		if (link_e > link_b) {
 967			link = rndr_newbuf(rndr, BUFFER_SPAN);
 968			bufput(rndr->opaque, rndr->cb.allocate, link, data + link_b, link_e - link_b);
 969		}
 970
 971		if (title_e > title_b) {
 972			title = rndr_newbuf(rndr, BUFFER_SPAN);
 973			bufput(rndr->opaque, rndr->cb.allocate, title, data + title_b, title_e - title_b);
 974		}
 975
 976		i++;
 977	}
 978
 979	/* reference style link */
 980	else if (i < size && data[i] == '[') {
 981		struct buf id = { 0, 0, 0, 0 };
 982		struct link_ref *lr;
 983
 984		/* looking for the id */
 985		i++;
 986		link_b = i;
 987		while (i < size && data[i] != ']') i++;
 988		if (i >= size) goto cleanup;
 989		link_e = i;
 990
 991		/* finding the link_ref */
 992		if (link_b == link_e) {
 993			if (text_has_nl) {
 994				struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN);
 995				size_t j;
 996
 997				for (j = 1; j < txt_e; j++) {
 998					if (data[j] != '\n')
 999						bufputc(rndr->opaque, rndr->cb.allocate, b, data[j]);
1000					else if (data[j - 1] != ' ')
1001						bufputc(rndr->opaque, rndr->cb.allocate, b, ' ');
1002				}
1003
1004				id.data = b->data;
1005				id.size = b->size;
1006			} else {
1007				id.data = data + 1;
1008				id.size = txt_e - 1;
1009			}
1010		} else {
1011			id.data = data + link_b;
1012			id.size = link_e - link_b;
1013		}
1014
1015		lr = find_link_ref(rndr->refs, id.data, id.size);
1016		if (!lr)
1017			goto cleanup;
1018
1019		/* keeping link and title from link_ref */
1020		link = lr->link;
1021		title = lr->title;
1022		i++;
1023	}
1024
1025	/* shortcut reference style link */
1026	else {
1027		struct buf id = { 0, 0, 0, 0 };
1028		struct link_ref *lr;
1029
1030		/* crafting the id */
1031		if (text_has_nl) {
1032			struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN);
1033			size_t j;
1034
1035			for (j = 1; j < txt_e; j++) {
1036				if (data[j] != '\n')
1037					bufputc(rndr->opaque, rndr->cb.allocate, b, data[j]);
1038				else if (data[j - 1] != ' ')
1039					bufputc(rndr->opaque, rndr->cb.allocate, b, ' ');
1040			}
1041
1042			id.data = b->data;
1043			id.size = b->size;
1044		} else {
1045			id.data = data + 1;
1046			id.size = txt_e - 1;
1047		}
1048
1049		/* finding the link_ref */
1050		lr = find_link_ref(rndr->refs, id.data, id.size);
1051		if (!lr)
1052			goto cleanup;
1053
1054		/* keeping link and title from link_ref */
1055		link = lr->link;
1056		title = lr->title;
1057
1058		/* rewinding the whitespace */
1059		i = txt_e + 1;
1060	}
1061
1062	/* building content: img alt is escaped, link content is parsed */
1063	if (txt_e > 1) {
1064		content = rndr_newbuf(rndr, BUFFER_SPAN);
1065		if (is_img) {
1066			bufput(rndr->opaque, rndr->cb.allocate, content, data + 1, txt_e - 1);
1067		} else {
1068			/* disable autolinking when parsing inline the
1069			 * content of a link */
1070			rndr->in_link_body = 1;
1071			parse_inline(content, rndr, data + 1, txt_e - 1);
1072			rndr->in_link_body = 0;
1073		}
1074	}
1075
1076	if (link) {
1077		u_link = rndr_newbuf(rndr, BUFFER_SPAN);
1078		unscape_text(rndr->opaque, rndr->cb.allocate, u_link, link);
1079	} else {
1080		goto cleanup;
1081	}
1082
1083	/* calling the relevant rendering function */
1084	if (is_img) {
1085		if (ob->size && ob->data[ob->size - 1] == '!')
1086			ob->size -= 1;
1087
1088		ret = rndr->cb.image(ob, u_link, title, content, rndr->opaque);
1089	} else {
1090		ret = rndr->cb.link(ob, u_link, title, content, rndr->opaque);
1091	}
1092
1093	/* cleanup */
1094cleanup:
1095	rndr->work_bufs[BUFFER_SPAN].size = (int)org_work_size;
1096	return ret ? i : 0;
1097}
1098
1099static size_t
1100char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
1101{
1102	size_t sup_start, sup_len;
1103	struct buf *sup;
1104
1105	if (!rndr->cb.superscript)
1106		return 0;
1107
1108	if (size < 2)
1109		return 0;
1110
1111	if (data[1] == '(') {
1112		sup_start = sup_len = 2;
1113
1114		while (sup_len < size && data[sup_len] != ')' && data[sup_len - 1] != '\\')
1115			sup_len++;
1116
1117		if (sup_len == size)
1118			return 0;
1119	} else {
1120		sup_start = sup_len = 1;
1121
1122		while (sup_len < size && !_isspace(data[sup_len]))
1123			sup_len++;
1124	}
1125
1126	if (sup_len - sup_start == 0)
1127		return (sup_start == 2) ? 3 : 0;
1128
1129	sup = rndr_newbuf(rndr, BUFFER_SPAN);
1130	parse_inline(sup, rndr, data + sup_start, sup_len - sup_start);
1131	rndr->cb.superscript(ob, sup, rndr->opaque);
1132	rndr_popbuf(rndr, BUFFER_SPAN);
1133
1134	return (sup_start == 2) ? sup_len + 1 : sup_len;
1135}
1136
1137/*********************************
1138 * BLOCK-LEVEL PARSING FUNCTIONS *
1139 *********************************/
1140
1141/* is_empty • returns the line length when it is empty, 0 otherwise */
1142static size_t
1143is_empty(uint8_t *data, size_t size)
1144{
1145	size_t i;
1146
1147	for (i = 0; i < size && data[i] != '\n'; i++)
1148		if (data[i] != ' ')
1149			return 0;
1150
1151	return i + 1;
1152}
1153
1154/* is_hrule • returns whether a line is a horizontal rule */
1155static int
1156is_hrule(uint8_t *data, size_t size)
1157{
1158	size_t i = 0, n = 0;
1159	uint8_t c;
1160
1161	/* skipping initial spaces */
1162	if (size < 3) return 0;
1163	if (data[0] == ' ') { i++;
1164	if (data[1] == ' ') { i++;
1165	if (data[2] == ' ') { i++; } } }
1166
1167	/* looking at the hrule uint8_t */
1168	if (i + 2 >= size
1169	|| (data[i] != '*' && data[i] != '-' && data[i] != '_'))
1170		return 0;
1171	c = data[i];
1172
1173	/* the whole line must be the char or whitespace */
1174	while (i < size && data[i] != '\n') {
1175		if (data[i] == c) n++;
1176		else if (data[i] != ' ')
1177			return 0;
1178
1179		i++;
1180	}
1181
1182	return n >= 3;
1183}
1184
1185/* check if a line begins with a code fence; return the
1186 * width of the code fence */
1187static size_t
1188prefix_codefence(uint8_t *data, size_t size)
1189{
1190	size_t i = 0, n = 0;
1191	uint8_t c;
1192
1193	/* skipping initial spaces */
1194	if (size < 3) return 0;
1195	if (data[0] == ' ') { i++;
1196	if (data[1] == ' ') { i++;
1197	if (data[2] == ' ') { i++; } } }
1198
1199	/* looking at the hrule uint8_t */
1200	if (i + 2 >= size || !(data[i] == '~' || data[i] == '`'))
1201		return 0;
1202
1203	c = data[i];
1204
1205	/* the whole line must be the uint8_t or whitespace */
1206	while (i < size && data[i] == c) {
1207		n++; i++;
1208	}
1209
1210	if (n < 3)
1211		return 0;
1212
1213	return i;
1214}
1215
1216/* check if a line is a code fence; return its size if it is */
1217static size_t
1218is_codefence(uint8_t *data, size_t size, struct buf *syntax)
1219{
1220	size_t i = 0, syn_len = 0;
1221	uint8_t *syn_start;
1222
1223	i = prefix_codefence(data, size);
1224	if (i == 0)
1225		return 0;
1226
1227	while (i < size && data[i] == ' ')
1228		i++;
1229
1230	syn_start = data + i;
1231
1232	if (i < size && data[i] == '{') {
1233		i++; syn_start++;
1234
1235		while (i < size && data[i] != '}' && data[i] != '\n') {
1236			syn_len++; i++;
1237		}
1238
1239		if (i == size || data[i] != '}')
1240			return 0;
1241
1242		/* strip all whitespace at the beginning and the end
1243		 * of the {} block */
1244		while (syn_len > 0 && _isspace(syn_start[0])) {
1245			syn_start++; syn_len--;
1246		}
1247
1248		while (syn_len > 0 && _isspace(syn_start[syn_len - 1]))
1249			syn_len--;
1250
1251		i++;
1252	} else {
1253		while (i < size && !_isspace(data[i])) {
1254			syn_len++; i++;
1255		}
1256	}
1257
1258	if (syntax) {
1259		syntax->data = syn_start;
1260		syntax->size = syn_len;
1261	}
1262
1263	while (i < size && data[i] != '\n') {
1264		if (!_isspace(data[i]))
1265			return 0;
1266
1267		i++;
1268	}
1269
1270	return i + 1;
1271}
1272
1273/* is_atxheader • returns whether the line is a hash-prefixed header */
1274static int
1275is_atxheader(struct sd_markdown *rndr, uint8_t *data, size_t size)
1276{
1277	if (data[0] != '#')
1278		return 0;
1279
1280	if (rndr->ext_flags & MKDEXT_SPACE_HEADERS) {
1281		size_t level = 0;
1282
1283		while (level < size && level < 6 && data[level] == '#')
1284			level++;
1285
1286		if (level < size && data[level] != ' ')
1287			return 0;
1288	}
1289
1290	return 1;
1291}
1292
1293/* is_headerline • returns whether the line is a setext-style hdr underline */
1294static int
1295is_headerline(uint8_t *data, size_t size)
1296{
1297	size_t i = 0;
1298
1299	/* test of level 1 header */
1300	if (data[i] == '=') {
1301		for (i = 1; i < size && data[i] == '='; i++);
1302		while (i < size && data[i] == ' ') i++;
1303		return (i >= size || data[i] == '\n') ? 1 : 0; }
1304
1305	/* test of level 2 header */
1306	if (data[i] == '-') {
1307		for (i = 1; i < size && data[i] == '-'; i++);
1308		while (i < size && data[i] == ' ') i++;
1309		return (i >= size || data[i] == '\n') ? 2 : 0; }
1310
1311	return 0;
1312}
1313
1314static int
1315is_next_headerline(uint8_t *data, size_t size)
1316{
1317	size_t i = 0;
1318
1319	while (i < size && data[i] != '\n')
1320		i++;
1321
1322	if (++i >= size)
1323		return 0;
1324
1325	return is_headerline(data + i, size - i);
1326}
1327
1328/* prefix_quote • returns blockquote prefix length */
1329static size_t
1330prefix_quote(uint8_t *data, size_t size)
1331{
1332	size_t i = 0;
1333	if (i < size && data[i] == ' ') i++;
1334	if (i < size && data[i] == ' ') i++;
1335	if (i < size && data[i] == ' ') i++;
1336
1337	if (i < size && data[i] == '>') {
1338		if (i + 1 < size && data[i + 1] == ' ')
1339			return i + 2;
1340
1341		return i + 1;
1342	}
1343
1344	return 0;
1345}
1346
1347/* prefix_code • returns prefix length for block code*/
1348static size_t
1349prefix_code(uint8_t *data, size_t size)
1350{
1351	if (size > 3 && data[0] == ' ' && data[1] == ' '
1352		&& data[2] == ' ' && data[3] == ' ') return 4;
1353
1354	return 0;
1355}
1356
1357/* prefix_oli • returns ordered list item prefix */
1358static size_t
1359prefix_oli(uint8_t *data, size_t size)
1360{
1361	size_t i = 0;
1362
1363	if (i < size && data[i] == ' ') i++;
1364	if (i < size && data[i] == ' ') i++;
1365	if (i < size && data[i] == ' ') i++;
1366
1367	if (i >= size || data[i] < '0' || data[i] > '9')
1368		return 0;
1369
1370	while (i < size && data[i] >= '0' && data[i] <= '9')
1371		i++;
1372
1373	if (i + 1 >= size || data[i] != '.' || data[i + 1] != ' ')
1374		return 0;
1375
1376	if (is_next_headerline(data + i, size - i))
1377		return 0;
1378
1379	return i + 2;
1380}
1381
1382/* prefix_uli • returns ordered list item prefix */
1383static size_t
1384prefix_uli(uint8_t *data, size_t size)
1385{
1386	size_t i = 0;
1387
1388	if (i < size && data[i] == ' ') i++;
1389	if (i < size && data[i] == ' ') i++;
1390	if (i < size && data[i] == ' ') i++;
1391
1392	if (i + 1 >= size ||
1393		(data[i] != '*' && data[i] != '+' && data[i] != '-') ||
1394		data[i + 1] != ' ')
1395		return 0;
1396
1397	if (is_next_headerline(data + i, size - i))
1398		return 0;
1399
1400	return i + 2;
1401}
1402
1403
1404/* parse_block • parsing of one block, returning next uint8_t to parse */
1405static void parse_block(struct buf *ob, struct sd_markdown *rndr,
1406			uint8_t *data, size_t size);
1407
1408
1409/* parse_blockquote • handles parsing of a blockquote fragment */
1410static size_t
1411parse_blockquote(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1412{
1413	size_t beg, end = 0, pre, work_size = 0;
1414	uint8_t *work_data = 0;
1415	struct buf *out = 0;
1416
1417	out = rndr_newbuf(rndr, BUFFER_BLOCK);
1418	beg = 0;
1419	while (beg < size) {
1420		for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
1421
1422		pre = prefix_quote(data + beg, end - beg);
1423
1424		if (pre)
1425			beg += pre; /* skipping prefix */
1426
1427		/* empty line followed by non-quote line */
1428		else if (is_empty(data + beg, end - beg) &&
1429				(end >= size || (prefix_quote(data + end, size - end) == 0 &&
1430				!is_empty(data + end, size - end))))
1431			break;
1432
1433		if (beg < end) { /* copy into the in-place working buffer */
1434			/* bufput(work, data + beg, end - beg); */
1435			if (!work_data)
1436				work_data = data + beg;
1437			else if (data + beg != work_data + work_size)
1438				memmove(work_data + work_size, data + beg, end - beg);
1439			work_size += end - beg;
1440		}
1441		beg = end;
1442	}
1443
1444	parse_block(out, rndr, work_data, work_size);
1445	if (rndr->cb.blockquote)
1446		rndr->cb.blockquote(ob, out, rndr->opaque);
1447	rndr_popbuf(rndr, BUFFER_BLOCK);
1448	return end;
1449}
1450
1451static size_t
1452parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render);
1453
1454/* parse_blockquote • handles parsing of a regular paragraph */
1455static size_t
1456parse_paragraph(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1457{
1458	size_t i = 0, end = 0;
1459	int level = 0;
1460	struct buf work = { data, 0, 0, 0 };
1461
1462	while (i < size) {
1463		for (end = i + 1; end < size && data[end - 1] != '\n'; end++) /* empty */;
1464
1465		if (prefix_quote(data + i, end - i) != 0) {
1466			end = i;
1467			break;
1468		}
1469
1470		if (is_empty(data + i, size - i))
1471			break;
1472
1473		if ((level = is_headerline(data + i, size - i)) != 0)
1474			break;
1475
1476		if (is_atxheader(rndr, data + i, size - i) ||
1477			is_hrule(data + i, size - i) ||
1478			prefix_quote(data + i, size - i)) {
1479			end = i;
1480			break;
1481		}
1482
1483		/*
1484		 * Early termination of a paragraph with the same logic
1485		 * as Markdown 1.0.0. If this logic is applied, the
1486		 * Markdown 1.0.3 test suite won't pass cleanly
1487		 *
1488		 * :: If the first character in a new line is not a letter,
1489		 * let's check to see if there's some kind of block starting
1490		 * here
1491		 */
1492		if ((rndr->ext_flags & MKDEXT_LAX_SPACING) && !isalnum(data[i])) {
1493			if (prefix_oli(data + i, size - i) ||
1494				prefix_uli(data + i, size - i)) {
1495				end = i;
1496				break;
1497			}
1498
1499			/* see if an html block starts here */
1500			if (data[i] == '<' && rndr->cb.blockhtml &&
1501				parse_htmlblock(ob, rndr, data + i, size - i, 0)) {
1502				end = i;
1503				break;
1504			}
1505
1506			/* see if a code fence starts here */
1507			if ((rndr->ext_flags & MKDEXT_FENCED_CODE) != 0 &&
1508				is_codefence(data + i, size - i, NULL) != 0) {
1509				end = i;
1510				break;
1511			}
1512		}
1513
1514		i = end;
1515	}
1516
1517	work.size = i;
1518	while (work.size && data[work.size - 1] == '\n')
1519		work.size--;
1520
1521	if (!level) {
1522		struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK);
1523		parse_inline(tmp, rndr, work.data, work.size);
1524		if (rndr->cb.paragraph)
1525			rndr->cb.paragraph(ob, tmp, rndr->opaque);
1526		rndr_popbuf(rndr, BUFFER_BLOCK);
1527	} else {
1528		struct buf *header_work;
1529
1530		if (work.size) {
1531			size_t beg;
1532			i = work.size;
1533			work.size -= 1;
1534
1535			while (work.size && data[work.size] != '\n')
1536				work.size -= 1;
1537
1538			beg = work.size + 1;
1539			while (work.size && data[work.size - 1] == '\n')
1540				work.size -= 1;
1541
1542			if (work.size > 0) {
1543				struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK);
1544				parse_inline(tmp, rndr, work.data, work.size);
1545
1546				if (rndr->cb.paragraph)
1547					rndr->cb.paragraph(ob, tmp, rndr->opaque);
1548
1549				rndr_popbuf(rndr, BUFFER_BLOCK);
1550				work.data += beg;
1551				work.size = i - beg;
1552			}
1553			else work.size = i;
1554		}
1555
1556		header_work = rndr_newbuf(rndr, BUFFER_SPAN);
1557		parse_inline(header_work, rndr, work.data, work.size);
1558
1559		if (rndr->cb.header)
1560			rndr->cb.header(ob, header_work, (int)level, rndr->opaque);
1561
1562		rndr_popbuf(rndr, BUFFER_SPAN);
1563	}
1564
1565	return end;
1566}
1567
1568/* parse_fencedcode • handles parsing of a block-level code fragment */
1569static size_t
1570parse_fencedcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1571{
1572	size_t beg, end;
1573	struct buf *work = 0;
1574	struct buf lang = { 0, 0, 0, 0 };
1575
1576	beg = is_codefence(data, size, &lang);
1577	if (beg == 0) return 0;
1578
1579	work = rndr_newbuf(rndr, BUFFER_BLOCK);
1580
1581	while (beg < size) {
1582		size_t fence_end;
1583		struct buf fence_trail = { 0, 0, 0, 0 };
1584
1585		fence_end = is_codefence(data + beg, size - beg, &fence_trail);
1586		if (fence_end != 0 && fence_trail.size == 0) {
1587			beg += fence_end;
1588			break;
1589		}
1590
1591		for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
1592
1593		if (beg < end) {
1594			/* verbatim copy to the working buffer,
1595				escaping entities */
1596			if (is_empty(data + beg, end - beg))
1597				bufputc(rndr->opaque, rndr->cb.allocate, work, '\n');
1598			else bufput(rndr->opaque, rndr->cb.allocate, work, data + beg, end - beg);
1599		}
1600		beg = end;
1601	}
1602
1603	if (work->size && work->data[work->size - 1] != '\n')
1604		bufputc(rndr->opaque, rndr->cb.allocate, work, '\n');
1605
1606	if (rndr->cb.blockcode)
1607		rndr->cb.blockcode(ob, work, lang.size ? &lang : NULL, rndr->opaque);
1608
1609	rndr_popbuf(rndr, BUFFER_BLOCK);
1610	return beg;
1611}
1612
1613static size_t
1614parse_blockcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1615{
1616	size_t beg, end, pre;
1617	struct buf *work = 0;
1618
1619	work = rndr_newbuf(rndr, BUFFER_BLOCK);
1620
1621	beg = 0;
1622	while (beg < size) {
1623		for (end = beg + 1; end < size && data[end - 1] != '\n'; end++) {};
1624		pre = prefix_code(data + beg, end - beg);
1625
1626		if (pre)
1627			beg += pre; /* skipping prefix */
1628		else if (!is_empty(data + beg, end - beg))
1629			/* non-empty non-prefixed line breaks the pre */
1630			break;
1631
1632		if (beg < end) {
1633			/* verbatim copy to the working buffer,
1634				escaping entities */
1635			if (is_empty(data + beg, end - beg))
1636				bufputc(rndr->opaque, rndr->cb.allocate, work, '\n');
1637			else bufput(rndr->opaque, rndr->cb.allocate, work, data + beg, end - beg);
1638		}
1639		beg = end;
1640	}
1641
1642	while (work->size && work->data[work->size - 1] == '\n')
1643		work->size -= 1;
1644
1645	bufputc(rndr->opaque, rndr->cb.allocate, work, '\n');
1646
1647	if (rndr->cb.blockcode)
1648		rndr->cb.blockcode(ob, work, NULL, rndr->opaque);
1649
1650	rndr_popbuf(rndr, BUFFER_BLOCK);
1651	return beg;
1652}
1653
1654/* parse_listitem • parsing of a single list item */
1655/*	assuming initial prefix is already removed */
1656static size_t
1657parse_listitem(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int *flags)
1658{
1659	struct buf *work = 0, *inter = 0;
1660	size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i;
1661	int in_empty = 0, has_inside_empty = 0, in_fence = 0;
1662
1663	/* keeping track of the first indentation prefix */
1664	while (orgpre < 3 && orgpre < size && data[orgpre] == ' ')
1665		orgpre++;
1666
1667	beg = prefix_uli(data, size);
1668	if (!beg)
1669		beg = prefix_oli(data, size);
1670
1671	if (!beg)
1672		return 0;
1673
1674	/* skipping to the beginning of the following line */
1675	end = beg;
1676	while (end < size && data[end - 1] != '\n')
1677		end++;
1678
1679	/* getting working buffers */
1680	work = rndr_newbuf(rndr, BUFFER_SPAN);
1681	inter = rndr_newbuf(rndr, BUFFER_SPAN);
1682
1683	/* putting the first line into the working buffer */
1684	bufput(rndr->opaque, rndr->cb.allocate, work, data + beg, end - beg);
1685	beg = end;
1686
1687	/* process the following lines */
1688	while (beg < size) {
1689		size_t has_next_uli = 0, has_next_oli = 0;
1690
1691		end++;
1692
1693		while (end < size && data[end - 1] != '\n')
1694			end++;
1695
1696		/* process an empty line */
1697		if (is_empty(data + beg, end - beg)) {
1698			in_empty = 1;
1699			beg = end;
1700			continue;
1701		}
1702
1703		/* calculating the indentation */
1704		i = 0;
1705		while (i < 4 && beg + i < end && data[beg + i] == ' ')
1706			i++;
1707
1708		pre = i;
1709
1710		if (rndr->ext_flags & MKDEXT_FENCED_CODE) {
1711			if (is_codefence(data + beg + i, end - beg - i, NULL) != 0)
1712				in_fence = !in_fence;
1713		}
1714
1715		/* Only check for new list items if we are **not** inside
1716		 * a fenced code block */
1717		if (!in_fence) {
1718			has_next_uli = prefix_uli(data + beg + i, end - beg - i);
1719			has_next_oli = prefix_oli(data + beg + i, end - beg - i);
1720		}
1721
1722		/* checking for ul/ol switch */
1723		if (in_empty && (
1724			((*flags & MKD_LIST_ORDERED) && has_next_uli) ||
1725			(!(*flags & MKD_LIST_ORDERED) && has_next_oli))){
1726			*flags |= MKD_LI_END;
1727			break; /* the following item must have same list type */
1728		}
1729
1730		/* checking for a new item */
1731		if ((has_next_uli && !is_hrule(data + beg + i, end - beg - i)) || has_next_oli) {
1732			if (in_empty)
1733				has_inside_empty = 1;
1734
1735			if (pre == orgpre) /* the following item must have */
1736				break;             /* the same indentation */
1737
1738			if (!sublist)
1739				sublist = work->size;
1740		}
1741		/* joining only indented stuff after empty lines;
1742		 * note that now we only require 1 space of indentation
1743		 * to continue a list */
1744		else if (in_empty && pre == 0) {
1745			*flags |= MKD_LI_END;
1746			break;
1747		}
1748		else if (in_empty) {
1749			bufputc(rndr->opaque, rndr->cb.allocate, work, '\n');
1750			has_inside_empty = 1;
1751		}
1752
1753		in_empty = 0;
1754
1755		/* adding the line without prefix into the working buffer */
1756		bufput(rndr->opaque, rndr->cb.allocate, work, data + beg + i, end - beg - i);
1757		beg = end;
1758	}
1759
1760	/* render of li contents */
1761	if (has_inside_empty)
1762		*flags |= MKD_LI_BLOCK;
1763
1764	if (*flags & MKD_LI_BLOCK) {
1765		/* intermediate render of block li */
1766		if (sublist && sublist < work->size) {
1767			parse_block(inter, rndr, work->data, sublist);
1768			parse_block(inter, rndr, work->data + sublist, work->size - sublist);
1769		}
1770		else
1771			parse_block(inter, rndr, work->data, work->size);
1772	} else {
1773		/* intermediate render of inline li */
1774		if (sublist && sublist < work->size) {
1775			parse_inline(inter, rndr, work->data, sublist);
1776			parse_block(inter, rndr, work->data + sublist, work->size - sublist);
1777		}
1778		else
1779			parse_inline(inter, rndr, work->data, work->size);
1780	}
1781
1782	/* render of li itself */
1783	if (rndr->cb.listitem)
1784		rndr->cb.listitem(ob, inter, *flags, rndr->opaque);
1785
1786	rndr_popbuf(rndr, BUFFER_SPAN);
1787	rndr_popbuf(rndr, BUFFER_SPAN);
1788	return beg;
1789}
1790
1791
1792/* parse_list • parsing ordered or unordered list block */
1793static size_t
1794parse_list(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int flags)
1795{
1796	struct buf *work = 0;
1797	size_t i = 0, j;
1798
1799	work = rndr_newbuf(rndr, BUFFER_BLOCK);
1800
1801	while (i < size) {
1802		j = parse_listitem(work, rndr, data + i, size - i, &flags);
1803		i += j;
1804
1805		if (!j || (flags & MKD_LI_END))
1806			break;
1807	}
1808
1809	if (rndr->cb.list)
1810		rndr->cb.list(ob, work, flags, rndr->opaque);
1811	rndr_popbuf(rndr, BUFFER_BLOCK);
1812	return i;
1813}
1814
1815/* parse_atxheader • parsing of atx-style headers */
1816static size_t
1817parse_atxheader(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1818{
1819	size_t level = 0;
1820	size_t i, end, skip;
1821
1822	while (level < size && level < 6 && data[level] == '#')
1823		level++;
1824
1825	for (i = level; i < size && data[i] == ' '; i++);
1826
1827	for (end = i; end < size && data[end] != '\n'; end++);
1828	skip = end;
1829
1830	while (end && data[end - 1] == '#')
1831		end--;
1832
1833	while (end && data[end - 1] == ' ')
1834		end--;
1835
1836	if (end > i) {
1837		struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN);
1838
1839		parse_inline(work, rndr, data + i, end - i);
1840
1841		if (rndr->cb.header)
1842			rndr->cb.header(ob, work, (int)level, rndr->opaque);
1843
1844		rndr_popbuf(rndr, BUFFER_SPAN);
1845	}
1846
1847	return skip;
1848}
1849
1850
1851/* htmlblock_end • checking end of HTML block : </tag>[ \t]*\n[ \t*]\n */
1852/*	returns the length on match, 0 otherwise */
1853static size_t
1854htmlblock_end_tag(
1855	const char *tag,
1856	size_t tag_len,
1857	struct sd_markdown *rndr,
1858	uint8_t *data,
1859	size_t size)
1860{
1861	size_t i, w;
1862
1863	/* checking if tag is a match */
1864	if (tag_len + 3 >= size ||
1865		strncasecmp((char *)data + 2, tag, tag_len) != 0 ||
1866		data[tag_len + 2] != '>')
1867		return 0;
1868
1869	/* checking white lines */
1870	i = tag_len + 3;
1871	w = 0;
1872	if (i < size && (w = is_empty(data + i, size - i)) == 0)
1873		return 0; /* non-blank after tag */
1874	i += w;
1875	w = 0;
1876
1877	if (i < size)
1878		w = is_empty(data + i, size - i);
1879
1880	return i + w;
1881}
1882
1883static size_t
1884htmlblock_end(const char *curtag,
1885	struct sd_markdown *rndr,
1886	uint8_t *data,
1887	size_t size,
1888	int start_of_line)
1889{
1890	size_t tag_size = strlen(curtag);
1891	size_t i = 1, end_tag;
1892	int block_lines = 0;
1893
1894	while (i < size) {
1895		i++;
1896		while (i < size && !(data[i - 1] == '<' && data[i] == '/')) {
1897			if (data[i] == '\n')
1898				block_lines++;
1899
1900			i++;
1901		}
1902
1903		/* If we are only looking for unindented tags, skip the tag
1904		 * if it doesn't follow a newline.
1905		 *
1906		 * The only exception to this is if the tag is still on the
1907		 * initial line; in that case it still counts as a closing
1908		 * tag
1909		 */
1910		if (start_of_line && block_lines > 0 && data[i - 2] != '\n')
1911			continue;
1912
1913		if (i + 2 + tag_size >= size)
1914			break;
1915
1916		end_tag = htmlblock_end_tag(curtag, tag_size, rndr, data + i - 1, size - i + 1);
1917		if (end_tag)
1918			return i + end_tag - 1;
1919	}
1920
1921	return 0;
1922}
1923
1924
1925/* parse_htmlblock • parsing of inline HTML block */
1926static size_t
1927parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render)
1928{
1929	size_t i, j = 0, tag_end;
1930	const char *curtag = NULL;
1931	struct buf work = { data, 0, 0, 0 };
1932
1933	/* identification of the opening tag */
1934	if (size < 2 || data[0] != '<')
1935		return 0;
1936
1937	i = 1;
1938	while (i < size && data[i] != '>' && data[i] != ' ')
1939		i++;
1940
1941	if (i < size)
1942		curtag = find_block_tag((char *)data + 1, (int)i - 1);
1943
1944	/* handling of special cases */
1945	if (!curtag) {
1946
1947		/* HTML comment, laxist form */
1948		if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') {
1949			i = 5;
1950
1951			while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>'))
1952				i++;
1953
1954			i++;
1955
1956			if (i < size)
1957				j = is_empty(data + i, size - i);
1958
1959			if (j) {
1960				work.size = i + j;
1961				if (do_render && rndr->cb.blockhtml)
1962					rndr->cb.blockhtml(ob, &work, rndr->opaque);
1963				return work.size;
1964			}
1965		}
1966
1967		/* HR, which is the only self-closing block tag considered */
1968		if (size > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R')) {
1969			i = 3;
1970			while (i < size && data[i] != '>')
1971				i++;
1972
1973			if (i + 1 < size) {
1974				i++;
1975				j = is_empty(data + i, size - i);
1976				if (j) {
1977					work.size = i + j;
1978					if (do_render && rndr->cb.blockhtml)
1979						rndr->cb.blockhtml(ob, &work, rndr->opaque);
1980					return work.size;
1981				}
1982			}
1983		}
1984
1985		/* no special case recognised */
1986		return 0;
1987	}
1988
1989	/* looking for an unindented matching closing tag */
1990	/*	followed by a blank line */
1991	tag_end = htmlblock_end(curtag, rndr, data, size, 1);
1992
1993	/* if not found, trying a second pass looking for indented match */
1994	/* but not if tag is "ins" or "del" (following original Markdown.pl) */
1995	if (!tag_end && strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0) {
1996		tag_end = htmlblock_end(curtag, rndr, data, size, 0);
1997	}
1998
1999	if (!tag_end)
2000		return 0;
2001
2002	/* the end of the block has been found */
2003	work.size = tag_end;
2004	if (do_render && rndr->cb.blockhtml)
2005		rndr->cb.blockhtml(ob, &work, rndr->opaque);
2006
2007	return tag_end;
2008}
2009
2010static void
2011parse_table_row(
2012	struct buf *ob,
2013	struct sd_markdown *rndr,
2014	uint8_t *data,
2015	size_t size,
2016	size_t columns,
2017	int *col_data,
2018	int header_flag)
2019{
2020	size_t i = 0, col;
2021	struct buf *row_work = 0;
2022
2023	if (!rndr->cb.table_cell || !rndr->cb.table_row)
2024		return;
2025
2026	row_work = rndr_newbuf(rndr, BUFFER_SPAN);
2027
2028	if (i < size && data[i] == '|')
2029		i++;
2030
2031	for (col = 0; col < columns && i < size; ++col) {
2032		size_t cell_start, cell_end;
2033		struct buf *cell_work;
2034
2035		cell_work = rndr_newbuf(rndr, BUFFER_SPAN);
2036
2037		while (i < size && _isspace(data[i]))
2038			i++;
2039
2040		cell_start = i;
2041
2042		while (i < size && data[i] != '|')
2043			i++;
2044
2045		cell_end = i - 1;
2046
2047		while (cell_end > cell_start && _isspace(data[cell_end]))
2048			cell_end--;
2049
2050		parse_inline(cell_work, rndr, data + cell_start, 1 + cell_end - cell_start);
2051		rndr->cb.table_cell(row_work, cell_work, col_data[col] | header_flag, rndr->opaque);
2052
2053		rndr_popbuf(rndr, BUFFER_SPAN);
2054		i++;
2055	}
2056
2057	for (; col < columns; ++col) {
2058		struct buf empty_cell = { 0, 0, 0, 0 };
2059		rndr->cb.table_cell(row_work, &empty_cell, col_data[col] | header_flag, rndr->opaque);
2060	}
2061
2062	rndr->cb.table_row(ob, row_work, rndr->opaque);
2063
2064	rndr_popbuf(rndr, BUFFER_SPAN);
2065}
2066
2067static size_t
2068parse_table_header(
2069	struct buf *ob,
2070	struct sd_markdown *rndr,
2071	uint8_t *data,
2072	size_t size,
2073	size_t *columns,
2074	int **column_data)
2075{
2076	int pipes;
2077	size_t i = 0, col, header_end, under_end;
2078
2079	pipes = 0;
2080	while (i < size && data[i] != '\n')
2081		if (data[i++] == '|')
2082			pipes++;
2083
2084	if (i == size || pipes == 0)
2085		return 0;
2086
2087	header_end = i;
2088
2089	while (header_end > 0 && _isspace(data[header_end - 1]))
2090		header_end--;
2091
2092	if (data[0] == '|')
2093		pipes--;
2094
2095	if (header_end && data[header_end - 

Large files files are truncated, but you can click here to view the full file