PageRenderTime 357ms CodeModel.GetById 117ms app.highlight 189ms RepoModel.GetById 20ms app.codeStats 0ms

/samples/C/markdown.c

https://github.com/shayn/linguist
C | 2551 lines | 2326 code | 146 blank | 79 comment | 202 complexity | ebdce0a604a147ebe7feb346ed036632 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1/* markdown.c - generic markdown parser */
   2
   3/*
   4 * Copyright (c) 2009, Natacha Porté
   5 * Copyright (c) 2011, Vicent Marti
   6 *
   7 * Permission to use, copy, modify, and distribute this software for any
   8 * purpose with or without fee is hereby granted, provided that the above
   9 * copyright notice and this permission notice appear in all copies.
  10 *
  11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  18 */
  19
  20#include "markdown.h"
  21#include "stack.h"
  22
  23#include <assert.h>
  24#include <string.h>
  25#include <ctype.h>
  26#include <stdio.h>
  27
  28#if defined(_WIN32)
  29#define strncasecmp	_strnicmp
  30#endif
  31
  32#define REF_TABLE_SIZE 8
  33
  34#define BUFFER_BLOCK 0
  35#define BUFFER_SPAN 1
  36
  37#define MKD_LI_END 8	/* internal list flag */
  38
  39#define gperf_case_strncmp(s1, s2, n) strncasecmp(s1, s2, n)
  40#define GPERF_DOWNCASE 1
  41#define GPERF_CASE_STRNCMP 1
  42#include "html_blocks.h"
  43
  44/***************
  45 * LOCAL TYPES *
  46 ***************/
  47
  48/* link_ref: reference to a link */
  49struct link_ref {
  50	unsigned int id;
  51
  52	struct buf *link;
  53	struct buf *title;
  54
  55	struct link_ref *next;
  56};
  57
  58/* char_trigger: function pointer to render active chars */
  59/*   returns the number of chars taken care of */
  60/*   data is the pointer of the beginning of the span */
  61/*   offset is the number of valid chars before data */
  62struct sd_markdown;
  63typedef size_t
  64(*char_trigger)(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  65
  66static size_t char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  67static size_t char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  68static size_t char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  69static size_t char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  70static size_t char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  71static size_t char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  72static size_t char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  73static size_t char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  74static size_t char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  75static size_t char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  76static size_t char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  77
  78enum markdown_char_t {
  79	MD_CHAR_NONE = 0,
  80	MD_CHAR_EMPHASIS,
  81	MD_CHAR_CODESPAN,
  82	MD_CHAR_LINEBREAK,
  83	MD_CHAR_LINK,
  84	MD_CHAR_LANGLE,
  85	MD_CHAR_ESCAPE,
  86	MD_CHAR_ENTITITY,
  87	MD_CHAR_AUTOLINK_URL,
  88	MD_CHAR_AUTOLINK_EMAIL,
  89	MD_CHAR_AUTOLINK_WWW,
  90	MD_CHAR_SUPERSCRIPT,
  91};
  92
  93static char_trigger markdown_char_ptrs[] = {
  94	NULL,
  95	&char_emphasis,
  96	&char_codespan,
  97	&char_linebreak,
  98	&char_link,
  99	&char_langle_tag,
 100	&char_escape,
 101	&char_entity,
 102	&char_autolink_url,
 103	&char_autolink_email,
 104	&char_autolink_www,
 105	&char_superscript,
 106};
 107
 108/* render • structure containing one particular render */
 109struct sd_markdown {
 110	struct sd_callbacks	cb;
 111	void *opaque;
 112
 113	struct link_ref *refs[REF_TABLE_SIZE];
 114	uint8_t active_char[256];
 115	struct stack work_bufs[2];
 116	unsigned int ext_flags;
 117	size_t max_nesting;
 118	int in_link_body;
 119};
 120
 121/***************************
 122 * HELPER FUNCTIONS *
 123 ***************************/
 124
 125static inline struct buf *
 126rndr_newbuf(struct sd_markdown *rndr, int type)
 127{
 128	static const size_t buf_size[2] = {256, 64};
 129	struct buf *work = NULL;
 130	struct stack *pool = &rndr->work_bufs[type];
 131
 132	if (pool->size < pool->asize &&
 133		pool->item[pool->size] != NULL) {
 134		work = pool->item[pool->size++];
 135		work->size = 0;
 136	} else {
 137		work = bufnew(buf_size[type]);
 138		stack_push(pool, work);
 139	}
 140
 141	return work;
 142}
 143
 144static inline void
 145rndr_popbuf(struct sd_markdown *rndr, int type)
 146{
 147	rndr->work_bufs[type].size--;
 148}
 149
 150static void
 151unscape_text(struct buf *ob, struct buf *src)
 152{
 153	size_t i = 0, org;
 154	while (i < src->size) {
 155		org = i;
 156		while (i < src->size && src->data[i] != '\\')
 157			i++;
 158
 159		if (i > org)
 160			bufput(ob, src->data + org, i - org);
 161
 162		if (i + 1 >= src->size)
 163			break;
 164
 165		bufputc(ob, src->data[i + 1]);
 166		i += 2;
 167	}
 168}
 169
 170static unsigned int
 171hash_link_ref(const uint8_t *link_ref, size_t length)
 172{
 173	size_t i;
 174	unsigned int hash = 0;
 175
 176	for (i = 0; i < length; ++i)
 177		hash = tolower(link_ref[i]) + (hash << 6) + (hash << 16) - hash;
 178
 179	return hash;
 180}
 181
 182static struct link_ref *
 183add_link_ref(
 184	struct link_ref **references,
 185	const uint8_t *name, size_t name_size)
 186{
 187	struct link_ref *ref = calloc(1, sizeof(struct link_ref));
 188
 189	if (!ref)
 190		return NULL;
 191
 192	ref->id = hash_link_ref(name, name_size);
 193	ref->next = references[ref->id % REF_TABLE_SIZE];
 194
 195	references[ref->id % REF_TABLE_SIZE] = ref;
 196	return ref;
 197}
 198
 199static struct link_ref *
 200find_link_ref(struct link_ref **references, uint8_t *name, size_t length)
 201{
 202	unsigned int hash = hash_link_ref(name, length);
 203	struct link_ref *ref = NULL;
 204
 205	ref = references[hash % REF_TABLE_SIZE];
 206
 207	while (ref != NULL) {
 208		if (ref->id == hash)
 209			return ref;
 210
 211		ref = ref->next;
 212	}
 213
 214	return NULL;
 215}
 216
 217static void
 218free_link_refs(struct link_ref **references)
 219{
 220	size_t i;
 221
 222	for (i = 0; i < REF_TABLE_SIZE; ++i) {
 223		struct link_ref *r = references[i];
 224		struct link_ref *next;
 225
 226		while (r) {
 227			next = r->next;
 228			bufrelease(r->link);
 229			bufrelease(r->title);
 230			free(r);
 231			r = next;
 232		}
 233	}
 234}
 235
 236/*
 237 * Check whether a char is a Markdown space.
 238
 239 * Right now we only consider spaces the actual
 240 * space and a newline: tabs and carriage returns
 241 * are filtered out during the preprocessing phase.
 242 *
 243 * If we wanted to actually be UTF-8 compliant, we
 244 * should instead extract an Unicode codepoint from
 245 * this character and check for space properties.
 246 */
 247static inline int
 248_isspace(int c)
 249{
 250	return c == ' ' || c == '\n';
 251}
 252
 253/****************************
 254 * INLINE PARSING FUNCTIONS *
 255 ****************************/
 256
 257/* is_mail_autolink • looks for the address part of a mail autolink and '>' */
 258/* this is less strict than the original markdown e-mail address matching */
 259static size_t
 260is_mail_autolink(uint8_t *data, size_t size)
 261{
 262	size_t i = 0, nb = 0;
 263
 264	/* address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' */
 265	for (i = 0; i < size; ++i) {
 266		if (isalnum(data[i]))
 267			continue;
 268
 269		switch (data[i]) {
 270			case '@':
 271				nb++;
 272
 273			case '-':
 274			case '.':
 275			case '_':
 276				break;
 277
 278			case '>':
 279				return (nb == 1) ? i + 1 : 0;
 280
 281			default:
 282				return 0;
 283		}
 284	}
 285
 286	return 0;
 287}
 288
 289/* tag_length • returns the length of the given tag, or 0 is it's not valid */
 290static size_t
 291tag_length(uint8_t *data, size_t size, enum mkd_autolink *autolink)
 292{
 293	size_t i, j;
 294
 295	/* a valid tag can't be shorter than 3 chars */
 296	if (size < 3) return 0;
 297
 298	/* begins with a '<' optionally followed by '/', followed by letter or number */
 299	if (data[0] != '<') return 0;
 300	i = (data[1] == '/') ? 2 : 1;
 301
 302	if (!isalnum(data[i]))
 303		return 0;
 304
 305	/* scheme test */
 306	*autolink = MKDA_NOT_AUTOLINK;
 307
 308	/* try to find the beginning of an URI */
 309	while (i < size && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-'))
 310		i++;
 311
 312	if (i > 1 && data[i] == '@') {
 313		if ((j = is_mail_autolink(data + i, size - i)) != 0) {
 314			*autolink = MKDA_EMAIL;
 315			return i + j;
 316		}
 317	}
 318
 319	if (i > 2 && data[i] == ':') {
 320		*autolink = MKDA_NORMAL;
 321		i++;
 322	}
 323
 324	/* completing autolink test: no whitespace or ' or " */
 325	if (i >= size)
 326		*autolink = MKDA_NOT_AUTOLINK;
 327
 328	else if (*autolink) {
 329		j = i;
 330
 331		while (i < size) {
 332			if (data[i] == '\\') i += 2;
 333			else if (data[i] == '>' || data[i] == '\'' ||
 334					data[i] == '"' || data[i] == ' ' || data[i] == '\n')
 335					break;
 336			else i++;
 337		}
 338
 339		if (i >= size) return 0;
 340		if (i > j && data[i] == '>') return i + 1;
 341		/* one of the forbidden chars has been found */
 342		*autolink = MKDA_NOT_AUTOLINK;
 343	}
 344
 345	/* looking for sometinhg looking like a tag end */
 346	while (i < size && data[i] != '>') i++;
 347	if (i >= size) return 0;
 348	return i + 1;
 349}
 350
 351/* parse_inline • parses inline markdown elements */
 352static void
 353parse_inline(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
 354{
 355	size_t i = 0, end = 0;
 356	uint8_t action = 0;
 357	struct buf work = { 0, 0, 0, 0 };
 358
 359	if (rndr->work_bufs[BUFFER_SPAN].size +
 360		rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting)
 361		return;
 362
 363	while (i < size) {
 364		/* copying inactive chars into the output */
 365		while (end < size && (action = rndr->active_char[data[end]]) == 0) {
 366			end++;
 367		}
 368
 369		if (rndr->cb.normal_text) {
 370			work.data = data + i;
 371			work.size = end - i;
 372			rndr->cb.normal_text(ob, &work, rndr->opaque);
 373		}
 374		else
 375			bufput(ob, data + i, end - i);
 376
 377		if (end >= size) break;
 378		i = end;
 379
 380		end = markdown_char_ptrs[(int)action](ob, rndr, data + i, i, size - i);
 381		if (!end) /* no action from the callback */
 382			end = i + 1;
 383		else {
 384			i += end;
 385			end = i;
 386		}
 387	}
 388}
 389
 390/* find_emph_char • looks for the next emph uint8_t, skipping other constructs */
 391static size_t
 392find_emph_char(uint8_t *data, size_t size, uint8_t c)
 393{
 394	size_t i = 1;
 395
 396	while (i < size) {
 397		while (i < size && data[i] != c && data[i] != '`' && data[i] != '[')
 398			i++;
 399
 400		if (i == size)
 401			return 0;
 402
 403		if (data[i] == c)
 404			return i;
 405
 406		/* not counting escaped chars */
 407		if (i && data[i - 1] == '\\') {
 408			i++; continue;
 409		}
 410
 411		if (data[i] == '`') {
 412			size_t span_nb = 0, bt;
 413			size_t tmp_i = 0;
 414
 415			/* counting the number of opening backticks */
 416			while (i < size && data[i] == '`') {
 417				i++; span_nb++;
 418			}
 419
 420			if (i >= size) return 0;
 421
 422			/* finding the matching closing sequence */
 423			bt = 0;
 424			while (i < size && bt < span_nb) {
 425				if (!tmp_i && data[i] == c) tmp_i = i;
 426				if (data[i] == '`') bt++;
 427				else bt = 0;
 428				i++;
 429			}
 430
 431			if (i >= size) return tmp_i;
 432		}
 433		/* skipping a link */
 434		else if (data[i] == '[') {
 435			size_t tmp_i = 0;
 436			uint8_t cc;
 437
 438			i++;
 439			while (i < size && data[i] != ']') {
 440				if (!tmp_i && data[i] == c) tmp_i = i;
 441				i++;
 442			}
 443
 444			i++;
 445			while (i < size && (data[i] == ' ' || data[i] == '\n'))
 446				i++;
 447
 448			if (i >= size)
 449				return tmp_i;
 450
 451			switch (data[i]) {
 452			case '[':
 453				cc = ']'; break;
 454
 455			case '(':
 456				cc = ')'; break;
 457
 458			default:
 459				if (tmp_i)
 460					return tmp_i;
 461				else
 462					continue;
 463			}
 464
 465			i++;
 466			while (i < size && data[i] != cc) {
 467				if (!tmp_i && data[i] == c) tmp_i = i;
 468				i++;
 469			}
 470
 471			if (i >= size)
 472				return tmp_i;
 473
 474			i++;
 475		}
 476	}
 477
 478	return 0;
 479}
 480
 481/* parse_emph1 • parsing single emphase */
 482/* closed by a symbol not preceded by whitespace and not followed by symbol */
 483static size_t
 484parse_emph1(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
 485{
 486	size_t i = 0, len;
 487	struct buf *work = 0;
 488	int r;
 489
 490	if (!rndr->cb.emphasis) return 0;
 491
 492	/* skipping one symbol if coming from emph3 */
 493	if (size > 1 && data[0] == c && data[1] == c) i = 1;
 494
 495	while (i < size) {
 496		len = find_emph_char(data + i, size - i, c);
 497		if (!len) return 0;
 498		i += len;
 499		if (i >= size) return 0;
 500
 501		if (data[i] == c && !_isspace(data[i - 1])) {
 502
 503			if (rndr->ext_flags & MKDEXT_NO_INTRA_EMPHASIS) {
 504				if (!(i + 1 == size || _isspace(data[i + 1]) || ispunct(data[i + 1])))
 505					continue;
 506			}
 507
 508			work = rndr_newbuf(rndr, BUFFER_SPAN);
 509			parse_inline(work, rndr, data, i);
 510			r = rndr->cb.emphasis(ob, work, rndr->opaque);
 511			rndr_popbuf(rndr, BUFFER_SPAN);
 512			return r ? i + 1 : 0;
 513		}
 514	}
 515
 516	return 0;
 517}
 518
 519/* parse_emph2 • parsing single emphase */
 520static size_t
 521parse_emph2(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
 522{
 523	int (*render_method)(struct buf *ob, const struct buf *text, void *opaque);
 524	size_t i = 0, len;
 525	struct buf *work = 0;
 526	int r;
 527
 528	render_method = (c == '~') ? rndr->cb.strikethrough : rndr->cb.double_emphasis;
 529
 530	if (!render_method)
 531		return 0;
 532
 533	while (i < size) {
 534		len = find_emph_char(data + i, size - i, c);
 535		if (!len) return 0;
 536		i += len;
 537
 538		if (i + 1 < size && data[i] == c && data[i + 1] == c && i && !_isspace(data[i - 1])) {
 539			work = rndr_newbuf(rndr, BUFFER_SPAN);
 540			parse_inline(work, rndr, data, i);
 541			r = render_method(ob, work, rndr->opaque);
 542			rndr_popbuf(rndr, BUFFER_SPAN);
 543			return r ? i + 2 : 0;
 544		}
 545		i++;
 546	}
 547	return 0;
 548}
 549
 550/* parse_emph3 • parsing single emphase */
 551/* finds the first closing tag, and delegates to the other emph */
 552static size_t
 553parse_emph3(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
 554{
 555	size_t i = 0, len;
 556	int r;
 557
 558	while (i < size) {
 559		len = find_emph_char(data + i, size - i, c);
 560		if (!len) return 0;
 561		i += len;
 562
 563		/* skip whitespace preceded symbols */
 564		if (data[i] != c || _isspace(data[i - 1]))
 565			continue;
 566
 567		if (i + 2 < size && data[i + 1] == c && data[i + 2] == c && rndr->cb.triple_emphasis) {
 568			/* triple symbol found */
 569			struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN);
 570
 571			parse_inline(work, rndr, data, i);
 572			r = rndr->cb.triple_emphasis(ob, work, rndr->opaque);
 573			rndr_popbuf(rndr, BUFFER_SPAN);
 574			return r ? i + 3 : 0;
 575
 576		} else if (i + 1 < size && data[i + 1] == c) {
 577			/* double symbol found, handing over to emph1 */
 578			len = parse_emph1(ob, rndr, data - 2, size + 2, c);
 579			if (!len) return 0;
 580			else return len - 2;
 581
 582		} else {
 583			/* single symbol found, handing over to emph2 */
 584			len = parse_emph2(ob, rndr, data - 1, size + 1, c);
 585			if (!len) return 0;
 586			else return len - 1;
 587		}
 588	}
 589	return 0;
 590}
 591
 592/* char_emphasis • single and double emphasis parsing */
 593static size_t
 594char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
 595{
 596	uint8_t c = data[0];
 597	size_t ret;
 598
 599	if (size > 2 && data[1] != c) {
 600		/* whitespace cannot follow an opening emphasis;
 601		 * strikethrough only takes two characters '~~' */
 602		if (c == '~' || _isspace(data[1]) || (ret = parse_emph1(ob, rndr, data + 1, size - 1, c)) == 0)
 603			return 0;
 604
 605		return ret + 1;
 606	}
 607
 608	if (size > 3 && data[1] == c && data[2] != c) {
 609		if (_isspace(data[2]) || (ret = parse_emph2(ob, rndr, data + 2, size - 2, c)) == 0)
 610			return 0;
 611
 612		return ret + 2;
 613	}
 614
 615	if (size > 4 && data[1] == c && data[2] == c && data[3] != c) {
 616		if (c == '~' || _isspace(data[3]) || (ret = parse_emph3(ob, rndr, data + 3, size - 3, c)) == 0)
 617			return 0;
 618
 619		return ret + 3;
 620	}
 621
 622	return 0;
 623}
 624
 625
 626/* char_linebreak • '\n' preceded by two spaces (assuming linebreak != 0) */
 627static size_t
 628char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
 629{
 630	if (offset < 2 || data[-1] != ' ' || data[-2] != ' ')
 631		return 0;
 632
 633	/* removing the last space from ob and rendering */
 634	while (ob->size && ob->data[ob->size - 1] == ' ')
 635		ob->size--;
 636
 637	return rndr->cb.linebreak(ob, rndr->opaque) ? 1 : 0;
 638}
 639
 640
 641/* char_codespan • '`' parsing a code span (assuming codespan != 0) */
 642static size_t
 643char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
 644{
 645	size_t end, nb = 0, i, f_begin, f_end;
 646
 647	/* counting the number of backticks in the delimiter */
 648	while (nb < size && data[nb] == '`')
 649		nb++;
 650
 651	/* finding the next delimiter */
 652	i = 0;
 653	for (end = nb; end < size && i < nb; end++) {
 654		if (data[end] == '`') i++;
 655		else i = 0;
 656	}
 657
 658	if (i < nb && end >= size)
 659		return 0; /* no matching delimiter */
 660
 661	/* trimming outside whitespaces */
 662	f_begin = nb;
 663	while (f_begin < end && data[f_begin] == ' ')
 664		f_begin++;
 665
 666	f_end = end - nb;
 667	while (f_end > nb && data[f_end-1] == ' ')
 668		f_end--;
 669
 670	/* real code span */
 671	if (f_begin < f_end) {
 672		struct buf work = { data + f_begin, f_end - f_begin, 0, 0 };
 673		if (!rndr->cb.codespan(ob, &work, rndr->opaque))
 674			end = 0;
 675	} else {
 676		if (!rndr->cb.codespan(ob, 0, rndr->opaque))
 677			end = 0;
 678	}
 679
 680	return end;
 681}
 682
 683
 684/* char_escape • '\\' backslash escape */
 685static size_t
 686char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
 687{
 688	static const char *escape_chars = "\\`*_{}[]()#+-.!:|&<>^~";
 689	struct buf work = { 0, 0, 0, 0 };
 690
 691	if (size > 1) {
 692		if (strchr(escape_chars, data[1]) == NULL)
 693			return 0;
 694
 695		if (rndr->cb.normal_text) {
 696			work.data = data + 1;
 697			work.size = 1;
 698			rndr->cb.normal_text(ob, &work, rndr->opaque);
 699		}
 700		else bufputc(ob, data[1]);
 701	} else if (size == 1) {
 702		bufputc(ob, data[0]);
 703	}
 704
 705	return 2;
 706}
 707
 708/* char_entity • '&' escaped when it doesn't belong to an entity */
 709/* valid entities are assumed to be anything matching &#?[A-Za-z0-9]+; */
 710static size_t
 711char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
 712{
 713	size_t end = 1;
 714	struct buf work = { 0, 0, 0, 0 };
 715
 716	if (end < size && data[end] == '#')
 717		end++;
 718
 719	while (end < size && isalnum(data[end]))
 720		end++;
 721
 722	if (end < size && data[end] == ';')
 723		end++; /* real entity */
 724	else
 725		return 0; /* lone '&' */
 726
 727	if (rndr->cb.entity) {
 728		work.data = data;
 729		work.size = end;
 730		rndr->cb.entity(ob, &work, rndr->opaque);
 731	}
 732	else bufput(ob, data, end);
 733
 734	return end;
 735}
 736
 737/* char_langle_tag • '<' when tags or autolinks are allowed */
 738static size_t
 739char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
 740{
 741	enum mkd_autolink altype = MKDA_NOT_AUTOLINK;
 742	size_t end = tag_length(data, size, &altype);
 743	struct buf work = { data, end, 0, 0 };
 744	int ret = 0;
 745
 746	if (end > 2) {
 747		if (rndr->cb.autolink && altype != MKDA_NOT_AUTOLINK) {
 748			struct buf *u_link = rndr_newbuf(rndr, BUFFER_SPAN);
 749			work.data = data + 1;
 750			work.size = end - 2;
 751			unscape_text(u_link, &work);
 752			ret = rndr->cb.autolink(ob, u_link, altype, rndr->opaque);
 753			rndr_popbuf(rndr, BUFFER_SPAN);
 754		}
 755		else if (rndr->cb.raw_html_tag)
 756			ret = rndr->cb.raw_html_tag(ob, &work, rndr->opaque);
 757	}
 758
 759	if (!ret) return 0;
 760	else return end;
 761}
 762
 763static size_t
 764char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
 765{
 766	struct buf *link, *link_url, *link_text;
 767	size_t link_len, rewind;
 768
 769	if (!rndr->cb.link || rndr->in_link_body)
 770		return 0;
 771
 772	link = rndr_newbuf(rndr, BUFFER_SPAN);
 773
 774	if ((link_len = sd_autolink__www(&rewind, link, data, offset, size)) > 0) {
 775		link_url = rndr_newbuf(rndr, BUFFER_SPAN);
 776		BUFPUTSL(link_url, "http://");
 777		bufput(link_url, link->data, link->size);
 778
 779		ob->size -= rewind;
 780		if (rndr->cb.normal_text) {
 781			link_text = rndr_newbuf(rndr, BUFFER_SPAN);
 782			rndr->cb.normal_text(link_text, link, rndr->opaque);
 783			rndr->cb.link(ob, link_url, NULL, link_text, rndr->opaque);
 784			rndr_popbuf(rndr, BUFFER_SPAN);
 785		} else {
 786			rndr->cb.link(ob, link_url, NULL, link, rndr->opaque);
 787		}
 788		rndr_popbuf(rndr, BUFFER_SPAN);
 789	}
 790
 791	rndr_popbuf(rndr, BUFFER_SPAN);
 792	return link_len;
 793}
 794
 795static size_t
 796char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
 797{
 798	struct buf *link;
 799	size_t link_len, rewind;
 800
 801	if (!rndr->cb.autolink || rndr->in_link_body)
 802		return 0;
 803
 804	link = rndr_newbuf(rndr, BUFFER_SPAN);
 805
 806	if ((link_len = sd_autolink__email(&rewind, link, data, offset, size)) > 0) {
 807		ob->size -= rewind;
 808		rndr->cb.autolink(ob, link, MKDA_EMAIL, rndr->opaque);
 809	}
 810
 811	rndr_popbuf(rndr, BUFFER_SPAN);
 812	return link_len;
 813}
 814
 815static size_t
 816char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
 817{
 818	struct buf *link;
 819	size_t link_len, rewind;
 820
 821	if (!rndr->cb.autolink || rndr->in_link_body)
 822		return 0;
 823
 824	link = rndr_newbuf(rndr, BUFFER_SPAN);
 825
 826	if ((link_len = sd_autolink__url(&rewind, link, data, offset, size)) > 0) {
 827		ob->size -= rewind;
 828		rndr->cb.autolink(ob, link, MKDA_NORMAL, rndr->opaque);
 829	}
 830
 831	rndr_popbuf(rndr, BUFFER_SPAN);
 832	return link_len;
 833}
 834
 835/* char_link • '[': parsing a link or an image */
 836static size_t
 837char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
 838{
 839	int is_img = (offset && data[-1] == '!'), level;
 840	size_t i = 1, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0;
 841	struct buf *content = 0;
 842	struct buf *link = 0;
 843	struct buf *title = 0;
 844	struct buf *u_link = 0;
 845	size_t org_work_size = rndr->work_bufs[BUFFER_SPAN].size;
 846	int text_has_nl = 0, ret = 0;
 847	int in_title = 0, qtype = 0;
 848
 849	/* checking whether the correct renderer exists */
 850	if ((is_img && !rndr->cb.image) || (!is_img && !rndr->cb.link))
 851		goto cleanup;
 852
 853	/* looking for the matching closing bracket */
 854	for (level = 1; i < size; i++) {
 855		if (data[i] == '\n')
 856			text_has_nl = 1;
 857
 858		else if (data[i - 1] == '\\')
 859			continue;
 860
 861		else if (data[i] == '[')
 862			level++;
 863
 864		else if (data[i] == ']') {
 865			level--;
 866			if (level <= 0)
 867				break;
 868		}
 869	}
 870
 871	if (i >= size)
 872		goto cleanup;
 873
 874	txt_e = i;
 875	i++;
 876
 877	/* skip any amount of whitespace or newline */
 878	/* (this is much more laxist than original markdown syntax) */
 879	while (i < size && _isspace(data[i]))
 880		i++;
 881
 882	/* inline style link */
 883	if (i < size && data[i] == '(') {
 884		/* skipping initial whitespace */
 885		i++;
 886
 887		while (i < size && _isspace(data[i]))
 888			i++;
 889
 890		link_b = i;
 891
 892		/* looking for link end: ' " ) */
 893		while (i < size) {
 894			if (data[i] == '\\') i += 2;
 895			else if (data[i] == ')') break;
 896			else if (i >= 1 && _isspace(data[i-1]) && (data[i] == '\'' || data[i] == '"')) break;
 897			else i++;
 898		}
 899
 900		if (i >= size) goto cleanup;
 901		link_e = i;
 902
 903		/* looking for title end if present */
 904		if (data[i] == '\'' || data[i] == '"') {
 905			qtype = data[i];
 906			in_title = 1;
 907			i++;
 908			title_b = i;
 909
 910			while (i < size) {
 911				if (data[i] == '\\') i += 2;
 912				else if (data[i] == qtype) {in_title = 0; i++;}
 913				else if ((data[i] == ')') && !in_title) break;
 914				else i++;
 915			}
 916
 917			if (i >= size) goto cleanup;
 918
 919			/* skipping whitespaces after title */
 920			title_e = i - 1;
 921			while (title_e > title_b && _isspace(data[title_e]))
 922				title_e--;
 923
 924			/* checking for closing quote presence */
 925			if (data[title_e] != '\'' &&  data[title_e] != '"') {
 926				title_b = title_e = 0;
 927				link_e = i;
 928			}
 929		}
 930
 931		/* remove whitespace at the end of the link */
 932		while (link_e > link_b && _isspace(data[link_e - 1]))
 933			link_e--;
 934
 935		/* remove optional angle brackets around the link */
 936		if (data[link_b] == '<') link_b++;
 937		if (data[link_e - 1] == '>') link_e--;
 938
 939		/* building escaped link and title */
 940		if (link_e > link_b) {
 941			link = rndr_newbuf(rndr, BUFFER_SPAN);
 942			bufput(link, data + link_b, link_e - link_b);
 943		}
 944
 945		if (title_e > title_b) {
 946			title = rndr_newbuf(rndr, BUFFER_SPAN);
 947			bufput(title, data + title_b, title_e - title_b);
 948		}
 949
 950		i++;
 951	}
 952
 953	/* reference style link */
 954	else if (i < size && data[i] == '[') {
 955		struct buf id = { 0, 0, 0, 0 };
 956		struct link_ref *lr;
 957
 958		/* looking for the id */
 959		i++;
 960		link_b = i;
 961		while (i < size && data[i] != ']') i++;
 962		if (i >= size) goto cleanup;
 963		link_e = i;
 964
 965		/* finding the link_ref */
 966		if (link_b == link_e) {
 967			if (text_has_nl) {
 968				struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN);
 969				size_t j;
 970
 971				for (j = 1; j < txt_e; j++) {
 972					if (data[j] != '\n')
 973						bufputc(b, data[j]);
 974					else if (data[j - 1] != ' ')
 975						bufputc(b, ' ');
 976				}
 977
 978				id.data = b->data;
 979				id.size = b->size;
 980			} else {
 981				id.data = data + 1;
 982				id.size = txt_e - 1;
 983			}
 984		} else {
 985			id.data = data + link_b;
 986			id.size = link_e - link_b;
 987		}
 988
 989		lr = find_link_ref(rndr->refs, id.data, id.size);
 990		if (!lr)
 991			goto cleanup;
 992
 993		/* keeping link and title from link_ref */
 994		link = lr->link;
 995		title = lr->title;
 996		i++;
 997	}
 998
 999	/* shortcut reference style link */
1000	else {
1001		struct buf id = { 0, 0, 0, 0 };
1002		struct link_ref *lr;
1003
1004		/* crafting the id */
1005		if (text_has_nl) {
1006			struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN);
1007			size_t j;
1008
1009			for (j = 1; j < txt_e; j++) {
1010				if (data[j] != '\n')
1011					bufputc(b, data[j]);
1012				else if (data[j - 1] != ' ')
1013					bufputc(b, ' ');
1014			}
1015
1016			id.data = b->data;
1017			id.size = b->size;
1018		} else {
1019			id.data = data + 1;
1020			id.size = txt_e - 1;
1021		}
1022
1023		/* finding the link_ref */
1024		lr = find_link_ref(rndr->refs, id.data, id.size);
1025		if (!lr)
1026			goto cleanup;
1027
1028		/* keeping link and title from link_ref */
1029		link = lr->link;
1030		title = lr->title;
1031
1032		/* rewinding the whitespace */
1033		i = txt_e + 1;
1034	}
1035
1036	/* building content: img alt is escaped, link content is parsed */
1037	if (txt_e > 1) {
1038		content = rndr_newbuf(rndr, BUFFER_SPAN);
1039		if (is_img) {
1040			bufput(content, data + 1, txt_e - 1);
1041		} else {
1042			/* disable autolinking when parsing inline the
1043			 * content of a link */
1044			rndr->in_link_body = 1;
1045			parse_inline(content, rndr, data + 1, txt_e - 1);
1046			rndr->in_link_body = 0;
1047		}
1048	}
1049
1050	if (link) {
1051		u_link = rndr_newbuf(rndr, BUFFER_SPAN);
1052		unscape_text(u_link, link);
1053	}
1054
1055	/* calling the relevant rendering function */
1056	if (is_img) {
1057		if (ob->size && ob->data[ob->size - 1] == '!')
1058			ob->size -= 1;
1059
1060		ret = rndr->cb.image(ob, u_link, title, content, rndr->opaque);
1061	} else {
1062		ret = rndr->cb.link(ob, u_link, title, content, rndr->opaque);
1063	}
1064
1065	/* cleanup */
1066cleanup:
1067	rndr->work_bufs[BUFFER_SPAN].size = (int)org_work_size;
1068	return ret ? i : 0;
1069}
1070
1071static size_t
1072char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
1073{
1074	size_t sup_start, sup_len;
1075	struct buf *sup;
1076
1077	if (!rndr->cb.superscript)
1078		return 0;
1079
1080	if (size < 2)
1081		return 0;
1082
1083	if (data[1] == '(') {
1084		sup_start = sup_len = 2;
1085
1086		while (sup_len < size && data[sup_len] != ')' && data[sup_len - 1] != '\\')
1087			sup_len++;
1088
1089		if (sup_len == size)
1090			return 0;
1091	} else {
1092		sup_start = sup_len = 1;
1093
1094		while (sup_len < size && !_isspace(data[sup_len]))
1095			sup_len++;
1096	}
1097
1098	if (sup_len - sup_start == 0)
1099		return (sup_start == 2) ? 3 : 0;
1100
1101	sup = rndr_newbuf(rndr, BUFFER_SPAN);
1102	parse_inline(sup, rndr, data + sup_start, sup_len - sup_start);
1103	rndr->cb.superscript(ob, sup, rndr->opaque);
1104	rndr_popbuf(rndr, BUFFER_SPAN);
1105
1106	return (sup_start == 2) ? sup_len + 1 : sup_len;
1107}
1108
1109/*********************************
1110 * BLOCK-LEVEL PARSING FUNCTIONS *
1111 *********************************/
1112
1113/* is_empty • returns the line length when it is empty, 0 otherwise */
1114static size_t
1115is_empty(uint8_t *data, size_t size)
1116{
1117	size_t i;
1118
1119	for (i = 0; i < size && data[i] != '\n'; i++)
1120		if (data[i] != ' ')
1121			return 0;
1122
1123	return i + 1;
1124}
1125
1126/* is_hrule • returns whether a line is a horizontal rule */
1127static int
1128is_hrule(uint8_t *data, size_t size)
1129{
1130	size_t i = 0, n = 0;
1131	uint8_t c;
1132
1133	/* skipping initial spaces */
1134	if (size < 3) return 0;
1135	if (data[0] == ' ') { i++;
1136	if (data[1] == ' ') { i++;
1137	if (data[2] == ' ') { i++; } } }
1138
1139	/* looking at the hrule uint8_t */
1140	if (i + 2 >= size
1141	|| (data[i] != '*' && data[i] != '-' && data[i] != '_'))
1142		return 0;
1143	c = data[i];
1144
1145	/* the whole line must be the char or whitespace */
1146	while (i < size && data[i] != '\n') {
1147		if (data[i] == c) n++;
1148		else if (data[i] != ' ')
1149			return 0;
1150
1151		i++;
1152	}
1153
1154	return n >= 3;
1155}
1156
1157/* check if a line begins with a code fence; return the
1158 * width of the code fence */
1159static size_t
1160prefix_codefence(uint8_t *data, size_t size)
1161{
1162	size_t i = 0, n = 0;
1163	uint8_t c;
1164
1165	/* skipping initial spaces */
1166	if (size < 3) return 0;
1167	if (data[0] == ' ') { i++;
1168	if (data[1] == ' ') { i++;
1169	if (data[2] == ' ') { i++; } } }
1170
1171	/* looking at the hrule uint8_t */
1172	if (i + 2 >= size || !(data[i] == '~' || data[i] == '`'))
1173		return 0;
1174
1175	c = data[i];
1176
1177	/* the whole line must be the uint8_t or whitespace */
1178	while (i < size && data[i] == c) {
1179		n++; i++;
1180	}
1181
1182	if (n < 3)
1183		return 0;
1184
1185	return i;
1186}
1187
1188/* check if a line is a code fence; return its size if it is */
1189static size_t
1190is_codefence(uint8_t *data, size_t size, struct buf *syntax)
1191{
1192	size_t i = 0, syn_len = 0;
1193	uint8_t *syn_start;
1194
1195	i = prefix_codefence(data, size);
1196	if (i == 0)
1197		return 0;
1198
1199	while (i < size && data[i] == ' ')
1200		i++;
1201
1202	syn_start = data + i;
1203
1204	if (i < size && data[i] == '{') {
1205		i++; syn_start++;
1206
1207		while (i < size && data[i] != '}' && data[i] != '\n') {
1208			syn_len++; i++;
1209		}
1210
1211		if (i == size || data[i] != '}')
1212			return 0;
1213
1214		/* strip all whitespace at the beginning and the end
1215		 * of the {} block */
1216		while (syn_len > 0 && _isspace(syn_start[0])) {
1217			syn_start++; syn_len--;
1218		}
1219
1220		while (syn_len > 0 && _isspace(syn_start[syn_len - 1]))
1221			syn_len--;
1222
1223		i++;
1224	} else {
1225		while (i < size && !_isspace(data[i])) {
1226			syn_len++; i++;
1227		}
1228	}
1229
1230	if (syntax) {
1231		syntax->data = syn_start;
1232		syntax->size = syn_len;
1233	}
1234
1235	while (i < size && data[i] != '\n') {
1236		if (!_isspace(data[i]))
1237			return 0;
1238
1239		i++;
1240	}
1241
1242	return i + 1;
1243}
1244
1245/* is_atxheader • returns whether the line is a hash-prefixed header */
1246static int
1247is_atxheader(struct sd_markdown *rndr, uint8_t *data, size_t size)
1248{
1249	if (data[0] != '#')
1250		return 0;
1251
1252	if (rndr->ext_flags & MKDEXT_SPACE_HEADERS) {
1253		size_t level = 0;
1254
1255		while (level < size && level < 6 && data[level] == '#')
1256			level++;
1257
1258		if (level < size && data[level] != ' ')
1259			return 0;
1260	}
1261
1262	return 1;
1263}
1264
1265/* is_headerline • returns whether the line is a setext-style hdr underline */
1266static int
1267is_headerline(uint8_t *data, size_t size)
1268{
1269	size_t i = 0;
1270
1271	/* test of level 1 header */
1272	if (data[i] == '=') {
1273		for (i = 1; i < size && data[i] == '='; i++);
1274		while (i < size && data[i] == ' ') i++;
1275		return (i >= size || data[i] == '\n') ? 1 : 0; }
1276
1277	/* test of level 2 header */
1278	if (data[i] == '-') {
1279		for (i = 1; i < size && data[i] == '-'; i++);
1280		while (i < size && data[i] == ' ') i++;
1281		return (i >= size || data[i] == '\n') ? 2 : 0; }
1282
1283	return 0;
1284}
1285
1286static int
1287is_next_headerline(uint8_t *data, size_t size)
1288{
1289	size_t i = 0;
1290
1291	while (i < size && data[i] != '\n')
1292		i++;
1293
1294	if (++i >= size)
1295		return 0;
1296
1297	return is_headerline(data + i, size - i);
1298}
1299
1300/* prefix_quote • returns blockquote prefix length */
1301static size_t
1302prefix_quote(uint8_t *data, size_t size)
1303{
1304	size_t i = 0;
1305	if (i < size && data[i] == ' ') i++;
1306	if (i < size && data[i] == ' ') i++;
1307	if (i < size && data[i] == ' ') i++;
1308
1309	if (i < size && data[i] == '>') {
1310		if (i + 1 < size && data[i + 1] == ' ')
1311			return i + 2;
1312
1313		return i + 1;
1314	}
1315
1316	return 0;
1317}
1318
1319/* prefix_code • returns prefix length for block code*/
1320static size_t
1321prefix_code(uint8_t *data, size_t size)
1322{
1323	if (size > 3 && data[0] == ' ' && data[1] == ' '
1324		&& data[2] == ' ' && data[3] == ' ') return 4;
1325
1326	return 0;
1327}
1328
1329/* prefix_oli • returns ordered list item prefix */
1330static size_t
1331prefix_oli(uint8_t *data, size_t size)
1332{
1333	size_t i = 0;
1334
1335	if (i < size && data[i] == ' ') i++;
1336	if (i < size && data[i] == ' ') i++;
1337	if (i < size && data[i] == ' ') i++;
1338
1339	if (i >= size || data[i] < '0' || data[i] > '9')
1340		return 0;
1341
1342	while (i < size && data[i] >= '0' && data[i] <= '9')
1343		i++;
1344
1345	if (i + 1 >= size || data[i] != '.' || data[i + 1] != ' ')
1346		return 0;
1347
1348	if (is_next_headerline(data + i, size - i))
1349		return 0;
1350
1351	return i + 2;
1352}
1353
1354/* prefix_uli • returns ordered list item prefix */
1355static size_t
1356prefix_uli(uint8_t *data, size_t size)
1357{
1358	size_t i = 0;
1359
1360	if (i < size && data[i] == ' ') i++;
1361	if (i < size && data[i] == ' ') i++;
1362	if (i < size && data[i] == ' ') i++;
1363
1364	if (i + 1 >= size ||
1365		(data[i] != '*' && data[i] != '+' && data[i] != '-') ||
1366		data[i + 1] != ' ')
1367		return 0;
1368
1369	if (is_next_headerline(data + i, size - i))
1370		return 0;
1371
1372	return i + 2;
1373}
1374
1375
1376/* parse_block • parsing of one block, returning next uint8_t to parse */
1377static void parse_block(struct buf *ob, struct sd_markdown *rndr,
1378			uint8_t *data, size_t size);
1379
1380
1381/* parse_blockquote • handles parsing of a blockquote fragment */
1382static size_t
1383parse_blockquote(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1384{
1385	size_t beg, end = 0, pre, work_size = 0;
1386	uint8_t *work_data = 0;
1387	struct buf *out = 0;
1388
1389	out = rndr_newbuf(rndr, BUFFER_BLOCK);
1390	beg = 0;
1391	while (beg < size) {
1392		for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
1393
1394		pre = prefix_quote(data + beg, end - beg);
1395
1396		if (pre)
1397			beg += pre; /* skipping prefix */
1398
1399		/* empty line followed by non-quote line */
1400		else if (is_empty(data + beg, end - beg) &&
1401				(end >= size || (prefix_quote(data + end, size - end) == 0 &&
1402				!is_empty(data + end, size - end))))
1403			break;
1404
1405		if (beg < end) { /* copy into the in-place working buffer */
1406			/* bufput(work, data + beg, end - beg); */
1407			if (!work_data)
1408				work_data = data + beg;
1409			else if (data + beg != work_data + work_size)
1410				memmove(work_data + work_size, data + beg, end - beg);
1411			work_size += end - beg;
1412		}
1413		beg = end;
1414	}
1415
1416	parse_block(out, rndr, work_data, work_size);
1417	if (rndr->cb.blockquote)
1418		rndr->cb.blockquote(ob, out, rndr->opaque);
1419	rndr_popbuf(rndr, BUFFER_BLOCK);
1420	return end;
1421}
1422
1423static size_t
1424parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render);
1425
1426/* parse_blockquote • handles parsing of a regular paragraph */
1427static size_t
1428parse_paragraph(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1429{
1430	size_t i = 0, end = 0;
1431	int level = 0;
1432	struct buf work = { data, 0, 0, 0 };
1433
1434	while (i < size) {
1435		for (end = i + 1; end < size && data[end - 1] != '\n'; end++) /* empty */;
1436
1437		if (is_empty(data + i, size - i))
1438			break;
1439
1440		if ((level = is_headerline(data + i, size - i)) != 0)
1441			break;
1442
1443		if (is_atxheader(rndr, data + i, size - i) ||
1444			is_hrule(data + i, size - i) ||
1445			prefix_quote(data + i, size - i)) {
1446			end = i;
1447			break;
1448		}
1449
1450		/*
1451		 * Early termination of a paragraph with the same logic
1452		 * as Markdown 1.0.0. If this logic is applied, the
1453		 * Markdown 1.0.3 test suite won't pass cleanly
1454		 *
1455		 * :: If the first character in a new line is not a letter,
1456		 * let's check to see if there's some kind of block starting
1457		 * here
1458		 */
1459		if ((rndr->ext_flags & MKDEXT_LAX_SPACING) && !isalnum(data[i])) {
1460			if (prefix_oli(data + i, size - i) ||
1461				prefix_uli(data + i, size - i)) {
1462				end = i;
1463				break;
1464			}
1465
1466			/* see if an html block starts here */
1467			if (data[i] == '<' && rndr->cb.blockhtml &&
1468				parse_htmlblock(ob, rndr, data + i, size - i, 0)) {
1469				end = i;
1470				break;
1471			}
1472
1473			/* see if a code fence starts here */
1474			if ((rndr->ext_flags & MKDEXT_FENCED_CODE) != 0 &&
1475				is_codefence(data + i, size - i, NULL) != 0) {
1476				end = i;
1477				break;
1478			}
1479		}
1480
1481		i = end;
1482	}
1483
1484	work.size = i;
1485	while (work.size && data[work.size - 1] == '\n')
1486		work.size--;
1487
1488	if (!level) {
1489		struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK);
1490		parse_inline(tmp, rndr, work.data, work.size);
1491		if (rndr->cb.paragraph)
1492			rndr->cb.paragraph(ob, tmp, rndr->opaque);
1493		rndr_popbuf(rndr, BUFFER_BLOCK);
1494	} else {
1495		struct buf *header_work;
1496
1497		if (work.size) {
1498			size_t beg;
1499			i = work.size;
1500			work.size -= 1;
1501
1502			while (work.size && data[work.size] != '\n')
1503				work.size -= 1;
1504
1505			beg = work.size + 1;
1506			while (work.size && data[work.size - 1] == '\n')
1507				work.size -= 1;
1508
1509			if (work.size > 0) {
1510				struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK);
1511				parse_inline(tmp, rndr, work.data, work.size);
1512
1513				if (rndr->cb.paragraph)
1514					rndr->cb.paragraph(ob, tmp, rndr->opaque);
1515
1516				rndr_popbuf(rndr, BUFFER_BLOCK);
1517				work.data += beg;
1518				work.size = i - beg;
1519			}
1520			else work.size = i;
1521		}
1522
1523		header_work = rndr_newbuf(rndr, BUFFER_SPAN);
1524		parse_inline(header_work, rndr, work.data, work.size);
1525
1526		if (rndr->cb.header)
1527			rndr->cb.header(ob, header_work, (int)level, rndr->opaque);
1528
1529		rndr_popbuf(rndr, BUFFER_SPAN);
1530	}
1531
1532	return end;
1533}
1534
1535/* parse_fencedcode • handles parsing of a block-level code fragment */
1536static size_t
1537parse_fencedcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1538{
1539	size_t beg, end;
1540	struct buf *work = 0;
1541	struct buf lang = { 0, 0, 0, 0 };
1542
1543	beg = is_codefence(data, size, &lang);
1544	if (beg == 0) return 0;
1545
1546	work = rndr_newbuf(rndr, BUFFER_BLOCK);
1547
1548	while (beg < size) {
1549		size_t fence_end;
1550		struct buf fence_trail = { 0, 0, 0, 0 };
1551
1552		fence_end = is_codefence(data + beg, size - beg, &fence_trail);
1553		if (fence_end != 0 && fence_trail.size == 0) {
1554			beg += fence_end;
1555			break;
1556		}
1557
1558		for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
1559
1560		if (beg < end) {
1561			/* verbatim copy to the working buffer,
1562				escaping entities */
1563			if (is_empty(data + beg, end - beg))
1564				bufputc(work, '\n');
1565			else bufput(work, data + beg, end - beg);
1566		}
1567		beg = end;
1568	}
1569
1570	if (work->size && work->data[work->size - 1] != '\n')
1571		bufputc(work, '\n');
1572
1573	if (rndr->cb.blockcode)
1574		rndr->cb.blockcode(ob, work, lang.size ? &lang : NULL, rndr->opaque);
1575
1576	rndr_popbuf(rndr, BUFFER_BLOCK);
1577	return beg;
1578}
1579
1580static size_t
1581parse_blockcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1582{
1583	size_t beg, end, pre;
1584	struct buf *work = 0;
1585
1586	work = rndr_newbuf(rndr, BUFFER_BLOCK);
1587
1588	beg = 0;
1589	while (beg < size) {
1590		for (end = beg + 1; end < size && data[end - 1] != '\n'; end++) {};
1591		pre = prefix_code(data + beg, end - beg);
1592
1593		if (pre)
1594			beg += pre; /* skipping prefix */
1595		else if (!is_empty(data + beg, end - beg))
1596			/* non-empty non-prefixed line breaks the pre */
1597			break;
1598
1599		if (beg < end) {
1600			/* verbatim copy to the working buffer,
1601				escaping entities */
1602			if (is_empty(data + beg, end - beg))
1603				bufputc(work, '\n');
1604			else bufput(work, data + beg, end - beg);
1605		}
1606		beg = end;
1607	}
1608
1609	while (work->size && work->data[work->size - 1] == '\n')
1610		work->size -= 1;
1611
1612	bufputc(work, '\n');
1613
1614	if (rndr->cb.blockcode)
1615		rndr->cb.blockcode(ob, work, NULL, rndr->opaque);
1616
1617	rndr_popbuf(rndr, BUFFER_BLOCK);
1618	return beg;
1619}
1620
1621/* parse_listitem • parsing of a single list item */
1622/*	assuming initial prefix is already removed */
1623static size_t
1624parse_listitem(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int *flags)
1625{
1626	struct buf *work = 0, *inter = 0;
1627	size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i;
1628	int in_empty = 0, has_inside_empty = 0, in_fence = 0;
1629
1630	/* keeping track of the first indentation prefix */
1631	while (orgpre < 3 && orgpre < size && data[orgpre] == ' ')
1632		orgpre++;
1633
1634	beg = prefix_uli(data, size);
1635	if (!beg)
1636		beg = prefix_oli(data, size);
1637
1638	if (!beg)
1639		return 0;
1640
1641	/* skipping to the beginning of the following line */
1642	end = beg;
1643	while (end < size && data[end - 1] != '\n')
1644		end++;
1645
1646	/* getting working buffers */
1647	work = rndr_newbuf(rndr, BUFFER_SPAN);
1648	inter = rndr_newbuf(rndr, BUFFER_SPAN);
1649
1650	/* putting the first line into the working buffer */
1651	bufput(work, data + beg, end - beg);
1652	beg = end;
1653
1654	/* process the following lines */
1655	while (beg < size) {
1656		size_t has_next_uli = 0, has_next_oli = 0;
1657
1658		end++;
1659
1660		while (end < size && data[end - 1] != '\n')
1661			end++;
1662
1663		/* process an empty line */
1664		if (is_empty(data + beg, end - beg)) {
1665			in_empty = 1;
1666			beg = end;
1667			continue;
1668		}
1669
1670		/* calculating the indentation */
1671		i = 0;
1672		while (i < 4 && beg + i < end && data[beg + i] == ' ')
1673			i++;
1674
1675		pre = i;
1676
1677		if (rndr->ext_flags & MKDEXT_FENCED_CODE) {
1678			if (is_codefence(data + beg + i, end - beg - i, NULL) != 0)
1679				in_fence = !in_fence;
1680		}
1681
1682		/* Only check for new list items if we are **not** inside
1683		 * a fenced code block */
1684		if (!in_fence) {
1685			has_next_uli = prefix_uli(data + beg + i, end - beg - i);
1686			has_next_oli = prefix_oli(data + beg + i, end - beg - i);
1687		}
1688
1689		/* checking for ul/ol switch */
1690		if (in_empty && (
1691			((*flags & MKD_LIST_ORDERED) && has_next_uli) ||
1692			(!(*flags & MKD_LIST_ORDERED) && has_next_oli))){
1693			*flags |= MKD_LI_END;
1694			break; /* the following item must have same list type */
1695		}
1696
1697		/* checking for a new item */
1698		if ((has_next_uli && !is_hrule(data + beg + i, end - beg - i)) || has_next_oli) {
1699			if (in_empty)
1700				has_inside_empty = 1;
1701
1702			if (pre == orgpre) /* the following item must have */
1703				break;             /* the same indentation */
1704
1705			if (!sublist)
1706				sublist = work->size;
1707		}
1708		/* joining only indented stuff after empty lines;
1709		 * note that now we only require 1 space of indentation
1710		 * to continue a list */
1711		else if (in_empty && pre == 0) {
1712			*flags |= MKD_LI_END;
1713			break;
1714		}
1715		else if (in_empty) {
1716			bufputc(work, '\n');
1717			has_inside_empty = 1;
1718		}
1719
1720		in_empty = 0;
1721
1722		/* adding the line without prefix into the working buffer */
1723		bufput(work, data + beg + i, end - beg - i);
1724		beg = end;
1725	}
1726
1727	/* render of li contents */
1728	if (has_inside_empty)
1729		*flags |= MKD_LI_BLOCK;
1730
1731	if (*flags & MKD_LI_BLOCK) {
1732		/* intermediate render of block li */
1733		if (sublist && sublist < work->size) {
1734			parse_block(inter, rndr, work->data, sublist);
1735			parse_block(inter, rndr, work->data + sublist, work->size - sublist);
1736		}
1737		else
1738			parse_block(inter, rndr, work->data, work->size);
1739	} else {
1740		/* intermediate render of inline li */
1741		if (sublist && sublist < work->size) {
1742			parse_inline(inter, rndr, work->data, sublist);
1743			parse_block(inter, rndr, work->data + sublist, work->size - sublist);
1744		}
1745		else
1746			parse_inline(inter, rndr, work->data, work->size);
1747	}
1748
1749	/* render of li itself */
1750	if (rndr->cb.listitem)
1751		rndr->cb.listitem(ob, inter, *flags, rndr->opaque);
1752
1753	rndr_popbuf(rndr, BUFFER_SPAN);
1754	rndr_popbuf(rndr, BUFFER_SPAN);
1755	return beg;
1756}
1757
1758
1759/* parse_list • parsing ordered or unordered list block */
1760static size_t
1761parse_list(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int flags)
1762{
1763	struct buf *work = 0;
1764	size_t i = 0, j;
1765
1766	work = rndr_newbuf(rndr, BUFFER_BLOCK);
1767
1768	while (i < size) {
1769		j = parse_listitem(work, rndr, data + i, size - i, &flags);
1770		i += j;
1771
1772		if (!j || (flags & MKD_LI_END))
1773			break;
1774	}
1775
1776	if (rndr->cb.list)
1777		rndr->cb.list(ob, work, flags, rndr->opaque);
1778	rndr_popbuf(rndr, BUFFER_BLOCK);
1779	return i;
1780}
1781
1782/* parse_atxheader • parsing of atx-style headers */
1783static size_t
1784parse_atxheader(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1785{
1786	size_t level = 0;
1787	size_t i, end, skip;
1788
1789	while (level < size && level < 6 && data[level] == '#')
1790		level++;
1791
1792	for (i = level; i < size && data[i] == ' '; i++);
1793
1794	for (end = i; end < size && data[end] != '\n'; end++);
1795	skip = end;
1796
1797	while (end && data[end - 1] == '#')
1798		end--;
1799
1800	while (end && data[end - 1] == ' ')
1801		end--;
1802
1803	if (end > i) {
1804		struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN);
1805
1806		parse_inline(work, rndr, data + i, end - i);
1807
1808		if (rndr->cb.header)
1809			rndr->cb.header(ob, work, (int)level, rndr->opaque);
1810
1811		rndr_popbuf(rndr, BUFFER_SPAN);
1812	}
1813
1814	return skip;
1815}
1816
1817
1818/* htmlblock_end • checking end of HTML block : </tag>[ \t]*\n[ \t*]\n */
1819/*	returns the length on match, 0 otherwise */
1820static size_t
1821htmlblock_end_tag(
1822	const char *tag,
1823	size_t tag_len,
1824	struct sd_markdown *rndr,
1825	uint8_t *data,
1826	size_t size)
1827{
1828	size_t i, w;
1829
1830	/* checking if tag is a match */
1831	if (tag_len + 3 >= size ||
1832		strncasecmp((char *)data + 2, tag, tag_len) != 0 ||
1833		data[tag_len + 2] != '>')
1834		return 0;
1835
1836	/* checking white lines */
1837	i = tag_len + 3;
1838	w = 0;
1839	if (i < size && (w = is_empty(data + i, size - i)) == 0)
1840		return 0; /* non-blank after tag */
1841	i += w;
1842	w = 0;
1843
1844	if (i < size)
1845		w = is_empty(data + i, size - i);
1846
1847	return i + w;
1848}
1849
1850static size_t
1851htmlblock_end(const char *curtag,
1852	struct sd_markdown *rndr,
1853	uint8_t *data,
1854	size_t size,
1855	int start_of_line)
1856{
1857	size_t tag_size = strlen(curtag);
1858	size_t i = 1, end_tag;
1859	int block_lines = 0;
1860
1861	while (i < size) {
1862		i++;
1863		while (i < size && !(data[i - 1] == '<' && data[i] == '/')) {
1864			if (data[i] == '\n')
1865				block_lines++;
1866
1867			i++;
1868		}
1869
1870		/* If we are only looking for unindented tags, skip the tag
1871		 * if it doesn't follow a newline.
1872		 *
1873		 * The only exception to this is if the tag is still on the
1874		 * initial line; in that case it still counts as a closing
1875		 * tag
1876		 */
1877		if (start_of_line && block_lines > 0 && data[i - 2] != '\n')
1878			continue;
1879
1880		if (i + 2 + tag_size >= size)
1881			break;
1882
1883		end_tag = htmlblock_end_tag(curtag, tag_size, rndr, data + i - 1, size - i + 1);
1884		if (end_tag)
1885			return i + end_tag - 1;
1886	}
1887
1888	return 0;
1889}
1890
1891
1892/* parse_htmlblock • parsing of inline HTML block */
1893static size_t
1894parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render)
1895{
1896	size_t i, j = 0, tag_end;
1897	const char *curtag = NULL;
1898	struct buf work = { data, 0, 0, 0 };
1899
1900	/* identification of the opening tag */
1901	if (size < 2 || data[0] != '<')
1902		return 0;
1903
1904	i = 1;
1905	while (i < size && data[i] != '>' && data[i] != ' ')
1906		i++;
1907
1908	if (i < size)
1909		curtag = find_block_tag((char *)data + 1, (int)i - 1);
1910
1911	/* handling of special cases */
1912	if (!curtag) {
1913
1914		/* HTML comment, laxist form */
1915		if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') {
1916			i = 5;
1917
1918			while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>'))
1919				i++;
1920
1921			i++;
1922
1923			if (i < size)
1924				j = is_empty(data + i, size - i);
1925
1926			if (j) {
1927				work.size = i + j;
1928				if (do_render && rndr->cb.blockhtml)
1929					rndr->cb.blockhtml(ob, &work, rndr->opaque);
1930				return work.size;
1931			}
1932		}
1933
1934		/* HR, which is the only self-closing block tag considered */
1935		if (size > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R')) {
1936			i = 3;
1937			while (i < size && data[i] != '>')
1938				i++;
1939
1940			if (i + 1 < size) {
1941				i++;
1942				j = is_empty(data + i, size - i);
1943				if (j) {
1944					work.size = i + j;
1945					if (do_render && rndr->cb.blockhtml)
1946						rndr->cb.blockhtml(ob, &work, rndr->opaque);
1947					return work.size;
1948				}
1949			}
1950		}
1951
1952		/* no special case recognised */
1953		return 0;
1954	}
1955
1956	/* looking for an unindented matching closing tag */
1957	/*	followed by a blank line */
1958	tag_end = htmlblock_end(curtag, rndr, data, size, 1);
1959
1960	/* if not found, trying a second pass looking for indented match */
1961	/* but not if tag is "ins" or "del" (following original Markdown.pl) */
1962	if (!tag_end && strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0) {
1963		tag_end = htmlblock_end(curtag, rndr, data, size, 0);
1964	}
1965
1966	if (!tag_end)
1967		return 0;
1968
1969	/* the end of the block has been found */
1970	work.size = tag_end;
1971	if (do_render && rndr->cb.blockhtml)
1972		rndr->cb.blockhtml(ob, &work, rndr->opaque);
1973
1974	return tag_end;
1975}
1976
1977static void
1978parse_table_row(
1979	struct buf *ob,
1980	struct sd_markdown *rndr,
1981	uint8_t *data,
1982	size_t size,
1983	size_t columns,
1984	int *col_data,
1985	int header_flag)
1986{
1987	size_t i = 0, col;
1988	struct buf *row_work = 0;
1989
1990	if (!rndr->cb.table_cell || !rndr->cb.table_row)
1991		return;
1992
1993	row_work = rndr_newbuf(rndr, BUFFER_SPAN);
1994
1995	if (i < size && data[i] == '|')
1996		i++;
1997
1998	for (col = 0; col < columns && i < size; ++col) {
1999		size_t cell_start, cell_end;
2000		struct buf *cell_work;
2001
2002		cell_work = rndr_newbuf(rndr, BUFFER_SPAN);
2003
2004		while (i < size && _isspace(data[i]))
2005			i++;
2006
2007		cell_start = i;
2008
2009		while (i < size && data[i] != '|')
2010			i++;
2011
2012		cell_end = i - 1;
2013
2014		while (cell_end > cell_start && _isspace(data[cell_end]))
2015			cell_end--;
2016
2017		parse_inline(cell_work, rndr, data + cell_start, 1 + cell_end - cell_start);
2018		rndr->cb.table_cell(row_work, cell_work, col_data[col] | header_flag, rndr->opaque);
2019
2020		rndr_popbuf(rndr, BUFFER_SPAN);
2021		i++;
2022	}
2023
2024	for (; col < columns; ++col) {
2025		struct buf empty_cell = { 0, 0, 0, 0 };
2026		rndr->cb.table_cell(row_work, &empty_cell, col_data[col] | header_flag, rndr->opaque);
2027	}
2028
2029	rndr->cb.table_row(ob, row_work, rndr->opaque);
2030
2031	rndr_popbuf(rndr, BUFFER_SPAN);
2032}
2033
2034static size_t
2035parse_table_header(
2036	struct buf *ob,
2037	struct sd_markdown *rndr,
2038	uint8_t *data,
2039	size_t size,
2040	size_t *columns,
2041	int **column_data)
2042{
2043	int pipes;
2044	size_t i = 0, col, header_end, under_end;
2045
2046	pipes = 0;
2047	while (i < size && data[i] != '\n')
2048		if (data[i++] == '|')
2049			pipes++;
2050
2051	if (i == size || pipes == 0)
2052		return 0;
2053
2054	header_end = i;
2055
2056	while (header_end > 0 && _isspace(data[header_end - 1]))
2057		header_end--;
2058
2059	if (data[0] == '|')
2060		pipes--;
2061
2062	if (header_end && data[header_end - 1] == '|')
2063		pipes--;
2064
2065	*columns = pipes + 1;
2066	*column_data = calloc(*columns, sizeof(int));
2067
2068	/* Parse the header underline */
2069	i++;
2070	if (i < size && data[i] == '|')
2071		i++;
2072
2073	under_end = i;
2074	while (under_end < size && data[under_end] != '\n')
2075		under_end++;
2076
2077	for (col = 0; col < *columns && i < under_end; ++col) {
2078		size_t dashes = 0;
2079
2080		while (i < under_end && data[i] == ' ')
2081			i++;
2082
2083		if (data[i] == ':') {
2084			i++; (*column_data)[col] |= MKD_TABLE_ALIGN_L;
2085			dashes++;
2086		}
2087
2088		while (i < under_end && data[i] == '-') {
2089			i++; dashes++;
2090		}
2091
2092		if (i < under_end && data[i] == ':') {
2093			i++; (*column_data)[col] |= MKD_TABLE_ALIGN_R;
2094			dashes++;
2095		}
2096
2097		while (i < under_end && data[i] == ' ')
2098			i++;
2099
2100		if (i < under_end && data[i] != '|')
2101			break;
2102
2103		if (dashes < 3)
2104			break;
2105
2106		i++;
2107	}
2108
2109	if (col < *columns)
2110		return 0;
2111
2112	parse_table_row(
2113		ob, rndr, data,
2114		header_end,
2115		*columns,
2116		*column_data,
2117		MKD_TABLE_HEADER
2118	);
2119
2120	return under_end + 1;
2121}
2122
2123static size_t
2124parse_table(
2125	struct buf *ob,
2126	struct sd_markdown *rndr,
2127	uint8_t *data,
2128	size_t size)
2129{
2130	size_t i;
2131
2132	struct buf *header_work = 0;
2133	struct buf *body_work = 0;
2134
2135	size_t columns;
2136	int *col_data = NULL;
2137
2138	header_work = rndr_newbuf(rndr, BUFFER_SPAN);
2139	body_work = rndr_newbuf(rndr, BUFFER_BLOCK);
2140
2141	i = parse_table_header(header_work, rndr, data, size, &columns, &col_data);
2142	if (i > 0) {
2143
2144		while (i < size) {
2145			size_t row_start;
2146			int pipes = 0;
2147
2148			row_start = i;
2149
2150			while (i < size && data[i] != '\n')
2151				if (data[i++] == '|')
2152					pipes++;
2153
2154			if (pipes == 0 || i == size) {
2155				i = row_start;
2156				break;
2157			}
2158
2159			parse_table_row(
2160				body_work,
2161				rndr,
2162				data + row_start,
2163				i - row_start,
2164				columns,
2165				col_data, 0
2166			);
2167
2168			i++;
2169		}
2170
2171		if (rndr->cb.table)
2172			rndr->cb.table(ob, header_work, body_work, rndr->opaque);
2173	}
2174
2175	free(col_data);
2176	rndr_popbuf(rndr, BUFFER_SPAN);
2177	rndr_popbuf(rndr, BUFFER_BLOCK);
2178	return i;
2179}
2180
2181/* parse_block • parsing of one block, returning next uint8_t to parse */
2182static void
2183parse_block(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
2184{
2185	size_t beg, end, i;
2186	uint8_t *txt_data;
2187	beg = 0;
2188
2189	if (rndr->work_bufs[BUFFER_SPAN].size +
2190		rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_

Large files files are truncated, but you can click here to view the full file