PageRenderTime 133ms CodeModel.GetById 3ms app.highlight 114ms RepoModel.GetById 1ms app.codeStats 1ms

/src/markdown.c

https://github.com/nono/upskirt
C | 2115 lines | 1956 code | 90 blank | 69 comment | 179 complexity | b6e96eafb00c47f3c0ebc671c982c409 MD5 | raw file
   1/* markdown.c - generic markdown parser */
   2
   3/*
   4 * Copyright (c) 2009, Natacha Porté
   5 * Copyright (c) 2011, Vicent Marti
   6 *
   7 * Permission to use, copy, modify, and distribute this software for any
   8 * purpose with or without fee is hereby granted, provided that the above
   9 * copyright notice and this permission notice appear in all copies.
  10 *
  11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  18 */
  19
  20#include "markdown.h"
  21#include "array.h"
  22
  23#include <assert.h>
  24#include <string.h>
  25#include <strings.h> /* for strncasecmp */
  26#include <ctype.h>
  27#include <stdio.h>
  28
  29#define TEXT_UNIT 64	/* unit for the copy of the input buffer */
  30#define WORK_UNIT 64	/* block-level working buffer */
  31
  32#define MKD_LI_END 8	/* internal list flag */
  33
  34/***************
  35 * LOCAL TYPES *
  36 ***************/
  37
  38/* link_ref • reference to a link */
  39struct link_ref {
  40	struct buf *id;
  41	struct buf *link;
  42	struct buf *title;
  43};
  44
  45/* char_trigger • function pointer to render active chars */
  46/*   returns the number of chars taken care of */
  47/*   data is the pointer of the beginning of the span */
  48/*   offset is the number of valid chars before data */
  49struct render;
  50typedef size_t
  51(*char_trigger)(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
  52
  53
  54/* render • structure containing one particular render */
  55struct render {
  56	struct mkd_renderer	make;
  57	struct array refs;
  58	char_trigger active_char[256];
  59	struct parray work;
  60	unsigned int ext_flags;
  61	size_t max_nesting;
  62};
  63
  64/* html_tag • structure for quick HTML tag search (inspired from discount) */
  65struct html_tag {
  66	const char *text;
  67	size_t size;
  68};
  69
  70static inline struct buf *
  71rndr_newbuf(struct render *rndr)
  72{
  73	struct buf *work = NULL;
  74
  75	if (rndr->work.size < rndr->work.asize) {
  76		work = rndr->work.item[rndr->work.size++];
  77		work->size = 0;
  78	} else {
  79		work = bufnew(WORK_UNIT);
  80		parr_push(&rndr->work, work);
  81	}
  82
  83	return work;
  84}
  85
  86static inline void
  87rndr_popbuf(struct render *rndr)
  88{
  89	rndr->work.size--;
  90}
  91
  92/********************
  93 * GLOBAL VARIABLES *
  94 ********************/
  95
  96/* block_tags • recognised block tags, sorted by cmp_html_tag */
  97static struct html_tag block_tags[] = {
  98/*0*/	{ "p",		1 },
  99	{ "dl",		2 },
 100	{ "h1",		2 },
 101	{ "h2",		2 },
 102	{ "h3",		2 },
 103	{ "h4",		2 },
 104	{ "h5",		2 },
 105	{ "h6",		2 },
 106	{ "ol",		2 },
 107	{ "ul",		2 },
 108/*10*/	{ "del",	3 },
 109	{ "div",	3 },
 110/*12*/	{ "ins",	3 },
 111	{ "pre",	3 },
 112	{ "form",	4 },
 113	{ "math",	4 },
 114	{ "table",	5 },
 115	{ "iframe",	6 },
 116	{ "script",	6 },
 117	{ "fieldset",	8 },
 118	{ "noscript",	8 },
 119	{ "blockquote",	10 }
 120};
 121
 122#define INS_TAG (block_tags + 12)
 123#define DEL_TAG (block_tags + 10)
 124
 125/***************************
 126 * HELPER FUNCTIONS *
 127 ***************************/
 128int
 129is_safe_link(const char *link, size_t link_len)
 130{
 131	static const size_t valid_uris_count = 4;
 132	static const char *valid_uris[] = {
 133		"http://", "https://", "ftp://", "mailto://"
 134	};
 135
 136	size_t i;
 137
 138	for (i = 0; i < valid_uris_count; ++i) {
 139		size_t len = strlen(valid_uris[i]);
 140
 141		if (link_len > len && strncasecmp(link, valid_uris[i], len) == 0)
 142			return 1;
 143	}
 144
 145	return 0;
 146}
 147
 148static void
 149unscape_text(struct buf *ob, struct buf *src)
 150{
 151	size_t i = 0, org;
 152	while (i < src->size) {
 153		org = i;
 154		while (i < src->size && src->data[i] != '\\')
 155			i++;
 156
 157		if (i > org)
 158			bufput(ob, src->data + org, i - org);
 159
 160		if (i + 1 >= src->size)
 161			break;
 162
 163		bufputc(ob, src->data[i + 1]);
 164		i += 2;
 165	}
 166}
 167
 168/* cmp_link_ref • comparison function for link_ref sorted arrays */
 169static int
 170cmp_link_ref(void *key, void *array_entry)
 171{
 172	struct link_ref *lr = array_entry;
 173	return bufcasecmp(key, lr->id);
 174}
 175
 176/* cmp_link_ref_sort • comparison function for link_ref qsort */
 177static int
 178cmp_link_ref_sort(const void *a, const void *b)
 179{
 180	const struct link_ref *lra = a;
 181	const struct link_ref *lrb = b;
 182	return bufcasecmp(lra->id, lrb->id);
 183}
 184
 185/* cmp_html_tag • comparison function for bsearch() (stolen from discount) */
 186static int
 187cmp_html_tag(const void *a, const void *b)
 188{
 189	const struct html_tag *hta = a;
 190	const struct html_tag *htb = b;
 191	if (hta->size != htb->size) return (int)((ssize_t)hta->size - (ssize_t)htb->size);
 192	return strncasecmp(hta->text, htb->text, hta->size);
 193}
 194
 195
 196/* find_block_tag • returns the current block tag */
 197static struct html_tag *
 198find_block_tag(char *data, size_t size)
 199{
 200	size_t i = 0;
 201	struct html_tag key;
 202
 203	/* looking for the word end */
 204	while (i < size && ((data[i] >= '0' && data[i] <= '9')
 205				|| (data[i] >= 'A' && data[i] <= 'Z')
 206				|| (data[i] >= 'a' && data[i] <= 'z')))
 207		i += 1;
 208	if (i >= size) return 0;
 209
 210	/* binary search of the tag */
 211	key.text = data;
 212	key.size = i;
 213	return bsearch(&key, block_tags,
 214				sizeof block_tags / sizeof block_tags[0],
 215				sizeof block_tags[0], cmp_html_tag);
 216}
 217
 218/****************************
 219 * INLINE PARSING FUNCTIONS *
 220 ****************************/
 221
 222/* is_mail_autolink • looks for the address part of a mail autolink and '>' */
 223/* this is less strict than the original markdown e-mail address matching */
 224static size_t
 225is_mail_autolink(char *data, size_t size)
 226{
 227	size_t i = 0, nb = 0;
 228
 229	/* address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' */
 230	while (i < size && (data[i] == '-' || data[i] == '.'
 231	|| data[i] == '_' || data[i] == '@'
 232	|| (data[i] >= 'a' && data[i] <= 'z')
 233	|| (data[i] >= 'A' && data[i] <= 'Z')
 234	|| (data[i] >= '0' && data[i] <= '9'))) {
 235		if (data[i] == '@') nb += 1;
 236		i += 1; }
 237	if (i >= size || data[i] != '>' || nb != 1) return 0;
 238	return i + 1;
 239}
 240
 241/* tag_length • returns the length of the given tag, or 0 is it's not valid */
 242static size_t
 243tag_length(char *data, size_t size, enum mkd_autolink *autolink)
 244{
 245	size_t i, j;
 246
 247	/* a valid tag can't be shorter than 3 chars */
 248	if (size < 3) return 0;
 249
 250	/* begins with a '<' optionally followed by '/', followed by letter */
 251	if (data[0] != '<') return 0;
 252	i = (data[1] == '/') ? 2 : 1;
 253	if ((data[i] < 'a' || data[i] > 'z')
 254	&&  (data[i] < 'A' || data[i] > 'Z')) return 0;
 255
 256	/* scheme test */
 257	*autolink = MKDA_NOT_AUTOLINK;
 258
 259	/* try to find the beggining of an URI */
 260	while (i < size && (isalpha(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-'))
 261		i++;
 262
 263	if (i > 1 && data[i] == '@') {
 264		if ((j = is_mail_autolink(data + i, size - i)) != 0) {
 265			*autolink = MKDA_EMAIL;
 266			return i + j;
 267		}
 268	}
 269
 270	if (i > 2 && data[i] == ':') {
 271		*autolink = MKDA_NORMAL;
 272		i++;
 273	}
 274
 275	/* completing autolink test: no whitespace or ' or " */
 276	if (i >= size)
 277		*autolink = MKDA_NOT_AUTOLINK;
 278
 279	else if (*autolink) {
 280		j = i;
 281
 282		while (i < size) {
 283			if (data[i] == '\\') i += 2;
 284			else if (data[i] == '>' || data[i] == '\'' ||
 285					data[i] == '"' || isspace(data[i])) break;
 286			else i += 1;
 287		}
 288
 289		if (i >= size) return 0;
 290		if (i > j && data[i] == '>') return i + 1;
 291		/* one of the forbidden chars has been found */
 292		*autolink = MKDA_NOT_AUTOLINK;
 293	}
 294
 295	/* looking for sometinhg looking like a tag end */
 296	while (i < size && data[i] != '>') i += 1;
 297	if (i >= size) return 0;
 298	return i + 1;
 299}
 300
 301/* parse_inline • parses inline markdown elements */
 302static void
 303parse_inline(struct buf *ob, struct render *rndr, char *data, size_t size)
 304{
 305	size_t i = 0, end = 0;
 306	char_trigger action = 0;
 307	struct buf work = { 0, 0, 0, 0, 0 };
 308
 309	if (rndr->work.size > rndr->max_nesting)
 310		return;
 311
 312	while (i < size) {
 313		/* copying inactive chars into the output */
 314		while (end < size && (action = rndr->active_char[(unsigned char)data[end]]) == 0) {
 315			end++;
 316		}
 317
 318		if (rndr->make.normal_text) {
 319			work.data = data + i;
 320			work.size = end - i;
 321			rndr->make.normal_text(ob, &work, rndr->make.opaque);
 322		}
 323		else
 324			bufput(ob, data + i, end - i);
 325
 326		if (end >= size) break;
 327		i = end;
 328
 329		/* calling the trigger */
 330		end = action(ob, rndr, data + i, i, size - i);
 331		if (!end) /* no action from the callback */
 332			end = i + 1;
 333		else { 
 334			i += end;
 335			end = i;
 336		} 
 337	}
 338}
 339
 340/* find_emph_char • looks for the next emph char, skipping other constructs */
 341static size_t
 342find_emph_char(char *data, size_t size, char c)
 343{
 344	size_t i = 1;
 345
 346	while (i < size) {
 347		while (i < size && data[i] != c
 348		&& data[i] != '`' && data[i] != '[')
 349			i += 1;
 350		if (data[i] == c) return i;
 351
 352		/* not counting escaped chars */
 353		if (i && data[i - 1] == '\\') { i += 1; continue; }
 354
 355		/* skipping a code span */
 356		if (data[i] == '`') {
 357			size_t tmp_i = 0;
 358			i += 1;
 359			while (i < size && data[i] != '`') {
 360				if (!tmp_i && data[i] == c) tmp_i = i;
 361				i += 1; }
 362			if (i >= size) return tmp_i;
 363			i += 1; }
 364
 365		/* skipping a link */
 366		else if (data[i] == '[') {
 367			size_t tmp_i = 0;
 368			char cc;
 369			i += 1;
 370			while (i < size && data[i] != ']') {
 371				if (!tmp_i && data[i] == c) tmp_i = i;
 372				i += 1; }
 373			i += 1;
 374			while (i < size && (data[i] == ' '
 375			|| data[i] == '\t' || data[i] == '\n'))
 376				i += 1;
 377			if (i >= size) return tmp_i;
 378			if (data[i] != '[' && data[i] != '(') { /* not a link*/
 379				if (tmp_i) return tmp_i;
 380				else continue; }
 381			cc = data[i];
 382			i += 1;
 383			while (i < size && data[i] != cc) {
 384				if (!tmp_i && data[i] == c) tmp_i = i;
 385				i += 1; }
 386			if (i >= size) return tmp_i;
 387			i += 1; } }
 388	return 0;
 389}
 390
 391/* parse_emph1 • parsing single emphase */
 392/* closed by a symbol not preceded by whitespace and not followed by symbol */
 393static size_t
 394parse_emph1(struct buf *ob, struct render *rndr, char *data, size_t size, char c)
 395{
 396	size_t i = 0, len;
 397	struct buf *work = 0;
 398	int r;
 399
 400	if (!rndr->make.emphasis) return 0;
 401
 402	/* skipping one symbol if coming from emph3 */
 403	if (size > 1 && data[0] == c && data[1] == c) i = 1;
 404
 405	while (i < size) {
 406		len = find_emph_char(data + i, size - i, c);
 407		if (!len) return 0;
 408		i += len;
 409		if (i >= size) return 0;
 410
 411		if (i + 1 < size && data[i + 1] == c) {
 412			i += 1;
 413			continue;
 414		}
 415
 416		if (data[i] == c && !isspace(data[i - 1])) {
 417
 418			if ((rndr->ext_flags & MKDEXT_LAX_EMPHASIS) == 0) {
 419				if (!(i + 1 == size || isspace(data[i + 1]) || ispunct(data[i + 1])))
 420					continue;
 421			}
 422
 423			work = rndr_newbuf(rndr);
 424			parse_inline(work, rndr, data, i);
 425			r = rndr->make.emphasis(ob, work, rndr->make.opaque);
 426			rndr_popbuf(rndr);
 427			return r ? i + 1 : 0;
 428		}
 429	}
 430
 431	return 0;
 432}
 433
 434/* parse_emph2 • parsing single emphase */
 435static size_t
 436parse_emph2(struct buf *ob, struct render *rndr, char *data, size_t size, char c)
 437{
 438	int (*render_method)(struct buf *ob, struct buf *text, void *opaque);
 439	size_t i = 0, len;
 440	struct buf *work = 0;
 441	int r;
 442
 443	render_method = (c == '~') ? rndr->make.strikethrough : rndr->make.double_emphasis;
 444
 445	if (!render_method)
 446		return 0;
 447	
 448	while (i < size) {
 449		len = find_emph_char(data + i, size - i, c);
 450		if (!len) return 0;
 451		i += len;
 452
 453		if (i + 1 < size && data[i] == c && data[i + 1] == c && i && !isspace(data[i - 1])) {
 454			work = rndr_newbuf(rndr);
 455			parse_inline(work, rndr, data, i);
 456			r = render_method(ob, work, rndr->make.opaque);
 457			rndr_popbuf(rndr);
 458			return r ? i + 2 : 0;
 459		}
 460		i++;
 461	}
 462	return 0;
 463}
 464
 465/* parse_emph3 • parsing single emphase */
 466/* finds the first closing tag, and delegates to the other emph */
 467static size_t
 468parse_emph3(struct buf *ob, struct render *rndr, char *data, size_t size, char c)
 469{
 470	size_t i = 0, len;
 471	int r;
 472
 473	while (i < size) {
 474		len = find_emph_char(data + i, size - i, c);
 475		if (!len) return 0;
 476		i += len;
 477
 478		/* skip whitespace preceded symbols */
 479		if (data[i] != c || isspace(data[i - 1]))
 480			continue;
 481
 482		if (i + 2 < size && data[i + 1] == c && data[i + 2] == c && rndr->make.triple_emphasis) {
 483			/* triple symbol found */
 484			struct buf *work = rndr_newbuf(rndr);
 485
 486			parse_inline(work, rndr, data, i);
 487			r = rndr->make.triple_emphasis(ob, work, rndr->make.opaque);
 488			rndr_popbuf(rndr);
 489			return r ? i + 3 : 0;
 490
 491		} else if (i + 1 < size && data[i + 1] == c) {
 492			/* double symbol found, handing over to emph1 */
 493			len = parse_emph1(ob, rndr, data - 2, size + 2, c);
 494			if (!len) return 0;
 495			else return len - 2;
 496
 497		} else {
 498			/* single symbol found, handing over to emph2 */
 499			len = parse_emph2(ob, rndr, data - 1, size + 1, c);
 500			if (!len) return 0;
 501			else return len - 1;
 502		}
 503	}
 504	return 0; 
 505}
 506
 507/* char_emphasis • single and double emphasis parsing */
 508static size_t
 509char_emphasis(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
 510{
 511	char c = data[0];
 512	size_t ret;
 513
 514	if (size > 2 && data[1] != c) {
 515		/* whitespace cannot follow an opening emphasis;
 516		 * strikethrough only takes two characters '~~' */
 517		if (c == '~' || isspace(data[1]) || (ret = parse_emph1(ob, rndr, data + 1, size - 1, c)) == 0)
 518			return 0;
 519
 520		return ret + 1;
 521	}
 522
 523	if (size > 3 && data[1] == c && data[2] != c) {
 524		if (isspace(data[2]) || (ret = parse_emph2(ob, rndr, data + 2, size - 2, c)) == 0)
 525			return 0;
 526
 527		return ret + 2;
 528	}
 529
 530	if (size > 4 && data[1] == c && data[2] == c && data[3] != c) {
 531		if (c == '~' || isspace(data[3]) || (ret = parse_emph3(ob, rndr, data + 3, size - 3, c)) == 0)
 532			return 0;
 533
 534		return ret + 3;
 535	}
 536
 537	return 0; 
 538}
 539
 540
 541/* char_linebreak • '\n' preceded by two spaces (assuming linebreak != 0) */
 542static size_t
 543char_linebreak(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
 544{
 545	if (offset < 2 || data[-1] != ' ' || data[-2] != ' ')
 546		return 0;
 547
 548	/* removing the last space from ob and rendering */
 549	while (ob->size && ob->data[ob->size - 1] == ' ')
 550		ob->size--;
 551
 552	return rndr->make.linebreak(ob, rndr->make.opaque) ? 1 : 0;
 553}
 554
 555
 556/* char_codespan • '`' parsing a code span (assuming codespan != 0) */
 557static size_t
 558char_codespan(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
 559{
 560	size_t end, nb = 0, i, f_begin, f_end;
 561
 562	/* counting the number of backticks in the delimiter */
 563	while (nb < size && data[nb] == '`')
 564		nb++;
 565
 566	/* finding the next delimiter */
 567	i = 0;
 568	for (end = nb; end < size && i < nb; end++) {
 569		if (data[end] == '`') i++;
 570		else i = 0;
 571	}
 572
 573	if (i < nb && end >= size)
 574		return 0; /* no matching delimiter */
 575
 576	/* trimming outside whitespaces */
 577	f_begin = nb;
 578	while (f_begin < end && (data[f_begin] == ' ' || data[f_begin] == '\t'))
 579		f_begin++;
 580
 581	f_end = end - nb;
 582	while (f_end > nb && (data[f_end-1] == ' ' || data[f_end-1] == '\t'))
 583		f_end--;
 584
 585	/* real code span */
 586	if (f_begin < f_end) {
 587		struct buf work = { data + f_begin, f_end - f_begin, 0, 0, 0 };
 588		if (!rndr->make.codespan(ob, &work, rndr->make.opaque))
 589			end = 0;
 590	} else {
 591		if (!rndr->make.codespan(ob, 0, rndr->make.opaque))
 592			end = 0;
 593	}
 594
 595	return end;
 596}
 597
 598
 599/* char_escape • '\\' backslash escape */
 600static size_t
 601char_escape(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
 602{
 603	struct buf work = { 0, 0, 0, 0, 0 };
 604
 605	if (size > 1) {
 606		if (rndr->make.normal_text) {
 607			work.data = data + 1;
 608			work.size = 1;
 609			rndr->make.normal_text(ob, &work, rndr->make.opaque);
 610		}
 611		else bufputc(ob, data[1]);
 612	}
 613
 614	return 2;
 615}
 616
 617/* char_entity • '&' escaped when it doesn't belong to an entity */
 618/* valid entities are assumed to be anything mathing &#?[A-Za-z0-9]+; */
 619static size_t
 620char_entity(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
 621{
 622	size_t end = 1;
 623	struct buf work;
 624
 625	if (end < size && data[end] == '#')
 626		end++;
 627
 628	while (end < size && isalnum(data[end]))
 629		end++;
 630
 631	if (end < size && data[end] == ';')
 632		end += 1; /* real entity */
 633	else
 634		return 0; /* lone '&' */
 635
 636	if (rndr->make.entity) {
 637		work.data = data;
 638		work.size = end;
 639		rndr->make.entity(ob, &work, rndr->make.opaque);
 640	}
 641	else bufput(ob, data, end);
 642
 643	return end;
 644}
 645
 646/* char_langle_tag • '<' when tags or autolinks are allowed */
 647static size_t
 648char_langle_tag(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
 649{
 650	enum mkd_autolink altype = MKDA_NOT_AUTOLINK;
 651	size_t end = tag_length(data, size, &altype);
 652	struct buf work = { data, end, 0, 0, 0 };
 653	int ret = 0;
 654
 655	if (end > 2) {
 656		if (rndr->make.autolink && altype != MKDA_NOT_AUTOLINK) {
 657			struct buf *u_link = rndr_newbuf(rndr);
 658			work.data = data + 1;
 659			work.size = end - 2;
 660			unscape_text(u_link, &work);
 661			ret = rndr->make.autolink(ob, u_link, altype, rndr->make.opaque);
 662			rndr_popbuf(rndr);
 663		}
 664		else if (rndr->make.raw_html_tag)
 665			ret = rndr->make.raw_html_tag(ob, &work, rndr->make.opaque);
 666	}
 667
 668	if (!ret) return 0;
 669	else return end;
 670}
 671
 672static size_t
 673char_autolink(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
 674{
 675	struct buf work = { data, 0, 0, 0, 0 };
 676
 677	if (offset > 0 && !isspace(data[-1]))
 678		return 0;
 679
 680	if (!is_safe_link(data, size))
 681		return 0;
 682
 683	while (work.size < size && !isspace(data[work.size]))
 684		work.size++;
 685
 686	if (rndr->make.autolink) {
 687		struct buf *u_link = rndr_newbuf(rndr);
 688		unscape_text(u_link, &work);
 689
 690		rndr->make.autolink(ob, u_link, MKDA_NORMAL, rndr->make.opaque);
 691		rndr_popbuf(rndr);
 692	}
 693
 694	return work.size;
 695}
 696
 697/* char_link • '[': parsing a link or an image */
 698static size_t
 699char_link(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
 700{
 701	int is_img = (offset && data[-1] == '!'), level;
 702	size_t i = 1, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0;
 703	struct buf *content = 0;
 704	struct buf *link = 0;
 705	struct buf *title = 0;
 706	struct buf *u_link = 0;
 707	size_t org_work_size = rndr->work.size;
 708	int text_has_nl = 0, ret = 0;
 709
 710	/* checking whether the correct renderer exists */
 711	if ((is_img && !rndr->make.image) || (!is_img && !rndr->make.link))
 712		goto cleanup;
 713
 714	/* looking for the matching closing bracket */
 715	for (level = 1; i < size; i += 1) {
 716		if (data[i] == '\n')
 717			text_has_nl = 1;
 718
 719		else if (data[i - 1] == '\\')
 720			continue;
 721
 722		else if (data[i] == '[')
 723			level++;
 724
 725		else if (data[i] == ']') {
 726			level--;
 727			if (level <= 0)
 728				break;
 729		}
 730	}
 731
 732	if (i >= size)
 733		goto cleanup;
 734
 735	txt_e = i;
 736	i += 1;
 737
 738	/* skip any amount of whitespace or newline */
 739	/* (this is much more laxist than original markdown syntax) */
 740	while (i < size && isspace(data[i]))
 741		i++;
 742
 743	/* inline style link */
 744	if (i < size && data[i] == '(') {
 745		/* skipping initial whitespace */
 746		i += 1;
 747
 748		while (i < size && isspace(data[i]))
 749			i++;
 750
 751		link_b = i;
 752
 753		/* looking for link end: ' " ) */
 754		while (i < size) {
 755			if (data[i] == '\\') i += 2;
 756			else if (data[i] == ')' || data[i] == '\'' || data[i] == '"') break;
 757			else i += 1;
 758		}
 759
 760		if (i >= size) goto cleanup;
 761		link_e = i;
 762
 763		/* looking for title end if present */
 764		if (data[i] == '\'' || data[i] == '"') {
 765			i++;
 766			title_b = i;
 767
 768			while (i < size) {
 769				if (data[i] == '\\') i += 2;
 770				else if (data[i] == ')') break;
 771				else i += 1;
 772			}
 773
 774			if (i >= size) goto cleanup;
 775
 776			/* skipping whitespaces after title */
 777			title_e = i - 1;
 778			while (title_e > title_b && isspace(data[title_e]))
 779				title_e--;
 780
 781			/* checking for closing quote presence */
 782			if (data[title_e] != '\'' &&  data[title_e] != '"') {
 783				title_b = title_e = 0;
 784				link_e = i;
 785			}
 786		}
 787
 788		/* remove whitespace at the end of the link */
 789		while (link_e > link_b && isspace(data[link_e - 1]))
 790			link_e--;
 791
 792		/* remove optional angle brackets around the link */
 793		if (data[link_b] == '<') link_b++;
 794		if (data[link_e - 1] == '>') link_e--;
 795
 796		/* building escaped link and title */
 797		if (link_e > link_b) {
 798			link = rndr_newbuf(rndr);
 799			bufput(link, data + link_b, link_e - link_b);
 800		}
 801
 802		if (title_e > title_b) {
 803			title = rndr_newbuf(rndr);
 804			bufput(title, data + title_b, title_e - title_b);
 805		}
 806
 807		i++;
 808	}
 809
 810	/* reference style link */
 811	else if (i < size && data[i] == '[') {
 812		struct buf id = { 0, 0, 0, 0, 0 };
 813		struct link_ref *lr;
 814
 815		/* looking for the id */
 816		i += 1;
 817		link_b = i;
 818		while (i < size && data[i] != ']') i++;
 819		if (i >= size) goto cleanup;
 820		link_e = i;
 821
 822		/* finding the link_ref */
 823		if (link_b == link_e) {
 824			if (text_has_nl) {
 825				struct buf *b = rndr_newbuf(rndr);
 826				size_t j;
 827
 828				for (j = 1; j < txt_e; j++) {
 829					if (data[j] != '\n')
 830						bufputc(b, data[j]);
 831					else if (data[j - 1] != ' ')
 832						bufputc(b, ' ');
 833				}
 834
 835				id.data = b->data;
 836				id.size = b->size;
 837			} else {
 838				id.data = data + 1;
 839				id.size = txt_e - 1;
 840			}
 841		} else {
 842			id.data = data + link_b;
 843			id.size = link_e - link_b;
 844		}
 845
 846		lr = arr_sorted_find(&rndr->refs, &id, cmp_link_ref);
 847		if (!lr) goto cleanup;
 848
 849		/* keeping link and title from link_ref */
 850		link = lr->link;
 851		title = lr->title;
 852		i += 1;
 853	}
 854
 855	/* shortcut reference style link */
 856	else {
 857		struct buf id = { 0, 0, 0, 0, 0 };
 858		struct link_ref *lr;
 859
 860		/* crafting the id */
 861		if (text_has_nl) {
 862			struct buf *b = rndr_newbuf(rndr);
 863			size_t j;
 864
 865			for (j = 1; j < txt_e; j++) {
 866				if (data[j] != '\n')
 867					bufputc(b, data[j]);
 868				else if (data[j - 1] != ' ')
 869					bufputc(b, ' ');
 870			}
 871
 872			id.data = b->data;
 873			id.size = b->size;
 874		} else {
 875			id.data = data + 1;
 876			id.size = txt_e - 1;
 877		}
 878
 879		/* finding the link_ref */
 880		lr = arr_sorted_find(&rndr->refs, &id, cmp_link_ref);
 881		if (!lr) goto cleanup;
 882
 883		/* keeping link and title from link_ref */
 884		link = lr->link;
 885		title = lr->title;
 886
 887		/* rewinding the whitespace */
 888		i = txt_e + 1;
 889	}
 890
 891	/* building content: img alt is escaped, link content is parsed */
 892	if (txt_e > 1) {
 893		content = rndr_newbuf(rndr);
 894		if (is_img) bufput(content, data + 1, txt_e - 1);
 895		else parse_inline(content, rndr, data + 1, txt_e - 1);
 896	}
 897
 898	if (link) {
 899		u_link = rndr_newbuf(rndr);
 900		unscape_text(u_link, link);
 901	}
 902
 903	/* calling the relevant rendering function */
 904	if (is_img) {
 905		if (ob->size && ob->data[ob->size - 1] == '!')
 906			ob->size -= 1;
 907
 908		ret = rndr->make.image(ob, u_link, title, content, rndr->make.opaque);
 909	} else {
 910		ret = rndr->make.link(ob, u_link, title, content, rndr->make.opaque);
 911	}
 912
 913	/* cleanup */
 914cleanup:
 915	rndr->work.size = (int)org_work_size;
 916	return ret ? i : 0;
 917}
 918
 919
 920
 921/*********************************
 922 * BLOCK-LEVEL PARSING FUNCTIONS *
 923 *********************************/
 924
 925/* is_empty • returns the line length when it is empty, 0 otherwise */
 926static size_t
 927is_empty(char *data, size_t size)
 928{
 929	size_t i;
 930	for (i = 0; i < size && data[i] != '\n'; i += 1)
 931		if (data[i] != ' ' && data[i] != '\t') return 0;
 932	return i + 1;
 933}
 934
 935/* is_hrule • returns whether a line is a horizontal rule */
 936static int
 937is_hrule(char *data, size_t size)
 938{
 939	size_t i = 0, n = 0;
 940	char c;
 941
 942	/* skipping initial spaces */
 943	if (size < 3) return 0;
 944	if (data[0] == ' ') { i += 1;
 945	if (data[1] == ' ') { i += 1;
 946	if (data[2] == ' ') { i += 1; } } }
 947
 948	/* looking at the hrule char */
 949	if (i + 2 >= size
 950	|| (data[i] != '*' && data[i] != '-' && data[i] != '_'))
 951		return 0;
 952	c = data[i];
 953
 954	/* the whole line must be the char or whitespace */
 955	while (i < size && data[i] != '\n') {
 956		if (data[i] == c) n += 1;
 957		else if (data[i] != ' ' && data[i] != '\t')
 958			return 0;
 959		i += 1; }
 960
 961	return n >= 3;
 962}
 963
 964/* check if a line is a code fence; return its size if it is */
 965static size_t
 966is_codefence(char *data, size_t size, struct buf *syntax)
 967{
 968	size_t i = 0, n = 0;
 969	char c;
 970
 971	/* skipping initial spaces */
 972	if (size < 3) return 0;
 973	if (data[0] == ' ') { i += 1;
 974	if (data[1] == ' ') { i += 1;
 975	if (data[2] == ' ') { i += 1; } } }
 976
 977	/* looking at the hrule char */
 978	if (i + 2 >= size || !(data[i] == '~' || data[i] == '`'))
 979		return 0;
 980
 981	c = data[i];
 982
 983	/* the whole line must be the char or whitespace */
 984	while (i < size && data[i] == c) {
 985		n++; i++;
 986	}
 987
 988	if (n < 3)
 989		return 0;
 990
 991	if (syntax != NULL) {
 992		size_t syn = 0;
 993
 994		while (i < size && (data[i] == ' ' || data[i] == '\t'))
 995			i++;
 996
 997		syntax->data = data + i;
 998
 999		if (i < size && data[i] == '{') {
1000			i++; syntax->data++;
1001
1002			while (i < size && data[i] != '}' && data[i] != '\n') {
1003				syn++; i++;
1004			}
1005
1006			if (i == size || data[i] != '}')
1007				return 0;
1008
1009			/* strip all whitespace at the beggining and the end
1010			 * of the {} block */
1011			while (syn > 0 && isspace(syntax->data[0])) {
1012				syntax->data++; syn--;
1013			}
1014
1015			while (syn > 0 && isspace(syntax->data[syn - 1]))
1016				syn--;
1017
1018			i++;
1019		} else {
1020			while (i < size && !isspace(data[i])) {
1021				syn++; i++;
1022			}
1023		}
1024
1025		syntax->size = syn;
1026	}
1027
1028	while (i < size && data[i] != '\n') {
1029		if (!isspace(data[i]))
1030			return 0;
1031
1032		i++;
1033	}
1034
1035	return i + 1;
1036}
1037
1038/* is_headerline • returns whether the line is a setext-style hdr underline */
1039static int
1040is_headerline(char *data, size_t size)
1041{
1042	size_t i = 0;
1043
1044	/* test of level 1 header */
1045	if (data[i] == '=') {
1046		for (i = 1; i < size && data[i] == '='; i += 1);
1047		while (i < size && (data[i] == ' ' || data[i] == '\t')) i += 1;
1048		return (i >= size || data[i] == '\n') ? 1 : 0; }
1049
1050	/* test of level 2 header */
1051	if (data[i] == '-') {
1052		for (i = 1; i < size && data[i] == '-'; i += 1);
1053		while (i < size && (data[i] == ' ' || data[i] == '\t')) i += 1;
1054		return (i >= size || data[i] == '\n') ? 2 : 0; }
1055
1056	return 0;
1057}
1058
1059/* prefix_quote • returns blockquote prefix length */
1060static size_t
1061prefix_quote(char *data, size_t size)
1062{
1063	size_t i = 0;
1064	if (i < size && data[i] == ' ') i += 1;
1065	if (i < size && data[i] == ' ') i += 1;
1066	if (i < size && data[i] == ' ') i += 1;
1067	if (i < size && data[i] == '>') {
1068		if (i + 1 < size && (data[i + 1] == ' ' || data[i+1] == '\t'))
1069			return i + 2;
1070		else return i + 1; }
1071	else return 0;
1072}
1073
1074/* prefix_code • returns prefix length for block code*/
1075static size_t
1076prefix_code(char *data, size_t size)
1077{
1078	if (size > 0 && data[0] == '\t') return 1;
1079	if (size > 3 && data[0] == ' ' && data[1] == ' '
1080			&& data[2] == ' ' && data[3] == ' ') return 4;
1081	return 0;
1082}
1083
1084/* prefix_oli • returns ordered list item prefix */
1085static size_t
1086prefix_oli(char *data, size_t size)
1087{
1088	size_t i = 0;
1089	if (i < size && data[i] == ' ') i += 1;
1090	if (i < size && data[i] == ' ') i += 1;
1091	if (i < size && data[i] == ' ') i += 1;
1092	if (i >= size || data[i] < '0' || data[i] > '9') return 0;
1093	while (i < size && data[i] >= '0' && data[i] <= '9') i += 1;
1094	if (i + 1 >= size || data[i] != '.'
1095	|| (data[i + 1] != ' ' && data[i + 1] != '\t')) return 0;
1096	return i + 2;
1097}
1098
1099/* prefix_uli • returns ordered list item prefix */
1100static size_t
1101prefix_uli(char *data, size_t size)
1102{
1103	size_t i = 0;
1104	if (i < size && data[i] == ' ') i += 1;
1105	if (i < size && data[i] == ' ') i += 1;
1106	if (i < size && data[i] == ' ') i += 1;
1107	if (i + 1 >= size
1108	|| (data[i] != '*' && data[i] != '+' && data[i] != '-')
1109	|| (data[i + 1] != ' ' && data[i + 1] != '\t'))
1110		return 0;
1111	return i + 2;
1112}
1113
1114
1115/* parse_block • parsing of one block, returning next char to parse */
1116static void parse_block(struct buf *ob, struct render *rndr,
1117			char *data, size_t size);
1118
1119
1120/* parse_blockquote • hanldes parsing of a blockquote fragment */
1121static size_t
1122parse_blockquote(struct buf *ob, struct render *rndr, char *data, size_t size)
1123{
1124	size_t beg, end = 0, pre, work_size = 0;
1125	char *work_data = 0;
1126	struct buf *out = 0;
1127
1128	out = rndr_newbuf(rndr);
1129	beg = 0;
1130	while (beg < size) {
1131		for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
1132
1133		pre = prefix_quote(data + beg, end - beg);
1134
1135		if (pre)
1136			beg += pre; /* skipping prefix */
1137
1138		/* empty line followed by non-quote line */
1139		else if (is_empty(data + beg, end - beg) &&
1140				(end >= size || (prefix_quote(data + end, size - end) == 0 &&
1141				!is_empty(data + end, size - end))))
1142			break;
1143
1144		if (beg < end) { /* copy into the in-place working buffer */
1145			/* bufput(work, data + beg, end - beg); */
1146			if (!work_data)
1147				work_data = data + beg;
1148			else if (data + beg != work_data + work_size)
1149				memmove(work_data + work_size, data + beg, end - beg);
1150			work_size += end - beg;
1151		}
1152		beg = end;
1153	}
1154
1155	parse_block(out, rndr, work_data, work_size);
1156	if (rndr->make.blockquote)
1157		rndr->make.blockquote(ob, out, rndr->make.opaque);
1158	rndr_popbuf(rndr);
1159	return end;
1160}
1161
1162static size_t
1163parse_htmlblock(struct buf *ob, struct render *rndr, char *data, size_t size, int do_render);
1164
1165/* parse_blockquote • hanldes parsing of a regular paragraph */
1166static size_t
1167parse_paragraph(struct buf *ob, struct render *rndr, char *data, size_t size)
1168{
1169	size_t i = 0, end = 0;
1170	int level = 0;
1171	struct buf work = { data, 0, 0, 0, 0 }; /* volatile working buffer */
1172
1173	while (i < size) {
1174		for (end = i + 1; end < size && data[end - 1] != '\n'; end++) /* empty */;
1175
1176		if (is_empty(data + i, size - i) || (level = is_headerline(data + i, size - i)) != 0)
1177			break;
1178
1179		if (rndr->ext_flags & MKDEXT_LAX_HTML_BLOCKS) {
1180			if (data[i] == '<' && rndr->make.blockhtml && parse_htmlblock(ob, rndr, data + i, size - i, 0)) {
1181				end = i;
1182				break;
1183			}
1184		}
1185
1186		if (data[i] == '#' || is_hrule(data + i, size - i)) {
1187			end = i;
1188			break;
1189		}
1190
1191		i = end;
1192	}
1193
1194	work.size = i;
1195	while (work.size && data[work.size - 1] == '\n')
1196		work.size--;
1197
1198	if (!level) {
1199		struct buf *tmp = rndr_newbuf(rndr);
1200		parse_inline(tmp, rndr, work.data, work.size);
1201		if (rndr->make.paragraph)
1202			rndr->make.paragraph(ob, tmp, rndr->make.opaque);
1203		rndr_popbuf(rndr);
1204	} else {
1205		struct buf *header_work;
1206
1207		if (work.size) {
1208			size_t beg;
1209			i = work.size;
1210			work.size -= 1;
1211
1212			while (work.size && data[work.size] != '\n')
1213				work.size -= 1;
1214
1215			beg = work.size + 1;
1216			while (work.size && data[work.size - 1] == '\n')
1217				work.size -= 1;
1218
1219			if (work.size > 0) {
1220				struct buf *tmp = rndr_newbuf(rndr);
1221				parse_inline(tmp, rndr, work.data, work.size);
1222
1223				if (rndr->make.paragraph)
1224					rndr->make.paragraph(ob, tmp, rndr->make.opaque);
1225
1226				rndr_popbuf(rndr);
1227				work.data += beg;
1228				work.size = i - beg;
1229			}
1230			else work.size = i;
1231		}
1232
1233		header_work = rndr_newbuf(rndr);
1234		parse_inline(header_work, rndr, work.data, work.size);
1235
1236		if (rndr->make.header)
1237			rndr->make.header(ob, header_work, (int)level, rndr->make.opaque);
1238
1239		rndr_popbuf(rndr);
1240	}
1241
1242	return end;
1243}
1244
1245/* parse_fencedcode • hanldes parsing of a block-level code fragment */
1246static size_t
1247parse_fencedcode(struct buf *ob, struct render *rndr, char *data, size_t size)
1248{
1249	size_t beg, end;
1250	struct buf *work = 0;
1251	struct buf lang = { 0, 0, 0, 0, 0 };
1252
1253	beg = is_codefence(data, size, &lang);
1254	if (beg == 0) return 0;
1255
1256	work = rndr_newbuf(rndr);
1257
1258	while (beg < size) {
1259		size_t fence_end;
1260
1261		fence_end = is_codefence(data + beg, size - beg, NULL);
1262		if (fence_end != 0) {
1263			beg += fence_end;
1264			break;
1265		}
1266
1267		for (end = beg + 1; end < size && data[end - 1] != '\n'; end += 1);
1268
1269		if (beg < end) {
1270			/* verbatim copy to the working buffer,
1271				escaping entities */
1272			if (is_empty(data + beg, end - beg))
1273				bufputc(work, '\n');
1274			else bufput(work, data + beg, end - beg);
1275		}
1276		beg = end;
1277	}
1278
1279	if (work->size && work->data[work->size - 1] != '\n')
1280		bufputc(work, '\n');
1281
1282	if (rndr->make.blockcode)
1283		rndr->make.blockcode(ob, work, lang.size ? &lang : NULL, rndr->make.opaque);
1284
1285	rndr_popbuf(rndr);
1286	return beg;
1287}
1288
1289static size_t
1290parse_blockcode(struct buf *ob, struct render *rndr, char *data, size_t size)
1291{
1292	size_t beg, end, pre;
1293	struct buf *work = 0;
1294
1295	work = rndr_newbuf(rndr);
1296
1297	beg = 0;
1298	while (beg < size) {
1299		for (end = beg + 1; end < size && data[end - 1] != '\n'; end++) {};
1300		pre = prefix_code(data + beg, end - beg);
1301
1302		if (pre)
1303			beg += pre; /* skipping prefix */
1304		else if (!is_empty(data + beg, end - beg))
1305			/* non-empty non-prefixed line breaks the pre */
1306			break;
1307
1308		if (beg < end) {
1309			/* verbatim copy to the working buffer,
1310				escaping entities */
1311			if (is_empty(data + beg, end - beg))
1312				bufputc(work, '\n');
1313			else bufput(work, data + beg, end - beg);
1314		}
1315		beg = end;
1316	}
1317
1318	while (work->size && work->data[work->size - 1] == '\n')
1319		work->size -= 1;
1320
1321	bufputc(work, '\n');
1322
1323	if (rndr->make.blockcode)
1324		rndr->make.blockcode(ob, work, NULL, rndr->make.opaque);
1325
1326	rndr_popbuf(rndr);
1327	return beg;
1328}
1329
1330/* parse_listitem • parsing of a single list item */
1331/*	assuming initial prefix is already removed */
1332static size_t
1333parse_listitem(struct buf *ob, struct render *rndr, char *data, size_t size, int *flags)
1334{
1335	struct buf *work = 0, *inter = 0;
1336	size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i;
1337	int in_empty = 0, has_inside_empty = 0;
1338
1339	/* keeping book of the first indentation prefix */
1340	while (orgpre < 3 && orgpre < size && data[orgpre] == ' ')
1341		orgpre++;
1342
1343	beg = prefix_uli(data, size);
1344	if (!beg)
1345		beg = prefix_oli(data, size);
1346
1347	if (!beg)
1348		return 0;
1349
1350	/* skipping to the beginning of the following line */
1351	end = beg;
1352	while (end < size && data[end - 1] != '\n')
1353		end++;
1354
1355	/* getting working buffers */
1356	work = rndr_newbuf(rndr);
1357	inter = rndr_newbuf(rndr);
1358
1359	/* putting the first line into the working buffer */
1360	bufput(work, data + beg, end - beg);
1361	beg = end;
1362
1363	/* process the following lines */
1364	while (beg < size) {
1365		end++;
1366
1367		while (end < size && data[end - 1] != '\n')
1368			end++;
1369
1370		/* process an empty line */
1371		if (is_empty(data + beg, end - beg)) {
1372			in_empty = 1;
1373			beg = end;
1374			continue;
1375		}
1376
1377		/* calculating the indentation */
1378		i = 0;
1379		while (i < 4 && beg + i < end && data[beg + i] == ' ')
1380			i++;
1381
1382		pre = i;
1383		if (data[beg] == '\t') { i = 1; pre = 8; }
1384
1385		/* checking for a new item */
1386		if ((prefix_uli(data + beg + i, end - beg - i) &&
1387			!is_hrule(data + beg + i, end - beg - i)) ||
1388			prefix_oli(data + beg + i, end - beg - i)) {
1389			if (in_empty)
1390				has_inside_empty = 1;
1391
1392			if (pre == orgpre) /* the following item must have */
1393				break;             /* the same indentation */
1394
1395			if (!sublist)
1396				sublist = work->size;
1397		}
1398		/* joining only indented stuff after empty lines */
1399		else if (in_empty && i < 4 && data[beg] != '\t') {
1400				*flags |= MKD_LI_END;
1401				break;
1402		}
1403		else if (in_empty) {
1404			bufputc(work, '\n');
1405			has_inside_empty = 1;
1406		}
1407
1408		in_empty = 0;
1409
1410		/* adding the line without prefix into the working buffer */
1411		bufput(work, data + beg + i, end - beg - i);
1412		beg = end;
1413	}
1414
1415	/* render of li contents */
1416	if (has_inside_empty)
1417		*flags |= MKD_LI_BLOCK;
1418
1419	if (*flags & MKD_LI_BLOCK) {
1420		/* intermediate render of block li */
1421		if (sublist && sublist < work->size) {
1422			parse_block(inter, rndr, work->data, sublist);
1423			parse_block(inter, rndr, work->data + sublist, work->size - sublist); 
1424		}
1425		else
1426			parse_block(inter, rndr, work->data, work->size);
1427	} else {
1428		/* intermediate render of inline li */
1429		if (sublist && sublist < work->size) {
1430			parse_inline(inter, rndr, work->data, sublist);
1431			parse_block(inter, rndr, work->data + sublist, work->size - sublist);
1432		}
1433		else
1434			parse_inline(inter, rndr, work->data, work->size);
1435	}
1436
1437	/* render of li itself */
1438	if (rndr->make.listitem)
1439		rndr->make.listitem(ob, inter, *flags, rndr->make.opaque);
1440
1441	rndr_popbuf(rndr);
1442	rndr_popbuf(rndr);
1443	return beg;
1444}
1445
1446
1447/* parse_list • parsing ordered or unordered list block */
1448static size_t
1449parse_list(struct buf *ob, struct render *rndr, char *data, size_t size, int flags)
1450{
1451	struct buf *work = 0;
1452	size_t i = 0, j;
1453
1454	work = rndr_newbuf(rndr);
1455
1456	while (i < size) {
1457		j = parse_listitem(work, rndr, data + i, size - i, &flags);
1458		i += j;
1459
1460		if (!j || (flags & MKD_LI_END))
1461			break;
1462	}
1463
1464	if (rndr->make.list)
1465		rndr->make.list(ob, work, flags, rndr->make.opaque);
1466	rndr_popbuf(rndr);
1467	return i;
1468}
1469
1470
1471/* parse_atxheader • parsing of atx-style headers */
1472static size_t
1473parse_atxheader(struct buf *ob, struct render *rndr, char *data, size_t size)
1474{
1475	size_t level = 0;
1476	size_t i, end, skip;
1477
1478	if (!size || data[0] != '#')
1479		return 0;
1480
1481	while (level < size && level < 6 && data[level] == '#')
1482		level++;
1483
1484	for (i = level; i < size && (data[i] == ' ' || data[i] == '\t'); i++);
1485
1486	for (end = i; end < size && data[end] != '\n'; end++);
1487	skip = end;
1488
1489	while (end && data[end - 1] == '#')
1490		end--;
1491
1492	while (end && (data[end - 1] == ' ' || data[end - 1] == '\t'))
1493		end--;
1494
1495	if (end > i) {
1496		struct buf *work = rndr_newbuf(rndr);
1497
1498		parse_inline(work, rndr, data + i, end - i);
1499
1500		if (rndr->make.header)
1501			rndr->make.header(ob, work, (int)level, rndr->make.opaque);
1502
1503		rndr_popbuf(rndr);
1504	}
1505
1506	return skip;
1507}
1508
1509
1510/* htmlblock_end • checking end of HTML block : </tag>[ \t]*\n[ \t*]\n */
1511/*	returns the length on match, 0 otherwise */
1512static size_t
1513htmlblock_end(struct html_tag *tag, struct render *rndr, char *data, size_t size)
1514{
1515	size_t i, w;
1516
1517	/* assuming data[0] == '<' && data[1] == '/' already tested */
1518
1519	/* checking tag is a match */
1520	if (tag->size + 3 >= size
1521	|| strncasecmp(data + 2, tag->text, tag->size)
1522	|| data[tag->size + 2] != '>')
1523		return 0;
1524
1525	/* checking white lines */
1526	i = tag->size + 3;
1527	w = 0;
1528	if (i < size && (w = is_empty(data + i, size - i)) == 0)
1529		return 0; /* non-blank after tag */
1530	i += w;
1531	w = 0;
1532
1533	if (rndr->ext_flags & MKDEXT_LAX_HTML_BLOCKS) {
1534		if (i < size)
1535			w = is_empty(data + i, size - i);
1536	} else  {
1537		if (i < size && (w = is_empty(data + i, size - i)) == 0)
1538			return 0; /* non-blank line after tag line */
1539	}
1540
1541	return i + w;
1542}
1543
1544
1545/* parse_htmlblock • parsing of inline HTML block */
1546static size_t
1547parse_htmlblock(struct buf *ob, struct render *rndr, char *data, size_t size, int do_render)
1548{
1549	size_t i, j = 0;
1550	struct html_tag *curtag;
1551	int found;
1552	struct buf work = { data, 0, 0, 0, 0 };
1553
1554	/* identification of the opening tag */
1555	if (size < 2 || data[0] != '<') return 0;
1556	curtag = find_block_tag(data + 1, size - 1);
1557
1558	/* handling of special cases */
1559	if (!curtag) {
1560
1561		/* HTML comment, laxist form */
1562		if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') {
1563			i = 5;
1564
1565			while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>'))
1566				i++;
1567
1568			i++;
1569
1570			if (i < size)
1571				j = is_empty(data + i, size - i);
1572
1573			if (j) {
1574				work.size = i + j;
1575				if (do_render && rndr->make.blockhtml)
1576					rndr->make.blockhtml(ob, &work, rndr->make.opaque);
1577				return work.size;
1578			} 
1579		}
1580
1581		/* HR, which is the only self-closing block tag considered */
1582		if (size > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R')) {
1583			i = 3;
1584			while (i < size && data[i] != '>')
1585				i += 1;
1586
1587			if (i + 1 < size) {
1588				i += 1;
1589				j = is_empty(data + i, size - i);
1590				if (j) {
1591					work.size = i + j;
1592					if (do_render && rndr->make.blockhtml)
1593						rndr->make.blockhtml(ob, &work, rndr->make.opaque);
1594					return work.size;
1595				}
1596			} 
1597		}
1598
1599		/* no special case recognised */
1600		return 0;
1601	}
1602
1603	/* looking for an unindented matching closing tag */
1604	/*	followed by a blank line */
1605	i = 1;
1606	found = 0;
1607
1608	/* if not found, trying a second pass looking for indented match */
1609	/* but not if tag is "ins" or "del" (following original Markdown.pl) */
1610	if (curtag != INS_TAG && curtag != DEL_TAG) {
1611		i = 1;
1612		while (i < size) {
1613			i++;
1614			while (i < size && !(data[i - 1] == '<' && data[i] == '/'))
1615				i++;
1616
1617			if (i + 2 + curtag->size >= size)
1618				break;
1619
1620			j = htmlblock_end(curtag, rndr, data + i - 1, size - i + 1);
1621
1622			if (j) {
1623				i += j - 1;
1624				found = 1;
1625				break;
1626			}
1627		} 
1628	}
1629
1630	if (!found) return 0;
1631
1632	/* the end of the block has been found */
1633	work.size = i;
1634	if (do_render && rndr->make.blockhtml)
1635		rndr->make.blockhtml(ob, &work, rndr->make.opaque);
1636
1637	return i;
1638}
1639
1640static void
1641parse_table_row(struct buf *ob, struct render *rndr, char *data, size_t size, size_t columns, int *col_data)
1642{
1643	size_t i = 0, col;
1644	struct buf *row_work = 0;
1645
1646	row_work = rndr_newbuf(rndr);
1647
1648	if (i < size && data[i] == '|')
1649		i++;
1650
1651	for (col = 0; col < columns && i < size; ++col) {
1652		size_t cell_start, cell_end;
1653		struct buf *cell_work;
1654
1655		cell_work = rndr_newbuf(rndr);
1656
1657		while (i < size && isspace(data[i]))
1658			i++;
1659
1660		cell_start = i;
1661
1662		while (i < size && data[i] != '|')
1663			i++;
1664
1665		cell_end = i - 1;
1666
1667		while (cell_end > cell_start && isspace(data[cell_end]))
1668			cell_end--;
1669
1670		parse_inline(cell_work, rndr, data + cell_start, 1 + cell_end - cell_start);
1671		if (rndr->make.table_cell)
1672			rndr->make.table_cell(row_work, cell_work, col_data ? col_data[col] : 0, rndr->make.opaque);
1673
1674		rndr_popbuf(rndr);
1675		i++;
1676	}
1677
1678	for (; col < columns; ++col) {
1679		struct buf empty_cell = {0, 0, 0, 0, 0};
1680		if (rndr->make.table_cell)
1681			rndr->make.table_cell(row_work, &empty_cell, col_data ? col_data[col] : 0, rndr->make.opaque);
1682	}
1683
1684	if (rndr->make.table_row)
1685		rndr->make.table_row(ob, row_work, rndr->make.opaque);
1686
1687	rndr_popbuf(rndr);
1688}
1689
1690static size_t
1691parse_table_header(struct buf *ob, struct render *rndr, char *data, size_t size, size_t *columns, int **column_data)
1692{
1693	int pipes;
1694	size_t i = 0, col, header_end, under_end;
1695
1696	pipes = 0;
1697	while (i < size && data[i] != '\n')
1698		if (data[i++] == '|')
1699			pipes++;
1700
1701	if (i == size || pipes == 0)
1702		return 0;
1703
1704	header_end = i;
1705
1706	if (data[0] == '|')
1707		pipes--;
1708
1709	if (i > 2 && data[i - 1] == '|')
1710		pipes--;
1711
1712	*columns = pipes + 1;
1713	*column_data = calloc(*columns, sizeof(int));
1714
1715	/* Parse the header underline */
1716	i++;
1717	if (i < size && data[i] == '|')
1718		i++;
1719
1720	under_end = i;
1721	while (under_end < size && data[under_end] != '\n')
1722		under_end++;
1723
1724	for (col = 0; col < *columns && i < under_end; ++col) {
1725		if (data[i] == ':') {
1726			i++; (*column_data)[col] |= MKD_TABLE_ALIGN_L;
1727		}
1728
1729		while (i < under_end && data[i] == '-')
1730			i++;
1731
1732		if (i < under_end && data[i] == ':') {
1733			i++; (*column_data)[col] |= MKD_TABLE_ALIGN_R;
1734		}
1735
1736		if (i < under_end && data[i] != '|')
1737			break;
1738
1739		i++;
1740	}
1741
1742	if (col < *columns)
1743		return 0;
1744
1745	parse_table_row(ob, rndr, data, header_end, *columns, *column_data);
1746	return under_end + 1;
1747}
1748
1749static size_t
1750parse_table(struct buf *ob, struct render *rndr, char *data, size_t size)
1751{
1752	size_t i;
1753
1754	struct buf *header_work = 0;
1755	struct buf *body_work = 0;
1756
1757	size_t columns;
1758	int *col_data = NULL;
1759
1760	header_work = rndr_newbuf(rndr);
1761	body_work = rndr_newbuf(rndr);
1762
1763	i = parse_table_header(header_work, rndr, data, size, &columns, &col_data);
1764	if (i > 0) {
1765
1766		while (i < size) {
1767			size_t row_start;
1768			int pipes = 0;
1769
1770			row_start = i;
1771
1772			while (i < size && data[i] != '\n')
1773				if (data[i++] == '|')
1774					pipes++;
1775
1776			if (pipes == 0 || i == size) {
1777				i = row_start;
1778				break;
1779			}
1780
1781			parse_table_row(body_work, rndr, data + row_start, i - row_start, columns, col_data);
1782			i++;
1783		}
1784
1785		if (rndr->make.table)
1786			rndr->make.table(ob, header_work, body_work, rndr->make.opaque);
1787	}
1788
1789	free(col_data);
1790	rndr_popbuf(rndr);
1791	rndr_popbuf(rndr);
1792	return i;
1793}
1794
1795/* parse_block • parsing of one block, returning next char to parse */
1796static void
1797parse_block(struct buf *ob, struct render *rndr, char *data, size_t size)
1798{
1799	size_t beg, end, i;
1800	char *txt_data;
1801	beg = 0;
1802
1803	if (rndr->work.size > rndr->max_nesting)
1804		return;
1805
1806	while (beg < size) {
1807		txt_data = data + beg;
1808		end = size - beg;
1809
1810		if (data[beg] == '#')
1811			beg += parse_atxheader(ob, rndr, txt_data, end);
1812
1813		else if (data[beg] == '<' && rndr->make.blockhtml &&
1814				(i = parse_htmlblock(ob, rndr, txt_data, end, 1)) != 0)
1815			beg += i;
1816
1817		else if ((i = is_empty(txt_data, end)) != 0)
1818			beg += i;
1819
1820		else if (is_hrule(txt_data, end)) {
1821			if (rndr->make.hrule)
1822				rndr->make.hrule(ob, rndr->make.opaque);
1823
1824			while (beg < size && data[beg] != '\n')
1825				beg++;
1826
1827			beg++;
1828		}
1829
1830		else if ((rndr->ext_flags & MKDEXT_FENCED_CODE) != 0 &&
1831			(i = parse_fencedcode(ob, rndr, txt_data, end)) != 0)
1832			beg += i;
1833
1834		else if ((rndr->ext_flags & MKDEXT_TABLES) != 0 &&
1835			(i = parse_table(ob, rndr, txt_data, end)) != 0)
1836			beg += i;
1837
1838		else if (prefix_quote(txt_data, end))
1839			beg += parse_blockquote(ob, rndr, txt_data, end);
1840
1841		else if (prefix_code(txt_data, end))
1842			beg += parse_blockcode(ob, rndr, txt_data, end);
1843
1844		else if (prefix_uli(txt_data, end))
1845			beg += parse_list(ob, rndr, txt_data, end, 0);
1846
1847		else if (prefix_oli(txt_data, end))
1848			beg += parse_list(ob, rndr, txt_data, end, MKD_LIST_ORDERED);
1849
1850		else
1851			beg += parse_paragraph(ob, rndr, txt_data, end);
1852	}
1853}
1854
1855
1856
1857/*********************
1858 * REFERENCE PARSING *
1859 *********************/
1860
1861/* is_ref • returns whether a line is a reference or not */
1862static int
1863is_ref(char *data, size_t beg, size_t end, size_t *last, struct array *refs)
1864{
1865/*	int n; */
1866	size_t i = 0;
1867	size_t id_offset, id_end;
1868	size_t link_offset, link_end;
1869	size_t title_offset, title_end;
1870	size_t line_end;
1871	struct link_ref *lr;
1872/*	struct buf id = { 0, 0, 0, 0, 0 }; / * volatile buf for id search */
1873
1874	/* up to 3 optional leading spaces */
1875	if (beg + 3 >= end) return 0;
1876	if (data[beg] == ' ') { i = 1;
1877	if (data[beg + 1] == ' ') { i = 2;
1878	if (data[beg + 2] == ' ') { i = 3;
1879	if (data[beg + 3] == ' ') return 0; } } }
1880	i += beg;
1881
1882	/* id part: anything but a newline between brackets */
1883	if (data[i] != '[') return 0;
1884	i += 1;
1885	id_offset = i;
1886	while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']')
1887		i += 1;
1888	if (i >= end || data[i] != ']') return 0;
1889	id_end = i;
1890
1891	/* spacer: colon (space | tab)* newline? (space | tab)* */
1892	i += 1;
1893	if (i >= end || data[i] != ':') return 0;
1894	i += 1;
1895	while (i < end && (data[i] == ' ' || data[i] == '\t')) i += 1;
1896	if (i < end && (data[i] == '\n' || data[i] == '\r')) {
1897		i += 1;
1898		if (i < end && data[i] == '\r' && data[i - 1] == '\n') i += 1; }
1899	while (i < end && (data[i] == ' ' || data[i] == '\t')) i += 1;
1900	if (i >= end) return 0;
1901
1902	/* link: whitespace-free sequence, optionally between angle brackets */
1903	if (data[i] == '<') i += 1;
1904	link_offset = i;
1905	while (i < end && data[i] != ' ' && data[i] != '\t'
1906			&& data[i] != '\n' && data[i] != '\r') i += 1;
1907	if (data[i - 1] == '>') link_end = i - 1;
1908	else link_end = i;
1909
1910	/* optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) */
1911	while (i < end && (data[i] == ' ' || data[i] == '\t')) i += 1;
1912	if (i < end && data[i] != '\n' && data[i] != '\r'
1913			&& data[i] != '\'' && data[i] != '"' && data[i] != '(')
1914		return 0;
1915	line_end = 0;
1916	/* computing end-of-line */
1917	if (i >= end || data[i] == '\r' || data[i] == '\n') line_end = i;
1918	if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
1919		line_end = i + 1;
1920
1921	/* optional (space|tab)* spacer after a newline */
1922	if (line_end) {
1923		i = line_end + 1;
1924		while (i < end && (data[i] == ' ' || data[i] == '\t')) i += 1; }
1925
1926	/* optional title: any non-newline sequence enclosed in '"()
1927					alone on its line */
1928	title_offset = title_end = 0;
1929	if (i + 1 < end
1930	&& (data[i] == '\'' || data[i] == '"' || data[i] == '(')) {
1931		i += 1;
1932		title_offset = i;
1933		/* looking for EOL */
1934		while (i < end && data[i] != '\n' && data[i] != '\r') i += 1;
1935		if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
1936			title_end = i + 1;
1937		else	title_end = i;
1938		/* stepping back */
1939		i -= 1;
1940		while (i > title_offset && (data[i] == ' ' || data[i] == '\t'))
1941			i -= 1;
1942		if (i > title_offset
1943		&& (data[i] == '\'' || data[i] == '"' || data[i] == ')')) {
1944			line_end = title_end;
1945			title_end = i; } }
1946	if (!line_end) return 0; /* garbage after the link */
1947
1948	/* a valid ref has been found, filling-in return structures */
1949	if (last) *last = line_end;
1950	if (!refs) return 1;
1951	lr = arr_item(refs, arr_newitem(refs));
1952	lr->id = bufnew(id_end - id_offset);
1953	bufput(lr->id, data + id_offset, id_end - id_offset);
1954	lr->link = bufnew(link_end - link_offset);
1955	bufput(lr->link, data + link_offset, link_end - link_offset);
1956	if (title_end > title_offset) {
1957		lr->title = bufnew(title_end - title_offset);
1958		bufput(lr->title, data + title_offset,
1959					title_end - title_offset); }
1960	else lr->title = 0;
1961	return 1; 
1962}
1963
1964static void expand_tabs(struct buf *ob, const char *line, size_t size)
1965{
1966	size_t  i = 0, tab = 0;
1967
1968	while (i < size) {
1969		size_t org = i;
1970
1971		while (i < size && line[i] != '\t') {
1972			i++; tab++;
1973		}
1974
1975		if (i > org)
1976			bufput(ob, line + org, i - org);
1977
1978		if (i >= size)
1979			break;
1980
1981		do {
1982			bufputc(ob, ' '); tab++;
1983		} while (tab % 4);
1984
1985		i++;
1986	}
1987}
1988
1989/**********************
1990 * EXPORTED FUNCTIONS *
1991 **********************/
1992
1993/* markdown • parses the input buffer and renders it into the output buffer */
1994void
1995ups_markdown(struct buf *ob, struct buf *ib, const struct mkd_renderer *rndrer, unsigned int extensions) {
1996	struct link_ref *lr;
1997	struct buf *text;
1998	size_t i, beg, end;
1999	struct render rndr;
2000
2001	/* filling the render structure */
2002	if (!rndrer)
2003		return;
2004
2005	text = bufnew(TEXT_UNIT);
2006	if (!text)
2007		return;
2008
2009	rndr.make = *rndrer;
2010	arr_init(&rndr.refs, sizeof (struct link_ref));
2011	parr_init(&rndr.work);
2012
2013	for (i = 0; i < 256; i += 1)
2014		rndr.active_char[i] = 0;
2015
2016	if (rndr.make.emphasis || rndr.make.double_emphasis || rndr.make.triple_emphasis) {
2017		rndr.active_char['*'] = char_emphasis;
2018		rndr.active_char['_'] = char_emphasis;
2019		if (extensions & MKDEXT_STRIKETHROUGH)
2020			rndr.active_char['~'] = char_emphasis;
2021	}
2022
2023	if (rndr.make.codespan)
2024		rndr.active_char['`'] = char_codespan;
2025
2026	if (rndr.make.linebreak)
2027		rndr.active_char['\n'] = char_linebreak;
2028
2029	if (rndr.make.image || rndr.make.link)
2030		rndr.active_char['['] = char_link;
2031
2032	rndr.active_char['<'] = char_langle_tag;
2033	rndr.active_char['\\'] = char_escape;
2034	rndr.active_char['&'] = char_entity;
2035
2036	if (extensions & MKDEXT_AUTOLINK) {
2037		rndr.active_char['h'] = char_autolink; // http, https
2038		rndr.active_char['H'] = char_autolink;
2039
2040		rndr.active_char['f'] = char_autolink; // ftp
2041		rndr.active_char['F'] = char_autolink;
2042
2043		rndr.active_char['m'] = char_autolink; // mailto
2044		rndr.active_char['M'] = char_autolink;
2045	}
2046
2047	/* Extension data */
2048	rndr.ext_flags = extensions;
2049	rndr.max_nesting = 16;
2050
2051	/* first pass: looking for references, copying everything else */
2052	beg = 0;
2053	while (beg < ib->size) /* iterating over lines */
2054		if (is_ref(ib->data, beg, ib->size, &end, &rndr.refs))
2055			beg = end;
2056		else { /* skipping to the next line */
2057			end = beg;
2058			while (end < ib->size && ib->data[end] != '\n' && ib->data[end] != '\r')
2059				end += 1;
2060
2061			/* adding the line body if present */
2062			if (end > beg)
2063				expand_tabs(text, ib->data + beg, end - beg);
2064
2065			while (end < ib->size && (ib->data[end] == '\n' || ib->data[end] == '\r')) {
2066				/* add one \n per newline */
2067				if (ib->data[end] == '\n' || (end + 1 < ib->size && ib->data[end + 1] != '\n'))
2068					bufputc(text, '\n');
2069				end += 1;
2070			}
2071
2072			beg = end;
2073		}
2074
2075	/* sorting the reference array */
2076	if (rndr.refs.size)
2077		qsort(rndr.refs.base, rndr.refs.size, rndr.refs.unit, cmp_link_ref_sort);
2078
2079	/* adding a final newline if not already present */
2080	if (!text->size)
2081		goto cleanup;
2082
2083	if (text->data[text->size - 1] != '\n' &&  text->data[text->size - 1] != '\r')
2084		bufputc(text, '\n');
2085
2086	/* second pass: actual rendering */
2087	if (rndr.make.doc_header)
2088		rndr.make.doc_header(ob, rndr.make.opaque);
2089
2090	parse_block(ob, &rndr, text->data, text->size);
2091
2092	if (rndr.make.doc_footer)
2093		rndr.make.doc_footer(ob, rndr.make.opaque);
2094
2095	/* clean-up */
2096cleanup:
2097	bufrelease(text);
2098	lr = rndr.refs.base;
2099	for (i = 0; i < (size_t)rndr.refs.size; i += 1) {
2100		bufrelease(lr[i].id);
2101		bufrelease(lr[i].link);
2102		bufrelease(lr[i].title);
2103	}
2104
2105	arr_free(&rndr.refs);
2106
2107	assert(rndr.work.size == 0);
2108
2109	for (i = 0; i < (size_t)rndr.work.asize; i += 1)
2110		bufrelease(rndr.work.item[i]);
2111
2112	parr_free(&rndr.work);
2113}
2114
2115/* vim: set filetype=c: */