PageRenderTime 18ms CodeModel.GetById 18ms app.highlight 153ms RepoModel.GetById 1ms app.codeStats 1ms

/src/markdown.c

https://github.com/erlingmat/upskirt
C | 2202 lines | 2039 code | 93 blank | 70 comment | 181 complexity | 48652e388e42df46ac160f58d5c18817 MD5 | raw file
   1/* markdown.c - generic markdown parser */
   2
   3/*
   4 * Copyright (c) 2009, Natacha Porté
   5 * Copyright (c) 2011, Vicent Marti
   6 *
   7 * Permission to use, copy, modify, and distribute this software for any
   8 * purpose with or without fee is hereby granted, provided that the above
   9 * copyright notice and this permission notice appear in all copies.
  10 *
  11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  18 */
  19
  20#include "markdown.h"
  21#include "array.h"
  22
  23#include <assert.h>
  24#include <string.h>
  25#include <strings.h> /* for strncasecmp */
  26#include <ctype.h>
  27#include <stdio.h>
  28
  29#define BUFFER_BLOCK 0
  30#define BUFFER_SPAN 1
  31
  32#define MKD_LI_END 8	/* internal list flag */
  33
  34/***************
  35 * LOCAL TYPES *
  36 ***************/
  37
  38/* link_ref • reference to a link */
  39struct link_ref {
  40	struct buf *id;
  41	struct buf *link;
  42	struct buf *title;
  43};
  44
  45/* char_trigger • function pointer to render active chars */
  46/*   returns the number of chars taken care of */
  47/*   data is the pointer of the beginning of the span */
  48/*   offset is the number of valid chars before data */
  49struct render;
  50typedef size_t
  51(*char_trigger)(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
  52
  53static size_t char_emphasis(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
  54static size_t char_linebreak(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
  55static size_t char_codespan(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
  56static size_t char_escape(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
  57static size_t char_entity(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
  58static size_t char_langle_tag(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
  59static size_t char_autolink(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
  60static size_t char_link(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
  61
  62enum markdown_char_t {
  63	MD_CHAR_NONE = 0,
  64	MD_CHAR_EMPHASIS,
  65	MD_CHAR_CODESPAN,
  66	MD_CHAR_LINEBREAK,
  67	MD_CHAR_LINK,
  68	MD_CHAR_LANGLE,
  69	MD_CHAR_ESCAPE,
  70	MD_CHAR_ENTITITY,
  71	MD_CHAR_AUTOLINK,
  72};
  73
  74static char_trigger markdown_char_ptrs[] = {
  75	NULL,
  76	&char_emphasis,
  77	&char_codespan,
  78	&char_linebreak,
  79	&char_link,
  80	&char_langle_tag,
  81	&char_escape,
  82	&char_entity,
  83	&char_autolink,
  84};
  85
  86/* render • structure containing one particular render */
  87struct render {
  88	struct mkd_renderer	make;
  89	struct array refs;
  90	char active_char[256];
  91	struct parray work_bufs[2];
  92	unsigned int ext_flags;
  93	size_t max_nesting;
  94};
  95
  96/* html_tag • structure for quick HTML tag search (inspired from discount) */
  97struct html_tag {
  98	const char *text;
  99	size_t size;
 100};
 101
 102static inline struct buf *
 103rndr_newbuf(struct render *rndr, int type)
 104{
 105	static const size_t buf_size[2] = {256, 64};
 106	struct buf *work = NULL;
 107	struct parray *queue = &rndr->work_bufs[type];
 108
 109	if (queue->size < queue->asize) {
 110		work = queue->item[queue->size++];
 111		work->size = 0;
 112	} else {
 113		work = bufnew(buf_size[type]);
 114		parr_push(queue, work);
 115	}
 116
 117	return work;
 118}
 119
 120static inline void
 121rndr_popbuf(struct render *rndr, int type)
 122{
 123	rndr->work_bufs[type].size--;
 124}
 125
 126/********************
 127 * GLOBAL VARIABLES *
 128 ********************/
 129
 130/* block_tags • recognised block tags, sorted by cmp_html_tag */
 131static struct html_tag block_tags[] = {
 132/*0*/	{ "p",		1 },
 133	{ "dl",		2 },
 134	{ "h1",		2 },
 135	{ "h2",		2 },
 136	{ "h3",		2 },
 137	{ "h4",		2 },
 138	{ "h5",		2 },
 139	{ "h6",		2 },
 140	{ "ol",		2 },
 141	{ "ul",		2 },
 142/*10*/	{ "del",	3 },
 143	{ "div",	3 },
 144/*12*/	{ "ins",	3 },
 145	{ "pre",	3 },
 146	{ "form",	4 },
 147	{ "math",	4 },
 148	{ "table",	5 },
 149	{ "iframe",	6 },
 150	{ "script",	6 },
 151	{ "fieldset",	8 },
 152	{ "noscript",	8 },
 153	{ "blockquote",	10 }
 154};
 155
 156#define INS_TAG (block_tags + 12)
 157#define DEL_TAG (block_tags + 10)
 158
 159/***************************
 160 * HELPER FUNCTIONS *
 161 ***************************/
 162int
 163is_safe_link(const char *link, size_t link_len)
 164{
 165	static const size_t valid_uris_count = 4;
 166	static const char *valid_uris[] = {
 167		"http://", "https://", "ftp://", "mailto://"
 168	};
 169
 170	size_t i;
 171
 172	for (i = 0; i < valid_uris_count; ++i) {
 173		size_t len = strlen(valid_uris[i]);
 174
 175		if (link_len > len && strncasecmp(link, valid_uris[i], len) == 0)
 176			return 1;
 177	}
 178
 179	return 0;
 180}
 181
 182static void
 183unscape_text(struct buf *ob, struct buf *src)
 184{
 185	size_t i = 0, org;
 186	while (i < src->size) {
 187		org = i;
 188		while (i < src->size && src->data[i] != '\\')
 189			i++;
 190
 191		if (i > org)
 192			bufput(ob, src->data + org, i - org);
 193
 194		if (i + 1 >= src->size)
 195			break;
 196
 197		bufputc(ob, src->data[i + 1]);
 198		i += 2;
 199	}
 200}
 201
 202/* cmp_link_ref • comparison function for link_ref sorted arrays */
 203static int
 204cmp_link_ref(void *key, void *array_entry)
 205{
 206	struct link_ref *lr = array_entry;
 207	return bufcasecmp(key, lr->id);
 208}
 209
 210/* cmp_link_ref_sort • comparison function for link_ref qsort */
 211static int
 212cmp_link_ref_sort(const void *a, const void *b)
 213{
 214	const struct link_ref *lra = a;
 215	const struct link_ref *lrb = b;
 216	return bufcasecmp(lra->id, lrb->id);
 217}
 218
 219/* cmp_html_tag • comparison function for bsearch() (stolen from discount) */
 220static int
 221cmp_html_tag(const void *a, const void *b)
 222{
 223	const struct html_tag *hta = a;
 224	const struct html_tag *htb = b;
 225	if (hta->size != htb->size) return (int)((ssize_t)hta->size - (ssize_t)htb->size);
 226	return strncasecmp(hta->text, htb->text, hta->size);
 227}
 228
 229
 230/* find_block_tag • returns the current block tag */
 231static struct html_tag *
 232find_block_tag(char *data, size_t size)
 233{
 234	size_t i = 0;
 235	struct html_tag key;
 236
 237	/* looking for the word end */
 238	while (i < size && ((data[i] >= '0' && data[i] <= '9')
 239				|| (data[i] >= 'A' && data[i] <= 'Z')
 240				|| (data[i] >= 'a' && data[i] <= 'z')))
 241		i += 1;
 242	if (i >= size) return 0;
 243
 244	/* binary search of the tag */
 245	key.text = data;
 246	key.size = i;
 247	return bsearch(&key, block_tags,
 248				sizeof block_tags / sizeof block_tags[0],
 249				sizeof block_tags[0], cmp_html_tag);
 250}
 251
 252/****************************
 253 * INLINE PARSING FUNCTIONS *
 254 ****************************/
 255
 256/* is_mail_autolink • looks for the address part of a mail autolink and '>' */
 257/* this is less strict than the original markdown e-mail address matching */
 258static size_t
 259is_mail_autolink(char *data, size_t size)
 260{
 261	size_t i = 0, nb = 0;
 262
 263	/* address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' */
 264	while (i < size && (data[i] == '-' || data[i] == '.'
 265	|| data[i] == '_' || data[i] == '@'
 266	|| (data[i] >= 'a' && data[i] <= 'z')
 267	|| (data[i] >= 'A' && data[i] <= 'Z')
 268	|| (data[i] >= '0' && data[i] <= '9'))) {
 269		if (data[i] == '@') nb += 1;
 270		i += 1; }
 271	if (i >= size || data[i] != '>' || nb != 1) return 0;
 272	return i + 1;
 273}
 274
 275/* tag_length • returns the length of the given tag, or 0 is it's not valid */
 276static size_t
 277tag_length(char *data, size_t size, enum mkd_autolink *autolink)
 278{
 279	size_t i, j;
 280
 281	/* a valid tag can't be shorter than 3 chars */
 282	if (size < 3) return 0;
 283
 284	/* begins with a '<' optionally followed by '/', followed by letter */
 285	if (data[0] != '<') return 0;
 286	i = (data[1] == '/') ? 2 : 1;
 287	if ((data[i] < 'a' || data[i] > 'z')
 288	&&  (data[i] < 'A' || data[i] > 'Z')) return 0;
 289
 290	/* scheme test */
 291	*autolink = MKDA_NOT_AUTOLINK;
 292
 293	/* try to find the beggining of an URI */
 294	while (i < size && (isalpha(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-'))
 295		i++;
 296
 297	if (i > 1 && data[i] == '@') {
 298		if ((j = is_mail_autolink(data + i, size - i)) != 0) {
 299			*autolink = MKDA_EMAIL;
 300			return i + j;
 301		}
 302	}
 303
 304	if (i > 2 && data[i] == ':') {
 305		*autolink = MKDA_NORMAL;
 306		i++;
 307	}
 308
 309	/* completing autolink test: no whitespace or ' or " */
 310	if (i >= size)
 311		*autolink = MKDA_NOT_AUTOLINK;
 312
 313	else if (*autolink) {
 314		j = i;
 315
 316		while (i < size) {
 317			if (data[i] == '\\') i += 2;
 318			else if (data[i] == '>' || data[i] == '\'' ||
 319					data[i] == '"' || isspace(data[i])) break;
 320			else i += 1;
 321		}
 322
 323		if (i >= size) return 0;
 324		if (i > j && data[i] == '>') return i + 1;
 325		/* one of the forbidden chars has been found */
 326		*autolink = MKDA_NOT_AUTOLINK;
 327	}
 328
 329	/* looking for sometinhg looking like a tag end */
 330	while (i < size && data[i] != '>') i += 1;
 331	if (i >= size) return 0;
 332	return i + 1;
 333}
 334
 335/* parse_inline • parses inline markdown elements */
 336static void
 337parse_inline(struct buf *ob, struct render *rndr, char *data, size_t size)
 338{
 339	size_t i = 0, end = 0;
 340	char action = 0;
 341	struct buf work = { 0, 0, 0, 0, 0 };
 342
 343	if (rndr->work_bufs[BUFFER_SPAN].size +
 344		rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting)
 345		return;
 346
 347	while (i < size) {
 348		/* copying inactive chars into the output */
 349		while (end < size && (action = rndr->active_char[(unsigned char)data[end]]) == 0) {
 350			end++;
 351		}
 352
 353		if (rndr->make.normal_text) {
 354			work.data = data + i;
 355			work.size = end - i;
 356			rndr->make.normal_text(ob, &work, rndr->make.opaque);
 357		}
 358		else
 359			bufput(ob, data + i, end - i);
 360
 361		if (end >= size) break;
 362		i = end;
 363
 364		/* calling the trigger */
 365		end = markdown_char_ptrs[(int)action](ob, rndr, data + i, i, size - i);
 366		if (!end) /* no action from the callback */
 367			end = i + 1;
 368		else { 
 369			i += end;
 370			end = i;
 371		} 
 372	}
 373}
 374
 375/* find_emph_char • looks for the next emph char, skipping other constructs */
 376static size_t
 377find_emph_char(char *data, size_t size, char c)
 378{
 379	size_t i = 1;
 380
 381	while (i < size) {
 382		while (i < size && data[i] != c
 383		&& data[i] != '`' && data[i] != '[')
 384			i += 1;
 385		if (data[i] == c) return i;
 386
 387		/* not counting escaped chars */
 388		if (i && data[i - 1] == '\\') { i += 1; continue; }
 389
 390		/* skipping a code span */
 391		if (data[i] == '`') {
 392			size_t tmp_i = 0;
 393			i += 1;
 394			while (i < size && data[i] != '`') {
 395				if (!tmp_i && data[i] == c) tmp_i = i;
 396				i += 1; }
 397			if (i >= size) return tmp_i;
 398			i += 1; }
 399
 400		/* skipping a link */
 401		else if (data[i] == '[') {
 402			size_t tmp_i = 0;
 403			char cc;
 404			i += 1;
 405			while (i < size && data[i] != ']') {
 406				if (!tmp_i && data[i] == c) tmp_i = i;
 407				i += 1; }
 408			i += 1;
 409			while (i < size && (data[i] == ' '
 410			|| data[i] == '\t' || data[i] == '\n'))
 411				i += 1;
 412			if (i >= size) return tmp_i;
 413			if (data[i] != '[' && data[i] != '(') { /* not a link*/
 414				if (tmp_i) return tmp_i;
 415				else continue; }
 416			cc = data[i];
 417			i += 1;
 418			while (i < size && data[i] != cc) {
 419				if (!tmp_i && data[i] == c) tmp_i = i;
 420				i += 1; }
 421			if (i >= size) return tmp_i;
 422			i += 1; } }
 423	return 0;
 424}
 425
 426/* parse_emph1 • parsing single emphase */
 427/* closed by a symbol not preceded by whitespace and not followed by symbol */
 428static size_t
 429parse_emph1(struct buf *ob, struct render *rndr, char *data, size_t size, char c)
 430{
 431	size_t i = 0, len;
 432	struct buf *work = 0;
 433	int r;
 434
 435	if (!rndr->make.emphasis) return 0;
 436
 437	/* skipping one symbol if coming from emph3 */
 438	if (size > 1 && data[0] == c && data[1] == c) i = 1;
 439
 440	while (i < size) {
 441		len = find_emph_char(data + i, size - i, c);
 442		if (!len) return 0;
 443		i += len;
 444		if (i >= size) return 0;
 445
 446		if (i + 1 < size && data[i + 1] == c) {
 447			i += 1;
 448			continue;
 449		}
 450
 451		if (data[i] == c && !isspace(data[i - 1])) {
 452
 453			if (rndr->ext_flags & MKDEXT_NO_INTRA_EMPHASIS) {
 454				if (!(i + 1 == size || isspace(data[i + 1]) || ispunct(data[i + 1])))
 455					continue;
 456			}
 457
 458			work = rndr_newbuf(rndr, BUFFER_SPAN);
 459			parse_inline(work, rndr, data, i);
 460			r = rndr->make.emphasis(ob, work, rndr->make.opaque);
 461			rndr_popbuf(rndr, BUFFER_SPAN);
 462			return r ? i + 1 : 0;
 463		}
 464	}
 465
 466	return 0;
 467}
 468
 469/* parse_emph2 • parsing single emphase */
 470static size_t
 471parse_emph2(struct buf *ob, struct render *rndr, char *data, size_t size, char c)
 472{
 473	int (*render_method)(struct buf *ob, struct buf *text, void *opaque);
 474	size_t i = 0, len;
 475	struct buf *work = 0;
 476	int r;
 477
 478	render_method = (c == '~') ? rndr->make.strikethrough : rndr->make.double_emphasis;
 479
 480	if (!render_method)
 481		return 0;
 482	
 483	while (i < size) {
 484		len = find_emph_char(data + i, size - i, c);
 485		if (!len) return 0;
 486		i += len;
 487
 488		if (i + 1 < size && data[i] == c && data[i + 1] == c && i && !isspace(data[i - 1])) {
 489			work = rndr_newbuf(rndr, BUFFER_SPAN);
 490			parse_inline(work, rndr, data, i);
 491			r = render_method(ob, work, rndr->make.opaque);
 492			rndr_popbuf(rndr, BUFFER_SPAN);
 493			return r ? i + 2 : 0;
 494		}
 495		i++;
 496	}
 497	return 0;
 498}
 499
 500/* parse_emph3 • parsing single emphase */
 501/* finds the first closing tag, and delegates to the other emph */
 502static size_t
 503parse_emph3(struct buf *ob, struct render *rndr, char *data, size_t size, char c)
 504{
 505	size_t i = 0, len;
 506	int r;
 507
 508	while (i < size) {
 509		len = find_emph_char(data + i, size - i, c);
 510		if (!len) return 0;
 511		i += len;
 512
 513		/* skip whitespace preceded symbols */
 514		if (data[i] != c || isspace(data[i - 1]))
 515			continue;
 516
 517		if (i + 2 < size && data[i + 1] == c && data[i + 2] == c && rndr->make.triple_emphasis) {
 518			/* triple symbol found */
 519			struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN);
 520
 521			parse_inline(work, rndr, data, i);
 522			r = rndr->make.triple_emphasis(ob, work, rndr->make.opaque);
 523			rndr_popbuf(rndr, BUFFER_SPAN);
 524			return r ? i + 3 : 0;
 525
 526		} else if (i + 1 < size && data[i + 1] == c) {
 527			/* double symbol found, handing over to emph1 */
 528			len = parse_emph1(ob, rndr, data - 2, size + 2, c);
 529			if (!len) return 0;
 530			else return len - 2;
 531
 532		} else {
 533			/* single symbol found, handing over to emph2 */
 534			len = parse_emph2(ob, rndr, data - 1, size + 1, c);
 535			if (!len) return 0;
 536			else return len - 1;
 537		}
 538	}
 539	return 0; 
 540}
 541
 542/* char_emphasis • single and double emphasis parsing */
 543static size_t
 544char_emphasis(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
 545{
 546	char c = data[0];
 547	size_t ret;
 548
 549	if (size > 2 && data[1] != c) {
 550		/* whitespace cannot follow an opening emphasis;
 551		 * strikethrough only takes two characters '~~' */
 552		if (c == '~' || isspace(data[1]) || (ret = parse_emph1(ob, rndr, data + 1, size - 1, c)) == 0)
 553			return 0;
 554
 555		return ret + 1;
 556	}
 557
 558	if (size > 3 && data[1] == c && data[2] != c) {
 559		if (isspace(data[2]) || (ret = parse_emph2(ob, rndr, data + 2, size - 2, c)) == 0)
 560			return 0;
 561
 562		return ret + 2;
 563	}
 564
 565	if (size > 4 && data[1] == c && data[2] == c && data[3] != c) {
 566		if (c == '~' || isspace(data[3]) || (ret = parse_emph3(ob, rndr, data + 3, size - 3, c)) == 0)
 567			return 0;
 568
 569		return ret + 3;
 570	}
 571
 572	return 0; 
 573}
 574
 575
 576/* char_linebreak • '\n' preceded by two spaces (assuming linebreak != 0) */
 577static size_t
 578char_linebreak(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
 579{
 580	if (offset < 2 || data[-1] != ' ' || data[-2] != ' ')
 581		return 0;
 582
 583	/* removing the last space from ob and rendering */
 584	while (ob->size && ob->data[ob->size - 1] == ' ')
 585		ob->size--;
 586
 587	return rndr->make.linebreak(ob, rndr->make.opaque) ? 1 : 0;
 588}
 589
 590
 591/* char_codespan • '`' parsing a code span (assuming codespan != 0) */
 592static size_t
 593char_codespan(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
 594{
 595	size_t end, nb = 0, i, f_begin, f_end;
 596
 597	/* counting the number of backticks in the delimiter */
 598	while (nb < size && data[nb] == '`')
 599		nb++;
 600
 601	/* finding the next delimiter */
 602	i = 0;
 603	for (end = nb; end < size && i < nb; end++) {
 604		if (data[end] == '`') i++;
 605		else i = 0;
 606	}
 607
 608	if (i < nb && end >= size)
 609		return 0; /* no matching delimiter */
 610
 611	/* trimming outside whitespaces */
 612	f_begin = nb;
 613	while (f_begin < end && (data[f_begin] == ' ' || data[f_begin] == '\t'))
 614		f_begin++;
 615
 616	f_end = end - nb;
 617	while (f_end > nb && (data[f_end-1] == ' ' || data[f_end-1] == '\t'))
 618		f_end--;
 619
 620	/* real code span */
 621	if (f_begin < f_end) {
 622		struct buf work = { data + f_begin, f_end - f_begin, 0, 0, 0 };
 623		if (!rndr->make.codespan(ob, &work, rndr->make.opaque))
 624			end = 0;
 625	} else {
 626		if (!rndr->make.codespan(ob, 0, rndr->make.opaque))
 627			end = 0;
 628	}
 629
 630	return end;
 631}
 632
 633
 634/* char_escape • '\\' backslash escape */
 635static size_t
 636char_escape(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
 637{
 638	struct buf work = { 0, 0, 0, 0, 0 };
 639
 640	if (size > 1) {
 641		if (rndr->make.normal_text) {
 642			work.data = data + 1;
 643			work.size = 1;
 644			rndr->make.normal_text(ob, &work, rndr->make.opaque);
 645		}
 646		else bufputc(ob, data[1]);
 647	}
 648
 649	return 2;
 650}
 651
 652/* char_entity • '&' escaped when it doesn't belong to an entity */
 653/* valid entities are assumed to be anything mathing &#?[A-Za-z0-9]+; */
 654static size_t
 655char_entity(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
 656{
 657	size_t end = 1;
 658	struct buf work;
 659
 660	if (end < size && data[end] == '#')
 661		end++;
 662
 663	while (end < size && isalnum(data[end]))
 664		end++;
 665
 666	if (end < size && data[end] == ';')
 667		end += 1; /* real entity */
 668	else
 669		return 0; /* lone '&' */
 670
 671	if (rndr->make.entity) {
 672		work.data = data;
 673		work.size = end;
 674		rndr->make.entity(ob, &work, rndr->make.opaque);
 675	}
 676	else bufput(ob, data, end);
 677
 678	return end;
 679}
 680
 681/* char_langle_tag • '<' when tags or autolinks are allowed */
 682static size_t
 683char_langle_tag(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
 684{
 685	enum mkd_autolink altype = MKDA_NOT_AUTOLINK;
 686	size_t end = tag_length(data, size, &altype);
 687	struct buf work = { data, end, 0, 0, 0 };
 688	int ret = 0;
 689
 690	if (end > 2) {
 691		if (rndr->make.autolink && altype != MKDA_NOT_AUTOLINK) {
 692			struct buf *u_link = rndr_newbuf(rndr, BUFFER_SPAN);
 693			work.data = data + 1;
 694			work.size = end - 2;
 695			unscape_text(u_link, &work);
 696			ret = rndr->make.autolink(ob, u_link, altype, rndr->make.opaque);
 697			rndr_popbuf(rndr, BUFFER_SPAN);
 698		}
 699		else if (rndr->make.raw_html_tag)
 700			ret = rndr->make.raw_html_tag(ob, &work, rndr->make.opaque);
 701	}
 702
 703	if (!ret) return 0;
 704	else return end;
 705}
 706
 707static size_t
 708char_autolink(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
 709{
 710	struct buf work = { data, 0, 0, 0, 0 };
 711	char cclose = 0;
 712	size_t link_end;
 713
 714	/* TODO:
 715	 * what's the fastest check we can do, previous char
 716	 * or URI prefix? We want to do the fastest one first
 717	 * to break asap
 718	 */
 719
 720	if (offset > 0) {
 721		switch (data[-1]) {
 722		case '"':	cclose = '"'; break;
 723		case '\'':	cclose = '\''; break;
 724		case '(':	cclose = ')'; break;
 725		case '[':	cclose = ']'; break;
 726		case '{':	cclose = '}'; break;
 727		case ' ': case '\t': case '\n': break;
 728		default:
 729			return 0;
 730		}
 731	}
 732
 733	if (!is_safe_link(data, size))
 734		return 0;
 735
 736	link_end = 0;
 737	while (link_end < size && !isspace(data[link_end]))
 738		link_end++;
 739
 740	if (cclose != 0) {
 741		size_t i = link_end;
 742		while (i > 0 && data[i] != cclose)
 743			i--;
 744
 745		if (i > 0)
 746			link_end = i;
 747	}
 748
 749	work.size = link_end;
 750
 751	if (rndr->make.autolink) {
 752		struct buf *u_link = rndr_newbuf(rndr, BUFFER_SPAN);
 753		unscape_text(u_link, &work);
 754
 755		rndr->make.autolink(ob, u_link, MKDA_NORMAL, rndr->make.opaque);
 756		rndr_popbuf(rndr, BUFFER_SPAN);
 757	}
 758
 759	return work.size;
 760}
 761
 762/* char_link • '[': parsing a link or an image */
 763static size_t
 764char_link(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
 765{
 766	int is_img = (offset && data[-1] == '!'), level;
 767	size_t i = 1, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0;
 768	struct buf *content = 0;
 769	struct buf *link = 0;
 770	struct buf *title = 0;
 771	struct buf *u_link = 0;
 772	size_t org_work_size = rndr->work_bufs[BUFFER_SPAN].size;
 773	int text_has_nl = 0, ret = 0;
 774
 775	/* checking whether the correct renderer exists */
 776	if ((is_img && !rndr->make.image) || (!is_img && !rndr->make.link))
 777		goto cleanup;
 778
 779	/* looking for the matching closing bracket */
 780	for (level = 1; i < size; i += 1) {
 781		if (data[i] == '\n')
 782			text_has_nl = 1;
 783
 784		else if (data[i - 1] == '\\')
 785			continue;
 786
 787		else if (data[i] == '[')
 788			level++;
 789
 790		else if (data[i] == ']') {
 791			level--;
 792			if (level <= 0)
 793				break;
 794		}
 795	}
 796
 797	if (i >= size)
 798		goto cleanup;
 799
 800	txt_e = i;
 801	i += 1;
 802
 803	/* skip any amount of whitespace or newline */
 804	/* (this is much more laxist than original markdown syntax) */
 805	while (i < size && isspace(data[i]))
 806		i++;
 807
 808	/* inline style link */
 809	if (i < size && data[i] == '(') {
 810		/* skipping initial whitespace */
 811		i += 1;
 812
 813		while (i < size && isspace(data[i]))
 814			i++;
 815
 816		link_b = i;
 817
 818		/* looking for link end: ' " ) */
 819		while (i < size) {
 820			if (data[i] == '\\') i += 2;
 821			else if (data[i] == ')' || data[i] == '\'' || data[i] == '"') break;
 822			else i += 1;
 823		}
 824
 825		if (i >= size) goto cleanup;
 826		link_e = i;
 827
 828		/* looking for title end if present */
 829		if (data[i] == '\'' || data[i] == '"') {
 830			i++;
 831			title_b = i;
 832
 833			while (i < size) {
 834				if (data[i] == '\\') i += 2;
 835				else if (data[i] == ')') break;
 836				else i += 1;
 837			}
 838
 839			if (i >= size) goto cleanup;
 840
 841			/* skipping whitespaces after title */
 842			title_e = i - 1;
 843			while (title_e > title_b && isspace(data[title_e]))
 844				title_e--;
 845
 846			/* checking for closing quote presence */
 847			if (data[title_e] != '\'' &&  data[title_e] != '"') {
 848				title_b = title_e = 0;
 849				link_e = i;
 850			}
 851		}
 852
 853		/* remove whitespace at the end of the link */
 854		while (link_e > link_b && isspace(data[link_e - 1]))
 855			link_e--;
 856
 857		/* remove optional angle brackets around the link */
 858		if (data[link_b] == '<') link_b++;
 859		if (data[link_e - 1] == '>') link_e--;
 860
 861		/* building escaped link and title */
 862		if (link_e > link_b) {
 863			link = rndr_newbuf(rndr, BUFFER_SPAN);
 864			bufput(link, data + link_b, link_e - link_b);
 865		}
 866
 867		if (title_e > title_b) {
 868			title = rndr_newbuf(rndr, BUFFER_SPAN);
 869			bufput(title, data + title_b, title_e - title_b);
 870		}
 871
 872		i++;
 873	}
 874
 875	/* reference style link */
 876	else if (i < size && data[i] == '[') {
 877		struct buf id = { 0, 0, 0, 0, 0 };
 878		struct link_ref *lr;
 879
 880		/* looking for the id */
 881		i += 1;
 882		link_b = i;
 883		while (i < size && data[i] != ']') i++;
 884		if (i >= size) goto cleanup;
 885		link_e = i;
 886
 887		/* finding the link_ref */
 888		if (link_b == link_e) {
 889			if (text_has_nl) {
 890				struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN);
 891				size_t j;
 892
 893				for (j = 1; j < txt_e; j++) {
 894					if (data[j] != '\n')
 895						bufputc(b, data[j]);
 896					else if (data[j - 1] != ' ')
 897						bufputc(b, ' ');
 898				}
 899
 900				id.data = b->data;
 901				id.size = b->size;
 902			} else {
 903				id.data = data + 1;
 904				id.size = txt_e - 1;
 905			}
 906		} else {
 907			id.data = data + link_b;
 908			id.size = link_e - link_b;
 909		}
 910
 911		lr = arr_sorted_find(&rndr->refs, &id, cmp_link_ref);
 912		if (!lr) goto cleanup;
 913
 914		/* keeping link and title from link_ref */
 915		link = lr->link;
 916		title = lr->title;
 917		i += 1;
 918	}
 919
 920	/* shortcut reference style link */
 921	else {
 922		struct buf id = { 0, 0, 0, 0, 0 };
 923		struct link_ref *lr;
 924
 925		/* crafting the id */
 926		if (text_has_nl) {
 927			struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN);
 928			size_t j;
 929
 930			for (j = 1; j < txt_e; j++) {
 931				if (data[j] != '\n')
 932					bufputc(b, data[j]);
 933				else if (data[j - 1] != ' ')
 934					bufputc(b, ' ');
 935			}
 936
 937			id.data = b->data;
 938			id.size = b->size;
 939		} else {
 940			id.data = data + 1;
 941			id.size = txt_e - 1;
 942		}
 943
 944		/* finding the link_ref */
 945		lr = arr_sorted_find(&rndr->refs, &id, cmp_link_ref);
 946		if (!lr) goto cleanup;
 947
 948		/* keeping link and title from link_ref */
 949		link = lr->link;
 950		title = lr->title;
 951
 952		/* rewinding the whitespace */
 953		i = txt_e + 1;
 954	}
 955
 956	/* building content: img alt is escaped, link content is parsed */
 957	if (txt_e > 1) {
 958		content = rndr_newbuf(rndr, BUFFER_SPAN);
 959		if (is_img) bufput(content, data + 1, txt_e - 1);
 960		else parse_inline(content, rndr, data + 1, txt_e - 1);
 961	}
 962
 963	if (link) {
 964		u_link = rndr_newbuf(rndr, BUFFER_SPAN);
 965		unscape_text(u_link, link);
 966	}
 967
 968	/* calling the relevant rendering function */
 969	if (is_img) {
 970		if (ob->size && ob->data[ob->size - 1] == '!')
 971			ob->size -= 1;
 972
 973		ret = rndr->make.image(ob, u_link, title, content, rndr->make.opaque);
 974	} else {
 975		ret = rndr->make.link(ob, u_link, title, content, rndr->make.opaque);
 976	}
 977
 978	/* cleanup */
 979cleanup:
 980	rndr->work_bufs[BUFFER_SPAN].size = (int)org_work_size;
 981	return ret ? i : 0;
 982}
 983
 984
 985
 986/*********************************
 987 * BLOCK-LEVEL PARSING FUNCTIONS *
 988 *********************************/
 989
 990/* is_empty • returns the line length when it is empty, 0 otherwise */
 991static size_t
 992is_empty(char *data, size_t size)
 993{
 994	size_t i;
 995	for (i = 0; i < size && data[i] != '\n'; i += 1)
 996		if (data[i] != ' ' && data[i] != '\t') return 0;
 997	return i + 1;
 998}
 999
1000/* is_hrule • returns whether a line is a horizontal rule */
1001static int
1002is_hrule(char *data, size_t size)
1003{
1004	size_t i = 0, n = 0;
1005	char c;
1006
1007	/* skipping initial spaces */
1008	if (size < 3) return 0;
1009	if (data[0] == ' ') { i += 1;
1010	if (data[1] == ' ') { i += 1;
1011	if (data[2] == ' ') { i += 1; } } }
1012
1013	/* looking at the hrule char */
1014	if (i + 2 >= size
1015	|| (data[i] != '*' && data[i] != '-' && data[i] != '_'))
1016		return 0;
1017	c = data[i];
1018
1019	/* the whole line must be the char or whitespace */
1020	while (i < size && data[i] != '\n') {
1021		if (data[i] == c) n += 1;
1022		else if (data[i] != ' ' && data[i] != '\t')
1023			return 0;
1024		i += 1; }
1025
1026	return n >= 3;
1027}
1028
1029/* check if a line is a code fence; return its size if it is */
1030static size_t
1031is_codefence(char *data, size_t size, struct buf *syntax)
1032{
1033	size_t i = 0, n = 0;
1034	char c;
1035
1036	/* skipping initial spaces */
1037	if (size < 3) return 0;
1038	if (data[0] == ' ') { i += 1;
1039	if (data[1] == ' ') { i += 1;
1040	if (data[2] == ' ') { i += 1; } } }
1041
1042	/* looking at the hrule char */
1043	if (i + 2 >= size || !(data[i] == '~' || data[i] == '`'))
1044		return 0;
1045
1046	c = data[i];
1047
1048	/* the whole line must be the char or whitespace */
1049	while (i < size && data[i] == c) {
1050		n++; i++;
1051	}
1052
1053	if (n < 3)
1054		return 0;
1055
1056	if (syntax != NULL) {
1057		size_t syn = 0;
1058
1059		while (i < size && (data[i] == ' ' || data[i] == '\t'))
1060			i++;
1061
1062		syntax->data = data + i;
1063
1064		if (i < size && data[i] == '{') {
1065			i++; syntax->data++;
1066
1067			while (i < size && data[i] != '}' && data[i] != '\n') {
1068				syn++; i++;
1069			}
1070
1071			if (i == size || data[i] != '}')
1072				return 0;
1073
1074			/* strip all whitespace at the beggining and the end
1075			 * of the {} block */
1076			while (syn > 0 && isspace(syntax->data[0])) {
1077				syntax->data++; syn--;
1078			}
1079
1080			while (syn > 0 && isspace(syntax->data[syn - 1]))
1081				syn--;
1082
1083			i++;
1084		} else {
1085			while (i < size && !isspace(data[i])) {
1086				syn++; i++;
1087			}
1088		}
1089
1090		syntax->size = syn;
1091	}
1092
1093	while (i < size && data[i] != '\n') {
1094		if (!isspace(data[i]))
1095			return 0;
1096
1097		i++;
1098	}
1099
1100	return i + 1;
1101}
1102
1103/* is_headerline • returns whether the line is a setext-style hdr underline */
1104static int
1105is_headerline(char *data, size_t size)
1106{
1107	size_t i = 0;
1108
1109	/* test of level 1 header */
1110	if (data[i] == '=') {
1111		for (i = 1; i < size && data[i] == '='; i += 1);
1112		while (i < size && (data[i] == ' ' || data[i] == '\t')) i += 1;
1113		return (i >= size || data[i] == '\n') ? 1 : 0; }
1114
1115	/* test of level 2 header */
1116	if (data[i] == '-') {
1117		for (i = 1; i < size && data[i] == '-'; i += 1);
1118		while (i < size && (data[i] == ' ' || data[i] == '\t')) i += 1;
1119		return (i >= size || data[i] == '\n') ? 2 : 0; }
1120
1121	return 0;
1122}
1123
1124/* prefix_quote • returns blockquote prefix length */
1125static size_t
1126prefix_quote(char *data, size_t size)
1127{
1128	size_t i = 0;
1129	if (i < size && data[i] == ' ') i += 1;
1130	if (i < size && data[i] == ' ') i += 1;
1131	if (i < size && data[i] == ' ') i += 1;
1132	if (i < size && data[i] == '>') {
1133		if (i + 1 < size && (data[i + 1] == ' ' || data[i+1] == '\t'))
1134			return i + 2;
1135		else return i + 1; }
1136	else return 0;
1137}
1138
1139/* prefix_code • returns prefix length for block code*/
1140static size_t
1141prefix_code(char *data, size_t size)
1142{
1143	if (size > 0 && data[0] == '\t') return 1;
1144	if (size > 3 && data[0] == ' ' && data[1] == ' '
1145			&& data[2] == ' ' && data[3] == ' ') return 4;
1146	return 0;
1147}
1148
1149/* prefix_oli • returns ordered list item prefix */
1150static size_t
1151prefix_oli(char *data, size_t size)
1152{
1153	size_t i = 0;
1154	if (i < size && data[i] == ' ') i += 1;
1155	if (i < size && data[i] == ' ') i += 1;
1156	if (i < size && data[i] == ' ') i += 1;
1157	if (i >= size || data[i] < '0' || data[i] > '9') return 0;
1158	while (i < size && data[i] >= '0' && data[i] <= '9') i += 1;
1159	if (i + 1 >= size || data[i] != '.'
1160	|| (data[i + 1] != ' ' && data[i + 1] != '\t')) return 0;
1161	return i + 2;
1162}
1163
1164/* prefix_uli • returns ordered list item prefix */
1165static size_t
1166prefix_uli(char *data, size_t size)
1167{
1168	size_t i = 0;
1169	if (i < size && data[i] == ' ') i += 1;
1170	if (i < size && data[i] == ' ') i += 1;
1171	if (i < size && data[i] == ' ') i += 1;
1172	if (i + 1 >= size
1173	|| (data[i] != '*' && data[i] != '+' && data[i] != '-')
1174	|| (data[i + 1] != ' ' && data[i + 1] != '\t'))
1175		return 0;
1176	return i + 2;
1177}
1178
1179
1180/* parse_block • parsing of one block, returning next char to parse */
1181static void parse_block(struct buf *ob, struct render *rndr,
1182			char *data, size_t size);
1183
1184
1185/* parse_blockquote • hanldes parsing of a blockquote fragment */
1186static size_t
1187parse_blockquote(struct buf *ob, struct render *rndr, char *data, size_t size)
1188{
1189	size_t beg, end = 0, pre, work_size = 0;
1190	char *work_data = 0;
1191	struct buf *out = 0;
1192
1193	out = rndr_newbuf(rndr, BUFFER_BLOCK);
1194	beg = 0;
1195	while (beg < size) {
1196		for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
1197
1198		pre = prefix_quote(data + beg, end - beg);
1199
1200		if (pre)
1201			beg += pre; /* skipping prefix */
1202
1203		/* empty line followed by non-quote line */
1204		else if (is_empty(data + beg, end - beg) &&
1205				(end >= size || (prefix_quote(data + end, size - end) == 0 &&
1206				!is_empty(data + end, size - end))))
1207			break;
1208
1209		if (beg < end) { /* copy into the in-place working buffer */
1210			/* bufput(work, data + beg, end - beg); */
1211			if (!work_data)
1212				work_data = data + beg;
1213			else if (data + beg != work_data + work_size)
1214				memmove(work_data + work_size, data + beg, end - beg);
1215			work_size += end - beg;
1216		}
1217		beg = end;
1218	}
1219
1220	parse_block(out, rndr, work_data, work_size);
1221	if (rndr->make.blockquote)
1222		rndr->make.blockquote(ob, out, rndr->make.opaque);
1223	rndr_popbuf(rndr, BUFFER_BLOCK);
1224	return end;
1225}
1226
1227static size_t
1228parse_htmlblock(struct buf *ob, struct render *rndr, char *data, size_t size, int do_render);
1229
1230/* parse_blockquote • hanldes parsing of a regular paragraph */
1231static size_t
1232parse_paragraph(struct buf *ob, struct render *rndr, char *data, size_t size)
1233{
1234	size_t i = 0, end = 0;
1235	int level = 0;
1236	struct buf work = { data, 0, 0, 0, 0 }; /* volatile working buffer */
1237
1238	while (i < size) {
1239		for (end = i + 1; end < size && data[end - 1] != '\n'; end++) /* empty */;
1240
1241		if (is_empty(data + i, size - i) || (level = is_headerline(data + i, size - i)) != 0)
1242			break;
1243
1244		if (rndr->ext_flags & MKDEXT_LAX_HTML_BLOCKS) {
1245			if (data[i] == '<' && rndr->make.blockhtml && parse_htmlblock(ob, rndr, data + i, size - i, 0)) {
1246				end = i;
1247				break;
1248			}
1249		}
1250
1251		if (data[i] == '#' || is_hrule(data + i, size - i)) {
1252			end = i;
1253			break;
1254		}
1255
1256		i = end;
1257	}
1258
1259	work.size = i;
1260	while (work.size && data[work.size - 1] == '\n')
1261		work.size--;
1262
1263	if (!level) {
1264		struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK);
1265		parse_inline(tmp, rndr, work.data, work.size);
1266		if (rndr->make.paragraph)
1267			rndr->make.paragraph(ob, tmp, rndr->make.opaque);
1268		rndr_popbuf(rndr, BUFFER_BLOCK);
1269	} else {
1270		struct buf *header_work;
1271
1272		if (work.size) {
1273			size_t beg;
1274			i = work.size;
1275			work.size -= 1;
1276
1277			while (work.size && data[work.size] != '\n')
1278				work.size -= 1;
1279
1280			beg = work.size + 1;
1281			while (work.size && data[work.size - 1] == '\n')
1282				work.size -= 1;
1283
1284			if (work.size > 0) {
1285				struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK);
1286				parse_inline(tmp, rndr, work.data, work.size);
1287
1288				if (rndr->make.paragraph)
1289					rndr->make.paragraph(ob, tmp, rndr->make.opaque);
1290
1291				rndr_popbuf(rndr, BUFFER_BLOCK);
1292				work.data += beg;
1293				work.size = i - beg;
1294			}
1295			else work.size = i;
1296		}
1297
1298		header_work = rndr_newbuf(rndr, BUFFER_SPAN);
1299		parse_inline(header_work, rndr, work.data, work.size);
1300
1301		if (rndr->make.header)
1302			rndr->make.header(ob, header_work, (int)level, rndr->make.opaque);
1303
1304		rndr_popbuf(rndr, BUFFER_SPAN);
1305	}
1306
1307	return end;
1308}
1309
1310/* parse_fencedcode • hanldes parsing of a block-level code fragment */
1311static size_t
1312parse_fencedcode(struct buf *ob, struct render *rndr, char *data, size_t size)
1313{
1314	size_t beg, end;
1315	struct buf *work = 0;
1316	struct buf lang = { 0, 0, 0, 0, 0 };
1317
1318	beg = is_codefence(data, size, &lang);
1319	if (beg == 0) return 0;
1320
1321	work = rndr_newbuf(rndr, BUFFER_BLOCK);
1322
1323	while (beg < size) {
1324		size_t fence_end;
1325
1326		fence_end = is_codefence(data + beg, size - beg, NULL);
1327		if (fence_end != 0) {
1328			beg += fence_end;
1329			break;
1330		}
1331
1332		for (end = beg + 1; end < size && data[end - 1] != '\n'; end += 1);
1333
1334		if (beg < end) {
1335			/* verbatim copy to the working buffer,
1336				escaping entities */
1337			if (is_empty(data + beg, end - beg))
1338				bufputc(work, '\n');
1339			else bufput(work, data + beg, end - beg);
1340		}
1341		beg = end;
1342	}
1343
1344	if (work->size && work->data[work->size - 1] != '\n')
1345		bufputc(work, '\n');
1346
1347	if (rndr->make.blockcode)
1348		rndr->make.blockcode(ob, work, lang.size ? &lang : NULL, rndr->make.opaque);
1349
1350	rndr_popbuf(rndr, BUFFER_BLOCK);
1351	return beg;
1352}
1353
1354static size_t
1355parse_blockcode(struct buf *ob, struct render *rndr, char *data, size_t size)
1356{
1357	size_t beg, end, pre;
1358	struct buf *work = 0;
1359
1360	work = rndr_newbuf(rndr, BUFFER_BLOCK);
1361
1362	beg = 0;
1363	while (beg < size) {
1364		for (end = beg + 1; end < size && data[end - 1] != '\n'; end++) {};
1365		pre = prefix_code(data + beg, end - beg);
1366
1367		if (pre)
1368			beg += pre; /* skipping prefix */
1369		else if (!is_empty(data + beg, end - beg))
1370			/* non-empty non-prefixed line breaks the pre */
1371			break;
1372
1373		if (beg < end) {
1374			/* verbatim copy to the working buffer,
1375				escaping entities */
1376			if (is_empty(data + beg, end - beg))
1377				bufputc(work, '\n');
1378			else bufput(work, data + beg, end - beg);
1379		}
1380		beg = end;
1381	}
1382
1383	while (work->size && work->data[work->size - 1] == '\n')
1384		work->size -= 1;
1385
1386	bufputc(work, '\n');
1387
1388	if (rndr->make.blockcode)
1389		rndr->make.blockcode(ob, work, NULL, rndr->make.opaque);
1390
1391	rndr_popbuf(rndr, BUFFER_BLOCK);
1392	return beg;
1393}
1394
1395/* parse_listitem • parsing of a single list item */
1396/*	assuming initial prefix is already removed */
1397static size_t
1398parse_listitem(struct buf *ob, struct render *rndr, char *data, size_t size, int *flags)
1399{
1400	struct buf *work = 0, *inter = 0;
1401	size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i;
1402	int in_empty = 0, has_inside_empty = 0;
1403
1404	/* keeping book of the first indentation prefix */
1405	while (orgpre < 3 && orgpre < size && data[orgpre] == ' ')
1406		orgpre++;
1407
1408	beg = prefix_uli(data, size);
1409	if (!beg)
1410		beg = prefix_oli(data, size);
1411
1412	if (!beg)
1413		return 0;
1414
1415	/* skipping to the beginning of the following line */
1416	end = beg;
1417	while (end < size && data[end - 1] != '\n')
1418		end++;
1419
1420	/* getting working buffers */
1421	work = rndr_newbuf(rndr, BUFFER_SPAN);
1422	inter = rndr_newbuf(rndr, BUFFER_SPAN);
1423
1424	/* putting the first line into the working buffer */
1425	bufput(work, data + beg, end - beg);
1426	beg = end;
1427
1428	/* process the following lines */
1429	while (beg < size) {
1430		end++;
1431
1432		while (end < size && data[end - 1] != '\n')
1433			end++;
1434
1435		/* process an empty line */
1436		if (is_empty(data + beg, end - beg)) {
1437			in_empty = 1;
1438			beg = end;
1439			continue;
1440		}
1441
1442		/* calculating the indentation */
1443		i = 0;
1444		while (i < 4 && beg + i < end && data[beg + i] == ' ')
1445			i++;
1446
1447		pre = i;
1448		if (data[beg] == '\t') { i = 1; pre = 8; }
1449
1450		/* checking for a new item */
1451		if ((prefix_uli(data + beg + i, end - beg - i) &&
1452			!is_hrule(data + beg + i, end - beg - i)) ||
1453			prefix_oli(data + beg + i, end - beg - i)) {
1454			if (in_empty)
1455				has_inside_empty = 1;
1456
1457			if (pre == orgpre) /* the following item must have */
1458				break;             /* the same indentation */
1459
1460			if (!sublist)
1461				sublist = work->size;
1462		}
1463		/* joining only indented stuff after empty lines */
1464		else if (in_empty && i < 4 && data[beg] != '\t') {
1465				*flags |= MKD_LI_END;
1466				break;
1467		}
1468		else if (in_empty) {
1469			bufputc(work, '\n');
1470			has_inside_empty = 1;
1471		}
1472
1473		in_empty = 0;
1474
1475		/* adding the line without prefix into the working buffer */
1476		bufput(work, data + beg + i, end - beg - i);
1477		beg = end;
1478	}
1479
1480	/* render of li contents */
1481	if (has_inside_empty)
1482		*flags |= MKD_LI_BLOCK;
1483
1484	if (*flags & MKD_LI_BLOCK) {
1485		/* intermediate render of block li */
1486		if (sublist && sublist < work->size) {
1487			parse_block(inter, rndr, work->data, sublist);
1488			parse_block(inter, rndr, work->data + sublist, work->size - sublist); 
1489		}
1490		else
1491			parse_block(inter, rndr, work->data, work->size);
1492	} else {
1493		/* intermediate render of inline li */
1494		if (sublist && sublist < work->size) {
1495			parse_inline(inter, rndr, work->data, sublist);
1496			parse_block(inter, rndr, work->data + sublist, work->size - sublist);
1497		}
1498		else
1499			parse_inline(inter, rndr, work->data, work->size);
1500	}
1501
1502	/* render of li itself */
1503	if (rndr->make.listitem)
1504		rndr->make.listitem(ob, inter, *flags, rndr->make.opaque);
1505
1506	rndr_popbuf(rndr, BUFFER_SPAN);
1507	rndr_popbuf(rndr, BUFFER_SPAN);
1508	return beg;
1509}
1510
1511
1512/* parse_list • parsing ordered or unordered list block */
1513static size_t
1514parse_list(struct buf *ob, struct render *rndr, char *data, size_t size, int flags)
1515{
1516	struct buf *work = 0;
1517	size_t i = 0, j;
1518
1519	work = rndr_newbuf(rndr, BUFFER_BLOCK);
1520
1521	while (i < size) {
1522		j = parse_listitem(work, rndr, data + i, size - i, &flags);
1523		i += j;
1524
1525		if (!j || (flags & MKD_LI_END))
1526			break;
1527	}
1528
1529	if (rndr->make.list)
1530		rndr->make.list(ob, work, flags, rndr->make.opaque);
1531	rndr_popbuf(rndr, BUFFER_BLOCK);
1532	return i;
1533}
1534
1535
1536/* parse_atxheader • parsing of atx-style headers */
1537static size_t
1538parse_atxheader(struct buf *ob, struct render *rndr, char *data, size_t size)
1539{
1540	size_t level = 0;
1541	size_t i, end, skip;
1542
1543	if (!size || data[0] != '#')
1544		return 0;
1545
1546	while (level < size && level < 6 && data[level] == '#')
1547		level++;
1548
1549	for (i = level; i < size && (data[i] == ' ' || data[i] == '\t'); i++);
1550
1551	for (end = i; end < size && data[end] != '\n'; end++);
1552	skip = end;
1553
1554	while (end && data[end - 1] == '#')
1555		end--;
1556
1557	while (end && (data[end - 1] == ' ' || data[end - 1] == '\t'))
1558		end--;
1559
1560	if (end > i) {
1561		struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN);
1562
1563		parse_inline(work, rndr, data + i, end - i);
1564
1565		if (rndr->make.header)
1566			rndr->make.header(ob, work, (int)level, rndr->make.opaque);
1567
1568		rndr_popbuf(rndr, BUFFER_SPAN);
1569	}
1570
1571	return skip;
1572}
1573
1574
1575/* htmlblock_end • checking end of HTML block : </tag>[ \t]*\n[ \t*]\n */
1576/*	returns the length on match, 0 otherwise */
1577static size_t
1578htmlblock_end(struct html_tag *tag, struct render *rndr, char *data, size_t size)
1579{
1580	size_t i, w;
1581
1582	/* assuming data[0] == '<' && data[1] == '/' already tested */
1583
1584	/* checking tag is a match */
1585	if (tag->size + 3 >= size
1586	|| strncasecmp(data + 2, tag->text, tag->size)
1587	|| data[tag->size + 2] != '>')
1588		return 0;
1589
1590	/* checking white lines */
1591	i = tag->size + 3;
1592	w = 0;
1593	if (i < size && (w = is_empty(data + i, size - i)) == 0)
1594		return 0; /* non-blank after tag */
1595	i += w;
1596	w = 0;
1597
1598	if (rndr->ext_flags & MKDEXT_LAX_HTML_BLOCKS) {
1599		if (i < size)
1600			w = is_empty(data + i, size - i);
1601	} else  {
1602		if (i < size && (w = is_empty(data + i, size - i)) == 0)
1603			return 0; /* non-blank line after tag line */
1604	}
1605
1606	return i + w;
1607}
1608
1609
1610/* parse_htmlblock • parsing of inline HTML block */
1611static size_t
1612parse_htmlblock(struct buf *ob, struct render *rndr, char *data, size_t size, int do_render)
1613{
1614	size_t i, j = 0;
1615	struct html_tag *curtag;
1616	int found;
1617	struct buf work = { data, 0, 0, 0, 0 };
1618
1619	/* identification of the opening tag */
1620	if (size < 2 || data[0] != '<') return 0;
1621	curtag = find_block_tag(data + 1, size - 1);
1622
1623	/* handling of special cases */
1624	if (!curtag) {
1625
1626		/* HTML comment, laxist form */
1627		if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') {
1628			i = 5;
1629
1630			while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>'))
1631				i++;
1632
1633			i++;
1634
1635			if (i < size)
1636				j = is_empty(data + i, size - i);
1637
1638			if (j) {
1639				work.size = i + j;
1640				if (do_render && rndr->make.blockhtml)
1641					rndr->make.blockhtml(ob, &work, rndr->make.opaque);
1642				return work.size;
1643			} 
1644		}
1645
1646		/* HR, which is the only self-closing block tag considered */
1647		if (size > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R')) {
1648			i = 3;
1649			while (i < size && data[i] != '>')
1650				i += 1;
1651
1652			if (i + 1 < size) {
1653				i += 1;
1654				j = is_empty(data + i, size - i);
1655				if (j) {
1656					work.size = i + j;
1657					if (do_render && rndr->make.blockhtml)
1658						rndr->make.blockhtml(ob, &work, rndr->make.opaque);
1659					return work.size;
1660				}
1661			} 
1662		}
1663
1664		/* no special case recognised */
1665		return 0;
1666	}
1667
1668	/* looking for an unindented matching closing tag */
1669	/*	followed by a blank line */
1670	i = 1;
1671	found = 0;
1672
1673	/* if not found, trying a second pass looking for indented match */
1674	/* but not if tag is "ins" or "del" (following original Markdown.pl) */
1675	if (curtag != INS_TAG && curtag != DEL_TAG) {
1676		i = 1;
1677		while (i < size) {
1678			i++;
1679			while (i < size && !(data[i - 1] == '<' && data[i] == '/'))
1680				i++;
1681
1682			if (i + 2 + curtag->size >= size)
1683				break;
1684
1685			j = htmlblock_end(curtag, rndr, data + i - 1, size - i + 1);
1686
1687			if (j) {
1688				i += j - 1;
1689				found = 1;
1690				break;
1691			}
1692		} 
1693	}
1694
1695	if (!found) return 0;
1696
1697	/* the end of the block has been found */
1698	work.size = i;
1699	if (do_render && rndr->make.blockhtml)
1700		rndr->make.blockhtml(ob, &work, rndr->make.opaque);
1701
1702	return i;
1703}
1704
1705static void
1706parse_table_row(struct buf *ob, struct render *rndr, char *data, size_t size, size_t columns, int *col_data)
1707{
1708	size_t i = 0, col;
1709	struct buf *row_work = 0;
1710
1711	row_work = rndr_newbuf(rndr, BUFFER_SPAN);
1712
1713	if (i < size && data[i] == '|')
1714		i++;
1715
1716	for (col = 0; col < columns && i < size; ++col) {
1717		size_t cell_start, cell_end;
1718		struct buf *cell_work;
1719
1720		cell_work = rndr_newbuf(rndr, BUFFER_SPAN);
1721
1722		while (i < size && isspace(data[i]))
1723			i++;
1724
1725		cell_start = i;
1726
1727		while (i < size && data[i] != '|')
1728			i++;
1729
1730		cell_end = i - 1;
1731
1732		while (cell_end > cell_start && isspace(data[cell_end]))
1733			cell_end--;
1734
1735		parse_inline(cell_work, rndr, data + cell_start, 1 + cell_end - cell_start);
1736		if (rndr->make.table_cell)
1737			rndr->make.table_cell(row_work, cell_work, col_data ? col_data[col] : 0, rndr->make.opaque);
1738
1739		rndr_popbuf(rndr, BUFFER_SPAN);
1740		i++;
1741	}
1742
1743	for (; col < columns; ++col) {
1744		struct buf empty_cell = {0, 0, 0, 0, 0};
1745		if (rndr->make.table_cell)
1746			rndr->make.table_cell(row_work, &empty_cell, col_data ? col_data[col] : 0, rndr->make.opaque);
1747	}
1748
1749	if (rndr->make.table_row)
1750		rndr->make.table_row(ob, row_work, rndr->make.opaque);
1751
1752	rndr_popbuf(rndr, BUFFER_SPAN);
1753}
1754
1755static size_t
1756parse_table_header(struct buf *ob, struct render *rndr, char *data, size_t size, size_t *columns, int **column_data)
1757{
1758	int pipes;
1759	size_t i = 0, col, header_end, under_end;
1760
1761	pipes = 0;
1762	while (i < size && data[i] != '\n')
1763		if (data[i++] == '|')
1764			pipes++;
1765
1766	if (i == size || pipes == 0)
1767		return 0;
1768
1769	header_end = i;
1770
1771	if (data[0] == '|')
1772		pipes--;
1773
1774	if (i > 2 && data[i - 1] == '|')
1775		pipes--;
1776
1777	*columns = pipes + 1;
1778	*column_data = calloc(*columns, sizeof(int));
1779
1780	/* Parse the header underline */
1781	i++;
1782	if (i < size && data[i] == '|')
1783		i++;
1784
1785	under_end = i;
1786	while (under_end < size && data[under_end] != '\n')
1787		under_end++;
1788
1789	for (col = 0; col < *columns && i < under_end; ++col) {
1790		size_t dashes = 0;
1791
1792		while (i < under_end && (data[i] == ' ' || data[i] == '\t'))
1793			i++;
1794
1795		if (data[i] == ':') {
1796			i++; (*column_data)[col] |= MKD_TABLE_ALIGN_L;
1797			dashes++;
1798		}
1799
1800		while (i < under_end && data[i] == '-') {
1801			i++; dashes++;
1802		}
1803
1804		if (i < under_end && data[i] == ':') {
1805			i++; (*column_data)[col] |= MKD_TABLE_ALIGN_R;
1806			dashes++;
1807		}
1808
1809		while (i < under_end && (data[i] == ' ' || data[i] == '\t'))
1810			i++;
1811
1812		if (i < under_end && data[i] != '|')
1813			break;
1814
1815		if (dashes < 3)
1816			break;
1817
1818		i++;
1819	}
1820
1821	if (col < *columns)
1822		return 0;
1823
1824	parse_table_row(ob, rndr, data, header_end, *columns, *column_data);
1825	return under_end + 1;
1826}
1827
1828static size_t
1829parse_table(struct buf *ob, struct render *rndr, char *data, size_t size)
1830{
1831	size_t i;
1832
1833	struct buf *header_work = 0;
1834	struct buf *body_work = 0;
1835
1836	size_t columns;
1837	int *col_data = NULL;
1838
1839	header_work = rndr_newbuf(rndr, BUFFER_SPAN);
1840	body_work = rndr_newbuf(rndr, BUFFER_BLOCK);
1841
1842	i = parse_table_header(header_work, rndr, data, size, &columns, &col_data);
1843	if (i > 0) {
1844
1845		while (i < size) {
1846			size_t row_start;
1847			int pipes = 0;
1848
1849			row_start = i;
1850
1851			while (i < size && data[i] != '\n')
1852				if (data[i++] == '|')
1853					pipes++;
1854
1855			if (pipes == 0 || i == size) {
1856				i = row_start;
1857				break;
1858			}
1859
1860			parse_table_row(body_work, rndr, data + row_start, i - row_start, columns, col_data);
1861			i++;
1862		}
1863
1864		if (rndr->make.table)
1865			rndr->make.table(ob, header_work, body_work, rndr->make.opaque);
1866	}
1867
1868	free(col_data);
1869	rndr_popbuf(rndr, BUFFER_SPAN);
1870	rndr_popbuf(rndr, BUFFER_BLOCK);
1871	return i;
1872}
1873
1874/* parse_block • parsing of one block, returning next char to parse */
1875static void
1876parse_block(struct buf *ob, struct render *rndr, char *data, size_t size)
1877{
1878	size_t beg, end, i;
1879	char *txt_data;
1880	beg = 0;
1881
1882	if (rndr->work_bufs[BUFFER_SPAN].size +
1883		rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting)
1884		return;
1885
1886	while (beg < size) {
1887		txt_data = data + beg;
1888		end = size - beg;
1889
1890		if (data[beg] == '#')
1891			beg += parse_atxheader(ob, rndr, txt_data, end);
1892
1893		else if (data[beg] == '<' && rndr->make.blockhtml &&
1894				(i = parse_htmlblock(ob, rndr, txt_data, end, 1)) != 0)
1895			beg += i;
1896
1897		else if ((i = is_empty(txt_data, end)) != 0)
1898			beg += i;
1899
1900		else if (is_hrule(txt_data, end)) {
1901			if (rndr->make.hrule)
1902				rndr->make.hrule(ob, rndr->make.opaque);
1903
1904			while (beg < size && data[beg] != '\n')
1905				beg++;
1906
1907			beg++;
1908		}
1909
1910		else if ((rndr->ext_flags & MKDEXT_FENCED_CODE) != 0 &&
1911			(i = parse_fencedcode(ob, rndr, txt_data, end)) != 0)
1912			beg += i;
1913
1914		else if ((rndr->ext_flags & MKDEXT_TABLES) != 0 &&
1915			(i = parse_table(ob, rndr, txt_data, end)) != 0)
1916			beg += i;
1917
1918		else if (prefix_quote(txt_data, end))
1919			beg += parse_blockquote(ob, rndr, txt_data, end);
1920
1921		else if (prefix_code(txt_data, end))
1922			beg += parse_blockcode(ob, rndr, txt_data, end);
1923
1924		else if (prefix_uli(txt_data, end))
1925			beg += parse_list(ob, rndr, txt_data, end, 0);
1926
1927		else if (prefix_oli(txt_data, end))
1928			beg += parse_list(ob, rndr, txt_data, end, MKD_LIST_ORDERED);
1929
1930		else
1931			beg += parse_paragraph(ob, rndr, txt_data, end);
1932	}
1933}
1934
1935
1936
1937/*********************
1938 * REFERENCE PARSING *
1939 *********************/
1940
1941/* is_ref • returns whether a line is a reference or not */
1942static int
1943is_ref(char *data, size_t beg, size_t end, size_t *last, struct array *refs)
1944{
1945/*	int n; */
1946	size_t i = 0;
1947	size_t id_offset, id_end;
1948	size_t link_offset, link_end;
1949	size_t title_offset, title_end;
1950	size_t line_end;
1951	struct link_ref *lr;
1952/*	struct buf id = { 0, 0, 0, 0, 0 }; / * volatile buf for id search */
1953
1954	/* up to 3 optional leading spaces */
1955	if (beg + 3 >= end) return 0;
1956	if (data[beg] == ' ') { i = 1;
1957	if (data[beg + 1] == ' ') { i = 2;
1958	if (data[beg + 2] == ' ') { i = 3;
1959	if (data[beg + 3] == ' ') return 0; } } }
1960	i += beg;
1961
1962	/* id part: anything but a newline between brackets */
1963	if (data[i] != '[') return 0;
1964	i += 1;
1965	id_offset = i;
1966	while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']')
1967		i += 1;
1968	if (i >= end || data[i] != ']') return 0;
1969	id_end = i;
1970
1971	/* spacer: colon (space | tab)* newline? (space | tab)* */
1972	i += 1;
1973	if (i >= end || data[i] != ':') return 0;
1974	i += 1;
1975	while (i < end && (data[i] == ' ' || data[i] == '\t')) i += 1;
1976	if (i < end && (data[i] == '\n' || data[i] == '\r')) {
1977		i += 1;
1978		if (i < end && data[i] == '\r' && data[i - 1] == '\n') i += 1; }
1979	while (i < end && (data[i] == ' ' || data[i] == '\t')) i += 1;
1980	if (i >= end) return 0;
1981
1982	/* link: whitespace-free sequence, optionally between angle brackets */
1983	if (data[i] == '<') i += 1;
1984	link_offset = i;
1985	while (i < end && data[i] != ' ' && data[i] != '\t'
1986			&& data[i] != '\n' && data[i] != '\r') i += 1;
1987	if (data[i - 1] == '>') link_end = i - 1;
1988	else link_end = i;
1989
1990	/* optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) */
1991	while (i < end && (data[i] == ' ' || data[i] == '\t')) i += 1;
1992	if (i < end && data[i] != '\n' && data[i] != '\r'
1993			&& data[i] != '\'' && data[i] != '"' && data[i] != '(')
1994		return 0;
1995	line_end = 0;
1996	/* computing end-of-line */
1997	if (i >= end || data[i] == '\r' || data[i] == '\n') line_end = i;
1998	if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
1999		line_end = i + 1;
2000
2001	/* optional (space|tab)* spacer after a newline */
2002	if (line_end) {
2003		i = line_end + 1;
2004		while (i < end && (data[i] == ' ' || data[i] == '\t')) i += 1; }
2005
2006	/* optional title: any non-newline sequence enclosed in '"()
2007					alone on its line */
2008	title_offset = title_end = 0;
2009	if (i + 1 < end
2010	&& (data[i] == '\'' || data[i] == '"' || data[i] == '(')) {
2011		i += 1;
2012		title_offset = i;
2013		/* looking for EOL */
2014		while (i < end && data[i] != '\n' && data[i] != '\r') i += 1;
2015		if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
2016			title_end = i + 1;
2017		else	title_end = i;
2018		/* stepping back */
2019		i -= 1;
2020		while (i > title_offset && (data[i] == ' ' || data[i] == '\t'))
2021			i -= 1;
2022		if (i > title_offset
2023		&& (data[i] == '\'' || data[i] == '"' || data[i] == ')')) {
2024			line_end = title_end;
2025			title_end = i; } }
2026	if (!line_end) return 0; /* garbage after the link */
2027
2028	/* a valid ref has been found, filling-in return structures */
2029	if (last) *last = line_end;
2030	if (!refs) return 1;
2031	lr = arr_item(refs, arr_newitem(refs));
2032	lr->id = bufnew(id_end - id_offset);
2033	bufput(lr->id, data + id_offset, id_end - id_offset);
2034	lr->link = bufnew(link_end - link_offset);
2035	bufput(lr->link, data + link_offset, link_end - link_offset);
2036	if (title_end > title_offset) {
2037		lr->title = bufnew(title_end - title_offset);
2038		bufput(lr->title, data + title_offset,
2039					title_end - title_offset); }
2040	else lr->title = 0;
2041	return 1; 
2042}
2043
2044static void expand_tabs(struct buf *ob, const char *line, size_t size)
2045{
2046	size_t  i = 0, tab = 0;
2047
2048	while (i < size) {
2049		size_t org = i;
2050
2051		while (i < size && line[i] != '\t') {
2052			i++; tab++;
2053		}
2054
2055		if (i > org)
2056			bufput(ob, line + org, i - org);
2057
2058		if (i >= size)
2059			break;
2060
2061		do {
2062			bufputc(ob, ' '); tab++;
2063		} while (tab % 4);
2064
2065		i++;
2066	}
2067}
2068
2069/**********************
2070 * EXPORTED FUNCTIONS *
2071 **********************/
2072
2073/* markdown • parses the input buffer and renders it into the output buffer */
2074void
2075ups_markdown(struct buf *ob, struct buf *ib, const struct mkd_renderer *rndrer, unsigned int extensions) {
2076	struct link_ref *lr;
2077	struct buf *text;
2078	size_t i, beg, end;
2079	struct render rndr;
2080
2081	/* filling the render structure */
2082	if (!rndrer)
2083		return;
2084
2085	text = bufnew(64);
2086	if (!text)
2087		return;
2088
2089	/* Preallocate enough space for our buffer to avoid expanding while copying */
2090	bufgrow(text, ib->size);
2091
2092	memcpy(&rndr.make, rndrer, sizeof(struct mkd_renderer));
2093	arr_init(&rndr.refs, sizeof (struct link_ref));
2094	parr_init(&rndr.work_bufs[BUFFER_BLOCK]);
2095	parr_init(&rndr.work_bufs[BUFFER_SPAN]);
2096
2097	for (i = 0; i < 256; i += 1)
2098		rndr.active_char[i] = 0;
2099
2100	if (rndr.make.emphasis || rndr.make.double_emphasis || rndr.make.triple_emphasis) {
2101		rndr.active_char['*'] = MD_CHAR_EMPHASIS;
2102		rndr.active_char['_'] = MD_CHAR_EMPHASIS;
2103		if (extensions & MKDEXT_STRIKETHROUGH)
2104			rndr.active_char['~'] = MD_CHAR_EMPHASIS;
2105	}
2106
2107	if (rndr.make.codespan)
2108		rndr.active_char['`'] = MD_CHAR_CODESPAN;
2109
2110	if (rndr.make.linebreak)
2111		rndr.active_char['\n'] = MD_CHAR_LINEBREAK;
2112
2113	if (rndr.make.image || rndr.make.link)
2114		rndr.active_char['['] = MD_CHAR_LINK;
2115
2116	rndr.active_char['<'] = MD_CHAR_LANGLE;
2117	rndr.active_char['\\'] = MD_CHAR_ESCAPE;
2118	rndr.active_char['&'] = MD_CHAR_ENTITITY;
2119
2120	if (extensions & MKDEXT_AUTOLINK) {
2121		rndr.active_char['h'] = MD_CHAR_AUTOLINK; // http, https
2122		rndr.active_char['H'] = MD_CHAR_AUTOLINK;
2123
2124		rndr.active_char['f'] = MD_CHAR_AUTOLINK; // ftp
2125		rndr.active_char['F'] = MD_CHAR_AUTOLINK;
2126
2127		rndr.active_char['m'] = MD_CHAR_AUTOLINK; // mailto
2128		rndr.active_char['M'] = MD_CHAR_AUTOLINK;
2129	}
2130
2131	/* Extension data */
2132	rndr.ext_flags = extensions;
2133	rndr.max_nesting = 16;
2134
2135	/* first pass: looking for references, copying everything else */
2136	beg = 0;
2137	while (beg < ib->size) /* iterating over lines */
2138		if (is_ref(ib->data, beg, ib->size, &end, &rndr.refs))
2139			beg = end;
2140		else { /* skipping to the next line */
2141			end = beg;
2142			while (end < ib->size && ib->data[end] != '\n' && ib->data[end] != '\r')
2143				end += 1;
2144
2145			/* adding the line body if present */
2146			if (end > beg)
2147				expand_tabs(text, ib->data + beg, end - beg);
2148
2149			while (end < ib->size && (ib->data[end] == '\n' || ib->data[end] == '\r')) {
2150				/* add one \n per newline */
2151				if (ib->data[end] == '\n' || (end + 1 < ib->size && ib->data[end + 1] != '\n'))
2152					bufputc(text, '\n');
2153				end += 1;
2154			}
2155
2156			beg = end;
2157		}
2158
2159	/* sorting the reference array */
2160	if (rndr.refs.size)
2161		qsort(rndr.refs.base, rndr.refs.size, rndr.refs.unit, cmp_link_ref_sort);
2162
2163	/* second pass: actual rendering */
2164	if (rndr.make.doc_header)
2165		rndr.make.doc_header(ob, rndr.make.opaque);
2166
2167	if (text->size) {
2168		/* adding a final newline if not already present */
2169		if (text->data[text->size - 1] != '\n' &&  text->data[text->size - 1] != '\r')
2170			bufputc(text, '\n');
2171
2172		parse_block(ob, &rndr, text->data, text->size);
2173	}
2174
2175	if (rndr.make.doc_footer)
2176		rndr.make.doc_footer(ob, rndr.make.opaque);
2177
2178	/* clean-up */
2179	bufrelease(text);
2180	lr = rndr.refs.base;
2181	for (i = 0; i < (size_t)rndr.refs.size; i += 1) {
2182		bufrelease(lr[i].id);
2183		bufrelease(lr[i].link);
2184		bufrelease(lr[i].title);
2185	}
2186
2187	arr_free(&rndr.refs);
2188
2189	assert(rndr.work_bufs[BUFFER_SPAN].size == 0);
2190	assert(rndr.work_bufs[BUFFER_BLOCK].size == 0);
2191
2192	for (i = 0; i < (size_t)rndr.work_bufs[BUFFER_SPAN].asize; ++i)
2193		bufrelease(rndr.work_bufs[BUFFER_SPAN].item[i]);
2194
2195	for (i = 0; i < (size_t)rndr.work_bufs[BUFFER_BLOCK].asize; ++i)
2196		bufrelease(rndr.work_bufs[BUFFER_BLOCK].item[i]);
2197
2198	parr_free(&rndr.work_bufs[BUFFER_SPAN]);
2199	parr_free(&rndr.work_bufs[BUFFER_BLOCK]);
2200}
2201
2202/* vim: set filetype=c: */