/reddish/lib/sundown/html/html_smartypants.c
C | 389 lines | 321 code | 53 blank | 15 comment | 144 complexity | c437cef296ff77dced3f8dfce7c1fdcc MD5 | raw file
- /*
- * Copyright (c) 2011, Vicent Marti
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
- #include "buffer.h"
- #include "html.h"
- #include <string.h>
- #include <stdlib.h>
- #include <stdio.h>
- #include <ctype.h>
- #if defined(_WIN32)
- #define snprintf _snprintf
- #endif
- struct smartypants_data {
- int in_squote;
- int in_dquote;
- };
- static size_t smartypants_cb__ltag(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
- static size_t smartypants_cb__dquote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
- static size_t smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
- static size_t smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
- static size_t smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
- static size_t smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
- static size_t smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
- static size_t smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
- static size_t smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
- static size_t smartypants_cb__escape(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
- static size_t (*smartypants_cb_ptrs[])
- (struct buf *, struct smartypants_data *, uint8_t, const uint8_t *, size_t) =
- {
- NULL, /* 0 */
- smartypants_cb__dash, /* 1 */
- smartypants_cb__parens, /* 2 */
- smartypants_cb__squote, /* 3 */
- smartypants_cb__dquote, /* 4 */
- smartypants_cb__amp, /* 5 */
- smartypants_cb__period, /* 6 */
- smartypants_cb__number, /* 7 */
- smartypants_cb__ltag, /* 8 */
- smartypants_cb__backtick, /* 9 */
- smartypants_cb__escape, /* 10 */
- };
- static const uint8_t smartypants_cb_chars[] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 4, 0, 0, 0, 5, 3, 2, 0, 0, 0, 0, 1, 6, 0,
- 0, 7, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0,
- 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- };
- static inline int
- word_boundary(uint8_t c)
- {
- return c == 0 || isspace(c) || ispunct(c);
- }
- static int
- smartypants_quotes(struct buf *ob, uint8_t previous_char, uint8_t next_char, uint8_t quote, int *is_open)
- {
- char ent[8];
- if (*is_open && !word_boundary(next_char))
- return 0;
- if (!(*is_open) && !word_boundary(previous_char))
- return 0;
- snprintf(ent, sizeof(ent), "&%c%cquo;", (*is_open) ? 'r' : 'l', quote);
- *is_open = !(*is_open);
- bufputs(ob, ent);
- return 1;
- }
- static size_t
- smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
- {
- if (size >= 2) {
- uint8_t t1 = tolower(text[1]);
- if (t1 == '\'') {
- if (smartypants_quotes(ob, previous_char, size >= 3 ? text[2] : 0, 'd', &smrt->in_dquote))
- return 1;
- }
- if ((t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') &&
- (size == 3 || word_boundary(text[2]))) {
- BUFPUTSL(ob, "’");
- return 0;
- }
- if (size >= 3) {
- uint8_t t2 = tolower(text[2]);
- if (((t1 == 'r' && t2 == 'e') ||
- (t1 == 'l' && t2 == 'l') ||
- (t1 == 'v' && t2 == 'e')) &&
- (size == 4 || word_boundary(text[3]))) {
- BUFPUTSL(ob, "’");
- return 0;
- }
- }
- }
- if (smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 's', &smrt->in_squote))
- return 0;
- bufputc(ob, text[0]);
- return 0;
- }
- static size_t
- smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
- {
- if (size >= 3) {
- uint8_t t1 = tolower(text[1]);
- uint8_t t2 = tolower(text[2]);
- if (t1 == 'c' && t2 == ')') {
- BUFPUTSL(ob, "©");
- return 2;
- }
- if (t1 == 'r' && t2 == ')') {
- BUFPUTSL(ob, "®");
- return 2;
- }
- if (size >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')') {
- BUFPUTSL(ob, "™");
- return 3;
- }
- }
- bufputc(ob, text[0]);
- return 0;
- }
- static size_t
- smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
- {
- if (size >= 3 && text[1] == '-' && text[2] == '-') {
- BUFPUTSL(ob, "—");
- return 2;
- }
- if (size >= 2 && text[1] == '-') {
- BUFPUTSL(ob, "–");
- return 1;
- }
- bufputc(ob, text[0]);
- return 0;
- }
- static size_t
- smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
- {
- if (size >= 6 && memcmp(text, """, 6) == 0) {
- if (smartypants_quotes(ob, previous_char, size >= 7 ? text[6] : 0, 'd', &smrt->in_dquote))
- return 5;
- }
- if (size >= 4 && memcmp(text, "�", 4) == 0)
- return 3;
- bufputc(ob, '&');
- return 0;
- }
- static size_t
- smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
- {
- if (size >= 3 && text[1] == '.' && text[2] == '.') {
- BUFPUTSL(ob, "…");
- return 2;
- }
- if (size >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.') {
- BUFPUTSL(ob, "…");
- return 4;
- }
- bufputc(ob, text[0]);
- return 0;
- }
- static size_t
- smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
- {
- if (size >= 2 && text[1] == '`') {
- if (smartypants_quotes(ob, previous_char, size >= 3 ? text[2] : 0, 'd', &smrt->in_dquote))
- return 1;
- }
- return 0;
- }
- static size_t
- smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
- {
- if (word_boundary(previous_char) && size >= 3) {
- if (text[0] == '1' && text[1] == '/' && text[2] == '2') {
- if (size == 3 || word_boundary(text[3])) {
- BUFPUTSL(ob, "½");
- return 2;
- }
- }
- if (text[0] == '1' && text[1] == '/' && text[2] == '4') {
- if (size == 3 || word_boundary(text[3]) ||
- (size >= 5 && tolower(text[3]) == 't' && tolower(text[4]) == 'h')) {
- BUFPUTSL(ob, "¼");
- return 2;
- }
- }
- if (text[0] == '3' && text[1] == '/' && text[2] == '4') {
- if (size == 3 || word_boundary(text[3]) ||
- (size >= 6 && tolower(text[3]) == 't' && tolower(text[4]) == 'h' && tolower(text[5]) == 's')) {
- BUFPUTSL(ob, "¾");
- return 2;
- }
- }
- }
- bufputc(ob, text[0]);
- return 0;
- }
- static size_t
- smartypants_cb__dquote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
- {
- if (!smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 'd', &smrt->in_dquote))
- BUFPUTSL(ob, """);
- return 0;
- }
- static size_t
- smartypants_cb__ltag(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
- {
- static const char *skip_tags[] = {
- "pre", "code", "var", "samp", "kbd", "math", "script", "style"
- };
- static const size_t skip_tags_count = 8;
- size_t tag, i = 0;
- while (i < size && text[i] != '>')
- i++;
- for (tag = 0; tag < skip_tags_count; ++tag) {
- if (sdhtml_is_tag(text, size, skip_tags[tag]) == HTML_TAG_OPEN)
- break;
- }
- if (tag < skip_tags_count) {
- for (;;) {
- while (i < size && text[i] != '<')
- i++;
- if (i == size)
- break;
- if (sdhtml_is_tag(text + i, size - i, skip_tags[tag]) == HTML_TAG_CLOSE)
- break;
- i++;
- }
- while (i < size && text[i] != '>')
- i++;
- }
- bufput(ob, text, i + 1);
- return i;
- }
- static size_t
- smartypants_cb__escape(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
- {
- if (size < 2)
- return 0;
- switch (text[1]) {
- case '\\':
- case '"':
- case '\'':
- case '.':
- case '-':
- case '`':
- bufputc(ob, text[1]);
- return 1;
- default:
- bufputc(ob, '\\');
- return 0;
- }
- }
- #if 0
- static struct {
- uint8_t c0;
- const uint8_t *pattern;
- const uint8_t *entity;
- int skip;
- } smartypants_subs[] = {
- { '\'', "'s>", "’", 0 },
- { '\'', "'t>", "’", 0 },
- { '\'', "'re>", "’", 0 },
- { '\'', "'ll>", "’", 0 },
- { '\'', "'ve>", "’", 0 },
- { '\'', "'m>", "’", 0 },
- { '\'', "'d>", "’", 0 },
- { '-', "--", "—", 1 },
- { '-', "<->", "–", 0 },
- { '.', "...", "…", 2 },
- { '.', ". . .", "…", 4 },
- { '(', "(c)", "©", 2 },
- { '(', "(r)", "®", 2 },
- { '(', "(tm)", "™", 3 },
- { '3', "<3/4>", "¾", 2 },
- { '3', "<3/4ths>", "¾", 2 },
- { '1', "<1/2>", "½", 2 },
- { '1', "<1/4>", "¼", 2 },
- { '1', "<1/4th>", "¼", 2 },
- { '&', "�", 0, 3 },
- };
- #endif
- void
- sdhtml_smartypants(struct buf *ob, const uint8_t *text, size_t size)
- {
- size_t i;
- struct smartypants_data smrt = {0, 0};
- if (!text)
- return;
- bufgrow(ob, size);
- for (i = 0; i < size; ++i) {
- size_t org;
- uint8_t action = 0;
- org = i;
- while (i < size && (action = smartypants_cb_chars[text[i]]) == 0)
- i++;
- if (i > org)
- bufput(ob, text + org, i - org);
- if (i < size) {
- i += smartypants_cb_ptrs[(int)action]
- (ob, &smrt, i ? text[i - 1] : 0, text + i, size - i);
- }
- }
- }