PageRenderTime 45ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/reddish/lib/sundown/html/html_smartypants.c

https://bitbucket.org/murarth/reddish
C | 389 lines | 321 code | 53 blank | 15 comment | 144 complexity | c437cef296ff77dced3f8dfce7c1fdcc MD5 | raw file
  1. /*
  2. * Copyright (c) 2011, Vicent Marti
  3. *
  4. * Permission to use, copy, modify, and distribute this software for any
  5. * purpose with or without fee is hereby granted, provided that the above
  6. * copyright notice and this permission notice appear in all copies.
  7. *
  8. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  10. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  11. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  12. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  13. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  14. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  15. */
  16. #include "buffer.h"
  17. #include "html.h"
  18. #include <string.h>
  19. #include <stdlib.h>
  20. #include <stdio.h>
  21. #include <ctype.h>
  22. #if defined(_WIN32)
  23. #define snprintf _snprintf
  24. #endif
  25. struct smartypants_data {
  26. int in_squote;
  27. int in_dquote;
  28. };
  29. static size_t smartypants_cb__ltag(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
  30. static size_t smartypants_cb__dquote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
  31. static size_t smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
  32. static size_t smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
  33. static size_t smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
  34. static size_t smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
  35. static size_t smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
  36. static size_t smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
  37. static size_t smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
  38. static size_t smartypants_cb__escape(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
  39. static size_t (*smartypants_cb_ptrs[])
  40. (struct buf *, struct smartypants_data *, uint8_t, const uint8_t *, size_t) =
  41. {
  42. NULL, /* 0 */
  43. smartypants_cb__dash, /* 1 */
  44. smartypants_cb__parens, /* 2 */
  45. smartypants_cb__squote, /* 3 */
  46. smartypants_cb__dquote, /* 4 */
  47. smartypants_cb__amp, /* 5 */
  48. smartypants_cb__period, /* 6 */
  49. smartypants_cb__number, /* 7 */
  50. smartypants_cb__ltag, /* 8 */
  51. smartypants_cb__backtick, /* 9 */
  52. smartypants_cb__escape, /* 10 */
  53. };
  54. static const uint8_t smartypants_cb_chars[] = {
  55. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  56. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  57. 0, 0, 4, 0, 0, 0, 5, 3, 2, 0, 0, 0, 0, 1, 6, 0,
  58. 0, 7, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0,
  59. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  60. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0,
  61. 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  62. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  63. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  64. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  65. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  66. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  67. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  68. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  69. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  70. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  71. };
  72. static inline int
  73. word_boundary(uint8_t c)
  74. {
  75. return c == 0 || isspace(c) || ispunct(c);
  76. }
  77. static int
  78. smartypants_quotes(struct buf *ob, uint8_t previous_char, uint8_t next_char, uint8_t quote, int *is_open)
  79. {
  80. char ent[8];
  81. if (*is_open && !word_boundary(next_char))
  82. return 0;
  83. if (!(*is_open) && !word_boundary(previous_char))
  84. return 0;
  85. snprintf(ent, sizeof(ent), "&%c%cquo;", (*is_open) ? 'r' : 'l', quote);
  86. *is_open = !(*is_open);
  87. bufputs(ob, ent);
  88. return 1;
  89. }
  90. static size_t
  91. smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
  92. {
  93. if (size >= 2) {
  94. uint8_t t1 = tolower(text[1]);
  95. if (t1 == '\'') {
  96. if (smartypants_quotes(ob, previous_char, size >= 3 ? text[2] : 0, 'd', &smrt->in_dquote))
  97. return 1;
  98. }
  99. if ((t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') &&
  100. (size == 3 || word_boundary(text[2]))) {
  101. BUFPUTSL(ob, "&rsquo;");
  102. return 0;
  103. }
  104. if (size >= 3) {
  105. uint8_t t2 = tolower(text[2]);
  106. if (((t1 == 'r' && t2 == 'e') ||
  107. (t1 == 'l' && t2 == 'l') ||
  108. (t1 == 'v' && t2 == 'e')) &&
  109. (size == 4 || word_boundary(text[3]))) {
  110. BUFPUTSL(ob, "&rsquo;");
  111. return 0;
  112. }
  113. }
  114. }
  115. if (smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 's', &smrt->in_squote))
  116. return 0;
  117. bufputc(ob, text[0]);
  118. return 0;
  119. }
  120. static size_t
  121. smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
  122. {
  123. if (size >= 3) {
  124. uint8_t t1 = tolower(text[1]);
  125. uint8_t t2 = tolower(text[2]);
  126. if (t1 == 'c' && t2 == ')') {
  127. BUFPUTSL(ob, "&copy;");
  128. return 2;
  129. }
  130. if (t1 == 'r' && t2 == ')') {
  131. BUFPUTSL(ob, "&reg;");
  132. return 2;
  133. }
  134. if (size >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')') {
  135. BUFPUTSL(ob, "&trade;");
  136. return 3;
  137. }
  138. }
  139. bufputc(ob, text[0]);
  140. return 0;
  141. }
  142. static size_t
  143. smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
  144. {
  145. if (size >= 3 && text[1] == '-' && text[2] == '-') {
  146. BUFPUTSL(ob, "&mdash;");
  147. return 2;
  148. }
  149. if (size >= 2 && text[1] == '-') {
  150. BUFPUTSL(ob, "&ndash;");
  151. return 1;
  152. }
  153. bufputc(ob, text[0]);
  154. return 0;
  155. }
  156. static size_t
  157. smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
  158. {
  159. if (size >= 6 && memcmp(text, "&quot;", 6) == 0) {
  160. if (smartypants_quotes(ob, previous_char, size >= 7 ? text[6] : 0, 'd', &smrt->in_dquote))
  161. return 5;
  162. }
  163. if (size >= 4 && memcmp(text, "&#0;", 4) == 0)
  164. return 3;
  165. bufputc(ob, '&');
  166. return 0;
  167. }
  168. static size_t
  169. smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
  170. {
  171. if (size >= 3 && text[1] == '.' && text[2] == '.') {
  172. BUFPUTSL(ob, "&hellip;");
  173. return 2;
  174. }
  175. if (size >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.') {
  176. BUFPUTSL(ob, "&hellip;");
  177. return 4;
  178. }
  179. bufputc(ob, text[0]);
  180. return 0;
  181. }
  182. static size_t
  183. smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
  184. {
  185. if (size >= 2 && text[1] == '`') {
  186. if (smartypants_quotes(ob, previous_char, size >= 3 ? text[2] : 0, 'd', &smrt->in_dquote))
  187. return 1;
  188. }
  189. return 0;
  190. }
  191. static size_t
  192. smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
  193. {
  194. if (word_boundary(previous_char) && size >= 3) {
  195. if (text[0] == '1' && text[1] == '/' && text[2] == '2') {
  196. if (size == 3 || word_boundary(text[3])) {
  197. BUFPUTSL(ob, "&frac12;");
  198. return 2;
  199. }
  200. }
  201. if (text[0] == '1' && text[1] == '/' && text[2] == '4') {
  202. if (size == 3 || word_boundary(text[3]) ||
  203. (size >= 5 && tolower(text[3]) == 't' && tolower(text[4]) == 'h')) {
  204. BUFPUTSL(ob, "&frac14;");
  205. return 2;
  206. }
  207. }
  208. if (text[0] == '3' && text[1] == '/' && text[2] == '4') {
  209. if (size == 3 || word_boundary(text[3]) ||
  210. (size >= 6 && tolower(text[3]) == 't' && tolower(text[4]) == 'h' && tolower(text[5]) == 's')) {
  211. BUFPUTSL(ob, "&frac34;");
  212. return 2;
  213. }
  214. }
  215. }
  216. bufputc(ob, text[0]);
  217. return 0;
  218. }
  219. static size_t
  220. smartypants_cb__dquote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
  221. {
  222. if (!smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 'd', &smrt->in_dquote))
  223. BUFPUTSL(ob, "&quot;");
  224. return 0;
  225. }
  226. static size_t
  227. smartypants_cb__ltag(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
  228. {
  229. static const char *skip_tags[] = {
  230. "pre", "code", "var", "samp", "kbd", "math", "script", "style"
  231. };
  232. static const size_t skip_tags_count = 8;
  233. size_t tag, i = 0;
  234. while (i < size && text[i] != '>')
  235. i++;
  236. for (tag = 0; tag < skip_tags_count; ++tag) {
  237. if (sdhtml_is_tag(text, size, skip_tags[tag]) == HTML_TAG_OPEN)
  238. break;
  239. }
  240. if (tag < skip_tags_count) {
  241. for (;;) {
  242. while (i < size && text[i] != '<')
  243. i++;
  244. if (i == size)
  245. break;
  246. if (sdhtml_is_tag(text + i, size - i, skip_tags[tag]) == HTML_TAG_CLOSE)
  247. break;
  248. i++;
  249. }
  250. while (i < size && text[i] != '>')
  251. i++;
  252. }
  253. bufput(ob, text, i + 1);
  254. return i;
  255. }
  256. static size_t
  257. smartypants_cb__escape(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
  258. {
  259. if (size < 2)
  260. return 0;
  261. switch (text[1]) {
  262. case '\\':
  263. case '"':
  264. case '\'':
  265. case '.':
  266. case '-':
  267. case '`':
  268. bufputc(ob, text[1]);
  269. return 1;
  270. default:
  271. bufputc(ob, '\\');
  272. return 0;
  273. }
  274. }
  275. #if 0
  276. static struct {
  277. uint8_t c0;
  278. const uint8_t *pattern;
  279. const uint8_t *entity;
  280. int skip;
  281. } smartypants_subs[] = {
  282. { '\'', "'s>", "&rsquo;", 0 },
  283. { '\'', "'t>", "&rsquo;", 0 },
  284. { '\'', "'re>", "&rsquo;", 0 },
  285. { '\'', "'ll>", "&rsquo;", 0 },
  286. { '\'', "'ve>", "&rsquo;", 0 },
  287. { '\'', "'m>", "&rsquo;", 0 },
  288. { '\'', "'d>", "&rsquo;", 0 },
  289. { '-', "--", "&mdash;", 1 },
  290. { '-', "<->", "&ndash;", 0 },
  291. { '.', "...", "&hellip;", 2 },
  292. { '.', ". . .", "&hellip;", 4 },
  293. { '(', "(c)", "&copy;", 2 },
  294. { '(', "(r)", "&reg;", 2 },
  295. { '(', "(tm)", "&trade;", 3 },
  296. { '3', "<3/4>", "&frac34;", 2 },
  297. { '3', "<3/4ths>", "&frac34;", 2 },
  298. { '1', "<1/2>", "&frac12;", 2 },
  299. { '1', "<1/4>", "&frac14;", 2 },
  300. { '1', "<1/4th>", "&frac14;", 2 },
  301. { '&', "&#0;", 0, 3 },
  302. };
  303. #endif
  304. void
  305. sdhtml_smartypants(struct buf *ob, const uint8_t *text, size_t size)
  306. {
  307. size_t i;
  308. struct smartypants_data smrt = {0, 0};
  309. if (!text)
  310. return;
  311. bufgrow(ob, size);
  312. for (i = 0; i < size; ++i) {
  313. size_t org;
  314. uint8_t action = 0;
  315. org = i;
  316. while (i < size && (action = smartypants_cb_chars[text[i]]) == 0)
  317. i++;
  318. if (i > org)
  319. bufput(ob, text + org, i - org);
  320. if (i < size) {
  321. i += smartypants_cb_ptrs[(int)action]
  322. (ob, &smrt, i ? text[i - 1] : 0, text + i, size - i);
  323. }
  324. }
  325. }