PageRenderTime 50ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/cgi/html.c

http://clearsilver.googlecode.com/
C | 839 lines | 766 code | 29 blank | 44 comment | 244 complexity | 6b47cb857135d485fc3699f2b67f6ce2 MD5 | raw file
Possible License(s): BSD-2-Clause
  1. /*
  2. * Copyright 2001-2004 Brandon Long
  3. * All Rights Reserved.
  4. *
  5. * ClearSilver Templating System
  6. *
  7. * This code is made available under the terms of the ClearSilver License.
  8. * http://www.clearsilver.net/license.hdf
  9. *
  10. */
  11. #include "cs_config.h"
  12. #include <stdlib.h>
  13. #include <string.h>
  14. #include <sys/types.h>
  15. #include <regex.h>
  16. #include <ctype.h>
  17. #include "util/neo_misc.h"
  18. #include "util/neo_err.h"
  19. #include "util/neo_str.h"
  20. #include "util/ulocks.h"
  21. #include "html.h"
  22. #include "cgi.h"
  23. static int has_space_formatting(const char *src, int slen)
  24. {
  25. int spaces = 0;
  26. int returns = 0;
  27. int ascii_art = 0;
  28. int x = 0;
  29. for (x = 0; x < slen; x++)
  30. {
  31. if (src[x] == '\t') return 1;
  32. if (src[x] == ' ')
  33. {
  34. spaces++;
  35. if (x && (src[x-1] == '.'))
  36. spaces--;
  37. }
  38. else if (src[x] == '\n')
  39. {
  40. spaces = 0;
  41. returns++;
  42. }
  43. else if (strchr ("/\\<>:[]!@#$%^&*()|", src[x]))
  44. {
  45. ascii_art++;
  46. if (ascii_art > 3) return 2;
  47. }
  48. else if (src[x] != '\r')
  49. {
  50. if (returns > 2) return 1;
  51. if (spaces > 2) return 1;
  52. returns = 0;
  53. spaces = 0;
  54. ascii_art = 0;
  55. }
  56. }
  57. return 0;
  58. }
  59. /*
  60. static int has_long_lines (char *s, int l)
  61. {
  62. char *ptr;
  63. int x = 0;
  64. while (x < l)
  65. {
  66. ptr = strchr (s + x, '\n');
  67. if (ptr == NULL)
  68. {
  69. if (l - x > 75) return 1;
  70. return 0;
  71. }
  72. if (ptr - (s + x) > 75) return 1;
  73. x = ptr - s + 1;
  74. }
  75. return 0;
  76. }
  77. */
  78. /* The first step is to actually find all of the URLs and email
  79. * addresses using our handy regular expressions. We then mark these,
  80. * and then go through convert non-special areas with straight
  81. * text->html escapes, and convert special parts as special parts
  82. */
  83. struct _parts {
  84. int begin;
  85. int end;
  86. int type;
  87. };
  88. #define SC_TYPE_TEXT 1
  89. #define SC_TYPE_URL 2
  90. #define SC_TYPE_EMAIL 3
  91. static char *EmailRe = "[^][@:;<>\\\"()[:space:][:cntrl:]]+@[-+a-zA-Z0-9]+\\.[-+a-zA-Z0-9\\.]+[-+a-zA-Z0-9]";
  92. static char *URLRe = "((http|https|ftp|mailto):(//)?[^[:space:]>\"\t]*|www\\.[-a-z0-9\\.]+)[^[:space:];\t\">]*";
  93. #ifdef HAVE_PTHREADS
  94. /* In multi-threaded environments, we have to init thread safely */
  95. static pthread_mutex_t InitLock = PTHREAD_MUTEX_INITIALIZER;
  96. #endif
  97. static int CompiledRe = 0;
  98. static regex_t EmailRegex, UrlRegex;
  99. static NEOERR *split_and_convert (const char *src, int slen,
  100. STRING *out, HTML_CONVERT_OPTS *opts)
  101. {
  102. NEOERR *err = STATUS_OK;
  103. regmatch_t email_match, url_match;
  104. int errcode;
  105. char *ptr, *esc;
  106. char errbuf[256];
  107. struct _parts *parts;
  108. int part_count;
  109. int part;
  110. int x, i;
  111. int spaces = 0;
  112. if (!CompiledRe)
  113. {
  114. #ifdef HAVE_PTHREADS
  115. /* In threaded environments, we have to mutex lock to do this regcomp, but
  116. * we don't want to use a mutex every time to check that it was regcomp.
  117. * So, we only lock if our first test of compiled was false */
  118. err = mLock(&InitLock);
  119. if (err != STATUS_OK) return nerr_pass(err);
  120. if (CompiledRe == 0) {
  121. #endif
  122. if ((errcode = regcomp (&EmailRegex, EmailRe, REG_ICASE | REG_EXTENDED)))
  123. {
  124. regerror (errcode, &EmailRegex, errbuf, sizeof(errbuf));
  125. err = nerr_raise (NERR_PARSE, "Unable to compile EmailRE: %s", errbuf);
  126. }
  127. if ((errcode = regcomp (&UrlRegex, URLRe, REG_ICASE | REG_EXTENDED)))
  128. {
  129. regerror (errcode, &UrlRegex, errbuf, sizeof(errbuf));
  130. err = nerr_raise (NERR_PARSE, "Unable to compile URLRe: %s", errbuf);
  131. }
  132. CompiledRe = 1;
  133. #ifdef HAVE_PTHREADS
  134. }
  135. if (err) {
  136. mUnlock(&InitLock);
  137. return err;
  138. }
  139. err = mUnlock(&InitLock);
  140. if (err != STATUS_OK) return nerr_pass(err);
  141. #else
  142. if (err) {
  143. return err;
  144. }
  145. #endif
  146. }
  147. part_count = 20;
  148. parts = (struct _parts *) malloc (sizeof(struct _parts) * part_count);
  149. part = 0;
  150. x = 0;
  151. if (regexec (&EmailRegex, src+x, 1, &email_match, 0) != 0)
  152. {
  153. email_match.rm_so = -1;
  154. email_match.rm_eo = -1;
  155. }
  156. else
  157. {
  158. email_match.rm_so += x;
  159. email_match.rm_eo += x;
  160. }
  161. if (regexec (&UrlRegex, src+x, 1, &url_match, 0) != 0)
  162. {
  163. url_match.rm_so = -1;
  164. url_match.rm_eo = -1;
  165. }
  166. else
  167. {
  168. url_match.rm_so += x;
  169. url_match.rm_eo += x;
  170. }
  171. while ((x < slen) && !((email_match.rm_so == -1) && (url_match.rm_so == -1)))
  172. {
  173. if (part >= part_count)
  174. {
  175. void *new_ptr;
  176. part_count *= 2;
  177. new_ptr = realloc (parts, sizeof(struct _parts) * part_count);
  178. if (new_ptr == NULL) {
  179. free(parts);
  180. return nerr_raise (NERR_NOMEM,
  181. "Unable to increase url matcher to %d urls",
  182. part_count);
  183. }
  184. parts = (struct _parts *) new_ptr;
  185. }
  186. if ((url_match.rm_so != -1) && ((email_match.rm_so == -1) || (url_match.rm_so <= email_match.rm_so)))
  187. {
  188. parts[part].begin = url_match.rm_so;
  189. parts[part].end = url_match.rm_eo;
  190. parts[part].type = SC_TYPE_URL;
  191. x = parts[part].end + 1;
  192. part++;
  193. if (x < slen)
  194. {
  195. if (regexec (&UrlRegex, src+x, 1, &url_match, 0) != 0)
  196. {
  197. url_match.rm_so = -1;
  198. url_match.rm_eo = -1;
  199. }
  200. else
  201. {
  202. url_match.rm_so += x;
  203. url_match.rm_eo += x;
  204. }
  205. if ((email_match.rm_so != -1) && (x > email_match.rm_so))
  206. {
  207. if (regexec (&EmailRegex, src+x, 1, &email_match, 0) != 0)
  208. {
  209. email_match.rm_so = -1;
  210. email_match.rm_eo = -1;
  211. }
  212. else
  213. {
  214. email_match.rm_so += x;
  215. email_match.rm_eo += x;
  216. }
  217. }
  218. }
  219. }
  220. else
  221. {
  222. parts[part].begin = email_match.rm_so;
  223. parts[part].end = email_match.rm_eo;
  224. parts[part].type = SC_TYPE_EMAIL;
  225. x = parts[part].end + 1;
  226. part++;
  227. if (x < slen)
  228. {
  229. if (regexec (&EmailRegex, src+x, 1, &email_match, 0) != 0)
  230. {
  231. email_match.rm_so = -1;
  232. email_match.rm_eo = -1;
  233. }
  234. else
  235. {
  236. email_match.rm_so += x;
  237. email_match.rm_eo += x;
  238. }
  239. if ((url_match.rm_so != -1) && (x > url_match.rm_so))
  240. {
  241. if (regexec (&UrlRegex, src+x, 1, &url_match, 0) != 0)
  242. {
  243. url_match.rm_so = -1;
  244. url_match.rm_eo = -1;
  245. }
  246. else
  247. {
  248. url_match.rm_so += x;
  249. url_match.rm_eo += x;
  250. }
  251. }
  252. }
  253. }
  254. }
  255. i = 0;
  256. x = 0;
  257. while (x < slen)
  258. {
  259. if ((i >= part) || (x < parts[i].begin))
  260. {
  261. ptr = strpbrk(src + x, "&<>\r\n ");
  262. if (ptr == NULL)
  263. {
  264. if (spaces)
  265. {
  266. int sp;
  267. for (sp = 0; sp < spaces - 1; sp++)
  268. {
  269. err = string_append (out, "&nbsp;");
  270. if (err != STATUS_OK) break;
  271. }
  272. if (err != STATUS_OK) break;
  273. err = string_append_char (out, ' ');
  274. }
  275. spaces = 0;
  276. if (i < part)
  277. {
  278. err = string_appendn (out, src + x, parts[i].begin - x);
  279. x = parts[i].begin;
  280. }
  281. else
  282. {
  283. err = string_append (out, src + x);
  284. x = slen;
  285. }
  286. }
  287. else
  288. {
  289. if ((i >= part) || ((ptr - src) < parts[i].begin))
  290. {
  291. if (spaces)
  292. {
  293. int sp;
  294. for (sp = 0; sp < spaces - 1; sp++)
  295. {
  296. err = string_append (out, "&nbsp;");
  297. if (err != STATUS_OK) break;
  298. }
  299. if (err != STATUS_OK) break;
  300. err = string_append_char (out, ' ');
  301. }
  302. spaces = 0;
  303. err = string_appendn (out, src + x, (ptr - src) - x);
  304. if (err != STATUS_OK) break;
  305. x = ptr - src;
  306. if (src[x] == ' ')
  307. {
  308. if (opts->space_convert)
  309. {
  310. spaces++;
  311. }
  312. else
  313. err = string_append_char (out, ' ');
  314. }
  315. else
  316. {
  317. if (src[x] != '\n' && spaces)
  318. {
  319. int sp;
  320. for (sp = 0; sp < spaces - 1; sp++)
  321. {
  322. err = string_append (out, "&nbsp;");
  323. if (err != STATUS_OK) break;
  324. }
  325. if (err != STATUS_OK) break;
  326. err = string_append_char (out, ' ');
  327. }
  328. spaces = 0;
  329. if (src[x] == '&')
  330. err = string_append (out, "&amp;");
  331. else if (src[x] == '<')
  332. err = string_append (out, "&lt;");
  333. else if (src[x] == '>')
  334. err = string_append (out, "&gt;");
  335. else if (src[x] == '\n')
  336. if (opts->newlines_convert)
  337. err = string_append (out, "<br/>\n");
  338. else if (x && src[x-1] == '\n')
  339. err = string_append (out, "<p/>\n");
  340. else
  341. err = string_append_char (out, '\n');
  342. else if (src[x] != '\r')
  343. err = nerr_raise (NERR_ASSERT, "src[x] == '%c'", src[x]);
  344. }
  345. x++;
  346. }
  347. else
  348. {
  349. if (spaces)
  350. {
  351. int sp;
  352. for (sp = 0; sp < spaces - 1; sp++)
  353. {
  354. err = string_append (out, "&nbsp;");
  355. if (err != STATUS_OK) break;
  356. }
  357. if (err != STATUS_OK) break;
  358. err = string_append_char (out, ' ');
  359. }
  360. spaces = 0;
  361. err = string_appendn (out, src + x, parts[i].begin - x);
  362. x = parts[i].begin;
  363. }
  364. }
  365. }
  366. else
  367. {
  368. if (spaces)
  369. {
  370. int sp;
  371. for (sp = 0; sp < spaces - 1; sp++)
  372. {
  373. err = string_append (out, "&nbsp;");
  374. if (err != STATUS_OK) break;
  375. }
  376. if (err != STATUS_OK) break;
  377. err = string_append_char (out, ' ');
  378. }
  379. spaces = 0;
  380. if (parts[i].type == SC_TYPE_URL)
  381. {
  382. char last_char = src[parts[i].end-1];
  383. int suffix=0;
  384. if (last_char == '.' || last_char == ',') { suffix=1; }
  385. err = string_append (out, " <a ");
  386. if (err != STATUS_OK) break;
  387. if (opts->url_class)
  388. {
  389. err = string_appendf (out, "class=%s ", opts->url_class);
  390. if (err) break;
  391. }
  392. if (opts->url_target)
  393. {
  394. err = string_appendf (out, "target=\"%s\" ", opts->url_target);
  395. if (err) break;
  396. }
  397. err = string_append(out, "href=\"");
  398. if (err) break;
  399. if (opts->bounce_url)
  400. {
  401. char *url, *esc_url, *new_url;
  402. int url_len;
  403. if (!strncasecmp(src + x, "www.", 4))
  404. {
  405. url_len = 7 + parts[i].end - x - suffix;
  406. url = (char *) malloc(url_len+1);
  407. if (url == NULL)
  408. {
  409. err = nerr_raise(NERR_NOMEM,
  410. "Unable to allocate memory to convert url");
  411. break;
  412. }
  413. strcpy(url, "http://");
  414. strncat(url, src + x, parts[i].end - x - suffix);
  415. }
  416. else
  417. {
  418. url_len = parts[i].end - x - suffix;
  419. url = (char *) malloc(url_len+1);
  420. if (url == NULL)
  421. {
  422. err = nerr_raise(NERR_NOMEM,
  423. "Unable to allocate memory to convert url");
  424. break;
  425. }
  426. strncpy(url, src + x, parts[i].end - x - suffix);
  427. url[url_len] = '\0';
  428. }
  429. err = cgi_url_escape(url, &esc_url);
  430. free(url);
  431. if (err) {
  432. free(esc_url);
  433. break;
  434. }
  435. new_url = sprintf_alloc(opts->bounce_url, esc_url);
  436. free(esc_url);
  437. if (new_url == NULL)
  438. {
  439. err = nerr_raise(NERR_NOMEM, "Unable to allocate memory to convert url");
  440. break;
  441. }
  442. err = string_append (out, new_url);
  443. free(new_url);
  444. if (err) break;
  445. }
  446. else
  447. {
  448. if (!strncasecmp(src + x, "www.", 4))
  449. {
  450. err = string_append (out, "http://");
  451. if (err != STATUS_OK) break;
  452. }
  453. err = string_appendn (out, src + x, parts[i].end - x - suffix);
  454. if (err != STATUS_OK) break;
  455. }
  456. err = string_append (out, "\">");
  457. if (err != STATUS_OK) break;
  458. if (opts->link_name) {
  459. err = html_escape_alloc((opts->link_name),
  460. strlen(opts->link_name), &esc);
  461. } else {
  462. err = html_escape_alloc((src + x), parts[i].end - x - suffix, &esc);
  463. }
  464. if (err != STATUS_OK) break;
  465. err = string_append (out, esc);
  466. free(esc);
  467. if (err != STATUS_OK) break;
  468. err = string_append (out, "</a>");
  469. if (suffix) {
  470. err = string_appendn(out,src + parts[i].end - 1,1);
  471. if (err != STATUS_OK) break;
  472. }
  473. }
  474. else /* type == SC_TYPE_EMAIL */
  475. {
  476. err = string_append (out, "<a ");
  477. if (err != STATUS_OK) break;
  478. if (opts->mailto_class)
  479. {
  480. err = string_appendf (out, "class=%s ", opts->mailto_class);
  481. if (err) break;
  482. }
  483. err = string_append(out, "href=\"mailto:");
  484. if (err) break;
  485. err = string_appendn (out, src + x, parts[i].end - x);
  486. if (err != STATUS_OK) break;
  487. err = string_append (out, "\">");
  488. if (err != STATUS_OK) break;
  489. err = html_escape_alloc(src + x, parts[i].end - x, &esc);
  490. if (err != STATUS_OK) break;
  491. err = string_append (out, esc);
  492. free(esc);
  493. if (err != STATUS_OK) break;
  494. err = string_append (out, "</a>");
  495. }
  496. x = parts[i].end;
  497. i++;
  498. }
  499. if (err != STATUS_OK) break;
  500. }
  501. free (parts);
  502. return err;
  503. }
  504. static void strip_white_space_end (STRING *str)
  505. {
  506. int x = 0;
  507. int ol = str->len;
  508. char *ptr;
  509. int i;
  510. while (x < str->len)
  511. {
  512. ptr = strchr(str->buf + x, '\n');
  513. if (ptr == NULL)
  514. {
  515. /* just strip the white space at the end of the string */
  516. ol = strlen(str->buf);
  517. while (ol && isspace(str->buf[ol-1]))
  518. {
  519. str->buf[ol - 1] = '\0';
  520. ol--;
  521. }
  522. str->len = ol;
  523. return;
  524. }
  525. else
  526. {
  527. x = i = ptr - str->buf;
  528. if (x)
  529. {
  530. x--;
  531. while (x && isspace(str->buf[x]) && (str->buf[x] != '\n')) x--;
  532. if (x) x++;
  533. memmove (str->buf + x, ptr, ol - i + 1);
  534. x++;
  535. str->len -= ((i - x) + 1);
  536. str->buf[str->len] = '\0';
  537. ol = str->len;
  538. }
  539. }
  540. }
  541. }
  542. NEOERR *convert_text_html_alloc (const char *src, int slen,
  543. char **out)
  544. {
  545. return nerr_pass(convert_text_html_alloc_options(src, slen, out, NULL));
  546. }
  547. NEOERR *convert_text_html_alloc_options (const char *src, int slen,
  548. char **out,
  549. HTML_CONVERT_OPTS *opts)
  550. {
  551. NEOERR *err;
  552. STRING out_s;
  553. int formatting = 0;
  554. HTML_CONVERT_OPTS my_opts;
  555. string_init(&out_s);
  556. if (opts == NULL)
  557. {
  558. opts = &my_opts;
  559. opts->bounce_url = NULL;
  560. opts->url_class = NULL;
  561. opts->url_target = "_blank";
  562. opts->mailto_class = NULL;
  563. opts->long_lines = 0;
  564. opts->space_convert = 0;
  565. opts->newlines_convert = 1;
  566. opts->longline_width = 75; /* This hasn't been used in a while, actually */
  567. opts->check_ascii_art = 1;
  568. opts->link_name = NULL;
  569. }
  570. do
  571. {
  572. if (opts->check_ascii_art)
  573. {
  574. formatting = has_space_formatting (src, slen);
  575. if (formatting) opts->space_convert = 1;
  576. }
  577. if (formatting == 2)
  578. {
  579. /* Do <pre> formatting */
  580. opts->newlines_convert = 1;
  581. err = string_append (&out_s, "<tt>");
  582. if (err != STATUS_OK) break;
  583. err = split_and_convert(src, slen, &out_s, opts);
  584. if (err != STATUS_OK) break;
  585. err = string_append (&out_s, "</tt>");
  586. if (err != STATUS_OK) break;
  587. /* Strip white space at end of lines */
  588. strip_white_space_end (&out_s);
  589. }
  590. else
  591. {
  592. /* int nl = has_long_lines (src, slen); */
  593. err = split_and_convert(src, slen, &out_s, opts);
  594. }
  595. } while (0);
  596. if (err != STATUS_OK)
  597. {
  598. string_clear (&out_s);
  599. return nerr_pass (err);
  600. }
  601. if (out_s.buf == NULL)
  602. {
  603. *out = strdup("");
  604. }
  605. else
  606. {
  607. *out = out_s.buf;
  608. }
  609. return STATUS_OK;
  610. }
  611. NEOERR *html_escape_alloc (const char *src, int slen,
  612. char **out)
  613. {
  614. return nerr_pass(neos_html_escape(src, slen, out));
  615. }
  616. /* Replace ampersand with iso-8859-1 character code */
  617. static unsigned char _expand_amp_8859_1_char (const char *s)
  618. {
  619. if (s[0] == '\0')
  620. return 0;
  621. switch (s[0]) {
  622. case '#':
  623. if (s[1] == 'x') return strtol (s+2, NULL, 16);
  624. return strtol (s+1, NULL, 10);
  625. case 'a':
  626. if (!strcmp(s, "agrave")) return 0xe0; /* ? */
  627. if (!strcmp(s, "aacute")) return 0xe1; /* á */
  628. if (!strcmp(s, "acirc")) return 0xe2; /* â */
  629. if (!strcmp(s, "atilde")) return 0xe3; /* ? */
  630. if (!strcmp(s, "auml")) return 0xe4; /* ä */
  631. if (!strcmp(s, "aring")) return 0xe5; /* ? */
  632. if (!strcmp(s, "aelig")) return 0xe6; /* ? */
  633. if (!strcmp(s, "amp")) return '&';
  634. return 0;
  635. case 'c':
  636. if (!strcmp(s, "ccedil")) return 0xe7; /* ç */
  637. return 0;
  638. case 'e':
  639. if (!strcmp(s, "egrave")) return 0xe8; /* ? */
  640. if (!strcmp(s, "eacute")) return 0xe9; /* é */
  641. if (!strcmp(s, "ecirc")) return 0xea; /* ? */
  642. if (!strcmp(s, "euml")) return 0xeb; /* ë */
  643. if (!strcmp(s, "eth")) return 0xf0; /* ? */
  644. return 0;
  645. case 'i':
  646. if (!strcmp(s, "igrave")) return 0xec; /* ? */
  647. if (!strcmp(s, "iacute")) return 0xed; /* í */
  648. if (!strcmp(s, "icirc")) return 0xee; /* î */
  649. if (!strcmp(s, "iuml")) return 0xef; /* ? */
  650. return 0;
  651. case 'g':
  652. if (!strcmp(s, "gt")) return '>';
  653. return 0;
  654. case 'l':
  655. if (!strcmp(s, "lt")) return '<';
  656. return 0;
  657. case 'n':
  658. if (!strcmp(s, "ntilde")) return 0xf1; /* ? */
  659. if (!strcmp(s, "nbsp")) return ' ';
  660. return 0;
  661. case 'o':
  662. if (!strcmp(s, "ograve")) return 0xf2; /* ? */
  663. if (!strcmp(s, "oacute")) return 0xf3; /* ó */
  664. if (!strcmp(s, "ocirc")) return 0xf4; /* ô */
  665. if (!strcmp(s, "otilde")) return 0xf5; /* ? */
  666. if (!strcmp(s, "ouml")) return 0xf6; /* ö */
  667. if (!strcmp(s, "oslash")) return 0xf8; /* ? */
  668. return 0;
  669. case 'q': /* quot */
  670. if (!strcmp(s, "quot")) return '"';
  671. return 0;
  672. case 's':
  673. if (!strcmp(s, "szlig")) return 0xdf; /* ß */
  674. return 0;
  675. case 't':
  676. if (!strcmp(s, "thorn")) return 0xfe; /* ? */
  677. return 0;
  678. case 'u':
  679. if (!strcmp(s, "ugrave")) return 0xf9; /* ? */
  680. if (!strcmp(s, "uacute")) return 0xfa; /* ú */
  681. if (!strcmp(s, "ucirc")) return 0xfb; /* ? */
  682. if (!strcmp(s, "uuml")) return 0xfc; /* ü */
  683. return 0;
  684. case 'y':
  685. if (!strcmp(s, "yacute")) return 0xfd; /* ý */
  686. }
  687. return 0;
  688. }
  689. char *html_expand_amp_8859_1(const char *amp,
  690. char *buf)
  691. {
  692. unsigned char ch;
  693. ch = _expand_amp_8859_1_char(amp);
  694. if (ch == '\0')
  695. {
  696. if (!strcmp(amp, "copy")) return "(C)";
  697. return "";
  698. }
  699. else {
  700. buf[0] = (char)ch;
  701. buf[1] = '\0';
  702. return buf;
  703. }
  704. }
  705. NEOERR *html_strip_alloc(const char *src, int slen,
  706. char **out)
  707. {
  708. NEOERR *err = STATUS_OK;
  709. STRING out_s;
  710. int x = 0;
  711. int strip_match = -1;
  712. int state = 0;
  713. char amp[10];
  714. int amp_start = 0;
  715. char buf[10];
  716. int ampl = 0;
  717. string_init(&out_s);
  718. err = string_append (&out_s, "");
  719. if (err) return nerr_pass (err);
  720. while (x < slen)
  721. {
  722. switch (state) {
  723. case 0:
  724. /* Default */
  725. if (src[x] == '&')
  726. {
  727. state = 3;
  728. ampl = 0;
  729. amp_start = x;
  730. }
  731. else if (src[x] == '<')
  732. {
  733. state = 1;
  734. }
  735. else
  736. {
  737. if (strip_match == -1)
  738. {
  739. err = string_append_char(&out_s, src[x]);
  740. if (err) break;
  741. }
  742. }
  743. x++;
  744. break;
  745. case 1:
  746. /* Starting TAG */
  747. if (src[x] == '>')
  748. {
  749. state = 0;
  750. }
  751. else if (src[x] == '/')
  752. {
  753. }
  754. else
  755. {
  756. }
  757. x++;
  758. break;
  759. case 2:
  760. /* In TAG */
  761. if (src[x] == '>')
  762. {
  763. state = 0;
  764. }
  765. x++;
  766. break;
  767. case 3:
  768. /* In AMP */
  769. if (src[x] == ';')
  770. {
  771. amp[ampl] = '\0';
  772. state = 0;
  773. err = string_append(&out_s, html_expand_amp_8859_1(amp, buf));
  774. if (err) break;
  775. }
  776. else
  777. {
  778. if (ampl < sizeof(amp)-1)
  779. amp[ampl++] = tolower(src[x]);
  780. else
  781. {
  782. /* broken html... just back up */
  783. x = amp_start;
  784. err = string_append_char(&out_s, src[x]);
  785. if (err) break;
  786. state = 0;
  787. }
  788. }
  789. x++;
  790. break;
  791. }
  792. if (err) break;
  793. }
  794. if (err)
  795. {
  796. string_clear (&out_s);
  797. return nerr_pass (err);
  798. }
  799. *out = out_s.buf;
  800. return STATUS_OK;
  801. }