PageRenderTime 53ms CodeModel.GetById 22ms RepoModel.GetById 1ms app.codeStats 0ms

/util/neo_str.c

http://clearsilver.googlecode.com/
C | 957 lines | 796 code | 101 blank | 60 comment | 206 complexity | ab6dabf93eb23e6f92a0a7c249c3ae8b MD5 | raw file
Possible License(s): BSD-2-Clause
  1. /*
  2. * Copyright 2001-2004 Brandon Long
  3. * All Rights Reserved.
  4. *
  5. * ClearSilver Templating System
  6. *
  7. * This code is made available under the terms of the ClearSilver License.
  8. * http://www.clearsilver.net/license.hdf
  9. *
  10. */
  11. #include "cs_config.h"
  12. #include <unistd.h>
  13. #include <ctype.h>
  14. #include <stdlib.h>
  15. #include <stdio.h>
  16. #include <string.h>
  17. #include <stdarg.h>
  18. #include <regex.h>
  19. #include "neo_misc.h"
  20. #include "neo_err.h"
  21. #include "neo_str.h"
  22. #include "ulist.h"
  23. #ifndef va_copy
  24. #ifdef __va_copy
  25. # define va_copy(dest,src) __va_copy(dest,src)
  26. #else
  27. # define va_copy(dest,src) ((dest) = (src))
  28. #endif
  29. #endif
  30. char *neos_strip (char *s)
  31. {
  32. int x;
  33. x = strlen(s) - 1;
  34. while (x>=0 && isspace(s[x])) s[x--] = '\0';
  35. while (*s && isspace(*s)) s++;
  36. return s;
  37. }
  38. char *neos_rstrip (char *s)
  39. {
  40. int n = strlen (s)-1;
  41. while (n >= 0 && isspace(s[n]))
  42. {
  43. s[n] = '\0';
  44. n--;
  45. }
  46. return s;
  47. }
  48. void neos_lower(char *s)
  49. {
  50. while(*s != 0) {
  51. *s = tolower(*s);
  52. s++;
  53. }
  54. }
  55. char *neos_strndup(const char *s, int len)
  56. {
  57. int x;
  58. char *dupl;
  59. if (s == NULL) return NULL;
  60. dupl = (char *) malloc(len+1);
  61. if (dupl == NULL) return NULL;
  62. for (x = 0; x < len && s[x]; x++)
  63. {
  64. dupl[x] = s[x];
  65. }
  66. dupl[x] = '\0';
  67. dupl[len] = '\0';
  68. return dupl;
  69. }
  70. void string_init (STRING *str)
  71. {
  72. str->buf = NULL;
  73. str->len = 0;
  74. str->max = 0;
  75. str->fixed = 0;
  76. }
  77. void string_clear (STRING *str)
  78. {
  79. if (str->buf != NULL)
  80. free(str->buf);
  81. string_init(str);
  82. }
  83. static NEOERR* string_check_length (STRING *str, int l)
  84. {
  85. if (str->buf == NULL)
  86. {
  87. if (l * 10 > 256)
  88. str->max = l * 10;
  89. else
  90. str->max = 256;
  91. str->buf = (char *) malloc (sizeof(char) * str->max);
  92. if (str->buf == NULL)
  93. return nerr_raise (NERR_NOMEM, "Unable to allocate render buf of size %d",
  94. str->max);
  95. /* ne_warn("Creating string %x at %d (%5.2fK)", str, str->max, (str->max / 1024.0)); */
  96. }
  97. else if (str->len + l >= str->max)
  98. {
  99. void *new_ptr;
  100. int new_max = str->max;
  101. /* TODO(blong): better would be to fill to size and drop the rest */
  102. if (str->fixed)
  103. return nerr_raise(NERR_ASSERT, "Length exceeds fixed size %d", str->max);
  104. do
  105. {
  106. new_max *= 2;
  107. } while (str->len + l >= new_max);
  108. new_ptr = realloc (str->buf, sizeof(char) * new_max);
  109. if (new_ptr == NULL) {
  110. return nerr_raise (NERR_NOMEM, "Unable to allocate STRING buf of size %d",
  111. new_max);
  112. }
  113. str->buf = (char *) new_ptr;
  114. str->max = new_max;
  115. /* ne_warn("Growing string %x to %d (%5.2fK)", str, str->max, (str->max / 1024.0)); */
  116. }
  117. return STATUS_OK;
  118. }
  119. NEOERR *string_set (STRING *str, const char *buf)
  120. {
  121. str->len = 0;
  122. return nerr_pass (string_append (str, buf));
  123. }
  124. NEOERR *string_append (STRING *str, const char *buf)
  125. {
  126. NEOERR *err;
  127. int l;
  128. l = strlen(buf);
  129. err = string_check_length (str, l);
  130. if (err != STATUS_OK) return nerr_pass (err);
  131. strcpy(str->buf + str->len, buf);
  132. str->len += l;
  133. return STATUS_OK;
  134. }
  135. NEOERR *string_appendn (STRING *str, const char *buf, int l)
  136. {
  137. NEOERR *err;
  138. err = string_check_length (str, l+1);
  139. if (err != STATUS_OK) return nerr_pass (err);
  140. memcpy(str->buf + str->len, buf, l);
  141. str->len += l;
  142. str->buf[str->len] = '\0';
  143. return STATUS_OK;
  144. }
  145. /* this is much more efficient with C99 snprintfs... */
  146. NEOERR *string_appendvf (STRING *str, const char *fmt, va_list ap)
  147. {
  148. NEOERR *err;
  149. char buf[4096];
  150. int bl, size;
  151. va_list tmp;
  152. va_copy(tmp, ap);
  153. /* determine length */
  154. size = sizeof (buf);
  155. bl = vsnprintf (buf, size, fmt, tmp);
  156. if (bl > -1 && bl < size)
  157. return string_appendn (str, buf, bl);
  158. /* Handle non-C99 snprintfs (requires extra malloc/free and copy) */
  159. if (bl == -1)
  160. {
  161. char *a_buf;
  162. va_copy(tmp, ap);
  163. a_buf = vnsprintf_alloc(size*2, fmt, tmp);
  164. if (a_buf == NULL)
  165. return nerr_raise(NERR_NOMEM,
  166. "Unable to allocate memory for formatted string");
  167. err = string_append(str, a_buf);
  168. free(a_buf);
  169. return nerr_pass(err);
  170. }
  171. err = string_check_length (str, bl+1);
  172. if (err != STATUS_OK) return nerr_pass (err);
  173. va_copy(tmp, ap);
  174. vsprintf (str->buf + str->len, fmt, tmp);
  175. str->len += bl;
  176. str->buf[str->len] = '\0';
  177. return STATUS_OK;
  178. }
  179. NEOERR *string_appendf (STRING *str, const char *fmt, ...)
  180. {
  181. NEOERR *err;
  182. va_list ap;
  183. va_start (ap, fmt);
  184. err = string_appendvf (str, fmt, ap);
  185. va_end (ap);
  186. return nerr_pass(err);
  187. }
  188. NEOERR *string_append_char (STRING *str, char c)
  189. {
  190. NEOERR *err;
  191. err = string_check_length (str, 1);
  192. if (err != STATUS_OK) return nerr_pass (err);
  193. str->buf[str->len] = c;
  194. str->buf[str->len + 1] = '\0';
  195. str->len += 1;
  196. return STATUS_OK;
  197. }
  198. void string_array_init (STRING_ARRAY *arr)
  199. {
  200. arr->entries = NULL;
  201. arr->count = 0;
  202. arr->max = 0;
  203. }
  204. NEOERR *string_array_split (ULIST **list, char *s, const char *sep,
  205. int max)
  206. {
  207. NEOERR *err;
  208. char *p, *n, *f;
  209. int sl;
  210. int x = 0;
  211. if (sep[0] == '\0')
  212. return nerr_raise (NERR_ASSERT, "separator must be at least one character");
  213. err = uListInit (list, 10, 0);
  214. if (err) return nerr_pass(err);
  215. sl = strlen(sep);
  216. p = (sl == 1) ? strchr (s, sep[0]) : strstr (s, sep);
  217. f = s;
  218. while (p != NULL)
  219. {
  220. if (x >= max) break;
  221. *p = '\0';
  222. n = strdup(f);
  223. *p = sep[0];
  224. if (n) err = uListAppend (*list, n);
  225. else err = nerr_raise(NERR_NOMEM,
  226. "Unable to allocate memory to split %s", s);
  227. if (err) goto split_err;
  228. f = p+sl;
  229. p = (sl == 1) ? strchr (f, sep[0]) : strstr (f, sep);
  230. x++;
  231. }
  232. /* Handle remainder */
  233. n = strdup(f);
  234. if (n) err = uListAppend (*list, n);
  235. else err = nerr_raise(NERR_NOMEM,
  236. "Unable to allocate memory to split %s", s);
  237. if (err) goto split_err;
  238. return STATUS_OK;
  239. split_err:
  240. uListDestroy(list, ULIST_FREE);
  241. return err;
  242. }
  243. void string_array_clear (STRING_ARRAY *arr)
  244. {
  245. int x;
  246. for (x = 0; x < arr->count; x++)
  247. {
  248. if (arr->entries[x] != NULL) free (arr->entries[x]);
  249. arr->entries[x] = NULL;
  250. }
  251. free (arr->entries);
  252. arr->entries = NULL;
  253. arr->count = 0;
  254. }
  255. /* Mostly used by vprintf_alloc for non-C99 compliant snprintfs,
  256. * this is like vsprintf_alloc except it takes a "suggested" size */
  257. int vnisprintf_alloc (char **buf, int start_size, const char *fmt, va_list ap)
  258. {
  259. int bl, size;
  260. va_list tmp;
  261. *buf = NULL;
  262. size = start_size;
  263. *buf = (char *) malloc (size * sizeof(char));
  264. if (*buf == NULL) return 0;
  265. while (1)
  266. {
  267. void *new_ptr;
  268. va_copy(tmp, ap);
  269. bl = vsnprintf (*buf, size, fmt, tmp);
  270. if (bl > -1 && bl < size)
  271. return bl;
  272. if (bl > -1)
  273. size = bl + 1;
  274. else
  275. size *= 2;
  276. new_ptr = realloc (*buf, size * sizeof(char));
  277. if (new_ptr == NULL) {
  278. free(*buf);
  279. *buf = NULL;
  280. return 0;
  281. }
  282. *buf = (char *) new_ptr;
  283. }
  284. }
  285. char *vnsprintf_alloc (int start_size, const char *fmt, va_list ap)
  286. {
  287. char *r;
  288. vnisprintf_alloc(&r, start_size, fmt, ap);
  289. return r;
  290. }
  291. /* This works better with a C99 compliant vsnprintf, but should work ok
  292. * with versions that return a -1 if it overflows the buffer */
  293. int visprintf_alloc (char **buf, const char *fmt, va_list ap)
  294. {
  295. char ibuf[4096];
  296. int bl, size;
  297. va_list tmp;
  298. /* PPC doesn't like you re-using a va_list... and it might not be
  299. * supposed to work at all */
  300. va_copy(tmp, ap);
  301. size = sizeof (ibuf);
  302. bl = vsnprintf (ibuf, sizeof (ibuf), fmt, tmp);
  303. if (bl > -1 && bl < size)
  304. {
  305. *buf = (char *) calloc(bl+1, sizeof(char));
  306. if (*buf == NULL) return 0;
  307. strncpy(*buf, ibuf, bl);
  308. return bl;
  309. }
  310. if (bl > -1)
  311. size = bl + 1;
  312. else
  313. size *= 2;
  314. return vnisprintf_alloc(buf, size, fmt, ap);
  315. }
  316. char *vsprintf_alloc (const char *fmt, va_list ap)
  317. {
  318. char *r;
  319. visprintf_alloc(&r, fmt, ap);
  320. return r;
  321. }
  322. /* technically, sprintf's can have null values, so we need to be able to
  323. * return a length also like real sprintf */
  324. int isprintf_alloc (char **buf, const char *fmt, ...)
  325. {
  326. va_list ap;
  327. int r;
  328. va_start (ap, fmt);
  329. r = visprintf_alloc (buf, fmt, ap);
  330. va_end (ap);
  331. return r;
  332. }
  333. char *sprintf_alloc (const char *fmt, ...)
  334. {
  335. va_list ap;
  336. char *r;
  337. va_start (ap, fmt);
  338. r = vsprintf_alloc (fmt, ap);
  339. va_end (ap);
  340. return r;
  341. }
  342. /* This is mostly just here for completeness, I doubt anyone would use
  343. * this (its more efficient (time-wise) if start_size is bigger than the
  344. * resulting string. Its less efficient than sprintf_alloc if we have a
  345. * C99 snprintf and it doesn't fit in start_size.
  346. * BTW: If you are really worried about the efficiency of these
  347. * functions, maybe you shouldn't be using them in the first place... */
  348. char *nsprintf_alloc (int start_size, const char *fmt, ...)
  349. {
  350. va_list ap;
  351. char *r;
  352. va_start (ap, fmt);
  353. r = vnsprintf_alloc (start_size, fmt, ap);
  354. va_end (ap);
  355. return r;
  356. }
  357. BOOL reg_search (const char *re, const char *str)
  358. {
  359. regex_t search_re;
  360. int errcode;
  361. char buf[256];
  362. if ((errcode = regcomp(&search_re, re, REG_ICASE | REG_EXTENDED | REG_NOSUB)))
  363. {
  364. regerror (errcode, &search_re, buf, sizeof(buf));
  365. ne_warn ("Unable to compile regex %s: %s", re, buf);
  366. return FALSE;
  367. }
  368. errcode = regexec (&search_re, str, 0, NULL, 0);
  369. regfree (&search_re);
  370. if (errcode == 0)
  371. return TRUE;
  372. return FALSE;
  373. }
  374. NEOERR *string_readline (STRING *str, FILE *fp)
  375. {
  376. NEOERR *err;
  377. /* minimum size for a readline is 256 above current position */
  378. err = string_check_length (str, str->len + 256);
  379. if (err != STATUS_OK) return nerr_pass (err);
  380. while (fgets(str->buf + str->len, str->max - str->len, fp) != NULL)
  381. {
  382. str->len = strlen(str->buf);
  383. if (str->buf[str->len-1] == '\n') break;
  384. err = string_check_length (str, str->len + 256);
  385. if (err != STATUS_OK) return nerr_pass (err);
  386. }
  387. return STATUS_OK;
  388. }
  389. NEOERR* neos_escape(UINT8 *buf, int buflen, char esc_char, const char *escape,
  390. char **esc)
  391. {
  392. int nl = 0;
  393. int l = 0;
  394. int x = 0;
  395. char *s;
  396. int match = 0;
  397. while (l < buflen)
  398. {
  399. if (buf[l] == esc_char)
  400. {
  401. nl += 2;
  402. }
  403. else
  404. {
  405. x = 0;
  406. while (escape[x])
  407. {
  408. if (escape[x] == buf[l])
  409. {
  410. nl +=2;
  411. break;
  412. }
  413. x++;
  414. }
  415. }
  416. nl++;
  417. l++;
  418. }
  419. s = (char *) malloc (sizeof(char) * (nl + 1));
  420. if (s == NULL)
  421. return nerr_raise (NERR_NOMEM, "Unable to allocate memory to escape %s",
  422. buf);
  423. nl = 0; l = 0;
  424. while (l < buflen)
  425. {
  426. match = 0;
  427. if (buf[l] == esc_char)
  428. {
  429. match = 1;
  430. }
  431. else
  432. {
  433. x = 0;
  434. while (escape[x])
  435. {
  436. if (escape[x] == buf[l])
  437. {
  438. match = 1;
  439. break;
  440. }
  441. x++;
  442. }
  443. }
  444. if (match)
  445. {
  446. s[nl++] = esc_char;
  447. s[nl++] = "0123456789ABCDEF"[buf[l] / 16];
  448. s[nl++] = "0123456789ABCDEF"[buf[l] % 16];
  449. l++;
  450. }
  451. else
  452. {
  453. s[nl++] = buf[l++];
  454. }
  455. }
  456. s[nl] = '\0';
  457. *esc = s;
  458. return STATUS_OK;
  459. }
  460. UINT8 *neos_unescape (UINT8 *s, int buflen, char esc_char)
  461. {
  462. int i = 0, o = 0;
  463. if (s == NULL) return s;
  464. while (i < buflen)
  465. {
  466. if (s[i] == esc_char && (i+2 < buflen) &&
  467. isxdigit(s[i+1]) && isxdigit(s[i+2]))
  468. {
  469. UINT8 num;
  470. num = (s[i+1] >= 'A') ? ((s[i+1] & 0xdf) - 'A') + 10 : (s[i+1] - '0');
  471. num *= 16;
  472. num += (s[i+2] >= 'A') ? ((s[i+2] & 0xdf) - 'A') + 10 : (s[i+2] - '0');
  473. s[o++] = num;
  474. i+=3;
  475. }
  476. else {
  477. s[o++] = s[i++];
  478. }
  479. }
  480. if (i && o) s[o] = '\0';
  481. return s;
  482. }
  483. char *repr_string_alloc (const char *s)
  484. {
  485. int l,x,i;
  486. int nl = 0;
  487. char *rs;
  488. if (s == NULL)
  489. {
  490. return strdup("NULL");
  491. }
  492. l = strlen(s);
  493. for (x = 0; x < l; x++)
  494. {
  495. if (isprint(s[x]) && s[x] != '"' && s[x] != '\\')
  496. {
  497. nl++;
  498. }
  499. else
  500. {
  501. if (s[x] == '\n' || s[x] == '\t' || s[x] == '\r' || s[x] == '"' ||
  502. s[x] == '\\')
  503. {
  504. nl += 2;
  505. }
  506. else nl += 4;
  507. }
  508. }
  509. rs = (char *) malloc ((nl+3) * sizeof(char));
  510. if (rs == NULL)
  511. return NULL;
  512. i = 0;
  513. rs[i++] = '"';
  514. for (x = 0; x < l; x++)
  515. {
  516. if (isprint(s[x]) && s[x] != '"' && s[x] != '\\')
  517. {
  518. rs[i++] = s[x];
  519. }
  520. else
  521. {
  522. rs[i++] = '\\';
  523. switch (s[x])
  524. {
  525. case '\n':
  526. rs[i++] = 'n';
  527. break;
  528. case '\t':
  529. rs[i++] = 't';
  530. break;
  531. case '\r':
  532. rs[i++] = 'r';
  533. break;
  534. case '"':
  535. rs[i++] = '"';
  536. break;
  537. case '\\':
  538. rs[i++] = '\\';
  539. break;
  540. default:
  541. sprintf(&(rs[i]), "%03o", (s[x] & 0377));
  542. i += 3;
  543. break;
  544. }
  545. }
  546. }
  547. rs[i++] = '"';
  548. rs[i] = '\0';
  549. return rs;
  550. }
  551. NEOERR *neos_js_escape (const char *in, char **esc)
  552. {
  553. int nl = 0;
  554. int l = 0;
  555. unsigned char *buf = (unsigned char *)in;
  556. unsigned char *s;
  557. while (buf[l])
  558. {
  559. if (buf[l] == '/' || buf[l] == '"' || buf[l] == '\'' ||
  560. buf[l] == '\\' || buf[l] == '>' || buf[l] == '<' ||
  561. buf[l] == '&' || buf[l] == ';' || buf[l] < 32)
  562. {
  563. nl += 3;
  564. }
  565. nl++;
  566. l++;
  567. }
  568. s = (unsigned char *) malloc (sizeof(unsigned char) * (nl + 1));
  569. if (s == NULL)
  570. return nerr_raise (NERR_NOMEM, "Unable to allocate memory to escape %s",
  571. buf);
  572. nl = 0; l = 0;
  573. while (buf[l])
  574. {
  575. if (buf[l] == '/' || buf[l] == '"' || buf[l] == '\'' ||
  576. buf[l] == '\\' || buf[l] == '>' || buf[l] == '<' ||
  577. buf[l] == '&' || buf[l] == ';' || buf[l] < 32)
  578. {
  579. s[nl++] = '\\';
  580. s[nl++] = 'x';
  581. s[nl++] = "0123456789ABCDEF"[(buf[l] >> 4) & 0xF];
  582. s[nl++] = "0123456789ABCDEF"[buf[l] & 0xF];
  583. l++;
  584. }
  585. else
  586. {
  587. s[nl++] = buf[l++];
  588. }
  589. }
  590. s[nl] = '\0';
  591. *esc = (char *)s;
  592. return STATUS_OK;
  593. }
  594. /* List of all characters that must be escaped
  595. * List based on http://www.blooberry.com/indexdot/html/topics/urlencoding.htm
  596. */
  597. static char QueryReservedChars[] = "$&+,/:;=?@ \"<>#%{}|\\^~[]`'";
  598. // List of characters to escape in URLs inside CSS.
  599. static char CssReservedChars[] = "\n\r\"'()*<>\\";
  600. #define IN_LIST(l, c) (strchr(l, c) != NULL)
  601. /*
  602. * Apply URL escaping to 'in' and return result in 'esc'.
  603. * The parameters 'reserved' and 'other' indicate which characters to escape.
  604. * If 'escape_non_printable' is non zero, all characters < 0x20 and > 0x7E
  605. * will also be escaped.
  606. */
  607. static NEOERR *url_escape_helper (const char *in, char **esc, char *reserved,
  608. const char *other, int escape_non_printable)
  609. {
  610. int nl = 0;
  611. int l = 0;
  612. int x = 0;
  613. unsigned char *buf = (unsigned char *)in;
  614. unsigned char *uother = (unsigned char *)other;
  615. unsigned char *s;
  616. int match = 0;
  617. while (buf[l])
  618. {
  619. if (IN_LIST(reserved, buf[l]) ||
  620. (escape_non_printable && (buf[l] < 32 || buf[l] > 126)))
  621. {
  622. nl += 2;
  623. }
  624. else if (uother)
  625. {
  626. x = 0;
  627. while (uother[x])
  628. {
  629. if (uother[x] == buf[l])
  630. {
  631. nl +=2;
  632. break;
  633. }
  634. x++;
  635. }
  636. }
  637. nl++;
  638. l++;
  639. }
  640. s = (unsigned char *) malloc (sizeof(unsigned char) * (nl + 1));
  641. if (s == NULL)
  642. return nerr_raise (NERR_NOMEM, "Unable to allocate memory to escape %s",
  643. buf);
  644. nl = 0; l = 0;
  645. while (buf[l])
  646. {
  647. match = 0;
  648. if (buf[l] == ' ' && IN_LIST(reserved, buf[l]))
  649. {
  650. s[nl++] = '+';
  651. l++;
  652. }
  653. else
  654. {
  655. if (IN_LIST(reserved, buf[l]) ||
  656. (escape_non_printable && (buf[l] < 32 || buf[l] > 126)))
  657. {
  658. match = 1;
  659. }
  660. else if (uother)
  661. {
  662. x = 0;
  663. while (uother[x])
  664. {
  665. if (uother[x] == buf[l])
  666. {
  667. match = 1;
  668. break;
  669. }
  670. x++;
  671. }
  672. }
  673. if (match)
  674. {
  675. s[nl++] = '%';
  676. s[nl++] = "0123456789ABCDEF"[buf[l] / 16];
  677. s[nl++] = "0123456789ABCDEF"[buf[l] % 16];
  678. l++;
  679. }
  680. else
  681. {
  682. s[nl++] = buf[l++];
  683. }
  684. }
  685. }
  686. s[nl] = '\0';
  687. *esc = (char *)s;
  688. return STATUS_OK;
  689. }
  690. NEOERR *neos_url_escape (const char *in, char **esc,
  691. const char *other)
  692. {
  693. return url_escape_helper(in, esc, QueryReservedChars, other, 1);
  694. }
  695. NEOERR *neos_html_escape (const char *src, int slen,
  696. char **out)
  697. {
  698. NEOERR *err = STATUS_OK;
  699. STRING out_s;
  700. int x;
  701. char *ptr;
  702. string_init(&out_s);
  703. err = string_append (&out_s, "");
  704. if (err) return nerr_pass (err);
  705. *out = NULL;
  706. x = 0;
  707. while (x < slen)
  708. {
  709. ptr = strpbrk(src + x, "&<>\"'\r");
  710. if (ptr == NULL || (ptr-src >= slen))
  711. {
  712. err = string_appendn (&out_s, src + x, slen-x);
  713. x = slen;
  714. }
  715. else
  716. {
  717. err = string_appendn (&out_s, src + x, (ptr - src) - x);
  718. if (err != STATUS_OK) break;
  719. x = ptr - src;
  720. if (src[x] == '&')
  721. err = string_append (&out_s, "&amp;");
  722. else if (src[x] == '<')
  723. err = string_append (&out_s, "&lt;");
  724. else if (src[x] == '>')
  725. err = string_append (&out_s, "&gt;");
  726. else if (src[x] == '"')
  727. err = string_append (&out_s, "&quot;");
  728. else if (src[x] == '\'')
  729. err = string_append (&out_s, "&#39;");
  730. else if (src[x] != '\r')
  731. err = nerr_raise (NERR_ASSERT, "src[x] == '%c'", src[x]);
  732. x++;
  733. }
  734. if (err != STATUS_OK) break;
  735. }
  736. if (err)
  737. {
  738. string_clear (&out_s);
  739. return nerr_pass (err);
  740. }
  741. *out = out_s.buf;
  742. return STATUS_OK;
  743. }
  744. static NEOERR *css_url_escape(const char *in, char **esc)
  745. {
  746. return url_escape_helper(in, esc, CssReservedChars, NULL, 0);
  747. }
  748. char *URL_PROTOCOLS[] = {"http://", "https://", "ftp://", "mailto:"};
  749. /*
  750. * Helper function to validate a URL for protecting against XSS.
  751. * Ensures that the URL is a relative URL or an absolute url with a safe scheme
  752. * (currently http, https, ftp or mailto). This is to avoid
  753. * dangerous schemes like javascript. It then escapes the URL in the requested
  754. * escape_mode.
  755. */
  756. static NEOERR *url_validate(const char *in, char **esc, NEOS_ESCAPE escape_mode)
  757. {
  758. NEOERR *err = STATUS_OK;
  759. STRING out_s;
  760. int valid = 0;
  761. size_t i;
  762. size_t inlen;
  763. int num_protocols = sizeof(URL_PROTOCOLS) / sizeof(char*);
  764. void* slashpos;
  765. void* colonpos;
  766. inlen = strlen(in);
  767. /*
  768. * <a href="//b:80"> or <a href="a/b:80"> are allowed by browsers
  769. * and ":" is treated as part of the path, while
  770. * <a href="www.google.com:80"> is an invalid url
  771. * and ":" is treated as a scheme separator.
  772. *
  773. * Hence allow for ":" in the path part of a url (after /)
  774. */
  775. slashpos = memchr(in, '/', inlen);
  776. if (slashpos == NULL) {
  777. i = inlen;
  778. }
  779. else {
  780. i = (size_t)((char*)slashpos - in);
  781. }
  782. colonpos = memchr(in, ':', i);
  783. if (colonpos == NULL) {
  784. /* no scheme in 'in': so this is a relative url */
  785. valid = 1;
  786. }
  787. else {
  788. for (i = 0; i < num_protocols; i++)
  789. {
  790. if ((inlen >= strlen(URL_PROTOCOLS[i])) &&
  791. strncasecmp(in, URL_PROTOCOLS[i], strlen(URL_PROTOCOLS[i])) == 0) {
  792. /* 'in' starts with one of the allowed protocols */
  793. valid = 1;
  794. break;
  795. }
  796. }
  797. }
  798. if (valid)
  799. {
  800. if (escape_mode == NEOS_ESCAPE_HTML)
  801. {
  802. return neos_html_escape(in, inlen, esc);
  803. }
  804. else if(escape_mode == NEOS_ESCAPE_CSS_URL)
  805. {
  806. return css_url_escape(in, esc);
  807. }
  808. else
  809. {
  810. return nerr_raise(NERR_ASSERT, "Invalid escape mode: %d\n", escape_mode);
  811. }
  812. }
  813. /* 'in' contains an unsupported scheme, replace with '#' */
  814. string_init(&out_s);
  815. err = string_append (&out_s, "#");
  816. if (err) return nerr_pass (err);
  817. *esc = out_s.buf;
  818. return STATUS_OK;
  819. }
  820. NEOERR *neos_url_validate (const char *in, char **esc)
  821. {
  822. return url_validate(in, esc, NEOS_ESCAPE_HTML);
  823. }
  824. NEOERR *neos_css_url_validate (const char *in, char **esc)
  825. {
  826. return url_validate(in, esc, NEOS_ESCAPE_CSS_URL);
  827. }
  828. NEOERR *neos_var_escape (NEOS_ESCAPE context,
  829. const char *in,
  830. char **esc)
  831. {
  832. /* Just dup and return if we do nothing. */
  833. if (context == NEOS_ESCAPE_NONE ||
  834. context == NEOS_ESCAPE_FUNCTION)
  835. {
  836. *esc = strdup(in);
  837. return STATUS_OK;
  838. }
  839. /* Now we escape based on context. This is the order of precedence:
  840. * url > script > style > html
  841. */
  842. if (context & NEOS_ESCAPE_URL)
  843. return nerr_pass(neos_url_escape(in, esc, NULL));
  844. else if (context & NEOS_ESCAPE_SCRIPT)
  845. return nerr_pass(neos_js_escape(in, esc));
  846. else if (context & NEOS_ESCAPE_HTML)
  847. return nerr_pass(neos_html_escape(in, strlen(in), esc));
  848. return nerr_raise(NERR_ASSERT, "unknown escape context supplied: %d",
  849. context);
  850. }