PageRenderTime 44ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/reddish/lib/sundown/html/houdini_href_e.c

https://bitbucket.org/murarth/reddish
C | 108 lines | 59 code | 13 blank | 36 comment | 7 complexity | 327ac86078b740183d9484bf9b65b719 MD5 | raw file
  1. #include <assert.h>
  2. #include <stdio.h>
  3. #include <string.h>
  4. #include "houdini.h"
  5. #define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10)
  6. /*
  7. * The following characters will not be escaped:
  8. *
  9. * -_.+!*'(),%#@?=;:/,+&$ alphanum
  10. *
  11. * Note that this character set is the addition of:
  12. *
  13. * - The characters which are safe to be in an URL
  14. * - The characters which are *not* safe to be in
  15. * an URL because they are RESERVED characters.
  16. *
  17. * We asume (lazily) that any RESERVED char that
  18. * appears inside an URL is actually meant to
  19. * have its native function (i.e. as an URL
  20. * component/separator) and hence needs no escaping.
  21. *
  22. * There are two exceptions: the chacters & (amp)
  23. * and ' (single quote) do not appear in the table.
  24. * They are meant to appear in the URL as components,
  25. * yet they require special HTML-entity escaping
  26. * to generate valid HTML markup.
  27. *
  28. * All other characters will be escaped to %XX.
  29. *
  30. */
  31. static const char HREF_SAFE[] = {
  32. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  33. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  34. 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
  35. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
  36. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  37. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
  38. 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  39. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
  40. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  41. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  42. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  43. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  44. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  45. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  46. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  47. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  48. };
  49. void
  50. houdini_escape_href(struct buf *ob, const uint8_t *src, size_t size)
  51. {
  52. static const char hex_chars[] = "0123456789ABCDEF";
  53. size_t i = 0, org;
  54. char hex_str[3];
  55. bufgrow(ob, ESCAPE_GROW_FACTOR(size));
  56. hex_str[0] = '%';
  57. while (i < size) {
  58. org = i;
  59. while (i < size && HREF_SAFE[src[i]] != 0)
  60. i++;
  61. if (i > org)
  62. bufput(ob, src + org, i - org);
  63. /* escaping */
  64. if (i >= size)
  65. break;
  66. switch (src[i]) {
  67. /* amp appears all the time in URLs, but needs
  68. * HTML-entity escaping to be inside an href */
  69. case '&':
  70. BUFPUTSL(ob, "&amp;");
  71. break;
  72. /* the single quote is a valid URL character
  73. * according to the standard; it needs HTML
  74. * entity escaping too */
  75. case '\'':
  76. BUFPUTSL(ob, "&#x27;");
  77. break;
  78. /* the space can be escaped to %20 or a plus
  79. * sign. we're going with the generic escape
  80. * for now. the plus thing is more commonly seen
  81. * when building GET strings */
  82. #if 0
  83. case ' ':
  84. bufputc(ob, '+');
  85. break;
  86. #endif
  87. /* every other character goes with a %XX escaping */
  88. default:
  89. hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
  90. hex_str[2] = hex_chars[src[i] & 0xF];
  91. bufput(ob, hex_str, 3);
  92. }
  93. i++;
  94. }
  95. }