PageRenderTime 43ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/hphp/runtime/ext/filter/sanitizing_filters.cpp

https://gitlab.com/iranjith4/hhvm
C++ | 329 lines | 283 code | 27 blank | 19 comment | 31 complexity | 5abf38c5541cf1e5313c3b6970f9b5ee MD5 | raw file
  1. /*
  2. +----------------------------------------------------------------------+
  3. | HipHop for PHP |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 2010-2016 Facebook, Inc. (http://www.facebook.com) |
  6. | Copyright (c) 1997-2010 The PHP Group |
  7. +----------------------------------------------------------------------+
  8. | This source file is subject to version 3.01 of the PHP license, |
  9. | that is bundled with this package in the file LICENSE, and is |
  10. | available through the world-wide-web at the following url: |
  11. | http://www.php.net/license/3_01.txt |
  12. | If you did not receive a copy of the PHP license and are unable to |
  13. | obtain it through the world-wide-web, please send a note to |
  14. | license@php.net so we can mail you a copy immediately. |
  15. +----------------------------------------------------------------------+
  16. */
  17. #include "hphp/runtime/ext/filter/sanitizing_filters.h"
  18. #include "hphp/runtime/ext/filter/ext_filter.h"
  19. #include "hphp/runtime/ext/string/ext_string.h"
  20. #include "hphp/runtime/base/string-buffer.h"
  21. #include "hphp/runtime/base/zend-string.h"
  22. namespace HPHP {
  23. typedef unsigned long filter_map[256];
  24. static String php_filter_encode_html(const String& value,
  25. const unsigned char *chars) {
  26. int len = value.length();
  27. unsigned char *s = (unsigned char *)value.data();
  28. unsigned char *e = s + len;
  29. if (len == 0) {
  30. return empty_string();
  31. }
  32. StringBuffer str(len);
  33. while (s < e) {
  34. if (chars[*s]) {
  35. str.append("&#");
  36. str.append(static_cast<int64_t>(s[0]));
  37. str.append(';');
  38. } else {
  39. /* XXX: this needs to be optimized to work with blocks of 'safe' chars */
  40. str.append(s[0]);
  41. }
  42. s++;
  43. }
  44. return str.detach();
  45. }
  46. static const unsigned char hexchars[] = "0123456789ABCDEF";
  47. #define LOWALPHA "abcdefghijklmnopqrstuvwxyz"
  48. #define HIALPHA "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
  49. #define DIGIT "0123456789"
  50. #define DEFAULT_URL_ENCODE LOWALPHA HIALPHA DIGIT "-._"
  51. static Variant php_filter_encode_url(const String& value, const unsigned char* chars,
  52. const int char_len, int high, int low,
  53. int encode_nul) {
  54. unsigned char tmp[256];
  55. unsigned char *s = (unsigned char *)chars;
  56. unsigned char *e = s + char_len;
  57. int len = value.length();
  58. if (len == 0) {
  59. return empty_string_variant();
  60. }
  61. memset(tmp, 1, sizeof(tmp)-1);
  62. while (s < e) {
  63. tmp[*s++] = 0;
  64. }
  65. StringBuffer str(len);
  66. s = (unsigned char *)value.data();
  67. e = s + len;
  68. while (s < e) {
  69. if (tmp[*s]) {
  70. str.append('%');
  71. str.append((char) hexchars[(unsigned char) *s >> 4]);
  72. str.append((char) hexchars[(unsigned char) *s & 15]);
  73. } else {
  74. str.append((char) *s);
  75. }
  76. s++;
  77. }
  78. return str.detach();
  79. }
  80. static Variant php_filter_strip(const String& value, long flags) {
  81. unsigned char *str;
  82. int i;
  83. int len = value.length();
  84. if (len == 0) {
  85. return empty_string_variant();
  86. }
  87. /* Optimization for if no strip flags are set */
  88. if (! ((flags & k_FILTER_FLAG_STRIP_LOW) ||
  89. (flags & k_FILTER_FLAG_STRIP_HIGH)) ) {
  90. return value;
  91. }
  92. str = (unsigned char *)value.data();
  93. StringBuffer buf(len);
  94. for (i = 0; i < len; i++) {
  95. if ((str[i] > 127) && (flags & k_FILTER_FLAG_STRIP_HIGH)) {
  96. } else if ((str[i] < 32) && (flags & k_FILTER_FLAG_STRIP_LOW)) {
  97. } else if ((str[i] == '`') && (flags & k_FILTER_FLAG_STRIP_BACKTICK)) {
  98. } else {
  99. buf.append((char) str[i]);
  100. }
  101. }
  102. return buf.detach();
  103. }
  104. static void filter_map_init(filter_map *map) {
  105. memset(map, 0, sizeof(filter_map));
  106. }
  107. static void filter_map_update(filter_map *map, int flag,
  108. const unsigned char *allowed_list) {
  109. int l, i;
  110. l = strlen((const char*)allowed_list);
  111. for (i = 0; i < l; ++i) {
  112. (*map)[allowed_list[i]] = flag;
  113. }
  114. }
  115. static Variant filter_map_apply(const String& value, filter_map *map) {
  116. unsigned char *str;
  117. int i;
  118. int len = value.length();
  119. if (len == 0) {
  120. return empty_string_variant();
  121. }
  122. str = (unsigned char *)value.data();
  123. StringBuffer buf(len);
  124. for (i = 0; i < len; i++) {
  125. if ((*map)[str[i]]) {
  126. buf.append((char) str[i]);
  127. }
  128. }
  129. return buf.detach();
  130. }
  131. template <typename T>
  132. unsigned char uc(T c) { return (unsigned char)c; }
  133. Variant php_filter_string(PHP_INPUT_FILTER_PARAM_DECL) {
  134. unsigned char enc[256] = {0};
  135. /* strip high/strip low ( see flags )*/
  136. auto const stripped = php_filter_strip(value, flags).toString();
  137. if (!(flags & k_FILTER_FLAG_NO_ENCODE_QUOTES)) {
  138. enc[uc('\'')] = enc[uc('"')] = 1;
  139. }
  140. if (flags & k_FILTER_FLAG_ENCODE_AMP) {
  141. enc[uc('&')] = 1;
  142. }
  143. if (flags & k_FILTER_FLAG_ENCODE_LOW) {
  144. memset(enc, 1, 32);
  145. }
  146. if (flags & k_FILTER_FLAG_ENCODE_HIGH) {
  147. memset(enc + 127, 1, sizeof(enc) - 127);
  148. }
  149. String encoded(php_filter_encode_html(stripped, enc));
  150. int len = encoded.length();
  151. auto const empty = staticEmptyString();
  152. String ret = string_strip_tags(
  153. encoded.data(), len, empty->data(), empty->size(), true
  154. );
  155. if (len == 0) {
  156. if (flags & k_FILTER_FLAG_EMPTY_STRING_NULL) {
  157. return init_null();
  158. }
  159. return empty_string_variant();
  160. }
  161. return ret;
  162. }
  163. Variant php_filter_encoded(PHP_INPUT_FILTER_PARAM_DECL) {
  164. /* apply strip_high and strip_low filters */
  165. php_filter_strip(value, flags);
  166. /* urlencode */
  167. return php_filter_encode_url(
  168. value,
  169. (unsigned char *)DEFAULT_URL_ENCODE,
  170. sizeof(DEFAULT_URL_ENCODE)-1,
  171. flags & k_FILTER_FLAG_ENCODE_HIGH,
  172. flags & k_FILTER_FLAG_ENCODE_LOW,
  173. 1
  174. );
  175. }
  176. Variant php_filter_special_chars(PHP_INPUT_FILTER_PARAM_DECL) {
  177. unsigned char enc[256] = {0};
  178. php_filter_strip(value, flags);
  179. /* encodes ' " < > & \0 to numerical entities */
  180. enc[uc('\'')] = enc[uc('"')] = enc[uc('<')] = enc[uc('>')] = enc[uc('&')] = enc[0] = 1;
  181. /* if strip low is not set, then we encode them as &#xx; */
  182. memset(enc, 1, 32);
  183. if (flags & k_FILTER_FLAG_ENCODE_HIGH) {
  184. memset(enc + 127, 1, sizeof(enc) - 127);
  185. }
  186. return php_filter_encode_html(value, enc);
  187. }
  188. Variant php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL) {
  189. int quotes;
  190. if (!(flags & k_FILTER_FLAG_NO_ENCODE_QUOTES)) {
  191. quotes = k_ENT_QUOTES;
  192. } else {
  193. quotes = k_ENT_HTML_QUOTE_NONE;
  194. }
  195. return HHVM_FN(htmlentities)(value, quotes);
  196. }
  197. Variant php_filter_unsafe_raw(PHP_INPUT_FILTER_PARAM_DECL) {
  198. /* Only if no flags are set (optimization) */
  199. if (flags != 0 && value.length() > 0) {
  200. unsigned char enc[256] = {0};
  201. auto stripped = php_filter_strip(value, flags);
  202. if (!stripped.isString()) {
  203. return stripped;
  204. }
  205. if (flags & k_FILTER_FLAG_ENCODE_AMP) {
  206. enc[uc('&')] = 1;
  207. }
  208. if (flags & k_FILTER_FLAG_ENCODE_LOW) {
  209. memset(enc, 1, 32);
  210. }
  211. if (flags & k_FILTER_FLAG_ENCODE_HIGH) {
  212. memset(enc + 127, 1, sizeof(enc) - 127);
  213. }
  214. return php_filter_encode_html(stripped.toString(), enc);
  215. } else if (flags & k_FILTER_FLAG_EMPTY_STRING_NULL && value.length() == 0) {
  216. return init_null();
  217. }
  218. return value;
  219. }
  220. #define SAFE "$-_.+"
  221. #define EXTRA "!*'(),"
  222. #define NATIONAL "{}|\\^~[]`"
  223. #define PUNCTUATION "<>#%\""
  224. #define RESERVED ";/?:@&="
  225. Variant php_filter_email(PHP_INPUT_FILTER_PARAM_DECL) {
  226. /* Check section 6 of rfc 822 http://www.faqs.org/rfcs/rfc822.html */
  227. const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT \
  228. "!#$%&'*+-=?^_`{|}~@.[]";
  229. filter_map map;
  230. filter_map_init(&map);
  231. filter_map_update(&map, 1, allowed_list);
  232. return filter_map_apply(value, &map);
  233. }
  234. Variant php_filter_url(PHP_INPUT_FILTER_PARAM_DECL) {
  235. /* Strip all chars not part of section 5 of
  236. * http://www.faqs.org/rfcs/rfc1738.html */
  237. const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT SAFE EXTRA \
  238. NATIONAL PUNCTUATION RESERVED;
  239. filter_map map;
  240. filter_map_init(&map);
  241. filter_map_update(&map, 1, allowed_list);
  242. return filter_map_apply(value, &map);
  243. }
  244. Variant php_filter_number_int(PHP_INPUT_FILTER_PARAM_DECL) {
  245. /* strip everything [^0-9+-] */
  246. const unsigned char allowed_list[] = "+-" DIGIT;
  247. filter_map map;
  248. filter_map_init(&map);
  249. filter_map_update(&map, 1, allowed_list);
  250. return filter_map_apply(value, &map);
  251. }
  252. Variant php_filter_number_float(PHP_INPUT_FILTER_PARAM_DECL) {
  253. /* strip everything [^0-9+-] */
  254. const unsigned char allowed_list[] = "+-" DIGIT;
  255. filter_map map;
  256. filter_map_init(&map);
  257. filter_map_update(&map, 1, allowed_list);
  258. /* depending on flags, strip '.', 'e', ",", "'" */
  259. if (flags & k_FILTER_FLAG_ALLOW_FRACTION) {
  260. filter_map_update(&map, 2, (const unsigned char *) ".");
  261. }
  262. if (flags & k_FILTER_FLAG_ALLOW_THOUSAND) {
  263. filter_map_update(&map, 3, (const unsigned char *) ",");
  264. }
  265. if (flags & k_FILTER_FLAG_ALLOW_SCIENTIFIC) {
  266. filter_map_update(&map, 4, (const unsigned char *) "eE");
  267. }
  268. return filter_map_apply(value, &map);
  269. }
  270. Variant php_filter_magic_quotes(PHP_INPUT_FILTER_PARAM_DECL) {
  271. /* just call addslashes quotes */
  272. return HHVM_FN(addslashes)(value);
  273. }
  274. }