PageRenderTime 25ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/external/chromium/net/base/escape_unittest.cc

https://gitlab.com/brian0218/rk3288_r-box_android4.4.2_sdk
C++ | 437 lines | 358 code | 43 blank | 36 comment | 23 complexity | 78c55bbd99db5ec615f2fabfab66a687 MD5 | raw file
  1. // Copyright (c) 2011 The Chromium Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license that can be
  3. // found in the LICENSE file.
  4. #include <algorithm>
  5. #include <string>
  6. #include "net/base/escape.h"
  7. #include "base/basictypes.h"
  8. #include "base/i18n/icu_string_conversions.h"
  9. #include "base/string_util.h"
  10. #include "base/stringprintf.h"
  11. #include "base/utf_string_conversions.h"
  12. #include "testing/gtest/include/gtest/gtest.h"
  13. namespace {
  14. static const size_t kNpos = string16::npos;
  15. struct EscapeCase {
  16. const wchar_t* input;
  17. const wchar_t* output;
  18. };
  19. struct UnescapeURLCase {
  20. const wchar_t* input;
  21. UnescapeRule::Type rules;
  22. const wchar_t* output;
  23. };
  24. struct UnescapeURLCaseASCII {
  25. const char* input;
  26. UnescapeRule::Type rules;
  27. const char* output;
  28. };
  29. struct UnescapeAndDecodeCase {
  30. const char* input;
  31. // The expected output when run through UnescapeURL.
  32. const char* url_unescaped;
  33. // The expected output when run through UnescapeQuery.
  34. const char* query_unescaped;
  35. // The expected output when run through UnescapeAndDecodeURLComponent.
  36. const wchar_t* decoded;
  37. };
  38. struct AdjustOffsetCase {
  39. const char* input;
  40. size_t input_offset;
  41. size_t output_offset;
  42. };
  43. struct EscapeForHTMLCase {
  44. const char* input;
  45. const char* expected_output;
  46. };
  47. } // namespace
  48. TEST(EscapeTest, EscapeTextForFormSubmission) {
  49. const EscapeCase escape_cases[] = {
  50. {L"foo", L"foo"},
  51. {L"foo bar", L"foo+bar"},
  52. {L"foo++", L"foo%2B%2B"}
  53. };
  54. for (size_t i = 0; i < arraysize(escape_cases); ++i) {
  55. EscapeCase value = escape_cases[i];
  56. EXPECT_EQ(WideToUTF16Hack(value.output),
  57. EscapeQueryParamValueUTF8(WideToUTF16Hack(value.input), true));
  58. }
  59. const EscapeCase escape_cases_no_plus[] = {
  60. {L"foo", L"foo"},
  61. {L"foo bar", L"foo%20bar"},
  62. {L"foo++", L"foo%2B%2B"}
  63. };
  64. for (size_t i = 0; i < arraysize(escape_cases_no_plus); ++i) {
  65. EscapeCase value = escape_cases_no_plus[i];
  66. EXPECT_EQ(WideToUTF16Hack(value.output),
  67. EscapeQueryParamValueUTF8(WideToUTF16Hack(value.input), false));
  68. }
  69. // Test all the values in we're supposed to be escaping.
  70. const std::string no_escape(
  71. "abcdefghijklmnopqrstuvwxyz"
  72. "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
  73. "0123456789"
  74. "!'()*-._~");
  75. for (int i = 0; i < 256; ++i) {
  76. std::string in;
  77. in.push_back(i);
  78. std::string out = EscapeQueryParamValue(in, true);
  79. if (0 == i) {
  80. EXPECT_EQ(out, std::string("%00"));
  81. } else if (32 == i) {
  82. // Spaces are plus escaped like web forms.
  83. EXPECT_EQ(out, std::string("+"));
  84. } else if (no_escape.find(in) == std::string::npos) {
  85. // Check %hex escaping
  86. std::string expected = base::StringPrintf("%%%02X", i);
  87. EXPECT_EQ(expected, out);
  88. } else {
  89. // No change for things in the no_escape list.
  90. EXPECT_EQ(out, in);
  91. }
  92. }
  93. // Check to see if EscapeQueryParamValueUTF8 is the same as
  94. // EscapeQueryParamValue(..., kCodepageUTF8,)
  95. string16 test_str;
  96. test_str.reserve(5000);
  97. for (int i = 1; i < 5000; ++i) {
  98. test_str.push_back(i);
  99. }
  100. string16 wide;
  101. EXPECT_TRUE(EscapeQueryParamValue(test_str, base::kCodepageUTF8, true,
  102. &wide));
  103. EXPECT_EQ(wide, EscapeQueryParamValueUTF8(test_str, true));
  104. EXPECT_TRUE(EscapeQueryParamValue(test_str, base::kCodepageUTF8, false,
  105. &wide));
  106. EXPECT_EQ(wide, EscapeQueryParamValueUTF8(test_str, false));
  107. }
  108. TEST(EscapeTest, EscapePath) {
  109. ASSERT_EQ(
  110. // Most of the character space we care about, un-escaped
  111. EscapePath(
  112. "\x02\n\x1d !\"#$%&'()*+,-./0123456789:;"
  113. "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
  114. "[\\]^_`abcdefghijklmnopqrstuvwxyz"
  115. "{|}~\x7f\x80\xff"),
  116. // Escaped
  117. "%02%0A%1D%20!%22%23$%25&'()*+,-./0123456789%3A;"
  118. "%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
  119. "%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz"
  120. "%7B%7C%7D~%7F%80%FF");
  121. }
  122. TEST(EscapeTest, EscapeUrlEncodedData) {
  123. ASSERT_EQ(
  124. // Most of the character space we care about, un-escaped
  125. EscapeUrlEncodedData(
  126. "\x02\n\x1d !\"#$%&'()*+,-./0123456789:;"
  127. "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
  128. "[\\]^_`abcdefghijklmnopqrstuvwxyz"
  129. "{|}~\x7f\x80\xff"),
  130. // Escaped
  131. "%02%0A%1D+!%22%23%24%25%26%27()*%2B,-./0123456789:%3B"
  132. "%3C%3D%3E%3F%40ABCDEFGHIJKLMNOPQRSTUVWXYZ"
  133. "%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz"
  134. "%7B%7C%7D~%7F%80%FF");
  135. }
  136. TEST(EscapeTest, UnescapeURLComponentASCII) {
  137. const UnescapeURLCaseASCII unescape_cases[] = {
  138. {"", UnescapeRule::NORMAL, ""},
  139. {"%2", UnescapeRule::NORMAL, "%2"},
  140. {"%%%%%%", UnescapeRule::NORMAL, "%%%%%%"},
  141. {"Don't escape anything", UnescapeRule::NORMAL, "Don't escape anything"},
  142. {"Invalid %escape %2", UnescapeRule::NORMAL, "Invalid %escape %2"},
  143. {"Some%20random text %25%2dOK", UnescapeRule::NONE,
  144. "Some%20random text %25%2dOK"},
  145. {"Some%20random text %25%2dOK", UnescapeRule::NORMAL,
  146. "Some%20random text %25-OK"},
  147. {"Some%20random text %25%2dOK", UnescapeRule::SPACES,
  148. "Some random text %25-OK"},
  149. {"Some%20random text %25%2dOK", UnescapeRule::URL_SPECIAL_CHARS,
  150. "Some%20random text %-OK"},
  151. {"Some%20random text %25%2dOK",
  152. UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS,
  153. "Some random text %-OK"},
  154. {"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL, "\xA0\xB1\xC2\xD3\xE4\xF5"},
  155. {"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, "\xAa\xBb\xCc\xDd\xEe\xFf"},
  156. // Certain URL-sensitive characters should not be unescaped unless asked.
  157. {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", UnescapeRule::SPACES,
  158. "Hello %13%10world %23# %3F? %3D= %26& %25% %2B+"},
  159. {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+",
  160. UnescapeRule::URL_SPECIAL_CHARS,
  161. "Hello%20%13%10world ## ?? == && %% ++"},
  162. // Control characters.
  163. {"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::URL_SPECIAL_CHARS,
  164. "%01%02%03%04%05%06%07%08%09 %"},
  165. {"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::CONTROL_CHARS,
  166. "\x01\x02\x03\x04\x05\x06\x07\x08\x09 %25"},
  167. {"Hello%20%13%10%02", UnescapeRule::SPACES, "Hello %13%10%02"},
  168. {"Hello%20%13%10%02", UnescapeRule::CONTROL_CHARS, "Hello%20\x13\x10\x02"},
  169. };
  170. for (size_t i = 0; i < arraysize(unescape_cases); i++) {
  171. std::string str(unescape_cases[i].input);
  172. EXPECT_EQ(std::string(unescape_cases[i].output),
  173. UnescapeURLComponent(str, unescape_cases[i].rules));
  174. }
  175. // Test the NULL character unescaping (which wouldn't work above since those
  176. // are just char pointers).
  177. std::string input("Null");
  178. input.push_back(0); // Also have a NULL in the input.
  179. input.append("%00%39Test");
  180. // When we're unescaping NULLs
  181. std::string expected("Null");
  182. expected.push_back(0);
  183. expected.push_back(0);
  184. expected.append("9Test");
  185. EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::CONTROL_CHARS));
  186. // When we're not unescaping NULLs.
  187. expected = "Null";
  188. expected.push_back(0);
  189. expected.append("%009Test");
  190. EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL));
  191. }
  192. TEST(EscapeTest, UnescapeURLComponent) {
  193. const UnescapeURLCase unescape_cases[] = {
  194. {L"", UnescapeRule::NORMAL, L""},
  195. {L"%2", UnescapeRule::NORMAL, L"%2"},
  196. {L"%%%%%%", UnescapeRule::NORMAL, L"%%%%%%"},
  197. {L"Don't escape anything", UnescapeRule::NORMAL, L"Don't escape anything"},
  198. {L"Invalid %escape %2", UnescapeRule::NORMAL, L"Invalid %escape %2"},
  199. {L"Some%20random text %25%2dOK", UnescapeRule::NONE,
  200. L"Some%20random text %25%2dOK"},
  201. {L"Some%20random text %25%2dOK", UnescapeRule::NORMAL,
  202. L"Some%20random text %25-OK"},
  203. {L"Some%20random text %25%2dOK", UnescapeRule::SPACES,
  204. L"Some random text %25-OK"},
  205. {L"Some%20random text %25%2dOK", UnescapeRule::URL_SPECIAL_CHARS,
  206. L"Some%20random text %-OK"},
  207. {L"Some%20random text %25%2dOK",
  208. UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS,
  209. L"Some random text %-OK"},
  210. {L"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL, L"\xA0\xB1\xC2\xD3\xE4\xF5"},
  211. {L"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, L"\xAa\xBb\xCc\xDd\xEe\xFf"},
  212. // Certain URL-sensitive characters should not be unescaped unless asked.
  213. {L"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", UnescapeRule::SPACES,
  214. L"Hello %13%10world %23# %3F? %3D= %26& %25% %2B+"},
  215. {L"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+",
  216. UnescapeRule::URL_SPECIAL_CHARS,
  217. L"Hello%20%13%10world ## ?? == && %% ++"},
  218. // We can neither escape nor unescape '@' since some websites expect it to
  219. // be preserved as either '@' or "%40".
  220. // See http://b/996720 and http://crbug.com/23933 .
  221. {L"me@my%40example", UnescapeRule::NORMAL, L"me@my%40example"},
  222. // Control characters.
  223. {L"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::URL_SPECIAL_CHARS,
  224. L"%01%02%03%04%05%06%07%08%09 %"},
  225. {L"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::CONTROL_CHARS,
  226. L"\x01\x02\x03\x04\x05\x06\x07\x08\x09 %25"},
  227. {L"Hello%20%13%10%02", UnescapeRule::SPACES, L"Hello %13%10%02"},
  228. {L"Hello%20%13%10%02", UnescapeRule::CONTROL_CHARS,
  229. L"Hello%20\x13\x10\x02"},
  230. {L"Hello\x9824\x9827", UnescapeRule::CONTROL_CHARS,
  231. L"Hello\x9824\x9827"},
  232. };
  233. for (size_t i = 0; i < arraysize(unescape_cases); i++) {
  234. string16 str(WideToUTF16(unescape_cases[i].input));
  235. EXPECT_EQ(WideToUTF16(unescape_cases[i].output),
  236. UnescapeURLComponent(str, unescape_cases[i].rules));
  237. }
  238. // Test the NULL character unescaping (which wouldn't work above since those
  239. // are just char pointers).
  240. string16 input(WideToUTF16(L"Null"));
  241. input.push_back(0); // Also have a NULL in the input.
  242. input.append(WideToUTF16(L"%00%39Test"));
  243. // When we're unescaping NULLs
  244. string16 expected(WideToUTF16(L"Null"));
  245. expected.push_back(0);
  246. expected.push_back(0);
  247. expected.append(ASCIIToUTF16("9Test"));
  248. EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::CONTROL_CHARS));
  249. // When we're not unescaping NULLs.
  250. expected = WideToUTF16(L"Null");
  251. expected.push_back(0);
  252. expected.append(WideToUTF16(L"%009Test"));
  253. EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL));
  254. }
  255. TEST(EscapeTest, UnescapeAndDecodeUTF8URLComponent) {
  256. const UnescapeAndDecodeCase unescape_cases[] = {
  257. { "%",
  258. "%",
  259. "%",
  260. L"%"},
  261. { "+",
  262. "+",
  263. " ",
  264. L"+"},
  265. { "%2+",
  266. "%2+",
  267. "%2 ",
  268. L"%2+"},
  269. { "+%%%+%%%",
  270. "+%%%+%%%",
  271. " %%% %%%",
  272. L"+%%%+%%%"},
  273. { "Don't escape anything",
  274. "Don't escape anything",
  275. "Don't escape anything",
  276. L"Don't escape anything"},
  277. { "+Invalid %escape %2+",
  278. "+Invalid %escape %2+",
  279. " Invalid %escape %2 ",
  280. L"+Invalid %escape %2+"},
  281. { "Some random text %25%2dOK",
  282. "Some random text %25-OK",
  283. "Some random text %25-OK",
  284. L"Some random text %25-OK"},
  285. { "%01%02%03%04%05%06%07%08%09",
  286. "%01%02%03%04%05%06%07%08%09",
  287. "%01%02%03%04%05%06%07%08%09",
  288. L"%01%02%03%04%05%06%07%08%09"},
  289. { "%E4%BD%A0+%E5%A5%BD",
  290. "\xE4\xBD\xA0+\xE5\xA5\xBD",
  291. "\xE4\xBD\xA0 \xE5\xA5\xBD",
  292. L"\x4f60+\x597d"},
  293. { "%ED%ED", // Invalid UTF-8.
  294. "\xED\xED",
  295. "\xED\xED",
  296. L"%ED%ED"}, // Invalid UTF-8 -> kept unescaped.
  297. };
  298. for (size_t i = 0; i < arraysize(unescape_cases); i++) {
  299. std::string unescaped = UnescapeURLComponent(unescape_cases[i].input,
  300. UnescapeRule::NORMAL);
  301. EXPECT_EQ(std::string(unescape_cases[i].url_unescaped), unescaped);
  302. unescaped = UnescapeURLComponent(unescape_cases[i].input,
  303. UnescapeRule::REPLACE_PLUS_WITH_SPACE);
  304. EXPECT_EQ(std::string(unescape_cases[i].query_unescaped), unescaped);
  305. // TODO: Need to test unescape_spaces and unescape_percent.
  306. string16 decoded = UnescapeAndDecodeUTF8URLComponent(
  307. unescape_cases[i].input, UnescapeRule::NORMAL, NULL);
  308. EXPECT_EQ(WideToUTF16Hack(std::wstring(unescape_cases[i].decoded)),
  309. decoded);
  310. }
  311. }
  312. TEST(EscapeTest, AdjustOffset) {
  313. const AdjustOffsetCase adjust_cases[] = {
  314. {"", 0, std::wstring::npos},
  315. {"test", 0, 0},
  316. {"test", 2, 2},
  317. {"test", 4, std::wstring::npos},
  318. {"test", std::wstring::npos, std::wstring::npos},
  319. {"%2dtest", 6, 4},
  320. {"%2dtest", 2, std::wstring::npos},
  321. {"test%2d", 2, 2},
  322. {"%E4%BD%A0+%E5%A5%BD", 9, 1},
  323. {"%E4%BD%A0+%E5%A5%BD", 6, std::wstring::npos},
  324. {"%ED%B0%80+%E5%A5%BD", 6, 6},
  325. };
  326. for (size_t i = 0; i < arraysize(adjust_cases); i++) {
  327. size_t offset = adjust_cases[i].input_offset;
  328. UnescapeAndDecodeUTF8URLComponent(adjust_cases[i].input,
  329. UnescapeRule::NORMAL, &offset);
  330. EXPECT_EQ(adjust_cases[i].output_offset, offset);
  331. }
  332. }
  333. TEST(EscapeTest, EscapeForHTML) {
  334. const EscapeForHTMLCase tests[] = {
  335. { "hello", "hello" },
  336. { "<hello>", "&lt;hello&gt;" },
  337. { "don\'t mess with me", "don&#39;t mess with me" },
  338. };
  339. for (size_t i = 0; i < arraysize(tests); ++i) {
  340. std::string result = EscapeForHTML(std::string(tests[i].input));
  341. EXPECT_EQ(std::string(tests[i].expected_output), result);
  342. }
  343. }
  344. TEST(EscapeTest, UnescapeForHTML) {
  345. const EscapeForHTMLCase tests[] = {
  346. { "", "" },
  347. { "&lt;hello&gt;", "<hello>" },
  348. { "don&#39;t mess with me", "don\'t mess with me" },
  349. { "&lt;&gt;&amp;&quot;&#39;", "<>&\"'" },
  350. { "& lt; &amp ; &; '", "& lt; &amp ; &; '" },
  351. { "&amp;", "&" },
  352. { "&quot;", "\"" },
  353. { "&#39;", "'" },
  354. { "&lt;", "<" },
  355. { "&gt;", ">" },
  356. { "&amp; &", "& &" },
  357. };
  358. for (size_t i = 0; i < arraysize(tests); ++i) {
  359. string16 result = UnescapeForHTML(ASCIIToUTF16(tests[i].input));
  360. EXPECT_EQ(ASCIIToUTF16(tests[i].expected_output), result);
  361. }
  362. }
  363. TEST(EscapeTest, AdjustEncodingOffset) {
  364. // Imagine we have strings as shown in the following cases where the
  365. // %XX's represent encoded characters
  366. // 1: abc%ECdef ==> abcXdef
  367. std::vector<size_t> offsets;
  368. for (size_t t = 0; t < 9; ++t)
  369. offsets.push_back(t);
  370. AdjustEncodingOffset::Adjustments adjustments;
  371. adjustments.push_back(3);
  372. std::for_each(offsets.begin(), offsets.end(),
  373. AdjustEncodingOffset(adjustments));
  374. size_t expected_1[] = {0, 1, 2, 3, kNpos, kNpos, 4, 5, 6};
  375. EXPECT_EQ(offsets.size(), arraysize(expected_1));
  376. for (size_t i = 0; i < arraysize(expected_1); ++i)
  377. EXPECT_EQ(expected_1[i], offsets[i]);
  378. // 2: %ECabc%EC%ECdef%EC ==> XabcXXdefX
  379. offsets.clear();
  380. for (size_t t = 0; t < 18; ++t)
  381. offsets.push_back(t);
  382. adjustments.clear();
  383. adjustments.push_back(0);
  384. adjustments.push_back(6);
  385. adjustments.push_back(9);
  386. adjustments.push_back(15);
  387. std::for_each(offsets.begin(), offsets.end(),
  388. AdjustEncodingOffset(adjustments));
  389. size_t expected_2[] = {0, kNpos, kNpos, 1, 2, 3, 4, kNpos, kNpos, 5, kNpos,
  390. kNpos, 6, 7, 8, 9, kNpos, kNpos};
  391. EXPECT_EQ(offsets.size(), arraysize(expected_2));
  392. for (size_t i = 0; i < arraysize(expected_2); ++i)
  393. EXPECT_EQ(expected_2[i], offsets[i]);
  394. }