PageRenderTime 120ms CodeModel.GetById 24ms RepoModel.GetById 1ms app.codeStats 0ms

/base/string.cc

http://omaha.googlecode.com/
C++ | 3391 lines | 2233 code | 535 blank | 623 comment | 700 complexity | 87938418347e2cdb64a6bf1abd31e81d MD5 | raw file
Possible License(s): Apache-2.0, BSD-3-Clause
  1. // Copyright 2003-2009 Google Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. // ========================================================================
  15. #include "omaha/base/string.h"
  16. #include <wininet.h> // For INTERNET_MAX_URL_LENGTH.
  17. #include <algorithm>
  18. #include <cstdlib>
  19. #include "base/scoped_ptr.h"
  20. #include "omaha/base/commontypes.h"
  21. #include "omaha/base/debug.h"
  22. #include "omaha/base/localization.h"
  23. #include "omaha/base/logging.h"
  24. namespace omaha {
  25. namespace {
  26. // Testing shows that only the following ASCII characters are
  27. // considered spaces by GetStringTypeA: 9-13, 32, 160.
  28. // Rather than call GetStringTypeA with no locale, as we used to,
  29. // we look up the values directly in a precomputed array.
  30. SELECTANY byte spaces[256] = {
  31. 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, // 0-9
  32. 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, // 10-19
  33. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20-29
  34. 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, // 30-39
  35. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 40-49
  36. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 50-59
  37. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 60-69
  38. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 70-79
  39. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80-89
  40. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 90-99
  41. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 100-109
  42. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 110-119
  43. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 120-129
  44. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 130-139
  45. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 140-149
  46. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 150-159
  47. 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 160-169
  48. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 170-179
  49. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 180-189
  50. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 190-199
  51. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 200-209
  52. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 210-219
  53. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 220-229
  54. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 230-239
  55. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 240-249
  56. 0, 0, 0, 0, 0, 1, // 250-255
  57. };
  58. } // namespace
  59. const TCHAR* const kFalse = _T("false");
  60. const TCHAR* const kTrue = _T("true");
  61. bool IsSpaceW(WCHAR c) {
  62. // GetStringTypeW considers these characters to be spaces:
  63. // 9-13, 32, 133, 160, 5760, 8192-8203, 8232, 8233, 12288
  64. if (c < 256)
  65. return (c == 133 || IsSpaceA((char) (c & 0xff)));
  66. return (c >= 8192 && c <= 8203) || c == 8232 ||
  67. c == 8233 || c == 12288;
  68. }
  69. bool IsSpaceA(char c) {
  70. return spaces[static_cast<unsigned char>(c)] == 1;
  71. }
  72. int TrimCString(CString &s) {
  73. int len = Trim(s.GetBuffer());
  74. s.ReleaseBufferSetLength(len);
  75. return len;
  76. }
  77. void MakeLowerCString(CString & s) {
  78. int len = s.GetLength();
  79. String_FastToLower(s.GetBuffer());
  80. s.ReleaseBufferSetLength(len);
  81. }
  82. int Trim(TCHAR *s) {
  83. ASSERT(s, (L""));
  84. // First find end of leading spaces
  85. TCHAR *start = s;
  86. while (*start) {
  87. if (!IsSpace(*start))
  88. break;
  89. ++start;
  90. }
  91. // Now search for the end, remembering the start of the last spaces
  92. TCHAR *end = start;
  93. TCHAR *last_space = end;
  94. while (*end) {
  95. if (!IsSpace(*end))
  96. last_space = end + 1;
  97. ++end;
  98. }
  99. // Copy the part we want
  100. int len = last_space - start;
  101. // lint -e{802} Conceivably passing a NULL pointer
  102. memmove(s, start, len * sizeof(TCHAR));
  103. // 0 terminate
  104. s[len] = 0;
  105. return len;
  106. }
  107. void TrimString(CString& s, const TCHAR* delimiters) {
  108. s = s.Trim(delimiters);
  109. }
  110. // Strip the first token from the front of argument s. A token is a
  111. // series of consecutive non-blank characters - unless the first
  112. // character is a double-quote ("), in that case the token is the full
  113. // quoted string
  114. CString StripFirstQuotedToken(const CString& s) {
  115. const int npos = -1;
  116. // Make a writeable copy
  117. CString str(s);
  118. // Trim any surrounding blanks (and tabs, for the heck of it)
  119. TrimString(str, L" \t");
  120. // Too short to have a second token
  121. if (str.GetLength() <= 1)
  122. return L"";
  123. // What kind of token are we stripping?
  124. if (str[0] == L'\"') {
  125. // Remove leading quoting string
  126. int i = str.Find(L"\"", 1);
  127. if (i != npos)
  128. i++;
  129. return str.Mid(i);
  130. } else {
  131. // Remove leading token
  132. int i = str.FindOneOf(L" \t");
  133. if (i != npos)
  134. i++;
  135. return str.Mid(i);
  136. }
  137. }
  138. // A block of text to separate lines, and back
  139. void TextToLines(const CString& text, const TCHAR* delimiter, std::vector<CString>* lines) {
  140. ASSERT(delimiter, (L""));
  141. ASSERT(lines, (L""));
  142. size_t delimiter_len = ::lstrlen(delimiter);
  143. int b = 0;
  144. int e = 0;
  145. for (b = 0; e != -1 && b < text.GetLength(); b = e + delimiter_len) {
  146. e = text.Find(delimiter, b);
  147. if (e != -1) {
  148. ASSERT1(e - b > 0);
  149. lines->push_back(text.Mid(b, e - b));
  150. } else {
  151. lines->push_back(text.Mid(b));
  152. }
  153. }
  154. }
  155. void LinesToText(const std::vector<CString>& lines, const TCHAR* delimiter, CString* text) {
  156. ASSERT(delimiter, (L""));
  157. ASSERT(text, (L""));
  158. size_t delimiter_len = ::lstrlen(delimiter);
  159. size_t len = 0;
  160. for (size_t i = 0; i < lines.size(); ++i) {
  161. len += lines[i].GetLength() + delimiter_len;
  162. }
  163. text->Empty();
  164. text->Preallocate(len);
  165. for (std::vector<CString>::size_type i = 0; i < lines.size(); ++i) {
  166. text->Append(lines[i]);
  167. if (delimiter_len) {
  168. text->Append(delimiter);
  169. }
  170. }
  171. }
  172. int CleanupWhitespaceCString(CString &s) {
  173. int len = CleanupWhitespace(s.GetBuffer());
  174. s.ReleaseBufferSetLength(len);
  175. return len;
  176. }
  177. int CleanupWhitespace(TCHAR *str) {
  178. ASSERT(str, (L""));
  179. TCHAR *src = str;
  180. TCHAR *dest = str;
  181. int spaces = 0;
  182. bool at_start = true;
  183. while (true) {
  184. // At end of string?
  185. TCHAR c = *src;
  186. if (0 == c)
  187. break;
  188. // Look for whitespace; copy it over if not whitespace
  189. if (IsSpace(c)) {
  190. ++spaces;
  191. }
  192. else {
  193. *dest++ = c;
  194. at_start = false;
  195. spaces = 0;
  196. }
  197. // Write only first consecutive space (but skip space at start)
  198. if (1 == spaces && !at_start)
  199. *dest++ = ' ';
  200. ++src;
  201. }
  202. // Remove trailing space, if any
  203. if (dest > str && *(dest - 1) == L' ')
  204. --dest;
  205. // 0-terminate
  206. *dest = 0;
  207. return dest - str;
  208. }
  209. // Take 1 single hexadecimal "digit" (as a character) and return its decimal value
  210. // Returns -1 if given invalid hex digit
  211. int HexDigitToDec(const TCHAR digit) {
  212. if (digit >= L'A' && digit <= L'F')
  213. return 10 + (digit - L'A');
  214. else if (digit >= L'a' && digit <= L'f')
  215. return 10 + (digit - L'a');
  216. else if (digit >= L'0' && digit <= L'9')
  217. return (digit - L'0');
  218. else
  219. return -1;
  220. }
  221. // Convert the 2 hex chars at positions <pos> and <pos>+1 in <s> to a char (<char_out>)
  222. // Note: scanf was giving me troubles, so here's the manual version
  223. // Extracted char gets written to <char_out>, which must be allocated by
  224. // the caller; return true on success or false if parameters are incorrect
  225. // or string does not have 2 hex digits at the specified position
  226. // NOTE: <char_out> is NOT a string, just a pointer to a char for the result
  227. bool ExtractChar(const CString & s, int pos, unsigned char * char_out) {
  228. // char_out may be NULL
  229. if (s.GetLength() < pos + 1) {
  230. return false;
  231. }
  232. if (pos < 0 || NULL == char_out) {
  233. ASSERT(0, (_T("invalid params: pos<0 or char_out is NULL")));
  234. return false;
  235. }
  236. TCHAR c1 = s.GetAt(pos);
  237. TCHAR c2 = s.GetAt(pos+1);
  238. int p1 = HexDigitToDec(c1);
  239. int p2 = HexDigitToDec(c2);
  240. if (p1 == -1 || p2 == -1) {
  241. return false;
  242. }
  243. *char_out = (unsigned char)(p1 * 16 + p2);
  244. return true;
  245. }
  246. WCHAR *ToWide (const char *s, int len) {
  247. ASSERT (s, (L""));
  248. WCHAR *w = new WCHAR [len+1]; if (!w) { return NULL; }
  249. // int rc = MultiByteToWideChar (CP_ACP, 0, s.GetString(), (int)s.GetLength()+1, w, s.GetLength()+1);
  250. // TODO(omaha): why would it ever be the case that rc > len?
  251. int rc = MultiByteToWideChar (CP_ACP, 0, s, len, w, len);
  252. if (rc > len) { delete [] w; return NULL; }
  253. // ASSERT (rc <= len, (L""));
  254. w[rc]=L'\0';
  255. return w;
  256. }
  257. const byte *BufferContains (const byte *buf, uint32 buf_len, const byte *data, uint32 data_len) {
  258. ASSERT(data, (L""));
  259. ASSERT(buf, (L""));
  260. for (uint32 i = 0; i < buf_len; i++) {
  261. uint32 j = i;
  262. uint32 k = 0;
  263. uint32 len = 0;
  264. while (j < buf_len && k < data_len && buf[j++] == data[k++]) { len++; }
  265. if (len == data_len) { return buf + i; }
  266. }
  267. return 0;
  268. }
  269. // Converting the Ansi Multibyte String into unicode string. The multibyte
  270. // string is encoded using the specified codepage.
  271. // The code is pretty much like the U2W function, except the codepage can be
  272. // any valid windows CP.
  273. BOOL AnsiToWideString(const char *from, int length, UINT codepage, CString *to) {
  274. ASSERT(from, (L""));
  275. ASSERT(to, (L""));
  276. ASSERT1(length >= -1);
  277. // Figure out how long the string is
  278. int req_chars = MultiByteToWideChar(codepage, 0, from, length, NULL, 0);
  279. if (req_chars <= 0) {
  280. UTIL_LOG(LEVEL_WARNING, (_T("MultiByteToWideChar Failed ")));
  281. *to = AnsiToWideString(from, length);
  282. return FALSE;
  283. }
  284. TCHAR *buffer = to->GetBufferSetLength(req_chars);
  285. int conv_chars = MultiByteToWideChar(codepage, 0, from, length, buffer, req_chars);
  286. if (conv_chars == 0) {
  287. UTIL_LOG(LEVEL_WARNING, (_T("MultiByteToWideChar Failed ")));
  288. to->ReleaseBuffer(0);
  289. *to = AnsiToWideString(from, length);
  290. return FALSE;
  291. }
  292. // Something truly horrible happened.
  293. ASSERT (req_chars == conv_chars, (L"MBToWide returned unexpected value: GetLastError()=%d",GetLastError()));
  294. // If length was inferred, conv_chars includes the null terminator.
  295. // Adjust the length here to remove null termination,
  296. // because we use the length-qualified CString constructor,
  297. // which automatically adds null termination given an unterminated array.
  298. if (-1 == length) { --conv_chars; }
  299. to->ReleaseBuffer(conv_chars);
  300. return TRUE;
  301. }
  302. // CStringW(const char* from) did not cast all character properly
  303. // so we write our own.
  304. CString AnsiToWideString(const char *from, int length) {
  305. ASSERT(from, (L""));
  306. ASSERT1(length >= -1);
  307. if (length < 0)
  308. length = strlen(from);
  309. CString to;
  310. TCHAR *buffer = to.GetBufferSetLength(length);
  311. for (int i = 0; i < length; ++i)
  312. buffer[i] = static_cast<UINT8>(from[i]);
  313. to.ReleaseBuffer(length);
  314. return to;
  315. }
  316. // Transform a unicode string into UTF8, as represented in an ASCII string
  317. CStringA WideToUtf8(const CString& w) {
  318. // Add a cutoff. If it's all ascii, convert it directly
  319. const TCHAR* input = static_cast<const TCHAR*>(w.GetString());
  320. int input_len = w.GetLength(), i;
  321. for (i = 0; i < input_len; ++i) {
  322. if (input[i] > 127) {
  323. break;
  324. }
  325. }
  326. // If we made it to the end without breaking, then it's all ANSI, so do a quick convert
  327. if (i == input_len) {
  328. return WideToAnsiDirect(w);
  329. }
  330. // Figure out how long the string is
  331. int req_bytes = ::WideCharToMultiByte(CP_UTF8, 0, w, -1, NULL, 0, NULL, NULL);
  332. scoped_array<char> utf8_buffer(new char[req_bytes]);
  333. int conv_bytes = ::WideCharToMultiByte(CP_UTF8, 0, w, -1, utf8_buffer.get(), req_bytes, NULL, NULL);
  334. ASSERT1(req_bytes == conv_bytes);
  335. // conv_bytes includes the null terminator, when we read this in, don't read the terminator
  336. CStringA out(utf8_buffer.get(), conv_bytes - 1);
  337. return out;
  338. }
  339. CString Utf8ToWideChar(const char* utf8, uint32 num_bytes) {
  340. ASSERT1(utf8);
  341. if (num_bytes == 0) {
  342. return CString();
  343. }
  344. uint32 number_of_wide_chars = ::MultiByteToWideChar(CP_UTF8, 0, utf8, num_bytes, NULL, 0);
  345. number_of_wide_chars += 1; // make room for NULL terminator
  346. CString ret_string;
  347. TCHAR* buffer = ret_string.GetBuffer(number_of_wide_chars);
  348. DWORD number_of_characters_copied = ::MultiByteToWideChar(CP_UTF8, 0, utf8, num_bytes, buffer, number_of_wide_chars);
  349. ASSERT1(number_of_characters_copied == number_of_wide_chars - 1);
  350. buffer[number_of_wide_chars - 1] = _T('\0'); // ensure there is a NULL terminator
  351. ret_string.ReleaseBuffer();
  352. // Strip the byte order marker if there is one in the document.
  353. if (ret_string[0] == kUnicodeBom) {
  354. ret_string = ret_string.Right(ret_string.GetLength() - 1);
  355. }
  356. if (number_of_characters_copied > 0) {
  357. return ret_string;
  358. }
  359. // Failure case
  360. return CString();
  361. }
  362. CString Utf8BufferToWideChar(const std::vector<uint8>& buffer) {
  363. CString result;
  364. if (!buffer.empty()) {
  365. result = Utf8ToWideChar(
  366. reinterpret_cast<const char*>(&buffer.front()), buffer.size());
  367. }
  368. return result;
  369. }
  370. CString AbbreviateString (const CString & title, int32 max_len) {
  371. ASSERT (max_len, (L""));
  372. CString s(title);
  373. TrimCString(s); // remove whitespace at start/end
  374. if (s.GetLength() > max_len) {
  375. s = s.Left (max_len - 2);
  376. CString orig(s);
  377. // remove partial words
  378. while (s.GetLength() > 1 && !IsSpace(s[s.GetLength()-1])) { s = s.Left (s.GetLength() - 1); }
  379. // but not if it would make the string very short
  380. if (s.GetLength() < max_len / 2) { s = orig; }
  381. s += _T("..");
  382. }
  383. return s;
  384. }
  385. CString GetAbsoluteUri(const CString& uri) {
  386. int i = String_FindString(uri, _T("://"));
  387. if (i==-1) return uri;
  388. // add trailing / if none exists
  389. int j = String_FindChar(uri, L'/',i+3);
  390. if (j==-1) return (uri+NOTRANSL(_T("/")));
  391. // remove duplicate trailing slashes
  392. int len = uri.GetLength();
  393. if (len > 1 && uri.GetAt(len-1) == '/' && uri.GetAt(len-2) == '/') {
  394. CString new_uri(uri);
  395. int new_len = new_uri.GetLength();
  396. while (new_len > 1 && new_uri.GetAt(new_len-1) == '/' && new_uri.GetAt(new_len-2) == '/') {
  397. new_len--;
  398. new_uri = new_uri.Left(new_len);
  399. }
  400. return new_uri;
  401. }
  402. else return uri;
  403. }
  404. // requires that input have a PROTOCOL (http://) for proper behavior
  405. // items with the "file" protocol are returned as is (what is the hostname in that case? C: ? doesn't make sense)
  406. // TODO(omaha): loosen requirement
  407. // includes http://, e.g. http://www.google.com/
  408. CString GetUriHostName(const CString& uri, bool strip_leading) {
  409. if (String_StartsWith(uri,NOTRANSL(_T("file:")),true)) return uri;
  410. // correct any "errors"
  411. CString s(GetAbsoluteUri(uri));
  412. // Strip the leading "www."
  413. if (strip_leading)
  414. {
  415. int index_www = String_FindString(s, kStrLeadingWww);
  416. if (index_www != -1)
  417. ReplaceCString (s, kStrLeadingWww, _T(""));
  418. }
  419. int i = String_FindString(s, _T("://"));
  420. if(i==-1) return uri;
  421. int j = String_FindChar(s, L'/',i+3);
  422. if(j==-1) return uri;
  423. return s.Left(j+1);
  424. }
  425. // requires that input have a PROTOCOL (http://) for proper behavior
  426. // TODO(omaha): loosen requirement
  427. // removes the http:// and the extra slash '/' at the end.
  428. // http://www.google.com/ -> www.google.com (or google.com if strip_leading = true)
  429. CString GetUriHostNameHostOnly(const CString& uri, bool strip_leading) {
  430. CString s(GetUriHostName(uri,strip_leading));
  431. // remove protocol
  432. int i = String_FindString (s, _T("://"));
  433. if(i==-1) return s;
  434. CString ss(s.Right (s.GetLength() - i-3));
  435. // remove the last '/'
  436. int j = ss.ReverseFind('/');
  437. if (j == -1) return ss;
  438. return ss.Left(j);
  439. }
  440. CString AbbreviateUri(const CString& uri, int32 max_len) {
  441. ASSERT1(max_len);
  442. ASSERT1(!uri.IsEmpty());
  443. CString s(uri);
  444. VERIFY1(String_FindString (s, _T("://")));
  445. TrimCString(s);
  446. // SKIP_LOC_BEGIN
  447. RemoveFromStart (s, _T("ftp://"), false);
  448. RemoveFromStart (s, _T("http://"), false);
  449. RemoveFromStart (s, _T("https://"), false);
  450. RemoveFromStart (s, _T("www."), false);
  451. RemoveFromStart (s, _T("ftp."), false);
  452. RemoveFromStart (s, _T("www-"), false);
  453. RemoveFromStart (s, _T("ftp-"), false);
  454. RemoveFromEnd (s, _T(".htm"));
  455. RemoveFromEnd (s, _T(".html"));
  456. RemoveFromEnd (s, _T(".asp"));
  457. // SKIP_LOC_END
  458. if (s.GetLength() > max_len) {
  459. // try to keep the portion after the last /
  460. int32 last_slash = s.ReverseFind ((TCHAR)'/');
  461. CString after_last_slash;
  462. if (last_slash == -1) { after_last_slash = _T(""); }
  463. else { after_last_slash = s.Right (uri.GetLength() - last_slash - 1); }
  464. if (after_last_slash.GetLength() > max_len / 2) {
  465. after_last_slash = after_last_slash.Right (max_len / 2);
  466. }
  467. s = s.Left (max_len - after_last_slash.GetLength() - 2);
  468. s += "..";
  469. s += after_last_slash;
  470. }
  471. return s;
  472. }
  473. // normalized version of a URI intended to map duplicates to the same string
  474. // the normalized URI is not a valid URI
  475. CString NormalizeUri (const CString & uri) {
  476. CString s(uri);
  477. TrimCString(s);
  478. MakeLowerCString(s);
  479. // SKIP_LOC_BEGIN
  480. ReplaceCString (s, _T(":80"), _T(""));
  481. RemoveFromEnd (s, _T("/index.html"));
  482. RemoveFromEnd (s, _T("/welcome.html")); // old netscape standard
  483. RemoveFromEnd (s, _T("/"));
  484. RemoveFromStart (s, _T("ftp://"), false);
  485. RemoveFromStart (s, _T("http://"), false);
  486. RemoveFromStart (s, _T("https://"), false);
  487. RemoveFromStart (s, _T("www."), false);
  488. RemoveFromStart (s, _T("ftp."), false);
  489. RemoveFromStart (s, _T("www-"), false);
  490. RemoveFromStart (s, _T("ftp-"), false);
  491. ReplaceCString (s, _T("/./"), _T("/"));
  492. // SKIP_LOC_END
  493. // TODO(omaha):
  494. // fixup URLs like a/b/../../c
  495. // while ($s =~ m!\/\.\.\!!) {
  496. // $s =~ s!/[^/]*/\.\./!/!;
  497. // }
  498. // TODO(omaha):
  499. // unescape characters
  500. // Note from RFC1630: "Sequences which start with a percent sign
  501. // but are not followed by two hexadecimal characters are reserved
  502. // for future extension"
  503. // $str =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if defined $str;
  504. return s;
  505. }
  506. CString RemoveInternetProtocolHeader (const CString& url) {
  507. int find_colon_slash_slash = String_FindString(url, NOTRANSL(L"://"));
  508. if( find_colon_slash_slash != -1 ) {
  509. // remove PROTOCOL://
  510. return url.Right(url.GetLength() - find_colon_slash_slash - 3);
  511. } else if (String_StartsWith(url, NOTRANSL(L"mailto:"), true)) {
  512. // remove "mailto:"
  513. return url.Right(url.GetLength() - 7);
  514. } else {
  515. // return as is
  516. return url;
  517. }
  518. }
  519. HRESULT ConvertFileUriToLocalPath(const CString& uri, CString* path_out) {
  520. ASSERT1(path_out);
  521. ASSERT1(uri.GetLength() < INTERNET_MAX_URL_LENGTH);
  522. if (uri.IsEmpty()) {
  523. return E_INVALIDARG;
  524. }
  525. DWORD buf_len = MAX_PATH;
  526. HRESULT hr = ::PathCreateFromUrl(uri,
  527. CStrBuf(*path_out, MAX_PATH),
  528. &buf_len,
  529. NULL);
  530. return hr;
  531. }
  532. void RemoveFromStart (CString & s, const TCHAR* remove, bool ignore_case) {
  533. ASSERT(remove, (L""));
  534. // Remove the characters if it is the prefix
  535. if (String_StartsWith(s, remove, ignore_case))
  536. s.Delete(0, lstrlen(remove));
  537. }
  538. bool String_EndsWith(const TCHAR *str, const TCHAR *end_str, bool ignore_case) {
  539. ASSERT(end_str, (L""));
  540. ASSERT(str, (L""));
  541. int str_len = lstrlen(str);
  542. int end_len = lstrlen(end_str);
  543. // Definitely false if the suffix is longer than the string
  544. if (end_len > str_len)
  545. return false;
  546. const TCHAR *str_ptr = str + str_len;
  547. const TCHAR *end_ptr = end_str + end_len;
  548. while (end_ptr >= end_str) {
  549. // Check for matching characters
  550. TCHAR c1 = *str_ptr;
  551. TCHAR c2 = *end_ptr;
  552. if (ignore_case) {
  553. c1 = Char_ToLower(c1);
  554. c2 = Char_ToLower(c2);
  555. }
  556. if (c1 != c2)
  557. return false;
  558. --str_ptr;
  559. --end_ptr;
  560. }
  561. // if we haven't failed out, it must be ok!
  562. return true;
  563. }
  564. CString String_MakeEndWith(const TCHAR* str, const TCHAR* end_str, bool ignore_case) {
  565. if (String_EndsWith(str, end_str, ignore_case)) {
  566. return str;
  567. } else {
  568. CString r(str);
  569. r += end_str;
  570. return r;
  571. }
  572. }
  573. void RemoveFromEnd (CString & s, const TCHAR* remove) {
  574. ASSERT(remove, (L""));
  575. // If the suffix is shorter than the string, don't bother
  576. int remove_len = lstrlen(remove);
  577. if (s.GetLength() < remove_len) return;
  578. // If the suffix is equal
  579. int suffix_begin = s.GetLength() - remove_len;
  580. if (0 == lstrcmp(s.GetString() + suffix_begin, remove))
  581. s.Delete(suffix_begin, remove_len);
  582. }
  583. CString ElideIfNeeded (const CString & input_string, int max_len, int min_len) {
  584. ASSERT (min_len <= max_len, (L""));
  585. ASSERT (max_len >= TSTR_SIZE(kEllipsis)+1, (L""));
  586. ASSERT (min_len >= TSTR_SIZE(kEllipsis)+1, (L""));
  587. CString s = input_string;
  588. s.TrimRight();
  589. if (s.GetLength() > max_len) {
  590. int truncate_at = max_len - TSTR_SIZE(kEllipsis);
  591. // find first space going backwards from character one after the truncation point
  592. while (truncate_at >= min_len && !IsSpace(s.GetAt(truncate_at)))
  593. truncate_at--;
  594. // skip the space(s)
  595. while (truncate_at >= min_len && IsSpace(s.GetAt(truncate_at)))
  596. truncate_at--;
  597. truncate_at++;
  598. if (truncate_at <= min_len || truncate_at > (max_len - static_cast<int>(TSTR_SIZE(kEllipsis)))) {
  599. // we weren't able to break at a word boundary, may as well use more of the string
  600. truncate_at = max_len - TSTR_SIZE(kEllipsis);
  601. // skip space(s)
  602. while (truncate_at > 0 && IsSpace(s.GetAt(truncate_at-1)))
  603. truncate_at--;
  604. }
  605. s = s.Left(truncate_at);
  606. s += kEllipsis;
  607. }
  608. UTIL_LOG(L6, (L"elide (%d %d) %s -> %s", min_len, max_len, input_string, s));
  609. return s;
  610. }
  611. // these functions untested
  612. // UTF8 parameter supported on XP/2000 only
  613. HRESULT AnsiToUTF8 (char * src, int src_len, char * dest, int *dest_len) {
  614. ASSERT (dest_len, (L""));
  615. ASSERT (dest, (L""));
  616. ASSERT (src, (L""));
  617. // First use MultiByteToWideChar(CP_UTF8, ...) to convert to Unicode
  618. // then use WideCharToMultiByte to convert from Unicode to UTF8
  619. WCHAR *unicode = new WCHAR [(src_len + 1) * sizeof (TCHAR)]; ASSERT (unicode, (L""));
  620. int chars_written = MultiByteToWideChar (CP_ACP, 0, src, src_len, unicode, src_len);
  621. ASSERT (chars_written == src_len, (L""));
  622. char *unmappable = " ";
  623. BOOL unmappable_characters = false;
  624. *dest_len = WideCharToMultiByte (CP_UTF8, 0, unicode, chars_written, dest, *dest_len, unmappable, &unmappable_characters);
  625. delete [] unicode;
  626. return S_OK;
  627. }
  628. // Convert Wide to ANSI directly. Use only when it is all ANSI
  629. CStringA WideToAnsiDirect(const CString & in) {
  630. int in_len = in.GetLength();
  631. const TCHAR * in_buf = static_cast<const TCHAR*>(in.GetString());
  632. CStringA out;
  633. unsigned char * out_buf = (unsigned char *)out.GetBufferSetLength(in_len);
  634. for(int i = 0; i < in_len; ++i)
  635. out_buf[i] = static_cast<unsigned char>(in_buf[i]);
  636. out.ReleaseBuffer(in_len);
  637. return out;
  638. }
  639. HRESULT UCS2ToUTF8 (LPCWSTR src, int src_len, char * dest, int *dest_len) {
  640. ASSERT(dest_len, (L""));
  641. ASSERT(dest, (L""));
  642. *dest_len = WideCharToMultiByte (CP_UTF8, 0, src, src_len, dest, *dest_len, NULL,NULL);
  643. return S_OK;
  644. }
  645. HRESULT UTF8ToUCS2 (const char * src, int src_len, LPWSTR dest, int *dest_len) {
  646. ASSERT (dest_len, (L""));
  647. ASSERT (src, (L""));
  648. *dest_len = MultiByteToWideChar (CP_UTF8, 0, src, src_len, dest, *dest_len);
  649. ASSERT (*dest_len == src_len, (L""));
  650. return S_OK;
  651. }
  652. HRESULT UTF8ToAnsi (char * src, int, char * dest, int *dest_len) {
  653. ASSERT(dest_len, (L""));
  654. ASSERT(dest, (L""));
  655. ASSERT(src, (L""));
  656. src; dest; dest_len; // unreferenced formal parameter
  657. // First use MultiByteToWideChar(CP_UTF8, ...) to convert to Unicode
  658. // then use WideCharToMultiByte to convert from Unicode to ANSI
  659. return E_FAIL;
  660. }
  661. // clean up a string so it can be included within a JavaScript string
  662. // mainly involves escaping characters
  663. CString SanitizeString(const CString & in, DWORD mode) {
  664. CString out(in);
  665. if (mode & kSanHtml) {
  666. // SKIP_LOC_BEGIN
  667. ReplaceCString(out, _T("&"), _T("&amp;"));
  668. ReplaceCString(out, _T("<"), _T("&lt;"));
  669. ReplaceCString(out, _T(">"), _T("&gt;"));
  670. // SKIP_LOC_END
  671. }
  672. if ((mode & kSanXml) == kSanXml) {
  673. // SKIP_LOC_BEGIN
  674. ReplaceCString(out, _T("'"), _T("&apos;"));
  675. ReplaceCString(out, _T("\""), _T("&quot;"));
  676. // SKIP_LOC_END
  677. }
  678. // Note that this SAN_JAVASCRIPT and kSanXml should not be used together.
  679. ASSERT ((mode & (kSanJs | kSanXml)) != (kSanJs | kSanXml), (L""));
  680. if ((mode & kSanJs) == kSanJs) {
  681. // SKIP_LOC_BEGIN
  682. ReplaceCString(out, _T("\\"), _T("\\\\"));
  683. ReplaceCString(out, _T("\'"), _T("\\\'"));
  684. ReplaceCString(out, _T("\""), _T("\\\""));
  685. ReplaceCString(out, _T("\n"), _T(" "));
  686. ReplaceCString(out, _T("\t"), _T(" "));
  687. // SKIP_LOC_END
  688. }
  689. if ((mode & kSanHtmlInput) == kSanHtmlInput) {
  690. // SKIP_LOC_BEGIN
  691. ReplaceCString(out, _T("\""), _T("&quot;"));
  692. ReplaceCString(out, _T("'"), _T("&#39;"));
  693. // SKIP_LOC_END
  694. }
  695. return out;
  696. }
  697. // Bolds the periods used for abbreviation. Call this after HighlightTerms.
  698. CString BoldAbbreviationPeriods(const CString & in) {
  699. CString out(in);
  700. CString abbrev;
  701. for (int i = 0; i < kAbbreviationPeriodLength; ++i)
  702. abbrev += _T(".");
  703. ReplaceCString(out, abbrev, NOTRANSL(_T("<b>")) + abbrev + NOTRANSL(_T("</b>")));
  704. return out;
  705. }
  706. // Unescape a escaped sequence leading by a percentage symbol '%',
  707. // and converted the unescaped sequence (in UTF8) into unicode.
  708. // Inputs: src is the input string.
  709. // pos is the starting position.
  710. // Returns: true if a EOS(null) char was encounted.
  711. // out contains the unescaped and converted unicode string.
  712. // consumed_length is how many bytes in the src string have been
  713. // unescaped.
  714. // We can avoid the expensive UTF8 conversion step if there are no higher
  715. // ansi characters So if there aren't any, just convert it ANSI-to-WIDE
  716. // directly, which is cheaper.
  717. inline bool UnescapeSequence(const CString &src, int pos,
  718. CStringW *out, int *consumed_length) {
  719. ASSERT1(out);
  720. ASSERT1(consumed_length);
  721. int length = src.GetLength();
  722. // (input_len - pos) / 3 is enough for un-escaping the (%xx)+ sequences.
  723. int max_dst_length = (length - pos) / 3;
  724. scoped_array<char> unescaped(new char[max_dst_length]);
  725. char *buf = unescaped.get();
  726. if (buf == NULL) { // no enough space ???
  727. *consumed_length = 0;
  728. return false;
  729. }
  730. char *dst = buf;
  731. bool is_utf8 = false;
  732. // It is possible that there is a null character '\0' in the sequence.
  733. // Because the CStringT does't support '\0' in it, we stop
  734. // parsing the input string when it is encounted.
  735. bool eos_encounted = false;
  736. uint8 ch;
  737. int s = pos;
  738. while (s + 2 < length && src[s] == '%' && !eos_encounted &&
  739. ExtractChar(src, s + 1, &ch)) {
  740. if (ch != 0)
  741. *dst++ = ch;
  742. else
  743. eos_encounted = true;
  744. if (ch >= 128)
  745. is_utf8 = true;
  746. s += 3;
  747. }
  748. ASSERT1(dst <= buf + max_dst_length); // just to make sure
  749. *consumed_length = s - pos;
  750. if (is_utf8)
  751. AnsiToWideString(buf, dst - buf, CP_UTF8, out);
  752. else
  753. *out = AnsiToWideString(buf, dst - buf);
  754. return eos_encounted;
  755. }
  756. // There is an encoding called "URL-encoding". This function takes a URL-encoded string
  757. // and converts it back to the original representation
  758. // example: "?q=moon+doggy_%25%5E%26&" = "moon doggy_%^&"
  759. CString Unencode(const CString &input) {
  760. const int input_len = input.GetLength();
  761. const TCHAR *src = input.GetString();
  762. // input_len is enough for containing the unencoded string.
  763. CString out;
  764. TCHAR *head = out.GetBuffer(input_len);
  765. TCHAR *dst = head;
  766. int s = 0;
  767. bool eos_encounted = false;
  768. bool is_utf8 = false;
  769. CStringW fragment;
  770. int consumed_length = 0;
  771. while (s < input_len && !eos_encounted) {
  772. switch (src[s]) {
  773. case '+' :
  774. *dst++ = ' ';
  775. ASSERT1(dst <= head + input_len);
  776. ++s;
  777. break;
  778. case '%' :
  779. eos_encounted =
  780. UnescapeSequence(input, s, &fragment, &consumed_length);
  781. if (consumed_length > 0) {
  782. s += consumed_length;
  783. ASSERT1(dst + fragment.GetLength() <= head + input_len);
  784. for (int i = 0; i < fragment.GetLength(); ++i)
  785. *dst++ = fragment[i];
  786. } else {
  787. *dst++ = src[s++];
  788. ASSERT1(dst <= head + input_len);
  789. }
  790. break;
  791. default:
  792. *dst++ = src[s];
  793. ASSERT1(dst <= head + input_len);
  794. ++s;
  795. }
  796. }
  797. int out_len = dst - head;
  798. out.ReleaseBuffer(out_len);
  799. return out;
  800. }
  801. CString GetTextInbetween(const CString &input, const CString &start, const CString &end) {
  802. int start_index = String_FindString(input, start);
  803. if (start_index == -1)
  804. return L"";
  805. start_index += start.GetLength();
  806. int end_index = String_FindString(input, end, start_index);
  807. if (end_index == -1)
  808. return L"";
  809. return input.Mid(start_index, end_index - start_index);
  810. }
  811. // Given a string, get the parameter and url-unencode it
  812. CString GetParam(const CString & input, const CString & key) {
  813. CString my_key(_T("?"));
  814. my_key.Append(key);
  815. my_key += L'=';
  816. return Unencode(GetTextInbetween(input, my_key, NOTRANSL(L"?")));
  817. }
  818. // Get an xml-like field from a string
  819. CString GetField (const CString & input, const CString & field) {
  820. CString start_field(NOTRANSL(_T("<")));
  821. start_field += field;
  822. start_field += L'>';
  823. int32 start = String_FindString(input, start_field);
  824. if (start == -1) { return _T(""); }
  825. start += 2 + lstrlen (field);
  826. CString end_field(NOTRANSL(_T("</")));
  827. end_field += field;
  828. end_field += L'>';
  829. int32 end = String_FindString(input, end_field);
  830. if (end == -1) { return _T(""); }
  831. return input.Mid (start, end - start);
  832. }
  833. // ------------------------------------------------------------
  834. // Finds a whole word match in the query.
  835. // If the word has non-spaces either before or after, it will not qualify as
  836. // a match. i.e. "pie!" is not a match because of the exclamation point.
  837. // TODO(omaha): Add parameter that will consider punctuation acceptable.
  838. //
  839. // Optionally will look for a colon at the end.
  840. // If not found, return -1.
  841. int FindWholeWordMatch (const CString &query,
  842. const CString &word_to_match,
  843. const bool end_with_colon,
  844. const int index_begin) {
  845. if (word_to_match.IsEmpty()) {
  846. return -1;
  847. }
  848. int index_word_begin = index_begin;
  849. // Keep going until we find a whole word match, or the string ends.
  850. do {
  851. index_word_begin = String_FindString (query, word_to_match, index_word_begin);
  852. if (-1 == index_word_begin) {
  853. return index_word_begin;
  854. }
  855. // If it's not a whole word match, keep going.
  856. if (index_word_begin > 0 &&
  857. !IsSpaceW (query[index_word_begin - 1])) {
  858. goto LoopEnd;
  859. }
  860. if (end_with_colon) {
  861. int index_colon = String_FindChar (query, L':', index_word_begin);
  862. // If there is no colon in the string, return now.
  863. if (-1 == index_colon) {
  864. return -1;
  865. }
  866. // If there is text between the end of the word and the colon, keep going.
  867. if (index_colon - index_word_begin != word_to_match.GetLength()) {
  868. goto LoopEnd;
  869. }
  870. } else {
  871. // If there are more chars left after this word/phrase, and
  872. // they are not spaces, return.
  873. if (query.GetLength() > index_word_begin + word_to_match.GetLength() &&
  874. !IsSpaceW (query.GetAt (index_word_begin + word_to_match.GetLength()))) {
  875. goto LoopEnd;
  876. }
  877. }
  878. // It fits all the requirements, so return the index to the beginning of the word.
  879. return index_word_begin;
  880. LoopEnd:
  881. ++index_word_begin;
  882. } while (-1 != index_word_begin);
  883. return index_word_begin;
  884. }
  885. // --------------------------------------------------------
  886. // Do whole-word replacement in "str".
  887. void ReplaceWholeWord (const CString &string_to_replace,
  888. const CString &replacement,
  889. const bool trim_whitespace,
  890. CString *str) {
  891. ASSERT (str, (L"ReplaceWholeWord"));
  892. if (string_to_replace.IsEmpty() || str->IsEmpty()) {
  893. return;
  894. }
  895. int index_str = 0;
  896. do {
  897. index_str = FindWholeWordMatch (*str, string_to_replace, false, index_str);
  898. if (-1 != index_str) {
  899. // Get the strings before and after, and trim whitespace.
  900. CString str_before_word(str->Left (index_str));
  901. if (trim_whitespace) {
  902. str_before_word.TrimRight();
  903. }
  904. CString str_after_word(str->Mid (index_str + string_to_replace.GetLength()));
  905. if (trim_whitespace) {
  906. str_after_word.TrimLeft();
  907. }
  908. *str = str_before_word + replacement + str_after_word;
  909. index_str += replacement.GetLength() + 1;
  910. }
  911. } while (index_str != -1);
  912. }
  913. // --------------------------------------------------------
  914. // Reverse (big-endian<->little-endian) the shorts that make up
  915. // Unicode characters in a byte array of Unicode chars
  916. HRESULT ReverseUnicodeByteOrder(byte* unicode_string, int size_in_bytes) {
  917. ASSERT (unicode_string, (L""));
  918. // If odd # of bytes, just leave the last one alone
  919. for (int i = 0; i < size_in_bytes - 1; i += 2) {
  920. byte b = unicode_string[i];
  921. unicode_string[i] = unicode_string[i+1];
  922. unicode_string[i+1] = b;
  923. }
  924. return S_OK;
  925. }
  926. // case insensitive strstr
  927. // adapted from http://c.snippets.org/snip_lister.php?fname=stristr.c
  928. const char *stristr(const char *string, const char *pattern)
  929. {
  930. ASSERT (pattern, (L""));
  931. ASSERT (string, (L""));
  932. ASSERT (string && pattern, (L""));
  933. char *pattern_ptr, *string_ptr;
  934. const char *start;
  935. for (start = string; *start != 0; start++)
  936. {
  937. // find start of pattern in string
  938. for ( ; ((*start!=0) && (String_ToUpperA(*start) != String_ToUpperA(*pattern))); start++)
  939. ;
  940. if (0 == *start)
  941. return NULL;
  942. pattern_ptr = (char *)pattern;
  943. string_ptr = (char *)start;
  944. while (String_ToUpperA(*string_ptr) == String_ToUpperA(*pattern_ptr))
  945. {
  946. string_ptr++;
  947. pattern_ptr++;
  948. // if end of pattern then pattern was found
  949. if (0 == *pattern_ptr)
  950. return (start);
  951. }
  952. }
  953. return NULL;
  954. }
  955. // case insensitive Unicode strstr
  956. // adapted from http://c.snippets.org/snip_lister.php?fname=stristr.c
  957. const WCHAR *stristrW(const WCHAR *string, const WCHAR *pattern)
  958. {
  959. ASSERT (pattern, (L""));
  960. ASSERT (string, (L""));
  961. ASSERT (string && pattern, (L""));
  962. const WCHAR *start;
  963. for (start = string; *start != 0; start++)
  964. {
  965. // find start of pattern in string
  966. for ( ; ((*start!=0) && (String_ToUpper(*start) != String_ToUpper(*pattern))); start++)
  967. ;
  968. if (0 == *start)
  969. return NULL;
  970. const WCHAR *pattern_ptr = pattern;
  971. const WCHAR *string_ptr = start;
  972. while (String_ToUpper(*string_ptr) == String_ToUpper(*pattern_ptr))
  973. {
  974. string_ptr++;
  975. pattern_ptr++;
  976. // if end of pattern then pattern was found
  977. if (0 == *pattern_ptr)
  978. return (start);
  979. }
  980. }
  981. return NULL;
  982. }
  983. // case sensitive Unicode strstr
  984. // adapted from http://c.snippets.org/snip_lister.php?fname=stristr.c
  985. const WCHAR *strstrW(const WCHAR *string, const WCHAR *pattern)
  986. {
  987. ASSERT (pattern, (L""));
  988. ASSERT (string, (L""));
  989. ASSERT (string && pattern, (L""));
  990. const WCHAR *start;
  991. for (start = string; *start != 0; start++)
  992. {
  993. // find start of pattern in string
  994. for ( ; ((*start!=0) && (*start != *pattern)); start++)
  995. ;
  996. if (0 == *start)
  997. return NULL;
  998. const WCHAR *pattern_ptr = pattern;
  999. const WCHAR *string_ptr = start;
  1000. while (*string_ptr == *pattern_ptr)
  1001. {
  1002. string_ptr++;
  1003. pattern_ptr++;
  1004. // if end of pattern then pattern was found
  1005. if (0 == *pattern_ptr)
  1006. return (start);
  1007. }
  1008. }
  1009. return NULL;
  1010. }
  1011. // -------------------------------------------------------------------------
  1012. // Helper function
  1013. float GetLenWithWordWrap (const float len_so_far,
  1014. const float len_to_add,
  1015. const uint32 len_line) {
  1016. // lint -save -e414 Possible division by 0
  1017. ASSERT (len_line != 0, (L""));
  1018. float len_total = len_so_far + len_to_add;
  1019. // Figure out if we need to word wrap by seeing if adding the second
  1020. // string will cause us to span more lines than before.
  1021. uint32 num_lines_before = static_cast<uint32> (len_so_far / len_line);
  1022. uint32 num_lines_after = static_cast<uint32> (len_total / len_line);
  1023. // If it just barely fit onto the line, do not wrap to the next line.
  1024. if (num_lines_after > 0 && (len_total / len_line - num_lines_after == 0)) {
  1025. --num_lines_after;
  1026. }
  1027. if (num_lines_after > num_lines_before) {
  1028. // Need to word wrap.
  1029. // lint -e{790} Suspicious truncation
  1030. return num_lines_after * len_line + len_to_add;
  1031. }
  1032. else
  1033. return len_total;
  1034. // lint -restore
  1035. }
  1036. int CalculateBase64EscapedLen(int input_len, bool do_padding) {
  1037. // these formulae were copied from comments that used to go with the base64
  1038. // encoding functions
  1039. int intermediate_result = 8 * input_len + 5;
  1040. ASSERT(intermediate_result > 0,(L"")); // make sure we didn't overflow
  1041. int len = intermediate_result / 6;
  1042. if (do_padding) len = ((len + 3) / 4) * 4;
  1043. return len;
  1044. }
  1045. // Base64Escape does padding, so this calculation includes padding.
  1046. int CalculateBase64EscapedLen(int input_len) {
  1047. return CalculateBase64EscapedLen(input_len, true);
  1048. }
  1049. // Base64Escape
  1050. // Largely based on b2a_base64 in google/docid_encryption.c
  1051. //
  1052. //
  1053. int Base64EscapeInternal(const char *src, int szsrc,
  1054. char *dest, int szdest, const char *base64,
  1055. bool do_padding)
  1056. {
  1057. ASSERT(base64, (L""));
  1058. ASSERT(dest, (L""));
  1059. ASSERT(src, (L""));
  1060. static const char kPad64 = '=';
  1061. if (szsrc <= 0) return 0;
  1062. char *cur_dest = dest;
  1063. const unsigned char *cur_src = reinterpret_cast<const unsigned char*>(src);
  1064. // Three bytes of data encodes to four characters of cyphertext.
  1065. // So we can pump through three-byte chunks atomically.
  1066. while (szsrc > 2) { /* keep going until we have less than 24 bits */
  1067. if( (szdest -= 4) < 0 ) return 0;
  1068. cur_dest[0] = base64[cur_src[0] >> 2];
  1069. cur_dest[1] = base64[((cur_src[0] & 0x03) << 4) + (cur_src[1] >> 4)];
  1070. cur_dest[2] = base64[((cur_src[1] & 0x0f) << 2) + (cur_src[2] >> 6)];
  1071. cur_dest[3] = base64[cur_src[2] & 0x3f];
  1072. cur_dest += 4;
  1073. cur_src += 3;
  1074. szsrc -= 3;
  1075. }
  1076. /* now deal with the tail (<=2 bytes) */
  1077. switch (szsrc) {
  1078. case 0:
  1079. // Nothing left; nothing more to do.
  1080. break;
  1081. case 1:
  1082. // One byte left: this encodes to two characters, and (optionally)
  1083. // two pad characters to round out the four-character cypherblock.
  1084. if( (szdest -= 2) < 0 ) return 0;
  1085. cur_dest[0] = base64[cur_src[0] >> 2];
  1086. cur_dest[1] = base64[(cur_src[0] & 0x03) << 4];
  1087. cur_dest += 2;
  1088. if (do_padding) {
  1089. if( (szdest -= 2) < 0 ) return 0;
  1090. cur_dest[0] = kPad64;
  1091. cur_dest[1] = kPad64;
  1092. cur_dest += 2;
  1093. }
  1094. break;
  1095. case 2:
  1096. // Two bytes left: this encodes to three characters, and (optionally)
  1097. // one pad character to round out the four-character cypherblock.
  1098. if( (szdest -= 3) < 0 ) return 0;
  1099. cur_dest[0] = base64[cur_src[0] >> 2];
  1100. cur_dest[1] = base64[((cur_src[0] & 0x03) << 4) + (cur_src[1] >> 4)];
  1101. cur_dest[2] = base64[(cur_src[1] & 0x0f) << 2];
  1102. cur_dest += 3;
  1103. if (do_padding) {
  1104. if( (szdest -= 1) < 0 ) return 0;
  1105. cur_dest[0] = kPad64;
  1106. cur_dest += 1;
  1107. }
  1108. break;
  1109. default:
  1110. // Should not be reached: blocks of 3 bytes are handled
  1111. // in the while loop before this switch statement.
  1112. ASSERT(false, (L"Logic problem? szsrc = %S",szsrc));
  1113. break;
  1114. }
  1115. return (cur_dest - dest);
  1116. }
  1117. #define kBase64Chars "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
  1118. #define kWebSafeBase64Chars "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"
  1119. int Base64Escape(const char *src, int szsrc, char *dest, int szdest) {
  1120. ASSERT(dest, (L""));
  1121. ASSERT(src, (L""));
  1122. return Base64EscapeInternal(src, szsrc, dest, szdest, kBase64Chars, true);
  1123. }
  1124. int WebSafeBase64Escape(const char *src, int szsrc, char *dest,
  1125. int szdest, bool do_padding) {
  1126. ASSERT(dest, (L""));
  1127. ASSERT(src, (L""));
  1128. return Base64EscapeInternal(src, szsrc, dest, szdest,
  1129. kWebSafeBase64Chars, do_padding);
  1130. }
  1131. void Base64Escape(const char *src, int szsrc,
  1132. CStringA* dest, bool do_padding)
  1133. {
  1134. ASSERT(src, (L""));
  1135. ASSERT(dest,(L""));
  1136. const int max_escaped_size = CalculateBase64EscapedLen(szsrc, do_padding);
  1137. dest->Empty();
  1138. const int escaped_len = Base64EscapeInternal(src, szsrc,
  1139. dest->GetBufferSetLength(max_escaped_size + 1), max_escaped_size + 1,
  1140. kBase64Chars,
  1141. do_padding);
  1142. ASSERT(max_escaped_size <= escaped_len,(L""));
  1143. dest->ReleaseBuffer(escaped_len);
  1144. }
  1145. void WebSafeBase64Escape(const char *src, int szsrc,
  1146. CStringA *dest, bool do_padding)
  1147. {
  1148. ASSERT(src, (L""));
  1149. ASSERT(dest,(L""));
  1150. const int max_escaped_size =
  1151. CalculateBase64EscapedLen(szsrc, do_padding);
  1152. dest->Empty();
  1153. const int escaped_len = Base64EscapeInternal(src, szsrc,
  1154. dest->GetBufferSetLength(max_escaped_size + 1), max_escaped_size + 1,
  1155. kWebSafeBase64Chars,
  1156. do_padding);
  1157. ASSERT(max_escaped_size <= escaped_len,(L""));
  1158. dest->ReleaseBuffer(escaped_len);
  1159. }
  1160. void WebSafeBase64Escape(const CStringA& src, CStringA* dest) {
  1161. ASSERT(dest,(L""));
  1162. int encoded_len = CalculateBase64EscapedLen(src.GetLength());
  1163. scoped_array<char> buf(new char[encoded_len]);
  1164. int len = WebSafeBase64Escape(src,src.GetLength(), buf.get(), encoded_len, false);
  1165. dest->SetString(buf.get(), len);
  1166. }
  1167. // ----------------------------------------------------------------------
  1168. // int Base64Unescape() - base64 decoder
  1169. //
  1170. // Check out
  1171. // http://www.cis.ohio-state.edu/htbin/rfc/rfc2045.html for formal
  1172. // description, but what we care about is that...
  1173. // Take the encoded stuff in groups of 4 characters and turn each
  1174. // character into a code 0 to 63 thus:
  1175. // A-Z map to 0 to 25
  1176. // a-z map to 26 to 51
  1177. // 0-9 map to 52 to 61
  1178. // +(- for WebSafe) maps to 62
  1179. // /(_ for WebSafe) maps to 63
  1180. // There will be four numbers, all less than 64 which can be represented
  1181. // by a 6 digit binary number (aaaaaa, bbbbbb, cccccc, dddddd respectively).
  1182. // Arrange the 6 digit binary numbers into three bytes as such:
  1183. // aaaaaabb bbbbcccc ccdddddd
  1184. // Equals signs (one or two) are used at the end of the encoded block to
  1185. // indicate that the text was not an integer multiple of three bytes long.
  1186. // ----------------------------------------------------------------------
  1187. int Base64UnescapeInternal(const char *src, int len_src,
  1188. char *dest, int len_dest, const char* unbase64) {
  1189. ASSERT (unbase64, (L""));
  1190. ASSERT (src, (L""));
  1191. static const char kPad64 = '=';
  1192. int decode;
  1193. int destidx = 0;
  1194. int state = 0;
  1195. // Used an unsigned char, since ch is used as an array index (into unbase64).
  1196. unsigned char ch = 0;
  1197. while (len_src-- && (ch = *src++) != '\0') {
  1198. if (IsSpaceA(ch)) // Skip whitespace
  1199. continue;
  1200. if (ch == kPad64)
  1201. break;
  1202. decode = unbase64[ch];
  1203. if (decode == 99) // A non-base64 character
  1204. return (-1);
  1205. // Four cyphertext characters decode to three bytes.
  1206. // Therefore we can be in one of four states.
  1207. switch (state) {
  1208. case 0:
  1209. // We're at the beginning of a four-character cyphertext block.
  1210. // This sets the high six bits of the first byte of the
  1211. // plaintext block.
  1212. if (dest) {
  1213. if (destidx >= len_dest)
  1214. return (-1);
  1215. // lint -e{734} Loss of precision
  1216. dest[destidx] = static_cast<char>(decode << 2);
  1217. }
  1218. state = 1;
  1219. break;
  1220. case 1:
  1221. // We're one character into a four-character cyphertext block.
  1222. // This sets the low two bits of the first plaintext byte,
  1223. // and the high four bits of the second plaintext byte.
  1224. // However, if this is the end of data, and those four
  1225. // bits are zero, it could be that those four bits are
  1226. // leftovers from the encoding of data that had a length
  1227. // of one mod three.
  1228. if (dest) {
  1229. if (destidx >= len_dest)
  1230. return (-1);
  1231. // lint -e{734} Loss of precision
  1232. dest[destidx] |= decode >> 4;
  1233. if (destidx + 1 >= len_dest) {
  1234. if (0 != (decode & 0x0f))
  1235. return (-1);
  1236. else
  1237. ;
  1238. } else {
  1239. // lint -e{734} Loss of precision
  1240. dest[destidx+1] = static_cast<char>((decode & 0x0f) << 4);
  1241. }
  1242. }
  1243. destidx++;
  1244. state = 2;
  1245. break;
  1246. case 2:
  1247. // We're two characters into a four-character cyphertext block.
  1248. // This sets the low four bits of the second plaintext
  1249. // byte, and the high two bits of the third plaintext byte.
  1250. // However, if this is the end of data, and those two
  1251. // bits are zero, it could be that those two bits are
  1252. // leftovers from the encoding of data that had a length
  1253. // of two mod three.
  1254. if (dest) {
  1255. if (destidx >= len_dest)
  1256. return (-1);
  1257. // lint -e{734} Loss of precision
  1258. dest[destidx] |= decode >> 2;
  1259. if (destidx +1 >= len_dest) {
  1260. if (0 != (decode & 0x03))
  1261. return (-1);
  1262. else
  1263. ;
  1264. } else {
  1265. // lint -e{734} Loss of precision
  1266. dest[destidx+1] = static_cast<char>((decode & 0x03) << 6);
  1267. }
  1268. }
  1269. destidx++;
  1270. state = 3;
  1271. break;
  1272. case 3:
  1273. // We're at the last character of a four-character cyphertext block.
  1274. // This sets the low six bits of the third plaintext byte.
  1275. if (dest) {
  1276. if (destidx >= len_dest)
  1277. return (-1);
  1278. // lint -e{734} Loss of precision
  1279. dest[destidx] |= decode;
  1280. }
  1281. destidx++;
  1282. state = 0;
  1283. break;
  1284. default:
  1285. ASSERT (false, (L""));
  1286. break;
  1287. }
  1288. }
  1289. // We are done decoding Base-64 chars. Let's see if we ended
  1290. // on a byte boundary, and/or with erroneous trailing characters.
  1291. if (ch == kPad64) { // We got a pad char
  1292. if ((state == 0) || (state == 1))
  1293. return (-1); // Invalid '=' in first or second position
  1294. if (len_src == 0) {
  1295. if (state == 2) // We run out of input but we still need another '='
  1296. return (-1);
  1297. // Otherwise, we are in state 3 and only need this '='
  1298. } else {
  1299. if (state == 2) { // need another '='
  1300. while ((ch = *src++) != '\0' && (len_src-- > 0)) {
  1301. if (!IsSpaceA(ch))
  1302. break;
  1303. }
  1304. if (ch != kPad64)
  1305. return (-1);
  1306. }
  1307. // state = 1 or 2, check if all remain padding is space
  1308. while ((ch = *src++) != '\0' && (len_src-- > 0)) {
  1309. if (!IsSpaceA(ch))
  1310. return(-1);
  1311. }
  1312. }
  1313. } else {
  1314. // We ended by seeing the end of the string. Make sure we
  1315. // have no partial bytes lying around. Note that we
  1316. // do not require trailing '=', so states 2 and 3 are okay too.
  1317. if (state == 1)
  1318. return (-1);
  1319. }
  1320. return (destidx);
  1321. }
  1322. int Base64Unescape(const char *src, int len_src, char *dest, int len_dest) {
  1323. ASSERT(dest, (L""));
  1324. ASSERT(src, (L""));
  1325. static const char UnBase64[] = {
  1326. 99, 99, 99, 99, 99, 99, 99, 99,
  1327. 99, 99, 99, 99, 99, 99, 99, 99,
  1328. 99, 99, 99, 99, 99, 99, 99, 99,
  1329. 99, 99, 99, 99, 99, 99, 99, 99,
  1330. 99, 99, 99, 99, 99, 99, 99, 99,
  1331. 99, 99, 99, 62/*+*/, 99, 99, 99, 63/*/ */,
  1332. 52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/,
  1333. 60/*8*/, 61/*9*/, 99, 99, 99, 99, 99, 99,
  1334. 99, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/,
  1335. 7/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/,
  1336. 15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/,
  1337. 23/*X*/, 24/*Y*/, 25/*Z*/, 99, 99, 99, 99, 99,
  1338. 99, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/,
  1339. 33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/,
  1340. 41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/,
  1341. 49/*x*/, 50/*y*/, 51/*z*/, 99, 99, 99, 99, 99,
  1342. 99, 99, 99, 99, 99, 99, 99, 99,
  1343. 99, 99, 99, 99, 99, 99, 99, 99,
  1344. 99, 99, 99, 99, 99, 99, 99, 99,
  1345. 99, 99, 99, 99, 99, 99, 99, 99,
  1346. 99, 99, 99, 99, 99, 99, 99, 99,
  1347. 99, 99, 99, 99, 99, 99, 99, 99,
  1348. 99, 99, 99, 99, 99, 99, 99, 99,
  1349. 99, 99, 99, 99, 99, 99, 99, 99,
  1350. 99, 99, 99, 99, 99, 99, 99, 99,
  1351. 99, 99, 99, 99, 99, 99, 99, 99,
  1352. 99, 99, 99, 99, 99, 99, 99, 99,
  1353. 99, 99, 99, 99, 99, 99, 99, 99,
  1354. 99, 99, 99, 99, 99, 99, 99, 99,
  1355. 99, 99, 99, 99, 99, 99, 99, 99,
  1356. 99, 99, 99, 99, 99, 99, 99, 99,
  1357. 99, 99, 99, 99, 99, 99, 99, 99
  1358. };
  1359. // The above array was generated by the following code
  1360. // #include <sys/time.h>
  1361. // #include <stdlib.h>
  1362. // #include <string.h>
  1363. // main()
  1364. // {
  1365. // static const char Base64[] =
  1366. // "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
  1367. // char *pos;
  1368. // int idx, i, j;
  1369. // printf(" ");
  1370. // for (i = 0; i < 255; i += 8) {
  1371. // for (j = i; j < i + 8; j++) {
  1372. // pos = strchr(Base64, j);
  1373. // if ((pos == NULL) || (j == 0))
  1374. // idx = 99;
  1375. // else
  1376. // idx = pos - Base64;
  1377. // if (idx == 99)
  1378. // printf(" %2d, ", idx);
  1379. // else
  1380. // printf(" %2d/*%c*/,", idx, j);
  1381. // }
  1382. // printf("\n ");
  1383. // }
  1384. // }
  1385. return Base64UnescapeInternal(src, len_src, dest, len_dest, UnBase64);
  1386. }
  1387. int WebSafeBase64Unescape(const char *src, int szsrc, char *dest, int szdest) {
  1388. ASSERT(dest, (L""));
  1389. ASSERT(src, (L""));
  1390. static const char UnBase64[] = {
  1391. 99, 99, 99, 99, 99, 99, 99, 99,
  1392. 99, 99, 99, 99, 99, 99, 99, 99,
  1393. 99, 99, 99, 99, 99, 99, 99, 99,
  1394. 99, 99, 99, 99, 99, 99, 99, 99,
  1395. 99, 99, 99, 99, 99, 99, 99, 99,
  1396. 99, 99, 99, 99, 99, 62/*-*/, 99, 99,
  1397. 52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/,
  1398. 60/*8*/, 61/*9*/, 99, 99, 99, 99, 99, 99,
  1399. 99, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/,
  1400. 7/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/,
  1401. 15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/,
  1402. 23/*X*/, 24/*Y*/, 25/*Z*/, 99, 99, 99, 99, 63/*_*/,
  1403. 99, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/,
  1404. 33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/,
  1405. 41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/,
  1406. 49/*x*/, 50/*y*/, 51/*z*/, 99, 99, 99, 99, 99,
  1407. 99, 99, 99, 99, 99, 99, 99, 99,
  1408. 99, 99, 99, 99, 99, 99, 99, 99,
  1409. 99, 99, 99, 99, 99, 99, 99, 99,
  1410. 99, 99, 99, 99, 99, 99, 99, 99,
  1411. 99, 99, 99, 99, 99, 99, 99, 99,
  1412. 99, 99, 99, 99, 99, 99, 99, 99,
  1413. 99, 99, 99, 99, 99, 99, 99, 99,
  1414. 99, 99, 99, 99, 99, 99, 99, 99,
  1415. 99, 99, 99, 99, 99, 99, 99, 99,
  1416. 99, 99, 99, 99, 99, 99, 99, 99,
  1417. 99, 99, 99, 99, 99, 99, 99, 99,
  1418. 99, 99, 99, 99, 99, 99, 99, 99,
  1419. 99, 99, 99, 99, 99, 99, 99, 99,
  1420. 99, 99, 99, 99, 99, 99, 99, 99,
  1421. 99, 99, 99, 99, 99, 99, 99, 99,
  1422. 99, 99, 99, 99, 99, 99, 99, 99
  1423. };
  1424. // The above array was generated by the following code
  1425. // #include <sys/time.h>
  1426. // #include <stdlib.h>
  1427. // #include <string.h>
  1428. // main()
  1429. // {
  1430. // static const char Base64[] =
  1431. // "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
  1432. // char *pos;
  1433. // int idx, i, j;
  1434. // printf(" ");
  1435. // for (i = 0; i < 255; i += 8) {
  1436. // for (j = i; j < i + 8; j++) {
  1437. // pos = strchr(Base64, j);
  1438. // if ((pos == NULL) || (j == 0))
  1439. // idx = 99;
  1440. // else
  1441. // idx = pos - Base64;
  1442. // if (idx == 99)
  1443. // printf(" %2d, ", idx);
  1444. // else
  1445. // printf(" %2d/*%c*/,", idx, j);
  1446. // }
  1447. // printf("\n ");
  1448. // }
  1449. // }
  1450. return Base64UnescapeInternal(src, szsrc, dest, szdest, UnBase64);
  1451. }
  1452. bool IsHexDigit (WCHAR c) {
  1453. return (((c >= L'a') && (c <= L'f'))
  1454. || ((c >= L'A') && (c <= L'F'))
  1455. || ((c >= L'0') && (c <= L'9')));
  1456. }
  1457. int HexDigitToInt (WCHAR c) {
  1458. return ((c >= L'a') ? ((c - L'a') + 10) :
  1459. (c >= L'A') ? ((c - L'A') + 10) :
  1460. (c - L'0'));
  1461. }
  1462. // ----------------------------------------------------------------------
  1463. // int QuotedPrintableUnescape()
  1464. //
  1465. // Check out http://www.cis.ohio-state.edu/htbin/rfc/rfc2045.html for
  1466. // more details, only briefly implemented. But from the web...
  1467. // Quoted-printable is an encoding method defined in the MIME
  1468. // standard. It is used primarily to encode 8-bit text (such as text
  1469. // that includes foreign characters) into 7-bit US ASCII, creating a
  1470. // document that is mostly readable by humans, even in its encoded
  1471. // form. All MIME compliant applications can decode quoted-printable
  1472. // text, though they may not necessarily be able to properly display the
  1473. // document as it was originally intended. As quoted-printable encoding
  1474. // is implemented most commonly, printable ASCII characters (values 33
  1475. // through 126, excluding 61), tabs and spaces that do not appear at the
  1476. // end of lines, and end-of-line characters are not encoded. Other
  1477. // characters are represented by an equal sign (=) immediately followed
  1478. // by that character's hexadecimal value. Lines that are longer than 76
  1479. // characters are shortened by line breaks, with the equal sign marking
  1480. // where the breaks occurred.
  1481. //
  1482. // Update: we really want QuotedPrintableUnescape to conform to rfc2047,
  1483. // which expands the q encoding. In particular, it specifices that _'s are
  1484. // to be treated as spaces.
  1485. // ----------------------------------------------------------------------
  1486. int QuotedPrintableUnescape(const WCHAR *source, int slen,
  1487. WCHAR *dest, int len_dest) {
  1488. ASSERT(dest, (L""));
  1489. ASSERT(source, (L""));
  1490. WCHAR* d = dest;
  1491. const WCHAR* p = source;
  1492. while (*p != '\0' && p < source+slen && d < dest+len_dest) {
  1493. switch (*p) {
  1494. case '=':
  1495. if (p == source+slen-1) {
  1496. // End of line, no need to print the =..
  1497. return (d-dest);
  1498. }
  1499. // if its valid, convert to hex and insert
  1500. if (p < source+slen-2 && IsHexDigit(p[1]) && IsHexDigit(p[2])) {
  1501. // lint -e{734} Loss of precision
  1502. *d++ = static_cast<WCHAR>(
  1503. HexDigitToInt(p[1]) * 16 + HexDigitToInt(p[2]));
  1504. p += 3;
  1505. } else {
  1506. p++;
  1507. }
  1508. break;
  1509. case '_': // According to rfc2047, _'s are to be treated as spaces
  1510. *d++ = ' '; p++;
  1511. break;
  1512. default:
  1513. *d++ = *p++;
  1514. break;
  1515. }
  1516. }
  1517. return (d-dest);
  1518. }
  1519. // TODO(omaha): currently set not to use IsCharUpper because that is relatively slow
  1520. // this is used in the QUIB; consider if we need to use IsCharUpper or a replacement
  1521. bool String_IsUpper(TCHAR c) {
  1522. return (c >= 'A' && c <= 'Z');
  1523. // return (IsCharUpper (c));
  1524. }
  1525. // Replacement for the CRT toupper(c)
  1526. int String_ToUpper(int c) {
  1527. // If it's < 128, then convert is ourself, which is far cheaper than the system conversion
  1528. if (c < 128)
  1529. return String_ToUpperA(static_cast<char>(c));
  1530. TCHAR * p_c = reinterpret_cast<TCHAR *>(c);
  1531. int conv_c = reinterpret_cast<int>(::CharUpper(p_c));
  1532. return conv_c;
  1533. }
  1534. // Replacement for the CRT toupper(c)
  1535. char String_ToUpperA(char c) {
  1536. if (c >= 'a' && c <= 'z') return (c - ('a' - 'A'));
  1537. return c;
  1538. }
  1539. void String_ToLower(TCHAR* str) {
  1540. ASSERT1(str);
  1541. ::CharLower(str);
  1542. }
  1543. void String_ToUpper(TCHAR* str) {
  1544. ASSERT1(str);
  1545. ::CharUpper(str);
  1546. }
  1547. // String comparison based on length
  1548. // Replacement for the CRT strncmp(i)
  1549. int String_StrNCmp(const TCHAR * str1, const TCHAR * str2, uint32 len, bool ignore_case) {
  1550. ASSERT(str2, (L""));
  1551. ASSERT(str1, (L""));
  1552. TCHAR c1, c2;
  1553. if (len == 0)
  1554. return 0;
  1555. // compare each char
  1556. // TODO(omaha): If we use a lot of case sensitive compares consider having 2 loops.
  1557. do {
  1558. c1 = *str1++;
  1559. c2 = *str2++;
  1560. if (ignore_case) {
  1561. c1 = (TCHAR)String_ToLowerChar((int)(c1)); // lint !e507 Suspicious truncation
  1562. c2 = (TCHAR)String_ToLowerChar((int)(c2)); // lint !e507
  1563. }
  1564. } while ( (--len) && c1 && (c1 == c2) );
  1565. return (int)(c1 - c2);
  1566. }
  1567. // TODO(omaha): Why do we introduce this behaviorial difference?
  1568. // Replacement for strncpy() - except ALWAYS ends string with null
  1569. TCHAR* String_StrNCpy(TCHAR* destination, const TCHAR* source, uint32 len) {
  1570. ASSERT (source, (L""));
  1571. ASSERT (destination, (L""));
  1572. TCHAR* result = destination;
  1573. ASSERT (0 != len, (L"")); // Too short a destination for even the null character
  1574. while (*source && len) {
  1575. *destination++ = *source++;
  1576. len--;
  1577. }
  1578. // If we ran out of space, back up one
  1579. if (0 == len) {
  1580. destination--;
  1581. }
  1582. // Null-terminate the string
  1583. *destination = _T('\0');
  1584. return result;
  1585. }
  1586. // check if a string starts with another string
  1587. bool String_StartsWith(const TCHAR *str, const TCHAR *start_str,
  1588. bool ignore_case) {
  1589. ASSERT(start_str, (L""));
  1590. ASSERT(str, (L""));
  1591. while (0 != *str) {
  1592. // Check for matching characters
  1593. TCHAR c1 = *str;
  1594. TCHAR c2 = *start_str;
  1595. // Reached the end of start_str?
  1596. if (0 == c2)
  1597. return true;
  1598. if (ignore_case) {
  1599. c1 = (TCHAR)String_ToLowerChar((int)(c1)); // lint !e507 Suspicious truncation
  1600. c2 = (TCHAR)String_ToLowerChar((int)(c2)); // lint !e507 Suspicious truncation
  1601. }
  1602. if (c1 != c2)
  1603. return false;
  1604. ++str;
  1605. ++start_str;
  1606. }
  1607. // If str is shorter than start_str, no match. If equal size, match.
  1608. return 0 == *start_str;
  1609. }
  1610. // check if a string starts with another string
  1611. bool String_StartsWithA(const char *str, const char *start_str, bool ignore_case) {
  1612. ASSERT(start_str, (L""));
  1613. ASSERT(str, (L""));
  1614. while (0 != *str) {
  1615. // Check for matching characters
  1616. char c1 = *str;
  1617. char c2 = *start_str;
  1618. // Reached the end of start_str?
  1619. if (0 == c2)
  1620. return true;
  1621. if (ignore_case) {
  1622. c1 = String_ToLowerCharAnsi(c1);
  1623. c2 = String_ToLowerCharAnsi(c2);
  1624. }
  1625. if (c1 != c2)
  1626. return false;
  1627. ++str;
  1628. ++start_str;
  1629. }
  1630. // If str is shorter than start_str, no match. If equal size, match.
  1631. return 0 == *start_str;
  1632. }
  1633. // the wrapper version below actually increased code size as of 5/31/04
  1634. // perhaps because the int64 version is larger and in some EXE/DLLs we only need the int32 version
  1635. // converts a string to an int
  1636. // Does not check for overflow
  1637. // is the direct int32 version significantly faster for our usage?
  1638. // int32 String_StringToInt(const TCHAR * str) {
  1639. // ASSERT(str, (L""));
  1640. // return static_cast<int32>(String_StringToInt64 (str));
  1641. // }
  1642. // converts a string to an int
  1643. // Does not check for overflow
  1644. int32 String_StringToInt(const TCHAR * str) {
  1645. ASSERT(str, (L""));
  1646. int c; // current char
  1647. int32 total; // current total
  1648. int sign; // if '-', then negative, otherwise positive
  1649. // remove spaces
  1650. while ( *str == _T(' '))
  1651. ++str;
  1652. c = (int)*str++;
  1653. sign = c; // save sign indication
  1654. if (c == _T('-') || c == _T('+'))
  1655. c = (int)*str++; // skip sign
  1656. total = 0;
  1657. while ((c = String_CharToDigit(static_cast<TCHAR>(c))) != -1 ) {
  1658. total = 10 * total + c; // accumulate digit
  1659. c = *str++; // get next char
  1660. }
  1661. if (sign == '-')
  1662. return -total;
  1663. else
  1664. return total; // return result, negated if necessary
  1665. }
  1666. // converts a string to an int64
  1667. // Does not check for overflow
  1668. int64 String_StringToInt64(const TCHAR * str) {
  1669. ASSERT(str, (L""));
  1670. int c; // current char
  1671. int64 total; // current total
  1672. int sign;
  1673. while (*str == ' ') ++str; // skip space
  1674. c = (int)*str++;
  1675. sign = c; /* save sign indication */
  1676. if (c == '-' || c == '+')
  1677. c = (int)*str++;
  1678. total = 0;
  1679. while ((c = String_CharToDigit(static_cast<TCHAR>(c))) != -1) {
  1680. total = 10 * total + c; /* accumulate digit */
  1681. c = *str++; /* get next char */
  1682. }
  1683. if (sign == '-')
  1684. return -total;
  1685. else
  1686. return total;
  1687. }
  1688. // A faster version of the ::CharLower command. We first check if all characters are in low ANSI
  1689. // If so, we can convert it ourselves [which is about 10x faster]
  1690. // Otherwise, ask the system to do it for us.
  1691. TCHAR * String_FastToLower(TCHAR * str) {
  1692. ASSERT(str, (L""));
  1693. TCHAR * p = str;
  1694. while (*p) {
  1695. // If we can't process it ourselves, then do it with the API
  1696. if (*p > 127)
  1697. return ::CharLower(str);
  1698. ++p;
  1699. }
  1700. // If we're still here, do it ourselves
  1701. p = str;
  1702. while (*p) {
  1703. // Lower case it
  1704. if (*p >= L'A' && *p <= 'Z')
  1705. *p |= 0x20;
  1706. ++p;
  1707. }
  1708. return str;
  1709. }
  1710. // Convert a size_t to a CString
  1711. CString sizet_to_str(const size_t & i) {
  1712. CString out;
  1713. out.Format(NOTRANSL(_T("%u")),i);
  1714. return out;
  1715. }
  1716. // Convert an int to a CString
  1717. CString itostr(const int i) {
  1718. return String_Int64ToString(i, 10);
  1719. }
  1720. // Convert a uint to a CString
  1721. CString itostr(const uint32 i) {
  1722. return String_Int64ToString(i, 10);
  1723. }
  1724. // converts an int to a string
  1725. // Does not check for overflow
  1726. CString String_Int64ToString(int64 value, int radix) {
  1727. ASSERT(radix > 0, (L""));
  1728. // Space big enough for it in binary, plus the sign
  1729. TCHAR temp[66];
  1730. bool negative = false;
  1731. if (value < 0) {
  1732. negative = true;
  1733. value = -value;
  1734. }
  1735. int pos = 0;
  1736. // Add digits in reverse order
  1737. do {
  1738. TCHAR digit = (TCHAR) (value % radix);
  1739. if (digit > 9)
  1740. temp[pos] = L'a' + digit - 10;
  1741. else
  1742. temp[pos] = L'0' + digit;
  1743. pos++;
  1744. value /= radix;
  1745. } while (value > 0);
  1746. if (negative)
  1747. temp[pos++] = L'-';
  1748. // Reverse it before making a CString out of it
  1749. int start = 0, end = pos - 1;
  1750. while (start < end) {
  1751. TCHAR t = temp[start];
  1752. temp[start] = temp[end];
  1753. temp[end] = t;
  1754. end--;
  1755. start++;
  1756. }
  1757. return CString(temp, pos);
  1758. }
  1759. // converts an uint64 to a string
  1760. // Does not check for overflow
  1761. CString String_Uint64ToString(uint64 value, int radix) {
  1762. ASSERT1(radix > 0);
  1763. CString ret;
  1764. const uint32 kMaxUint64Digits = 65;
  1765. // Space big enough for it in binary
  1766. TCHAR* temp = ret.GetBufferSetLength(kMaxUint64Digits);
  1767. int pos = 0;
  1768. // Add digits in reverse order
  1769. do {
  1770. TCHAR digit = static_cast<TCHAR>(value % radix);
  1771. if (digit > 9) {
  1772. temp[pos] = _T('a') + digit - 10;
  1773. } else {
  1774. temp[pos] = _T('0') + digit;
  1775. }
  1776. pos++;
  1777. value /= radix;
  1778. } while (value > 0 && pos < kMaxUint64Digits);
  1779. ret.ReleaseBuffer(pos);
  1780. // Reverse it before making a CString out of it
  1781. ret.MakeReverse();
  1782. return ret;
  1783. }
  1784. // converts an double to a string specifies the number of digits after
  1785. // the decimal point
  1786. CString String_DoubleToString(double value, int point_digits) {
  1787. int64 int_val = (int64) value;
  1788. // Deal with integer part
  1789. CString result(String_Int64ToString(int_val, 10));
  1790. if (point_digits > 0) {
  1791. result.AppendChar(L'.');
  1792. // get the fp digits
  1793. double rem_val = value - int_val;
  1794. if (rem_val < 0)
  1795. rem_val = -rem_val;
  1796. // multiply w/ the requested number of significant digits
  1797. // construct the string in place
  1798. for(int i=0; i<point_digits; i++) {
  1799. // TODO(omaha): I have seen 1.2 turn into 1.1999999999999, and generate that string.
  1800. // We should round better. For now, I'll add a quick fix to favor high
  1801. rem_val += 1e-12;
  1802. rem_val *= 10;
  1803. // Get the ones digit
  1804. int64 int_rem_dig = std::min(10LL, static_cast<int64>(rem_val));
  1805. result += static_cast<TCHAR>(int_rem_dig + L'0');
  1806. rem_val = rem_val - int_rem_dig;
  1807. }
  1808. }
  1809. return result;
  1810. }
  1811. double String_StringToDouble (const TCHAR *s) {
  1812. ASSERT(s, (L""));
  1813. double value, power;
  1814. int i = 0, sign;
  1815. while (IsSpaceW(s[i])) i++;
  1816. // get sign
  1817. sign = (s[i] == '-') ? -1 : 1;
  1818. if (s[i] == '+' || s[i] == '-') i++;
  1819. for (value = 0.0; s[i] >= '0' && s[i] <= '9'; i++)
  1820. value = 10.0 * value + (s[i] - '0');
  1821. if (s[i] == '.') i++;
  1822. for (power = 1.0; s[i] >= '0' && s[i] <= '9'; i++) {
  1823. value = 10.0 * value + (s[i] - '0');
  1824. power *= 10.0;
  1825. }
  1826. return sign * value / power;
  1827. }
  1828. // Converts a character to a digit
  1829. // if the character is not a digit return -1 (same as CRT)
  1830. int32 String_CharToDigit(const TCHAR c) {
  1831. return ((c) >= '0' && (c) <= '9' ? (c) - '0' : -1);
  1832. }
  1833. bool String_IsDigit (const TCHAR c) {
  1834. return ((c) >= '0' && (c) <= '9');
  1835. }
  1836. TCHAR String_DigitToChar(unsigned int n) {
  1837. ASSERT1(n < 10);
  1838. return static_cast<TCHAR>(_T('0') + n % 10);
  1839. }
  1840. // Returns true if an identifier character: letter, digit, or "_"
  1841. bool String_IsIdentifierChar(const TCHAR c) {
  1842. return ((c >= _T('A') && c <= _T('Z')) ||
  1843. (c >= _T('a') && c <= _T('z')) ||
  1844. (c >= _T('0') && c <= _T('9')) ||
  1845. c == _T('_'));
  1846. }
  1847. // Returns true if the string has letters in it.
  1848. // This is used by the keyword extractor to downweight numbers,
  1849. // IDs (sequences of numbers like social security numbers), etc.
  1850. bool String_HasAlphabetLetters (const TCHAR * str) {
  1851. ASSERT (str, (L""));
  1852. while (*str != '\0') {
  1853. // if (iswalpha (*str)) {
  1854. // Note that IsCharAlpha is slower but we want to avoid the CRT
  1855. if (IsCharAlpha (*str)) {
  1856. return true;
  1857. }
  1858. ++str;
  1859. }
  1860. return false;
  1861. }
  1862. CString String_LargeIntToApproximateString(uint64 value, bool base_ten, int* power) {
  1863. uint32 to_one_decimal;
  1864. uint32 gig = base_ten ? 1000000000 : (1<<30);
  1865. uint32 gig_div_10 = base_ten ? 100000000 : (1<<30)/10;
  1866. uint32 meg = base_ten ? 1000000 : (1<<20);
  1867. uint32 meg_div_10 = base_ten ? 100000 : (1<<20)/10;
  1868. uint32 kilo = base_ten ? 1000 : (1<<10);
  1869. uint32 kilo_div_10 = base_ten ? 100 : (1<<10)/10;
  1870. if (value >= gig) {
  1871. if (power) *power = 3;
  1872. to_one_decimal = static_cast<uint32>(value / gig_div_10);
  1873. } else if (value >= meg) {
  1874. if (power) *power = 2;
  1875. to_one_decimal = static_cast<uint32>(value / meg_div_10);
  1876. } else if (value >= kilo) {
  1877. if (power) *power = 1;
  1878. to_one_decimal = static_cast<uint32>(value / kilo_div_10);
  1879. } else {
  1880. if (power) *power = 0;
  1881. return String_Int64ToString(static_cast<uint32>(value), 10 /*radix*/);
  1882. }
  1883. uint32 whole_part = to_one_decimal / 10;
  1884. if (whole_part < 10)
  1885. return Show(0.1 * static_cast<double>(to_one_decimal), 1);
  1886. return String_Int64ToString(whole_part, 10 /*radix*/);
  1887. }
  1888. int String_FindString(const TCHAR *s1, const TCHAR *s2) {
  1889. ASSERT(s2, (L""));
  1890. ASSERT(s1, (L""));
  1891. // Naive implementation, but still oodles better than ATL's implementation
  1892. // (which deals with variable character widths---we don't).
  1893. const TCHAR *found = _tcsstr(s1, s2);
  1894. if (NULL == found)
  1895. return -1;
  1896. return found - s1;
  1897. }
  1898. int String_FindString(const TCHAR *s1, const TCHAR *s2, int start_pos) {
  1899. ASSERT(s2, (L""));
  1900. ASSERT(s1, (L""));
  1901. // Naive implementation, but still oodles better than ATL's implementation
  1902. // (which deals with variable character widths---we don't).
  1903. int skip = start_pos;
  1904. const TCHAR *s = s1;
  1905. while (skip && *s) {
  1906. ++s;
  1907. --skip;
  1908. }
  1909. if (!(*s))
  1910. return -1;
  1911. const TCHAR *found = _tcsstr(s, s2);
  1912. if (NULL == found)
  1913. return -1;
  1914. return found - s1;
  1915. }
  1916. int String_FindChar(const TCHAR *str, const TCHAR c) {
  1917. ASSERT (str, (L""));
  1918. const TCHAR *s = str;
  1919. while (*s) {
  1920. if (*s == c)
  1921. return s - str;
  1922. ++s;
  1923. }
  1924. return -1;
  1925. }
  1926. // taken from wcsrchr, modified to behave in the CString way
  1927. int String_ReverseFindChar(const TCHAR * str,TCHAR c) {
  1928. ASSERT (str, (L""));
  1929. TCHAR *start = (TCHAR *)str;
  1930. while (*str++) /* find end of string */
  1931. ;
  1932. /* search towards front */
  1933. while (--str != start && *str != (TCHAR)c)
  1934. ;
  1935. if (*str == (TCHAR)c) /* found ? */
  1936. return( str - start );
  1937. return -1;
  1938. }
  1939. int String_FindChar(const TCHAR *str, const TCHAR c, int start_pos) {
  1940. ASSERT (str, (L""));
  1941. int n = 0;
  1942. const TCHAR *s = str;
  1943. while (*s) {
  1944. if (n++ >= start_pos && *s == c)
  1945. return s - str;
  1946. ++s;
  1947. }
  1948. return -1;
  1949. }
  1950. bool String_Contains(const TCHAR *s1, const TCHAR *s2) {
  1951. ASSERT(s2, (L""));
  1952. ASSERT(s1, (L""));
  1953. return -1 != String_FindString(s1, s2);
  1954. }
  1955. void String_ReplaceChar(TCHAR *str, TCHAR old_char, TCHAR new_char) {
  1956. ASSERT (str, (L""));
  1957. while (*str) {
  1958. if (*str == old_char)
  1959. *str = new_char;
  1960. ++str;
  1961. }
  1962. }
  1963. void String_ReplaceChar(CString & str, TCHAR old_char, TCHAR new_char) {
  1964. String_ReplaceChar (str.GetBuffer(), old_char, new_char);
  1965. str.ReleaseBuffer();
  1966. }
  1967. int ReplaceCString (CString & src, const TCHAR *from, const TCHAR *to) {
  1968. ASSERT(to, (L""));
  1969. ASSERT(from, (L""));
  1970. return ReplaceCString(src, from, lstrlen(from), to, lstrlen(to), kRepMax);
  1971. }
  1972. // A special version of the replace function which takes advantage of CString properties
  1973. // to make it much faster when the string grows
  1974. // 1) It will resize the string in place if possible. Even if it has to 'grow' the string
  1975. // 2) It will cutoff after a maximum number of matches
  1976. // 3) It expects sizing data to be passed to it
  1977. int ReplaceCString (CString & src, const TCHAR *from, unsigned int from_len,
  1978. const TCHAR *to, unsigned int to_len,
  1979. unsigned int max_matches) {
  1980. ASSERT (from, (L""));
  1981. ASSERT (to, (L""));
  1982. ASSERT (from[0] != '\0', (L""));
  1983. int i = 0, j = 0;
  1984. unsigned int matches = 0;
  1985. // Keep track of the matches, it's easier than recalculating them
  1986. unsigned int match_pos_stack[kExpectedMaxReplaceMatches];
  1987. // We might need to dynamically allocate space for the matches
  1988. bool dynamic_allocate = false;
  1989. unsigned int * match_pos = (unsigned int*)match_pos_stack;
  1990. unsigned int max_match_size = kExpectedMaxReplaceMatches;
  1991. // Is the string getting bigger?
  1992. bool longer = to_len > from_len;
  1993. // don't compute the lengths unless we know we need to
  1994. int src_len = src.GetLength();
  1995. int cur_len = src_len;
  1996. // Trick: We temporarily add 1 extra character to the string. The first char from the from
  1997. // string. This way we can avoid searching for NULL, since we are guaranteed to find it
  1998. TCHAR * buffer = src.GetBufferSetLength(src_len+1);
  1999. const TCHAR from_0 = from[0];
  2000. buffer[src_len] = from[0];
  2001. while (i < cur_len) {
  2002. // If we have too many matches, then re-allocate to a dynamic buffer that is
  2003. // twice as big as the one we are currently using
  2004. if (longer && (matches == max_match_size)) {
  2005. // Double the buffer size, and copy it over
  2006. unsigned int * temp = new unsigned int[max_match_size * 2];
  2007. memcpy(temp, match_pos, matches * sizeof(unsigned int));
  2008. if (dynamic_allocate)
  2009. delete [] match_pos; // lint !e424 Inappropriate deallocation
  2010. match_pos = temp;
  2011. max_match_size *= 2;
  2012. dynamic_allocate = true;
  2013. }
  2014. // If we have the maximum number of matches already, then stop
  2015. if (matches >= max_matches) {
  2016. break;
  2017. }
  2018. // For each potential match
  2019. // Note: oddly enough, this is the most expensive line in the function under normal usage. So I am optimizing the heck out of it
  2020. TCHAR * buf_ptr = buffer + i;
  2021. while (*buf_ptr != from_0) { ++buf_ptr; }
  2022. i = buf_ptr - buffer;
  2023. // We're done!
  2024. if (i >= cur_len)
  2025. break;
  2026. // buffer is not NULL terminated, we replaced the NULL above
  2027. while (i < cur_len && buffer[i] && buffer[i] == from[j]) {
  2028. ++i; ++j;
  2029. if (from[j] == '\0') { // found match
  2030. if (!longer) { // modify in place
  2031. memcpy ((byte *)(buffer+i) - (sizeof (TCHAR) * from_len), (byte *)to, sizeof (TCHAR) * to_len);
  2032. // if there are often a lot of replacements, it would be faster to create a new string instead
  2033. // of using memmove
  2034. // TODO(omaha): - memmove will cause n^2 behavior in strings with multiple matches since it will be moved many times...
  2035. if (to_len < from_len) { memmove ((byte *)(buffer+i) - (sizeof (TCHAR) * (from_len - to_len)),
  2036. (byte *)(buffer+i), (src_len - i + 1) * sizeof (TCHAR)); }
  2037. i -= (from_len - to_len);
  2038. cur_len -= (from_len - to_len);
  2039. }
  2040. else
  2041. match_pos[matches] = i - from_len;
  2042. ++matches;
  2043. break;
  2044. }
  2045. }
  2046. j = 0;
  2047. }
  2048. if (to_len <= from_len)
  2049. src_len -= matches * (from_len - to_len);
  2050. // if the new string is longer we do another pass now that we know how long the new string needs to be
  2051. if (matches && to_len > from_len) {
  2052. src.ReleaseBuffer(src_len);
  2053. int new_len = src_len + matches * (to_len - from_len);
  2054. buffer = src.GetBufferSetLength(new_len);
  2055. // It's easier to assemble it backwards...
  2056. int temp_end = new_len;
  2057. for(i = matches-1; i >= 0; --i) {
  2058. // Figure out where the trailing portion isthe trailing portion
  2059. int len = src_len - match_pos[i] - from_len;
  2060. int start = match_pos[i] + from_len;
  2061. int dest = temp_end - len;
  2062. memmove(buffer+dest, buffer+start, (len) * sizeof(TCHAR));
  2063. // copy the new item
  2064. memcpy(buffer + dest - to_len, to, to_len * sizeof(TCHAR));
  2065. // Update the pointers
  2066. temp_end = dest - to_len;
  2067. src_len = match_pos[i];
  2068. }
  2069. src_len = new_len;
  2070. }
  2071. src.ReleaseBuffer(src_len);
  2072. if (dynamic_allocate)
  2073. delete [] match_pos; // lint !e673 Possibly inappropriate deallocation
  2074. return matches;
  2075. }
  2076. /*
  2077. The following 2 functions will do replacement on TCHAR* directly. They is currently unused.
  2078. Feel free to put it back if you need to.
  2079. */
  2080. int ReplaceString (TCHAR *src, const TCHAR *from, const TCHAR *to, TCHAR **out, int *out_len) {
  2081. ASSERT(out_len, (L""));
  2082. ASSERT(out, (L""));
  2083. ASSERT(to, (L""));
  2084. ASSERT(from, (L""));
  2085. ASSERT(src, (L""));
  2086. bool created_new_string;
  2087. int matches = ReplaceStringMaybeInPlace (src, from, to, out, out_len, &created_new_string);
  2088. if (!created_new_string) {
  2089. *out = new TCHAR [(*out_len)+1];
  2090. if (!(*out)) { *out = src; return 0; }
  2091. _tcscpy_s(*out, *out_len + 1, src);
  2092. }
  2093. return matches;
  2094. }
  2095. int ReplaceStringMaybeInPlace (TCHAR *src, const TCHAR *from, const TCHAR *to, TCHAR **out, int *out_len, bool *created_new_string) {
  2096. ASSERT (created_new_string, (L""));
  2097. ASSERT (out_len, (L""));
  2098. ASSERT (src, (L""));
  2099. ASSERT (from, (L""));
  2100. ASSERT (to, (L""));
  2101. ASSERT (out, (L""));
  2102. ASSERT (from[0] != '\0', (L""));
  2103. int i = 0, j = 0;
  2104. int matches = 0;
  2105. // don't compute the lengths unless we know we need to
  2106. int from_len = -1, to_len = -1, src_len = -1;
  2107. *created_new_string = false;
  2108. *out = src;
  2109. while (src[i]) {
  2110. while (src[i] && src[i] != from[0]) { i++; }
  2111. while (src[i] && src[i] == from[j]) {
  2112. i++; j++;
  2113. if (from[j] == '\0') { // found match
  2114. if (from_len == -1) { // compute lengths if not known
  2115. from_len = lstrlen (from);
  2116. to_len = lstrlen (to);
  2117. src_len = lstrlen (src);
  2118. }
  2119. matches++;
  2120. if (to_len <= from_len) { // modify in place
  2121. memcpy ((byte *)(src+i) - (sizeof (TCHAR) * from_len), (byte *)to, sizeof (TCHAR) * to_len);
  2122. // if there are often a lot of replacements, it would be faster to create a new string instead
  2123. // of using memmove
  2124. if (to_len < from_len) { memmove ((byte *)(src+i) - (sizeof (TCHAR) * (from_len - to_len)),
  2125. (byte *)(src+i), (src_len - i + 1) * sizeof (TCHAR)); }
  2126. i -= (from_len - to_len);
  2127. }
  2128. break;
  2129. }
  2130. }
  2131. j = 0;
  2132. }
  2133. *out_len = i;
  2134. // if the new string is longer we do another pass now that we know how long the new string needs to be
  2135. if (matches && to_len > from_len) {
  2136. ASSERT (src_len == i, (L""));
  2137. int new_len = src_len + matches * (to_len - from_len);
  2138. *out = new TCHAR [new_len+1];
  2139. if (!(*out)) { *out = src; *out_len = lstrlen (src); return 0; }
  2140. *created_new_string = true;
  2141. i = 0; j = 0; int k = 0;
  2142. while (src[i]) {
  2143. while (src[i] && src[i] != from[0]) {
  2144. (*out)[k++] = src[i++];
  2145. }
  2146. while (src[i] && src[i] == from[j]) {
  2147. (*out)[k++] = src[i++];
  2148. j++;
  2149. if (from[j] == '\0') { // found match
  2150. k -= from_len;
  2151. ASSERT (k >= 0, (L""));
  2152. memcpy ((byte *)((*out)+k), (byte *)to, sizeof (TCHAR) * to_len);
  2153. k += to_len;
  2154. break;
  2155. }
  2156. }
  2157. j = 0;
  2158. }
  2159. (*out)[k] = '\0';
  2160. ASSERT (k == new_len, (L""));
  2161. *out_len = new_len;
  2162. }
  2163. return matches;
  2164. }
  2165. /****************************************************************************
  2166. * wcstol, wcstoul(nptr,endptr,ibase) - Convert ascii string to long un/signed int.
  2167. *
  2168. * modified from:
  2169. *
  2170. * wcstol.c - Contains C runtimes wcstol and wcstoul
  2171. *
  2172. * Copyright (c) Microsoft Corporation. All rights reserved.
  2173. *
  2174. * Purpose:
  2175. * Convert an ascii string to a long 32-bit value. The base
  2176. * used for the caculations is supplied by the caller. The base
  2177. * must be in the range 0, 2-36. If a base of 0 is supplied, the
  2178. * ascii string must be examined to determine the base of the
  2179. * number:
  2180. * (a) First char = '0', second char = 'x' or 'X',
  2181. * use base 16.
  2182. * (b) First char = '0', use base 8
  2183. * (c) First char in range '1' - '9', use base 10.
  2184. *
  2185. * If the 'endptr' value is non-NULL, then wcstol/wcstoul places
  2186. * a pointer to the terminating character in this value.
  2187. * See ANSI standard for details
  2188. *
  2189. *Entry:
  2190. * nptr == NEAR/FAR pointer to the start of string.
  2191. * endptr == NEAR/FAR pointer to the end of the string.
  2192. * ibase == integer base to use for the calculations.
  2193. *
  2194. * string format: [whitespace] [sign] [0] [x] [digits/letters]
  2195. *
  2196. *Exit:
  2197. * Good return:
  2198. * result
  2199. *
  2200. * Overflow return:
  2201. * wcstol -- LONG_MAX or LONG_MIN
  2202. * wcstoul -- ULONG_MAX
  2203. * wcstol/wcstoul -- errno == ERANGE
  2204. *
  2205. * No digits or bad base return:
  2206. * 0
  2207. * endptr = nptr*
  2208. *
  2209. *Exceptions:
  2210. * None.
  2211. *
  2212. *******************************************************************************/
  2213. // flag values */
  2214. #define kFlUnsigned (1) // wcstoul called */
  2215. #define kFlNeg (2) // negative sign found */
  2216. #define kFlOverflow (4) // overflow occured */
  2217. #define kFlReaddigit (8) // we've read at least one correct digit */
  2218. static unsigned long __cdecl wcstoxl (const wchar_t *nptr, wchar_t **endptr, int ibase, int flags) {
  2219. ASSERT(nptr, (L""));
  2220. const wchar_t *p;
  2221. wchar_t c;
  2222. unsigned long number;
  2223. unsigned digval;
  2224. unsigned long maxval;
  2225. // #ifdef _MT
  2226. // pthreadlocinfo ptloci = _getptd()->ptlocinfo;
  2227. // if ( ptloci != __ptlocinfo )
  2228. // ptloci = __updatetlocinfo();
  2229. // #endif // _MT */
  2230. p = nptr; // p is our scanning pointer */
  2231. number = 0; // start with zero */
  2232. c = *p++; // read char */
  2233. // #ifdef _MT
  2234. // while ( __iswspace_mt(ptloci, c) )
  2235. // #else // _MT */
  2236. while (c == ' ')
  2237. // while ( iswspace(c) )
  2238. // #endif // _MT */
  2239. c = *p++; // skip whitespace */
  2240. if (c == '-') {
  2241. flags |= kFlNeg; // remember minus sign */
  2242. c = *p++;
  2243. }
  2244. else if (c == '+')
  2245. c = *p++; // skip sign */
  2246. if (ibase < 0 || ibase == 1 || ibase > 36) {
  2247. // bad base! */
  2248. if (endptr)
  2249. // store beginning of string in endptr */
  2250. *endptr = const_cast<wchar_t *>(nptr);
  2251. return 0L; // return 0 */
  2252. }
  2253. else if (ibase == 0) {
  2254. // determine base free-lance, based on first two chars of
  2255. // string */
  2256. if (String_CharToDigit(c) != 0)
  2257. ibase = 10;
  2258. else if (*p == L'x' || *p == L'X')
  2259. ibase = 16;
  2260. else
  2261. ibase = 8;
  2262. }
  2263. if (ibase == 16) {
  2264. // we might have 0x in front of number; remove if there */
  2265. if (String_CharToDigit(c) == 0 && (*p == L'x' || *p == L'X')) {
  2266. ++p;
  2267. c = *p++; // advance past prefix */
  2268. }
  2269. }
  2270. // if our number exceeds this, we will overflow on multiply */
  2271. maxval = ULONG_MAX / ibase;
  2272. for (;;) { // exit in middle of loop */
  2273. // convert c to value */
  2274. if ( (digval = String_CharToDigit(c)) != (unsigned) -1 )
  2275. ;
  2276. else if (c >= 'A' && c <= 'F') { digval = c - 'A' + 10; }
  2277. else if (c >= 'a' && c <= 'f') { digval = c - 'a' + 10; }
  2278. // else if ( __ascii_iswalpha(c))
  2279. // digval = __ascii_towupper(c) - L'A' + 10;
  2280. else
  2281. break;
  2282. if (digval >= (unsigned)ibase)
  2283. break; // exit loop if bad digit found */
  2284. // record the fact we have read one digit */
  2285. flags |= kFlReaddigit;
  2286. // we now need to compute number = number * base + digval,
  2287. // but we need to know if overflow occured. This requires
  2288. // a tricky pre-check. */
  2289. if (number < maxval || (number == maxval &&
  2290. (unsigned long)digval <= ULONG_MAX % ibase)) {
  2291. // we won't overflow, go ahead and multiply */
  2292. number = number * ibase + digval;
  2293. }
  2294. else {
  2295. // we would have overflowed -- set the overflow flag */
  2296. flags |= kFlOverflow;
  2297. }
  2298. c = *p++; // read next digit */
  2299. }
  2300. --p; // point to place that stopped scan */
  2301. if (!(flags & kFlReaddigit)) {
  2302. // no number there; return 0 and point to beginning of string */
  2303. if (endptr)
  2304. // store beginning of string in endptr later on */
  2305. p = nptr;
  2306. number = 0L; // return 0 */
  2307. }
  2308. // lint -save -e648 -e650 Overflow in -LONG_MIN
  2309. #pragma warning(push)
  2310. // C4287 : unsigned/negative constant mismatch.
  2311. // The offending expression is number > -LONG_MIN. -LONG_MIN overflows and
  2312. // technically -LONG_MIN == LONG_MIN == 0x80000000. It should actually
  2313. // result in a compiler warning, such as C4307: integral constant overflow.
  2314. // Anyway, in the expression (number > -LONG_MIN) the right operand is converted
  2315. // to unsigned long, so the expression is actually evaluated as
  2316. // number > 0x80000000UL. The code is probably correct but subtle, to say the
  2317. // least.
  2318. #pragma warning(disable : 4287)
  2319. else if ( (flags & kFlOverflow) ||
  2320. ( !(flags & kFlUnsigned) &&
  2321. ( ( (flags & kFlNeg) && (number > -LONG_MIN) ) ||
  2322. ( !(flags & kFlNeg) && (number > LONG_MAX) ) ) ) )
  2323. {
  2324. // overflow or signed overflow occurred */
  2325. // errno = ERANGE;
  2326. if ( flags & kFlUnsigned )
  2327. number = ULONG_MAX;
  2328. else if ( flags & kFlNeg )
  2329. // lint -e{648, 650} Overflow in -LONG_MIN
  2330. number = (unsigned long)(-LONG_MIN);
  2331. else
  2332. number = LONG_MAX;
  2333. }
  2334. #pragma warning(pop)
  2335. // lint -restore
  2336. if (endptr != NULL)
  2337. // store pointer to char that stopped the scan */
  2338. *endptr = const_cast<wchar_t *>(p);
  2339. if (flags & kFlNeg)
  2340. // negate result if there was a neg sign */
  2341. number = (unsigned long)(-(long)number);
  2342. return number; // done. */
  2343. }
  2344. long __cdecl Wcstol (const wchar_t *nptr, wchar_t **endptr, int ibase) {
  2345. ASSERT(endptr, (L""));
  2346. ASSERT(nptr, (L""));
  2347. return (long) wcstoxl(nptr, endptr, ibase, 0);
  2348. }
  2349. unsigned long __cdecl Wcstoul (const wchar_t *nptr, wchar_t **endptr, int ibase) {
  2350. // endptr may be NULL
  2351. ASSERT(nptr, (L""));
  2352. return wcstoxl(nptr, endptr, ibase, kFlUnsigned);
  2353. }
  2354. // Functions on arrays of strings
  2355. // Returns true iff s is in the array strings (case-insensitive compare)
  2356. bool String_MemberOf(const TCHAR* const* strings, const TCHAR* s) {
  2357. ASSERT(s, (L""));
  2358. // strings may be NULL
  2359. const int s_length = lstrlen(s);
  2360. if (strings == NULL)
  2361. return false;
  2362. for (; *strings != NULL; strings++) {
  2363. if (0 == String_StrNCmp(*strings, s, s_length, true)) {
  2364. return true; // Found equal string
  2365. }
  2366. }
  2367. return false;
  2368. }
  2369. // Returns index of s in the array of strings (or -1 for missing) (case-insensitive compare)
  2370. int String_IndexOf(const TCHAR* const* strings, const TCHAR* s) {
  2371. ASSERT(s, (L""));
  2372. // strings may be NULL
  2373. const int s_length = lstrlen(s);
  2374. if (strings == NULL)
  2375. return -1;
  2376. for (int i = 0; *strings != NULL; i++, strings++) {
  2377. if (0 == String_StrNCmp(*strings, s, s_length, true)) {
  2378. return i; // Found equal string
  2379. }
  2380. }
  2381. return -1;
  2382. }
  2383. // The internal format is a int64.
  2384. time64 StringToTime(const CString & time) {
  2385. return static_cast<time64>(String_StringToInt64(time));
  2386. }
  2387. // See above comment from StringToTime.
  2388. // Just show it as a INT64 for now
  2389. // NOTE: this will truncating it to INT64, which may lop off some times in the future
  2390. CString TimeToString(const time64 & time) {
  2391. return String_Int64ToString(static_cast<int64>(time), 10);
  2392. }
  2393. const TCHAR *FindStringASpaceStringB (const TCHAR *s, const TCHAR *a, const TCHAR *b) {
  2394. ASSERT(s, (L""));
  2395. ASSERT(a, (L""));
  2396. ASSERT(b, (L""));
  2397. const TCHAR *search_from = s;
  2398. const TCHAR *pos;
  2399. while (*search_from && (pos = stristrW (search_from, a)) != NULL) {
  2400. const TCHAR *start = pos;
  2401. pos += lstrlen(a);
  2402. search_from = pos;
  2403. while (*pos == ' ' || *pos == '\t') pos++;
  2404. if (!String_StrNCmp (pos, b, lstrlen(b), true)) return start;
  2405. }
  2406. return 0;
  2407. }
  2408. bool IsAlphaA (const char c) {
  2409. return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'));
  2410. }
  2411. bool IsDigitA (const char c) {
  2412. return (c >= '0' && c <= '9');
  2413. }
  2414. void SafeStrCat (TCHAR *dest, const TCHAR *src, int dest_buffer_len) {
  2415. _tcscat_s(dest, dest_buffer_len, src);
  2416. }
  2417. // extracts next float in a string
  2418. // skips any non-digit characters
  2419. // return position after end of float
  2420. const TCHAR *ExtractNextDouble (const TCHAR *s, double *f) {
  2421. ASSERT (f, (L""));
  2422. ASSERT (s, (L""));
  2423. CString num;
  2424. while (*s && !String_IsDigit (*s)) s++;
  2425. while (*s && (*s == '.' || String_IsDigit (*s))) { num += *s; s++; }
  2426. ASSERT (num.GetLength(), (L""));
  2427. *f = String_StringToDouble (num);
  2428. return s;
  2429. }
  2430. TCHAR *String_PathFindExtension(const TCHAR *path) {
  2431. ASSERT(path, (L""));
  2432. // Documentation says PathFindExtension string must be of max length
  2433. // MAX_PATH but a trusted tester hit the ASSERT and we don't really
  2434. // need it here, so commented out. We can't address where it is
  2435. // called because it's called from ATL code.
  2436. // ASSERT(lstrlen(path)<=MAX_PATH, (L""));
  2437. // point to terminating NULL
  2438. const TCHAR *ret = path + lstrlen(path);
  2439. const TCHAR *pos = ret;
  2440. while (--pos >= path) {
  2441. if (*pos == '.')
  2442. return const_cast<TCHAR *>(pos);
  2443. }
  2444. return const_cast<TCHAR *>(ret);
  2445. }
  2446. char String_ToLowerCharAnsi(char c) {
  2447. if (c >= 'A' && c <= 'Z') return (c + ('a' - 'A'));
  2448. return c;
  2449. }
  2450. int String_ToLowerChar(int c) {
  2451. // If it's < 128, then convert is ourself, which is far cheaper than the system conversion
  2452. if (c < 128)
  2453. return String_ToLowerCharAnsi(static_cast<char>(c));
  2454. return Char_ToLower(static_cast<TCHAR>(c));
  2455. }
  2456. bool String_PathRemoveFileSpec(TCHAR *path) {
  2457. ASSERT (path, (L""));
  2458. int len, pos;
  2459. len = pos = lstrlen (path);
  2460. // You might think that the SHLWAPI API does not change "c:\windows" -> "c:\"
  2461. // when c:\windows is a directory, but it does.
  2462. // If we don't want to match this weird API we can use the following to check
  2463. // for directories:
  2464. // Check if we are already a directory.
  2465. WIN32_FILE_ATTRIBUTE_DATA attrs;
  2466. // Failure (if file does not exist) is OK.
  2467. BOOL success = GetFileAttributesEx(path, GetFileExInfoStandard, &attrs);
  2468. UTIL_LOG(L4, (_T("[String_PathRemoveFileSpec][path %s][success %d][dir %d]"),
  2469. path,
  2470. success,
  2471. attrs.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY));
  2472. if (success && (attrs.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) {
  2473. // Remove trailing backslash, if any.
  2474. if (path[pos-1] == '\\')
  2475. path[pos-1] = '\0';
  2476. return 1;
  2477. }
  2478. // Find last backslash.
  2479. while (pos && path[pos] != '\\') pos--;
  2480. if (!pos && path[pos] != '\\') return 0;
  2481. ASSERT (pos < len, (L""));
  2482. // The documentation says it removes backslash but it doesn't for c:\.
  2483. if (!pos || path[pos-1] == ':' || (pos == 1 && path[0] == '\\'))
  2484. // Keep the backslash in this case.
  2485. path[pos+1] = '\0';
  2486. else
  2487. path[pos] = '\0';
  2488. return 1;
  2489. }
  2490. void String_EndWithChar(TCHAR *str, TCHAR c) {
  2491. ASSERT (str, (L""));
  2492. int len = lstrlen(str);
  2493. if (len == 0 || str[len - 1] != c) {
  2494. str[len] = c;
  2495. str[len + 1] = 0;
  2496. }
  2497. }
  2498. bool StartsWithBOM(const TCHAR* string) {
  2499. ASSERT(string, (L""));
  2500. wchar_t c = string[0];
  2501. if (c == 0xFFFE || c == 0xFEFF)
  2502. return true;
  2503. else
  2504. return false;
  2505. }
  2506. const TCHAR* StringAfterBOM(const TCHAR* string) {
  2507. ASSERT(string, (L""));
  2508. return &string[StartsWithBOM(string) ? 1 : 0];
  2509. }
  2510. bool String_StringToDecimalIntChecked(const TCHAR* str, int* value) {
  2511. ASSERT1(str);
  2512. ASSERT1(value);
  2513. if (_set_errno(0)) {
  2514. return false;
  2515. }
  2516. TCHAR* end_ptr = NULL;
  2517. *value = _tcstol(str, &end_ptr, 10);
  2518. ASSERT1(end_ptr);
  2519. if (errno) {
  2520. ASSERT1(ERANGE == errno);
  2521. // Overflow or underflow.
  2522. return false;
  2523. } else if (*value == 0) {
  2524. // The value returned could be an error code. tcsltol returns
  2525. // zero when it cannot convert the string. However we need to
  2526. // distinguish a real zero. Thus check to see if end_ptr is not the start
  2527. // of the string (str is not an empty string) and is pointing to a '\0'.
  2528. // If not, we have an error.
  2529. if ((str == end_ptr) || (*end_ptr != '\0')) {
  2530. return false;
  2531. }
  2532. } else if (*end_ptr != '\0') {
  2533. // The end_ptr is pointing at a character that is
  2534. // not the end of the string. Only part of the string could be converted.
  2535. return false;
  2536. }
  2537. return true;
  2538. }
  2539. bool CLSIDToCString(const GUID& guid, CString* str) {
  2540. ASSERT(str, (L""));
  2541. LPOLESTR string_guid = NULL;
  2542. if (::StringFromCLSID(guid, &string_guid) != S_OK) {
  2543. return false;
  2544. }
  2545. *str = string_guid;
  2546. ::CoTaskMemFree(string_guid);
  2547. return true;
  2548. }
  2549. HRESULT String_StringToBool(const TCHAR* str, bool* value) {
  2550. ASSERT1(str);
  2551. ASSERT1(value);
  2552. // This method now performs a case-insentitive
  2553. // culture aware compare. We should however be ok as we are only comparing
  2554. // latin characters.
  2555. if (_tcsicmp(kFalse, str) == 0) {
  2556. *value = false;
  2557. } else if (_tcsicmp(kTrue, str) == 0) {
  2558. *value = true;
  2559. } else {
  2560. // we found another string. should error out.
  2561. return E_FAIL;
  2562. }
  2563. return S_OK;
  2564. }
  2565. HRESULT String_BoolToString(bool value, CString* string) {
  2566. ASSERT1(string);
  2567. *string = value ? kTrue : kFalse;
  2568. return S_OK;
  2569. }
  2570. CString String_ReplaceIgnoreCase(const CString& string,
  2571. const CString& token,
  2572. const CString& replacement) {
  2573. int token_length = token.GetLength();
  2574. if (!token_length) {
  2575. return string;
  2576. }
  2577. CString string_lowercase(string);
  2578. CString token_lowercase(token);
  2579. string_lowercase.MakeLower();
  2580. token_lowercase.MakeLower();
  2581. CString output(string);
  2582. int replacement_length = replacement.GetLength();
  2583. int index = 0;
  2584. int output_index = 0;
  2585. for (int new_index = 0;
  2586. (new_index = string_lowercase.Find(token_lowercase, index)) != -1;
  2587. index = new_index + token_length) {
  2588. output_index += new_index - index;
  2589. output.Delete(output_index, token_length);
  2590. output.Insert(output_index, replacement);
  2591. output_index += replacement_length;
  2592. }
  2593. return output;
  2594. }
  2595. // Escape and unescape strings (shlwapi-based implementation).
  2596. // The intended usage for these APIs is escaping strings to make up
  2597. // URLs, for example building query strings.
  2598. //
  2599. // Pass false to the flag segment_only to escape the url. This will not
  2600. // cause the conversion of the # (%23), ? (%3F), and / (%2F) characters.
  2601. // Characters that must be encoded include any characters that have no
  2602. // corresponding graphic character in the US-ASCII coded character
  2603. // set (hexadecimal 80-FF, which are not used in the US-ASCII coded character
  2604. // set, and hexadecimal 00-1F and 7F, which are control characters),
  2605. // blank spaces, "%" (which is used to encode other characters),
  2606. // and unsafe characters (<, >, ", #, {, }, |, \, ^, ~, [, ], and ').
  2607. //
  2608. // The input and output strings can't be longer than INTERNET_MAX_URL_LENGTH
  2609. HRESULT StringEscape(const CString& str_in,
  2610. bool segment_only,
  2611. CString* str_out) {
  2612. ASSERT1(str_out);
  2613. ASSERT1(str_in.GetLength() < INTERNET_MAX_URL_LENGTH);
  2614. DWORD buf_len = INTERNET_MAX_URL_LENGTH + 1;
  2615. HRESULT hr = ::UrlEscape(str_in, str_out->GetBufferSetLength(buf_len), &buf_len,
  2616. segment_only ? URL_ESCAPE_PERCENT | URL_ESCAPE_SEGMENT_ONLY : URL_ESCAPE_PERCENT);
  2617. if (SUCCEEDED(hr)) {
  2618. str_out->ReleaseBuffer();
  2619. ASSERT1(buf_len <= INTERNET_MAX_URL_LENGTH);
  2620. }
  2621. return hr;
  2622. }
  2623. HRESULT StringUnescape(const CString& str_in, CString* str_out) {
  2624. ASSERT1(str_out);
  2625. ASSERT1(str_in.GetLength() < INTERNET_MAX_URL_LENGTH);
  2626. DWORD buf_len = INTERNET_MAX_URL_LENGTH + 1;
  2627. HRESULT hr = ::UrlUnescape(const_cast<TCHAR*>(str_in.GetString()),
  2628. str_out->GetBufferSetLength(buf_len), &buf_len, 0);
  2629. if (SUCCEEDED(hr)) {
  2630. str_out->ReleaseBuffer(buf_len + 1);
  2631. ASSERT1(buf_len <= INTERNET_MAX_URL_LENGTH);
  2632. }
  2633. return hr;
  2634. }
  2635. bool String_StringToTristate(const TCHAR* str, Tristate* value) {
  2636. ASSERT1(str);
  2637. ASSERT1(value);
  2638. int numerical_value = 0;
  2639. if (!String_StringToDecimalIntChecked(str, &numerical_value)) {
  2640. return false;
  2641. }
  2642. switch (numerical_value) {
  2643. case 0:
  2644. *value = TRISTATE_FALSE;
  2645. break;
  2646. case 1:
  2647. *value = TRISTATE_TRUE;
  2648. break;
  2649. case 2:
  2650. *value = TRISTATE_NONE;
  2651. break;
  2652. default:
  2653. return false;
  2654. }
  2655. return true;
  2656. }
  2657. // Extracts the name and value from a string that contains a name/value pair.
  2658. bool ParseNameValuePair(const CString& token,
  2659. TCHAR separator,
  2660. CString* name,
  2661. CString* value) {
  2662. ASSERT1(name);
  2663. ASSERT1(value);
  2664. int separator_index = token.Find(separator);
  2665. if ((separator_index == -1) || // Not a name-value pair.
  2666. (separator_index == 0) || // No name was supplied.
  2667. (separator_index == (token.GetLength() - 1))) { // No value was supplied.
  2668. return false;
  2669. }
  2670. *name = token.Left(separator_index);
  2671. *value = token.Right(token.GetLength() - separator_index - 1);
  2672. ASSERT1(token.GetLength() == name->GetLength() + value->GetLength() + 1);
  2673. // It's not possible for the name to contain the separator.
  2674. ASSERT1(-1 == name->Find(separator));
  2675. if (-1 != value->Find(separator)) {
  2676. // The value contains the separator.
  2677. return false;
  2678. }
  2679. return true;
  2680. }
  2681. bool SplitCommandLineInPlace(TCHAR *command_line,
  2682. TCHAR **first_argument_parameter,
  2683. TCHAR **remaining_arguments_parameter) {
  2684. if (!command_line ||
  2685. !first_argument_parameter ||
  2686. !remaining_arguments_parameter) {
  2687. return false;
  2688. }
  2689. TCHAR end_char;
  2690. TCHAR *&first_argument = *first_argument_parameter;
  2691. TCHAR *&remaining_arguments = *remaining_arguments_parameter;
  2692. if (_T('\"') == *command_line) {
  2693. end_char = _T('\"');
  2694. first_argument = remaining_arguments = command_line + 1;
  2695. } else {
  2696. end_char = _T(' ');
  2697. first_argument = remaining_arguments = command_line;
  2698. }
  2699. // Search for the end of the first argument
  2700. while (end_char != *remaining_arguments && '\0' != *remaining_arguments) {
  2701. ++remaining_arguments;
  2702. }
  2703. if (end_char == *remaining_arguments) {
  2704. *remaining_arguments = '\0';
  2705. do {
  2706. // Skip the spaces between the first argument and the remaining arguments.
  2707. ++remaining_arguments;
  2708. } while (_T(' ') == *remaining_arguments);
  2709. }
  2710. return true;
  2711. }
  2712. bool ContainsOnlyAsciiChars(const CString& str) {
  2713. for (int i = 0; i < str.GetLength(); ++i) {
  2714. if (str[i] > 0x7F) {
  2715. return false;
  2716. }
  2717. }
  2718. return true;
  2719. }
  2720. CString BytesToHex(const uint8* bytes, size_t num_bytes) {
  2721. CString result;
  2722. if (bytes) {
  2723. result.Preallocate(num_bytes * sizeof(TCHAR));
  2724. static const TCHAR* const kHexChars = _T("0123456789abcdef");
  2725. for (size_t i = 0; i != num_bytes; ++i) {
  2726. result.AppendChar(kHexChars[(bytes[i] >> 4)]);
  2727. result.AppendChar(kHexChars[(bytes[i] & 0xf)]);
  2728. }
  2729. }
  2730. return result;
  2731. }
  2732. CString BytesToHex(const std::vector<uint8>& bytes) {
  2733. CString result;
  2734. if (!bytes.empty()) {
  2735. result.SetString(BytesToHex(&bytes.front(), bytes.size()));
  2736. }
  2737. return result;
  2738. }
  2739. void JoinStrings(const std::vector<CString>& components,
  2740. const TCHAR* delim,
  2741. CString* result) {
  2742. ASSERT1(result);
  2743. result->Empty();
  2744. // Compute length so we can reserve memory.
  2745. size_t length = 0;
  2746. size_t delim_length = delim ? _tcslen(delim) : 0;
  2747. for (size_t i = 0; i != components.size(); ++i) {
  2748. if (i != 0) {
  2749. length += delim_length;
  2750. }
  2751. length += components[i].GetLength();
  2752. }
  2753. result->Preallocate(length);
  2754. for (size_t i = 0; i != components.size(); ++i) {
  2755. if (i != 0 && delim) {
  2756. result->Append(delim, delim_length);
  2757. }
  2758. result->Append(components[i]);
  2759. }
  2760. }
  2761. void JoinStringsInArray(const TCHAR* components[],
  2762. int num_components,
  2763. const TCHAR* delim,
  2764. CString* result) {
  2765. ASSERT1(result);
  2766. result->Empty();
  2767. for (int i = 0; i != num_components; ++i) {
  2768. if (i != 0 && delim) {
  2769. result->Append(delim);
  2770. }
  2771. if (components[i]) {
  2772. result->Append(components[i]);
  2773. }
  2774. }
  2775. }
  2776. CString FormatResourceMessage(uint32 resource_id, ...) {
  2777. CString format;
  2778. const bool is_loaded = !!format.LoadString(resource_id);
  2779. if (!is_loaded) {
  2780. return CString();
  2781. }
  2782. va_list arg_list;
  2783. va_start(arg_list, resource_id);
  2784. CString formatted;
  2785. formatted.FormatMessageV(format, &arg_list);
  2786. va_end(arg_list);
  2787. return formatted;
  2788. }
  2789. CString FormatErrorCode(DWORD error_code) {
  2790. CString error_code_string;
  2791. if (FAILED(error_code)) {
  2792. error_code_string.Format(_T("0x%08x"), error_code);
  2793. } else {
  2794. error_code_string.Format(_T("%u"), error_code);
  2795. }
  2796. return error_code_string;
  2797. }
  2798. HRESULT WideStringToUtf8UrlEncodedString(const CString& str, CString* out) {
  2799. ASSERT1(out);
  2800. out->Empty();
  2801. if (str.IsEmpty()) {
  2802. return S_OK;
  2803. }
  2804. // Utf8 encode the Utf16 string first. Next urlencode it.
  2805. CStringA utf8str = WideToUtf8(str);
  2806. ASSERT1(!utf8str.IsEmpty());
  2807. DWORD buf_len = INTERNET_MAX_URL_LENGTH;
  2808. CStringA escaped_utf8_name;
  2809. HRESULT hr = ::UrlEscapeA(utf8str,
  2810. CStrBufA(escaped_utf8_name, buf_len),
  2811. &buf_len,
  2812. 0);
  2813. ASSERT1(buf_len <= INTERNET_MAX_URL_LENGTH);
  2814. ASSERT1(escaped_utf8_name.GetLength() == static_cast<int>(buf_len));
  2815. if (FAILED(hr)) {
  2816. UTIL_LOG(LE, (_T("[UrlEscapeA failed][0x%08x]"), hr));
  2817. return hr;
  2818. }
  2819. *out = CString(escaped_utf8_name);
  2820. return S_OK;
  2821. }
  2822. HRESULT Utf8UrlEncodedStringToWideString(const CString& str, CString* out) {
  2823. ASSERT1(out);
  2824. out->Empty();
  2825. if (str.IsEmpty()) {
  2826. return S_OK;
  2827. }
  2828. // The value is a utf8 encoded url escaped string that is stored as a
  2829. // unicode string. Because of this, it should contain only ascii chars.
  2830. if (!ContainsOnlyAsciiChars(str)) {
  2831. UTIL_LOG(LE, (_T("[String contains non ascii chars]")));
  2832. return E_INVALIDARG;
  2833. }
  2834. CStringA escaped_utf8_val = WideToAnsiDirect(str);
  2835. DWORD buf_len = INTERNET_MAX_URL_LENGTH;
  2836. CStringA unescaped_val;
  2837. HRESULT hr = ::UrlUnescapeA(const_cast<char*>(escaped_utf8_val.GetString()),
  2838. CStrBufA(unescaped_val, buf_len),
  2839. &buf_len,
  2840. 0);
  2841. ASSERT1(unescaped_val.GetLength() == static_cast<int>(buf_len));
  2842. if (FAILED(hr)) {
  2843. UTIL_LOG(LE, (_T("[UrlUnescapeA failed][0x%08x]"), hr));
  2844. return hr;
  2845. }
  2846. ASSERT1(buf_len == static_cast<DWORD>(unescaped_val.GetLength()));
  2847. ASSERT1(buf_len <= INTERNET_MAX_URL_LENGTH);
  2848. CString app_name = Utf8ToWideChar(unescaped_val,
  2849. unescaped_val.GetLength());
  2850. if (app_name.IsEmpty()) {
  2851. return E_INVALIDARG;
  2852. }
  2853. *out = app_name;
  2854. return S_OK;
  2855. }
  2856. } // namespace omaha