PageRenderTime 74ms CodeModel.GetById 21ms RepoModel.GetById 1ms app.codeStats 0ms

/xbmc/utils/StringUtils.cpp

http://github.com/xbmc/xbmc
C++ | 1623 lines | 1381 code | 153 blank | 89 comment | 316 complexity | fab5cada64da790a09272b86d12958b7 MD5 | raw file
Possible License(s): GPL-3.0, CC-BY-SA-3.0, LGPL-2.0, 0BSD, Unlicense, GPL-2.0, AGPL-1.0, BSD-3-Clause, LGPL-2.1, LGPL-3.0
  1. /*
  2. * Copyright (C) 2005-2018 Team Kodi
  3. * This file is part of Kodi - https://kodi.tv
  4. *
  5. * SPDX-License-Identifier: GPL-2.0-or-later
  6. * See LICENSES/README.md for more information.
  7. */
  8. //-----------------------------------------------------------------------
  9. //
  10. // File: StringUtils.cpp
  11. //
  12. // Purpose: ATL split string utility
  13. // Author: Paul J. Weiss
  14. //
  15. // Modified to use J O'Leary's std::string class by kraqh3d
  16. //
  17. //------------------------------------------------------------------------
  18. #ifdef HAVE_NEW_CROSSGUID
  19. #include <guid.hpp>
  20. #else
  21. #include <guid.h>
  22. #endif
  23. #if defined(TARGET_ANDROID)
  24. #include <androidjni/JNIThreading.h>
  25. #endif
  26. #include "CharsetConverter.h"
  27. #include "LangInfo.h"
  28. #include "StringUtils.h"
  29. #include "Util.h"
  30. #include <algorithm>
  31. #include <array>
  32. #include <assert.h>
  33. #include <functional>
  34. #include <inttypes.h>
  35. #include <iomanip>
  36. #include <math.h>
  37. #include <stdio.h>
  38. #include <stdlib.h>
  39. #include <string.h>
  40. #include <time.h>
  41. #include <fstrcmp.h>
  42. #include <memory.h>
  43. // don't move or std functions end up in PCRE namespace
  44. // clang-format off
  45. #include "utils/RegExp.h"
  46. // clang-format on
  47. #define FORMAT_BLOCK_SIZE 512 // # of bytes for initial allocation for printf
  48. static constexpr const char* ADDON_GUID_RE = "^(\\{){0,1}[0-9a-fA-F]{8}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{12}(\\}){0,1}$";
  49. /* empty string for use in returns by ref */
  50. const std::string StringUtils::Empty = "";
  51. // Copyright (c) Leigh Brasington 2012. All rights reserved.
  52. // This code may be used and reproduced without written permission.
  53. // http://www.leighb.com/tounicupper.htm
  54. //
  55. // The tables were constructed from
  56. // http://publib.boulder.ibm.com/infocenter/iseries/v7r1m0/index.jsp?topic=%2Fnls%2Frbagslowtoupmaptable.htm
  57. static constexpr wchar_t unicode_lowers[] = {
  58. (wchar_t)0x0061, (wchar_t)0x0062, (wchar_t)0x0063, (wchar_t)0x0064, (wchar_t)0x0065, (wchar_t)0x0066, (wchar_t)0x0067, (wchar_t)0x0068, (wchar_t)0x0069,
  59. (wchar_t)0x006A, (wchar_t)0x006B, (wchar_t)0x006C, (wchar_t)0x006D, (wchar_t)0x006E, (wchar_t)0x006F, (wchar_t)0x0070, (wchar_t)0x0071, (wchar_t)0x0072,
  60. (wchar_t)0x0073, (wchar_t)0x0074, (wchar_t)0x0075, (wchar_t)0x0076, (wchar_t)0x0077, (wchar_t)0x0078, (wchar_t)0x0079, (wchar_t)0x007A, (wchar_t)0x00E0,
  61. (wchar_t)0x00E1, (wchar_t)0x00E2, (wchar_t)0x00E3, (wchar_t)0x00E4, (wchar_t)0x00E5, (wchar_t)0x00E6, (wchar_t)0x00E7, (wchar_t)0x00E8, (wchar_t)0x00E9,
  62. (wchar_t)0x00EA, (wchar_t)0x00EB, (wchar_t)0x00EC, (wchar_t)0x00ED, (wchar_t)0x00EE, (wchar_t)0x00EF, (wchar_t)0x00F0, (wchar_t)0x00F1, (wchar_t)0x00F2,
  63. (wchar_t)0x00F3, (wchar_t)0x00F4, (wchar_t)0x00F5, (wchar_t)0x00F6, (wchar_t)0x00F8, (wchar_t)0x00F9, (wchar_t)0x00FA, (wchar_t)0x00FB, (wchar_t)0x00FC,
  64. (wchar_t)0x00FD, (wchar_t)0x00FE, (wchar_t)0x00FF, (wchar_t)0x0101, (wchar_t)0x0103, (wchar_t)0x0105, (wchar_t)0x0107, (wchar_t)0x0109, (wchar_t)0x010B,
  65. (wchar_t)0x010D, (wchar_t)0x010F, (wchar_t)0x0111, (wchar_t)0x0113, (wchar_t)0x0115, (wchar_t)0x0117, (wchar_t)0x0119, (wchar_t)0x011B, (wchar_t)0x011D,
  66. (wchar_t)0x011F, (wchar_t)0x0121, (wchar_t)0x0123, (wchar_t)0x0125, (wchar_t)0x0127, (wchar_t)0x0129, (wchar_t)0x012B, (wchar_t)0x012D, (wchar_t)0x012F,
  67. (wchar_t)0x0131, (wchar_t)0x0133, (wchar_t)0x0135, (wchar_t)0x0137, (wchar_t)0x013A, (wchar_t)0x013C, (wchar_t)0x013E, (wchar_t)0x0140, (wchar_t)0x0142,
  68. (wchar_t)0x0144, (wchar_t)0x0146, (wchar_t)0x0148, (wchar_t)0x014B, (wchar_t)0x014D, (wchar_t)0x014F, (wchar_t)0x0151, (wchar_t)0x0153, (wchar_t)0x0155,
  69. (wchar_t)0x0157, (wchar_t)0x0159, (wchar_t)0x015B, (wchar_t)0x015D, (wchar_t)0x015F, (wchar_t)0x0161, (wchar_t)0x0163, (wchar_t)0x0165, (wchar_t)0x0167,
  70. (wchar_t)0x0169, (wchar_t)0x016B, (wchar_t)0x016D, (wchar_t)0x016F, (wchar_t)0x0171, (wchar_t)0x0173, (wchar_t)0x0175, (wchar_t)0x0177, (wchar_t)0x017A,
  71. (wchar_t)0x017C, (wchar_t)0x017E, (wchar_t)0x0183, (wchar_t)0x0185, (wchar_t)0x0188, (wchar_t)0x018C, (wchar_t)0x0192, (wchar_t)0x0199, (wchar_t)0x01A1,
  72. (wchar_t)0x01A3, (wchar_t)0x01A5, (wchar_t)0x01A8, (wchar_t)0x01AD, (wchar_t)0x01B0, (wchar_t)0x01B4, (wchar_t)0x01B6, (wchar_t)0x01B9, (wchar_t)0x01BD,
  73. (wchar_t)0x01C6, (wchar_t)0x01C9, (wchar_t)0x01CC, (wchar_t)0x01CE, (wchar_t)0x01D0, (wchar_t)0x01D2, (wchar_t)0x01D4, (wchar_t)0x01D6, (wchar_t)0x01D8,
  74. (wchar_t)0x01DA, (wchar_t)0x01DC, (wchar_t)0x01DF, (wchar_t)0x01E1, (wchar_t)0x01E3, (wchar_t)0x01E5, (wchar_t)0x01E7, (wchar_t)0x01E9, (wchar_t)0x01EB,
  75. (wchar_t)0x01ED, (wchar_t)0x01EF, (wchar_t)0x01F3, (wchar_t)0x01F5, (wchar_t)0x01FB, (wchar_t)0x01FD, (wchar_t)0x01FF, (wchar_t)0x0201, (wchar_t)0x0203,
  76. (wchar_t)0x0205, (wchar_t)0x0207, (wchar_t)0x0209, (wchar_t)0x020B, (wchar_t)0x020D, (wchar_t)0x020F, (wchar_t)0x0211, (wchar_t)0x0213, (wchar_t)0x0215,
  77. (wchar_t)0x0217, (wchar_t)0x0253, (wchar_t)0x0254, (wchar_t)0x0257, (wchar_t)0x0258, (wchar_t)0x0259, (wchar_t)0x025B, (wchar_t)0x0260, (wchar_t)0x0263,
  78. (wchar_t)0x0268, (wchar_t)0x0269, (wchar_t)0x026F, (wchar_t)0x0272, (wchar_t)0x0275, (wchar_t)0x0283, (wchar_t)0x0288, (wchar_t)0x028A, (wchar_t)0x028B,
  79. (wchar_t)0x0292, (wchar_t)0x03AC, (wchar_t)0x03AD, (wchar_t)0x03AE, (wchar_t)0x03AF, (wchar_t)0x03B1, (wchar_t)0x03B2, (wchar_t)0x03B3, (wchar_t)0x03B4,
  80. (wchar_t)0x03B5, (wchar_t)0x03B6, (wchar_t)0x03B7, (wchar_t)0x03B8, (wchar_t)0x03B9, (wchar_t)0x03BA, (wchar_t)0x03BB, (wchar_t)0x03BC, (wchar_t)0x03BD,
  81. (wchar_t)0x03BE, (wchar_t)0x03BF, (wchar_t)0x03C0, (wchar_t)0x03C1, (wchar_t)0x03C3, (wchar_t)0x03C4, (wchar_t)0x03C5, (wchar_t)0x03C6, (wchar_t)0x03C7,
  82. (wchar_t)0x03C8, (wchar_t)0x03C9, (wchar_t)0x03CA, (wchar_t)0x03CB, (wchar_t)0x03CC, (wchar_t)0x03CD, (wchar_t)0x03CE, (wchar_t)0x03E3, (wchar_t)0x03E5,
  83. (wchar_t)0x03E7, (wchar_t)0x03E9, (wchar_t)0x03EB, (wchar_t)0x03ED, (wchar_t)0x03EF, (wchar_t)0x0430, (wchar_t)0x0431, (wchar_t)0x0432, (wchar_t)0x0433,
  84. (wchar_t)0x0434, (wchar_t)0x0435, (wchar_t)0x0436, (wchar_t)0x0437, (wchar_t)0x0438, (wchar_t)0x0439, (wchar_t)0x043A, (wchar_t)0x043B, (wchar_t)0x043C,
  85. (wchar_t)0x043D, (wchar_t)0x043E, (wchar_t)0x043F, (wchar_t)0x0440, (wchar_t)0x0441, (wchar_t)0x0442, (wchar_t)0x0443, (wchar_t)0x0444, (wchar_t)0x0445,
  86. (wchar_t)0x0446, (wchar_t)0x0447, (wchar_t)0x0448, (wchar_t)0x0449, (wchar_t)0x044A, (wchar_t)0x044B, (wchar_t)0x044C, (wchar_t)0x044D, (wchar_t)0x044E,
  87. (wchar_t)0x044F, (wchar_t)0x0451, (wchar_t)0x0452, (wchar_t)0x0453, (wchar_t)0x0454, (wchar_t)0x0455, (wchar_t)0x0456, (wchar_t)0x0457, (wchar_t)0x0458,
  88. (wchar_t)0x0459, (wchar_t)0x045A, (wchar_t)0x045B, (wchar_t)0x045C, (wchar_t)0x045E, (wchar_t)0x045F, (wchar_t)0x0461, (wchar_t)0x0463, (wchar_t)0x0465,
  89. (wchar_t)0x0467, (wchar_t)0x0469, (wchar_t)0x046B, (wchar_t)0x046D, (wchar_t)0x046F, (wchar_t)0x0471, (wchar_t)0x0473, (wchar_t)0x0475, (wchar_t)0x0477,
  90. (wchar_t)0x0479, (wchar_t)0x047B, (wchar_t)0x047D, (wchar_t)0x047F, (wchar_t)0x0481, (wchar_t)0x0491, (wchar_t)0x0493, (wchar_t)0x0495, (wchar_t)0x0497,
  91. (wchar_t)0x0499, (wchar_t)0x049B, (wchar_t)0x049D, (wchar_t)0x049F, (wchar_t)0x04A1, (wchar_t)0x04A3, (wchar_t)0x04A5, (wchar_t)0x04A7, (wchar_t)0x04A9,
  92. (wchar_t)0x04AB, (wchar_t)0x04AD, (wchar_t)0x04AF, (wchar_t)0x04B1, (wchar_t)0x04B3, (wchar_t)0x04B5, (wchar_t)0x04B7, (wchar_t)0x04B9, (wchar_t)0x04BB,
  93. (wchar_t)0x04BD, (wchar_t)0x04BF, (wchar_t)0x04C2, (wchar_t)0x04C4, (wchar_t)0x04C8, (wchar_t)0x04CC, (wchar_t)0x04D1, (wchar_t)0x04D3, (wchar_t)0x04D5,
  94. (wchar_t)0x04D7, (wchar_t)0x04D9, (wchar_t)0x04DB, (wchar_t)0x04DD, (wchar_t)0x04DF, (wchar_t)0x04E1, (wchar_t)0x04E3, (wchar_t)0x04E5, (wchar_t)0x04E7,
  95. (wchar_t)0x04E9, (wchar_t)0x04EB, (wchar_t)0x04EF, (wchar_t)0x04F1, (wchar_t)0x04F3, (wchar_t)0x04F5, (wchar_t)0x04F9, (wchar_t)0x0561, (wchar_t)0x0562,
  96. (wchar_t)0x0563, (wchar_t)0x0564, (wchar_t)0x0565, (wchar_t)0x0566, (wchar_t)0x0567, (wchar_t)0x0568, (wchar_t)0x0569, (wchar_t)0x056A, (wchar_t)0x056B,
  97. (wchar_t)0x056C, (wchar_t)0x056D, (wchar_t)0x056E, (wchar_t)0x056F, (wchar_t)0x0570, (wchar_t)0x0571, (wchar_t)0x0572, (wchar_t)0x0573, (wchar_t)0x0574,
  98. (wchar_t)0x0575, (wchar_t)0x0576, (wchar_t)0x0577, (wchar_t)0x0578, (wchar_t)0x0579, (wchar_t)0x057A, (wchar_t)0x057B, (wchar_t)0x057C, (wchar_t)0x057D,
  99. (wchar_t)0x057E, (wchar_t)0x057F, (wchar_t)0x0580, (wchar_t)0x0581, (wchar_t)0x0582, (wchar_t)0x0583, (wchar_t)0x0584, (wchar_t)0x0585, (wchar_t)0x0586,
  100. (wchar_t)0x10D0, (wchar_t)0x10D1, (wchar_t)0x10D2, (wchar_t)0x10D3, (wchar_t)0x10D4, (wchar_t)0x10D5, (wchar_t)0x10D6, (wchar_t)0x10D7, (wchar_t)0x10D8,
  101. (wchar_t)0x10D9, (wchar_t)0x10DA, (wchar_t)0x10DB, (wchar_t)0x10DC, (wchar_t)0x10DD, (wchar_t)0x10DE, (wchar_t)0x10DF, (wchar_t)0x10E0, (wchar_t)0x10E1,
  102. (wchar_t)0x10E2, (wchar_t)0x10E3, (wchar_t)0x10E4, (wchar_t)0x10E5, (wchar_t)0x10E6, (wchar_t)0x10E7, (wchar_t)0x10E8, (wchar_t)0x10E9, (wchar_t)0x10EA,
  103. (wchar_t)0x10EB, (wchar_t)0x10EC, (wchar_t)0x10ED, (wchar_t)0x10EE, (wchar_t)0x10EF, (wchar_t)0x10F0, (wchar_t)0x10F1, (wchar_t)0x10F2, (wchar_t)0x10F3,
  104. (wchar_t)0x10F4, (wchar_t)0x10F5, (wchar_t)0x1E01, (wchar_t)0x1E03, (wchar_t)0x1E05, (wchar_t)0x1E07, (wchar_t)0x1E09, (wchar_t)0x1E0B, (wchar_t)0x1E0D,
  105. (wchar_t)0x1E0F, (wchar_t)0x1E11, (wchar_t)0x1E13, (wchar_t)0x1E15, (wchar_t)0x1E17, (wchar_t)0x1E19, (wchar_t)0x1E1B, (wchar_t)0x1E1D, (wchar_t)0x1E1F,
  106. (wchar_t)0x1E21, (wchar_t)0x1E23, (wchar_t)0x1E25, (wchar_t)0x1E27, (wchar_t)0x1E29, (wchar_t)0x1E2B, (wchar_t)0x1E2D, (wchar_t)0x1E2F, (wchar_t)0x1E31,
  107. (wchar_t)0x1E33, (wchar_t)0x1E35, (wchar_t)0x1E37, (wchar_t)0x1E39, (wchar_t)0x1E3B, (wchar_t)0x1E3D, (wchar_t)0x1E3F, (wchar_t)0x1E41, (wchar_t)0x1E43,
  108. (wchar_t)0x1E45, (wchar_t)0x1E47, (wchar_t)0x1E49, (wchar_t)0x1E4B, (wchar_t)0x1E4D, (wchar_t)0x1E4F, (wchar_t)0x1E51, (wchar_t)0x1E53, (wchar_t)0x1E55,
  109. (wchar_t)0x1E57, (wchar_t)0x1E59, (wchar_t)0x1E5B, (wchar_t)0x1E5D, (wchar_t)0x1E5F, (wchar_t)0x1E61, (wchar_t)0x1E63, (wchar_t)0x1E65, (wchar_t)0x1E67,
  110. (wchar_t)0x1E69, (wchar_t)0x1E6B, (wchar_t)0x1E6D, (wchar_t)0x1E6F, (wchar_t)0x1E71, (wchar_t)0x1E73, (wchar_t)0x1E75, (wchar_t)0x1E77, (wchar_t)0x1E79,
  111. (wchar_t)0x1E7B, (wchar_t)0x1E7D, (wchar_t)0x1E7F, (wchar_t)0x1E81, (wchar_t)0x1E83, (wchar_t)0x1E85, (wchar_t)0x1E87, (wchar_t)0x1E89, (wchar_t)0x1E8B,
  112. (wchar_t)0x1E8D, (wchar_t)0x1E8F, (wchar_t)0x1E91, (wchar_t)0x1E93, (wchar_t)0x1E95, (wchar_t)0x1EA1, (wchar_t)0x1EA3, (wchar_t)0x1EA5, (wchar_t)0x1EA7,
  113. (wchar_t)0x1EA9, (wchar_t)0x1EAB, (wchar_t)0x1EAD, (wchar_t)0x1EAF, (wchar_t)0x1EB1, (wchar_t)0x1EB3, (wchar_t)0x1EB5, (wchar_t)0x1EB7, (wchar_t)0x1EB9,
  114. (wchar_t)0x1EBB, (wchar_t)0x1EBD, (wchar_t)0x1EBF, (wchar_t)0x1EC1, (wchar_t)0x1EC3, (wchar_t)0x1EC5, (wchar_t)0x1EC7, (wchar_t)0x1EC9, (wchar_t)0x1ECB,
  115. (wchar_t)0x1ECD, (wchar_t)0x1ECF, (wchar_t)0x1ED1, (wchar_t)0x1ED3, (wchar_t)0x1ED5, (wchar_t)0x1ED7, (wchar_t)0x1ED9, (wchar_t)0x1EDB, (wchar_t)0x1EDD,
  116. (wchar_t)0x1EDF, (wchar_t)0x1EE1, (wchar_t)0x1EE3, (wchar_t)0x1EE5, (wchar_t)0x1EE7, (wchar_t)0x1EE9, (wchar_t)0x1EEB, (wchar_t)0x1EED, (wchar_t)0x1EEF,
  117. (wchar_t)0x1EF1, (wchar_t)0x1EF3, (wchar_t)0x1EF5, (wchar_t)0x1EF7, (wchar_t)0x1EF9, (wchar_t)0x1F00, (wchar_t)0x1F01, (wchar_t)0x1F02, (wchar_t)0x1F03,
  118. (wchar_t)0x1F04, (wchar_t)0x1F05, (wchar_t)0x1F06, (wchar_t)0x1F07, (wchar_t)0x1F10, (wchar_t)0x1F11, (wchar_t)0x1F12, (wchar_t)0x1F13, (wchar_t)0x1F14,
  119. (wchar_t)0x1F15, (wchar_t)0x1F20, (wchar_t)0x1F21, (wchar_t)0x1F22, (wchar_t)0x1F23, (wchar_t)0x1F24, (wchar_t)0x1F25, (wchar_t)0x1F26, (wchar_t)0x1F27,
  120. (wchar_t)0x1F30, (wchar_t)0x1F31, (wchar_t)0x1F32, (wchar_t)0x1F33, (wchar_t)0x1F34, (wchar_t)0x1F35, (wchar_t)0x1F36, (wchar_t)0x1F37, (wchar_t)0x1F40,
  121. (wchar_t)0x1F41, (wchar_t)0x1F42, (wchar_t)0x1F43, (wchar_t)0x1F44, (wchar_t)0x1F45, (wchar_t)0x1F51, (wchar_t)0x1F53, (wchar_t)0x1F55, (wchar_t)0x1F57,
  122. (wchar_t)0x1F60, (wchar_t)0x1F61, (wchar_t)0x1F62, (wchar_t)0x1F63, (wchar_t)0x1F64, (wchar_t)0x1F65, (wchar_t)0x1F66, (wchar_t)0x1F67, (wchar_t)0x1F80,
  123. (wchar_t)0x1F81, (wchar_t)0x1F82, (wchar_t)0x1F83, (wchar_t)0x1F84, (wchar_t)0x1F85, (wchar_t)0x1F86, (wchar_t)0x1F87, (wchar_t)0x1F90, (wchar_t)0x1F91,
  124. (wchar_t)0x1F92, (wchar_t)0x1F93, (wchar_t)0x1F94, (wchar_t)0x1F95, (wchar_t)0x1F96, (wchar_t)0x1F97, (wchar_t)0x1FA0, (wchar_t)0x1FA1, (wchar_t)0x1FA2,
  125. (wchar_t)0x1FA3, (wchar_t)0x1FA4, (wchar_t)0x1FA5, (wchar_t)0x1FA6, (wchar_t)0x1FA7, (wchar_t)0x1FB0, (wchar_t)0x1FB1, (wchar_t)0x1FD0, (wchar_t)0x1FD1,
  126. (wchar_t)0x1FE0, (wchar_t)0x1FE1, (wchar_t)0x24D0, (wchar_t)0x24D1, (wchar_t)0x24D2, (wchar_t)0x24D3, (wchar_t)0x24D4, (wchar_t)0x24D5, (wchar_t)0x24D6,
  127. (wchar_t)0x24D7, (wchar_t)0x24D8, (wchar_t)0x24D9, (wchar_t)0x24DA, (wchar_t)0x24DB, (wchar_t)0x24DC, (wchar_t)0x24DD, (wchar_t)0x24DE, (wchar_t)0x24DF,
  128. (wchar_t)0x24E0, (wchar_t)0x24E1, (wchar_t)0x24E2, (wchar_t)0x24E3, (wchar_t)0x24E4, (wchar_t)0x24E5, (wchar_t)0x24E6, (wchar_t)0x24E7, (wchar_t)0x24E8,
  129. (wchar_t)0x24E9, (wchar_t)0xFF41, (wchar_t)0xFF42, (wchar_t)0xFF43, (wchar_t)0xFF44, (wchar_t)0xFF45, (wchar_t)0xFF46, (wchar_t)0xFF47, (wchar_t)0xFF48,
  130. (wchar_t)0xFF49, (wchar_t)0xFF4A, (wchar_t)0xFF4B, (wchar_t)0xFF4C, (wchar_t)0xFF4D, (wchar_t)0xFF4E, (wchar_t)0xFF4F, (wchar_t)0xFF50, (wchar_t)0xFF51,
  131. (wchar_t)0xFF52, (wchar_t)0xFF53, (wchar_t)0xFF54, (wchar_t)0xFF55, (wchar_t)0xFF56, (wchar_t)0xFF57, (wchar_t)0xFF58, (wchar_t)0xFF59, (wchar_t)0xFF5A
  132. };
  133. static const wchar_t unicode_uppers[] = {
  134. (wchar_t)0x0041, (wchar_t)0x0042, (wchar_t)0x0043, (wchar_t)0x0044, (wchar_t)0x0045, (wchar_t)0x0046, (wchar_t)0x0047, (wchar_t)0x0048, (wchar_t)0x0049,
  135. (wchar_t)0x004A, (wchar_t)0x004B, (wchar_t)0x004C, (wchar_t)0x004D, (wchar_t)0x004E, (wchar_t)0x004F, (wchar_t)0x0050, (wchar_t)0x0051, (wchar_t)0x0052,
  136. (wchar_t)0x0053, (wchar_t)0x0054, (wchar_t)0x0055, (wchar_t)0x0056, (wchar_t)0x0057, (wchar_t)0x0058, (wchar_t)0x0059, (wchar_t)0x005A, (wchar_t)0x00C0,
  137. (wchar_t)0x00C1, (wchar_t)0x00C2, (wchar_t)0x00C3, (wchar_t)0x00C4, (wchar_t)0x00C5, (wchar_t)0x00C6, (wchar_t)0x00C7, (wchar_t)0x00C8, (wchar_t)0x00C9,
  138. (wchar_t)0x00CA, (wchar_t)0x00CB, (wchar_t)0x00CC, (wchar_t)0x00CD, (wchar_t)0x00CE, (wchar_t)0x00CF, (wchar_t)0x00D0, (wchar_t)0x00D1, (wchar_t)0x00D2,
  139. (wchar_t)0x00D3, (wchar_t)0x00D4, (wchar_t)0x00D5, (wchar_t)0x00D6, (wchar_t)0x00D8, (wchar_t)0x00D9, (wchar_t)0x00DA, (wchar_t)0x00DB, (wchar_t)0x00DC,
  140. (wchar_t)0x00DD, (wchar_t)0x00DE, (wchar_t)0x0178, (wchar_t)0x0100, (wchar_t)0x0102, (wchar_t)0x0104, (wchar_t)0x0106, (wchar_t)0x0108, (wchar_t)0x010A,
  141. (wchar_t)0x010C, (wchar_t)0x010E, (wchar_t)0x0110, (wchar_t)0x0112, (wchar_t)0x0114, (wchar_t)0x0116, (wchar_t)0x0118, (wchar_t)0x011A, (wchar_t)0x011C,
  142. (wchar_t)0x011E, (wchar_t)0x0120, (wchar_t)0x0122, (wchar_t)0x0124, (wchar_t)0x0126, (wchar_t)0x0128, (wchar_t)0x012A, (wchar_t)0x012C, (wchar_t)0x012E,
  143. (wchar_t)0x0049, (wchar_t)0x0132, (wchar_t)0x0134, (wchar_t)0x0136, (wchar_t)0x0139, (wchar_t)0x013B, (wchar_t)0x013D, (wchar_t)0x013F, (wchar_t)0x0141,
  144. (wchar_t)0x0143, (wchar_t)0x0145, (wchar_t)0x0147, (wchar_t)0x014A, (wchar_t)0x014C, (wchar_t)0x014E, (wchar_t)0x0150, (wchar_t)0x0152, (wchar_t)0x0154,
  145. (wchar_t)0x0156, (wchar_t)0x0158, (wchar_t)0x015A, (wchar_t)0x015C, (wchar_t)0x015E, (wchar_t)0x0160, (wchar_t)0x0162, (wchar_t)0x0164, (wchar_t)0x0166,
  146. (wchar_t)0x0168, (wchar_t)0x016A, (wchar_t)0x016C, (wchar_t)0x016E, (wchar_t)0x0170, (wchar_t)0x0172, (wchar_t)0x0174, (wchar_t)0x0176, (wchar_t)0x0179,
  147. (wchar_t)0x017B, (wchar_t)0x017D, (wchar_t)0x0182, (wchar_t)0x0184, (wchar_t)0x0187, (wchar_t)0x018B, (wchar_t)0x0191, (wchar_t)0x0198, (wchar_t)0x01A0,
  148. (wchar_t)0x01A2, (wchar_t)0x01A4, (wchar_t)0x01A7, (wchar_t)0x01AC, (wchar_t)0x01AF, (wchar_t)0x01B3, (wchar_t)0x01B5, (wchar_t)0x01B8, (wchar_t)0x01BC,
  149. (wchar_t)0x01C4, (wchar_t)0x01C7, (wchar_t)0x01CA, (wchar_t)0x01CD, (wchar_t)0x01CF, (wchar_t)0x01D1, (wchar_t)0x01D3, (wchar_t)0x01D5, (wchar_t)0x01D7,
  150. (wchar_t)0x01D9, (wchar_t)0x01DB, (wchar_t)0x01DE, (wchar_t)0x01E0, (wchar_t)0x01E2, (wchar_t)0x01E4, (wchar_t)0x01E6, (wchar_t)0x01E8, (wchar_t)0x01EA,
  151. (wchar_t)0x01EC, (wchar_t)0x01EE, (wchar_t)0x01F1, (wchar_t)0x01F4, (wchar_t)0x01FA, (wchar_t)0x01FC, (wchar_t)0x01FE, (wchar_t)0x0200, (wchar_t)0x0202,
  152. (wchar_t)0x0204, (wchar_t)0x0206, (wchar_t)0x0208, (wchar_t)0x020A, (wchar_t)0x020C, (wchar_t)0x020E, (wchar_t)0x0210, (wchar_t)0x0212, (wchar_t)0x0214,
  153. (wchar_t)0x0216, (wchar_t)0x0181, (wchar_t)0x0186, (wchar_t)0x018A, (wchar_t)0x018E, (wchar_t)0x018F, (wchar_t)0x0190, (wchar_t)0x0193, (wchar_t)0x0194,
  154. (wchar_t)0x0197, (wchar_t)0x0196, (wchar_t)0x019C, (wchar_t)0x019D, (wchar_t)0x019F, (wchar_t)0x01A9, (wchar_t)0x01AE, (wchar_t)0x01B1, (wchar_t)0x01B2,
  155. (wchar_t)0x01B7, (wchar_t)0x0386, (wchar_t)0x0388, (wchar_t)0x0389, (wchar_t)0x038A, (wchar_t)0x0391, (wchar_t)0x0392, (wchar_t)0x0393, (wchar_t)0x0394,
  156. (wchar_t)0x0395, (wchar_t)0x0396, (wchar_t)0x0397, (wchar_t)0x0398, (wchar_t)0x0399, (wchar_t)0x039A, (wchar_t)0x039B, (wchar_t)0x039C, (wchar_t)0x039D,
  157. (wchar_t)0x039E, (wchar_t)0x039F, (wchar_t)0x03A0, (wchar_t)0x03A1, (wchar_t)0x03A3, (wchar_t)0x03A4, (wchar_t)0x03A5, (wchar_t)0x03A6, (wchar_t)0x03A7,
  158. (wchar_t)0x03A8, (wchar_t)0x03A9, (wchar_t)0x03AA, (wchar_t)0x03AB, (wchar_t)0x038C, (wchar_t)0x038E, (wchar_t)0x038F, (wchar_t)0x03E2, (wchar_t)0x03E4,
  159. (wchar_t)0x03E6, (wchar_t)0x03E8, (wchar_t)0x03EA, (wchar_t)0x03EC, (wchar_t)0x03EE, (wchar_t)0x0410, (wchar_t)0x0411, (wchar_t)0x0412, (wchar_t)0x0413,
  160. (wchar_t)0x0414, (wchar_t)0x0415, (wchar_t)0x0416, (wchar_t)0x0417, (wchar_t)0x0418, (wchar_t)0x0419, (wchar_t)0x041A, (wchar_t)0x041B, (wchar_t)0x041C,
  161. (wchar_t)0x041D, (wchar_t)0x041E, (wchar_t)0x041F, (wchar_t)0x0420, (wchar_t)0x0421, (wchar_t)0x0422, (wchar_t)0x0423, (wchar_t)0x0424, (wchar_t)0x0425,
  162. (wchar_t)0x0426, (wchar_t)0x0427, (wchar_t)0x0428, (wchar_t)0x0429, (wchar_t)0x042A, (wchar_t)0x042B, (wchar_t)0x042C, (wchar_t)0x042D, (wchar_t)0x042E,
  163. (wchar_t)0x042F, (wchar_t)0x0401, (wchar_t)0x0402, (wchar_t)0x0403, (wchar_t)0x0404, (wchar_t)0x0405, (wchar_t)0x0406, (wchar_t)0x0407, (wchar_t)0x0408,
  164. (wchar_t)0x0409, (wchar_t)0x040A, (wchar_t)0x040B, (wchar_t)0x040C, (wchar_t)0x040E, (wchar_t)0x040F, (wchar_t)0x0460, (wchar_t)0x0462, (wchar_t)0x0464,
  165. (wchar_t)0x0466, (wchar_t)0x0468, (wchar_t)0x046A, (wchar_t)0x046C, (wchar_t)0x046E, (wchar_t)0x0470, (wchar_t)0x0472, (wchar_t)0x0474, (wchar_t)0x0476,
  166. (wchar_t)0x0478, (wchar_t)0x047A, (wchar_t)0x047C, (wchar_t)0x047E, (wchar_t)0x0480, (wchar_t)0x0490, (wchar_t)0x0492, (wchar_t)0x0494, (wchar_t)0x0496,
  167. (wchar_t)0x0498, (wchar_t)0x049A, (wchar_t)0x049C, (wchar_t)0x049E, (wchar_t)0x04A0, (wchar_t)0x04A2, (wchar_t)0x04A4, (wchar_t)0x04A6, (wchar_t)0x04A8,
  168. (wchar_t)0x04AA, (wchar_t)0x04AC, (wchar_t)0x04AE, (wchar_t)0x04B0, (wchar_t)0x04B2, (wchar_t)0x04B4, (wchar_t)0x04B6, (wchar_t)0x04B8, (wchar_t)0x04BA,
  169. (wchar_t)0x04BC, (wchar_t)0x04BE, (wchar_t)0x04C1, (wchar_t)0x04C3, (wchar_t)0x04C7, (wchar_t)0x04CB, (wchar_t)0x04D0, (wchar_t)0x04D2, (wchar_t)0x04D4,
  170. (wchar_t)0x04D6, (wchar_t)0x04D8, (wchar_t)0x04DA, (wchar_t)0x04DC, (wchar_t)0x04DE, (wchar_t)0x04E0, (wchar_t)0x04E2, (wchar_t)0x04E4, (wchar_t)0x04E6,
  171. (wchar_t)0x04E8, (wchar_t)0x04EA, (wchar_t)0x04EE, (wchar_t)0x04F0, (wchar_t)0x04F2, (wchar_t)0x04F4, (wchar_t)0x04F8, (wchar_t)0x0531, (wchar_t)0x0532,
  172. (wchar_t)0x0533, (wchar_t)0x0534, (wchar_t)0x0535, (wchar_t)0x0536, (wchar_t)0x0537, (wchar_t)0x0538, (wchar_t)0x0539, (wchar_t)0x053A, (wchar_t)0x053B,
  173. (wchar_t)0x053C, (wchar_t)0x053D, (wchar_t)0x053E, (wchar_t)0x053F, (wchar_t)0x0540, (wchar_t)0x0541, (wchar_t)0x0542, (wchar_t)0x0543, (wchar_t)0x0544,
  174. (wchar_t)0x0545, (wchar_t)0x0546, (wchar_t)0x0547, (wchar_t)0x0548, (wchar_t)0x0549, (wchar_t)0x054A, (wchar_t)0x054B, (wchar_t)0x054C, (wchar_t)0x054D,
  175. (wchar_t)0x054E, (wchar_t)0x054F, (wchar_t)0x0550, (wchar_t)0x0551, (wchar_t)0x0552, (wchar_t)0x0553, (wchar_t)0x0554, (wchar_t)0x0555, (wchar_t)0x0556,
  176. (wchar_t)0x10A0, (wchar_t)0x10A1, (wchar_t)0x10A2, (wchar_t)0x10A3, (wchar_t)0x10A4, (wchar_t)0x10A5, (wchar_t)0x10A6, (wchar_t)0x10A7, (wchar_t)0x10A8,
  177. (wchar_t)0x10A9, (wchar_t)0x10AA, (wchar_t)0x10AB, (wchar_t)0x10AC, (wchar_t)0x10AD, (wchar_t)0x10AE, (wchar_t)0x10AF, (wchar_t)0x10B0, (wchar_t)0x10B1,
  178. (wchar_t)0x10B2, (wchar_t)0x10B3, (wchar_t)0x10B4, (wchar_t)0x10B5, (wchar_t)0x10B6, (wchar_t)0x10B7, (wchar_t)0x10B8, (wchar_t)0x10B9, (wchar_t)0x10BA,
  179. (wchar_t)0x10BB, (wchar_t)0x10BC, (wchar_t)0x10BD, (wchar_t)0x10BE, (wchar_t)0x10BF, (wchar_t)0x10C0, (wchar_t)0x10C1, (wchar_t)0x10C2, (wchar_t)0x10C3,
  180. (wchar_t)0x10C4, (wchar_t)0x10C5, (wchar_t)0x1E00, (wchar_t)0x1E02, (wchar_t)0x1E04, (wchar_t)0x1E06, (wchar_t)0x1E08, (wchar_t)0x1E0A, (wchar_t)0x1E0C,
  181. (wchar_t)0x1E0E, (wchar_t)0x1E10, (wchar_t)0x1E12, (wchar_t)0x1E14, (wchar_t)0x1E16, (wchar_t)0x1E18, (wchar_t)0x1E1A, (wchar_t)0x1E1C, (wchar_t)0x1E1E,
  182. (wchar_t)0x1E20, (wchar_t)0x1E22, (wchar_t)0x1E24, (wchar_t)0x1E26, (wchar_t)0x1E28, (wchar_t)0x1E2A, (wchar_t)0x1E2C, (wchar_t)0x1E2E, (wchar_t)0x1E30,
  183. (wchar_t)0x1E32, (wchar_t)0x1E34, (wchar_t)0x1E36, (wchar_t)0x1E38, (wchar_t)0x1E3A, (wchar_t)0x1E3C, (wchar_t)0x1E3E, (wchar_t)0x1E40, (wchar_t)0x1E42,
  184. (wchar_t)0x1E44, (wchar_t)0x1E46, (wchar_t)0x1E48, (wchar_t)0x1E4A, (wchar_t)0x1E4C, (wchar_t)0x1E4E, (wchar_t)0x1E50, (wchar_t)0x1E52, (wchar_t)0x1E54,
  185. (wchar_t)0x1E56, (wchar_t)0x1E58, (wchar_t)0x1E5A, (wchar_t)0x1E5C, (wchar_t)0x1E5E, (wchar_t)0x1E60, (wchar_t)0x1E62, (wchar_t)0x1E64, (wchar_t)0x1E66,
  186. (wchar_t)0x1E68, (wchar_t)0x1E6A, (wchar_t)0x1E6C, (wchar_t)0x1E6E, (wchar_t)0x1E70, (wchar_t)0x1E72, (wchar_t)0x1E74, (wchar_t)0x1E76, (wchar_t)0x1E78,
  187. (wchar_t)0x1E7A, (wchar_t)0x1E7C, (wchar_t)0x1E7E, (wchar_t)0x1E80, (wchar_t)0x1E82, (wchar_t)0x1E84, (wchar_t)0x1E86, (wchar_t)0x1E88, (wchar_t)0x1E8A,
  188. (wchar_t)0x1E8C, (wchar_t)0x1E8E, (wchar_t)0x1E90, (wchar_t)0x1E92, (wchar_t)0x1E94, (wchar_t)0x1EA0, (wchar_t)0x1EA2, (wchar_t)0x1EA4, (wchar_t)0x1EA6,
  189. (wchar_t)0x1EA8, (wchar_t)0x1EAA, (wchar_t)0x1EAC, (wchar_t)0x1EAE, (wchar_t)0x1EB0, (wchar_t)0x1EB2, (wchar_t)0x1EB4, (wchar_t)0x1EB6, (wchar_t)0x1EB8,
  190. (wchar_t)0x1EBA, (wchar_t)0x1EBC, (wchar_t)0x1EBE, (wchar_t)0x1EC0, (wchar_t)0x1EC2, (wchar_t)0x1EC4, (wchar_t)0x1EC6, (wchar_t)0x1EC8, (wchar_t)0x1ECA,
  191. (wchar_t)0x1ECC, (wchar_t)0x1ECE, (wchar_t)0x1ED0, (wchar_t)0x1ED2, (wchar_t)0x1ED4, (wchar_t)0x1ED6, (wchar_t)0x1ED8, (wchar_t)0x1EDA, (wchar_t)0x1EDC,
  192. (wchar_t)0x1EDE, (wchar_t)0x1EE0, (wchar_t)0x1EE2, (wchar_t)0x1EE4, (wchar_t)0x1EE6, (wchar_t)0x1EE8, (wchar_t)0x1EEA, (wchar_t)0x1EEC, (wchar_t)0x1EEE,
  193. (wchar_t)0x1EF0, (wchar_t)0x1EF2, (wchar_t)0x1EF4, (wchar_t)0x1EF6, (wchar_t)0x1EF8, (wchar_t)0x1F08, (wchar_t)0x1F09, (wchar_t)0x1F0A, (wchar_t)0x1F0B,
  194. (wchar_t)0x1F0C, (wchar_t)0x1F0D, (wchar_t)0x1F0E, (wchar_t)0x1F0F, (wchar_t)0x1F18, (wchar_t)0x1F19, (wchar_t)0x1F1A, (wchar_t)0x1F1B, (wchar_t)0x1F1C,
  195. (wchar_t)0x1F1D, (wchar_t)0x1F28, (wchar_t)0x1F29, (wchar_t)0x1F2A, (wchar_t)0x1F2B, (wchar_t)0x1F2C, (wchar_t)0x1F2D, (wchar_t)0x1F2E, (wchar_t)0x1F2F,
  196. (wchar_t)0x1F38, (wchar_t)0x1F39, (wchar_t)0x1F3A, (wchar_t)0x1F3B, (wchar_t)0x1F3C, (wchar_t)0x1F3D, (wchar_t)0x1F3E, (wchar_t)0x1F3F, (wchar_t)0x1F48,
  197. (wchar_t)0x1F49, (wchar_t)0x1F4A, (wchar_t)0x1F4B, (wchar_t)0x1F4C, (wchar_t)0x1F4D, (wchar_t)0x1F59, (wchar_t)0x1F5B, (wchar_t)0x1F5D, (wchar_t)0x1F5F,
  198. (wchar_t)0x1F68, (wchar_t)0x1F69, (wchar_t)0x1F6A, (wchar_t)0x1F6B, (wchar_t)0x1F6C, (wchar_t)0x1F6D, (wchar_t)0x1F6E, (wchar_t)0x1F6F, (wchar_t)0x1F88,
  199. (wchar_t)0x1F89, (wchar_t)0x1F8A, (wchar_t)0x1F8B, (wchar_t)0x1F8C, (wchar_t)0x1F8D, (wchar_t)0x1F8E, (wchar_t)0x1F8F, (wchar_t)0x1F98, (wchar_t)0x1F99,
  200. (wchar_t)0x1F9A, (wchar_t)0x1F9B, (wchar_t)0x1F9C, (wchar_t)0x1F9D, (wchar_t)0x1F9E, (wchar_t)0x1F9F, (wchar_t)0x1FA8, (wchar_t)0x1FA9, (wchar_t)0x1FAA,
  201. (wchar_t)0x1FAB, (wchar_t)0x1FAC, (wchar_t)0x1FAD, (wchar_t)0x1FAE, (wchar_t)0x1FAF, (wchar_t)0x1FB8, (wchar_t)0x1FB9, (wchar_t)0x1FD8, (wchar_t)0x1FD9,
  202. (wchar_t)0x1FE8, (wchar_t)0x1FE9, (wchar_t)0x24B6, (wchar_t)0x24B7, (wchar_t)0x24B8, (wchar_t)0x24B9, (wchar_t)0x24BA, (wchar_t)0x24BB, (wchar_t)0x24BC,
  203. (wchar_t)0x24BD, (wchar_t)0x24BE, (wchar_t)0x24BF, (wchar_t)0x24C0, (wchar_t)0x24C1, (wchar_t)0x24C2, (wchar_t)0x24C3, (wchar_t)0x24C4, (wchar_t)0x24C5,
  204. (wchar_t)0x24C6, (wchar_t)0x24C7, (wchar_t)0x24C8, (wchar_t)0x24C9, (wchar_t)0x24CA, (wchar_t)0x24CB, (wchar_t)0x24CC, (wchar_t)0x24CD, (wchar_t)0x24CE,
  205. (wchar_t)0x24CF, (wchar_t)0xFF21, (wchar_t)0xFF22, (wchar_t)0xFF23, (wchar_t)0xFF24, (wchar_t)0xFF25, (wchar_t)0xFF26, (wchar_t)0xFF27, (wchar_t)0xFF28,
  206. (wchar_t)0xFF29, (wchar_t)0xFF2A, (wchar_t)0xFF2B, (wchar_t)0xFF2C, (wchar_t)0xFF2D, (wchar_t)0xFF2E, (wchar_t)0xFF2F, (wchar_t)0xFF30, (wchar_t)0xFF31,
  207. (wchar_t)0xFF32, (wchar_t)0xFF33, (wchar_t)0xFF34, (wchar_t)0xFF35, (wchar_t)0xFF36, (wchar_t)0xFF37, (wchar_t)0xFF38, (wchar_t)0xFF39, (wchar_t)0xFF3A
  208. };
  209. std::string StringUtils::FormatV(const char *fmt, va_list args)
  210. {
  211. if (!fmt || !fmt[0])
  212. return "";
  213. int size = FORMAT_BLOCK_SIZE;
  214. va_list argCopy;
  215. while (true)
  216. {
  217. char *cstr = reinterpret_cast<char*>(malloc(sizeof(char) * size));
  218. if (!cstr)
  219. return "";
  220. va_copy(argCopy, args);
  221. int nActual = vsnprintf(cstr, size, fmt, argCopy);
  222. va_end(argCopy);
  223. if (nActual > -1 && nActual < size) // We got a valid result
  224. {
  225. std::string str(cstr, nActual);
  226. free(cstr);
  227. return str;
  228. }
  229. free(cstr);
  230. #ifndef TARGET_WINDOWS
  231. if (nActual > -1) // Exactly what we will need (glibc 2.1)
  232. size = nActual + 1;
  233. else // Let's try to double the size (glibc 2.0)
  234. size *= 2;
  235. #else // TARGET_WINDOWS
  236. va_copy(argCopy, args);
  237. size = _vscprintf(fmt, argCopy);
  238. va_end(argCopy);
  239. if (size < 0)
  240. return "";
  241. else
  242. size++; // increment for null-termination
  243. #endif // TARGET_WINDOWS
  244. }
  245. return ""; // unreachable
  246. }
  247. std::wstring StringUtils::FormatV(const wchar_t *fmt, va_list args)
  248. {
  249. if (!fmt || !fmt[0])
  250. return L"";
  251. int size = FORMAT_BLOCK_SIZE;
  252. va_list argCopy;
  253. while (true)
  254. {
  255. wchar_t *cstr = reinterpret_cast<wchar_t*>(malloc(sizeof(wchar_t) * size));
  256. if (!cstr)
  257. return L"";
  258. va_copy(argCopy, args);
  259. int nActual = vswprintf(cstr, size, fmt, argCopy);
  260. va_end(argCopy);
  261. if (nActual > -1 && nActual < size) // We got a valid result
  262. {
  263. std::wstring str(cstr, nActual);
  264. free(cstr);
  265. return str;
  266. }
  267. free(cstr);
  268. #ifndef TARGET_WINDOWS
  269. if (nActual > -1) // Exactly what we will need (glibc 2.1)
  270. size = nActual + 1;
  271. else // Let's try to double the size (glibc 2.0)
  272. size *= 2;
  273. #else // TARGET_WINDOWS
  274. va_copy(argCopy, args);
  275. size = _vscwprintf(fmt, argCopy);
  276. va_end(argCopy);
  277. if (size < 0)
  278. return L"";
  279. else
  280. size++; // increment for null-termination
  281. #endif // TARGET_WINDOWS
  282. }
  283. return L"";
  284. }
  285. int compareWchar (const void* a, const void* b)
  286. {
  287. if (*(const wchar_t*)a < *(const wchar_t*)b)
  288. return -1;
  289. else if (*(const wchar_t*)a > *(const wchar_t*)b)
  290. return 1;
  291. return 0;
  292. }
  293. wchar_t tolowerUnicode(const wchar_t& c)
  294. {
  295. wchar_t* p = (wchar_t*) bsearch (&c, unicode_uppers, sizeof(unicode_uppers) / sizeof(wchar_t), sizeof(wchar_t), compareWchar);
  296. if (p)
  297. return *(unicode_lowers + (p - unicode_uppers));
  298. return c;
  299. }
  300. wchar_t toupperUnicode(const wchar_t& c)
  301. {
  302. wchar_t* p = (wchar_t*) bsearch (&c, unicode_lowers, sizeof(unicode_lowers) / sizeof(wchar_t), sizeof(wchar_t), compareWchar);
  303. if (p)
  304. return *(unicode_uppers + (p - unicode_lowers));
  305. return c;
  306. }
  307. void StringUtils::ToUpper(std::string &str)
  308. {
  309. std::transform(str.begin(), str.end(), str.begin(), ::toupper);
  310. }
  311. void StringUtils::ToUpper(std::wstring &str)
  312. {
  313. transform(str.begin(), str.end(), str.begin(), toupperUnicode);
  314. }
  315. void StringUtils::ToLower(std::string &str)
  316. {
  317. transform(str.begin(), str.end(), str.begin(), ::tolower);
  318. }
  319. void StringUtils::ToLower(std::wstring &str)
  320. {
  321. transform(str.begin(), str.end(), str.begin(), tolowerUnicode);
  322. }
  323. void StringUtils::ToCapitalize(std::string &str)
  324. {
  325. std::wstring wstr;
  326. g_charsetConverter.utf8ToW(str, wstr);
  327. ToCapitalize(wstr);
  328. g_charsetConverter.wToUTF8(wstr, str);
  329. }
  330. void StringUtils::ToCapitalize(std::wstring &str)
  331. {
  332. const std::locale& loc = g_langInfo.GetSystemLocale();
  333. bool isFirstLetter = true;
  334. for (std::wstring::iterator it = str.begin(); it < str.end(); ++it)
  335. {
  336. // capitalize after spaces and punctuation characters (except apostrophes)
  337. if (std::isspace(*it, loc) || (std::ispunct(*it, loc) && *it != '\''))
  338. isFirstLetter = true;
  339. else if (isFirstLetter)
  340. {
  341. *it = std::toupper(*it, loc);
  342. isFirstLetter = false;
  343. }
  344. }
  345. }
  346. bool StringUtils::EqualsNoCase(const std::string &str1, const std::string &str2)
  347. {
  348. // before we do the char-by-char comparison, first compare sizes of both strings.
  349. // This led to a 33% improvement in benchmarking on average. (size() just returns a member of std::string)
  350. if (str1.size() != str2.size())
  351. return false;
  352. return EqualsNoCase(str1.c_str(), str2.c_str());
  353. }
  354. bool StringUtils::EqualsNoCase(const std::string &str1, const char *s2)
  355. {
  356. return EqualsNoCase(str1.c_str(), s2);
  357. }
  358. bool StringUtils::EqualsNoCase(const char *s1, const char *s2)
  359. {
  360. char c2; // we need only one char outside the loop
  361. do
  362. {
  363. const char c1 = *s1++; // const local variable should help compiler to optimize
  364. c2 = *s2++;
  365. if (c1 != c2 && ::tolower(c1) != ::tolower(c2)) // This includes the possibility that one of the characters is the null-terminator, which implies a string mismatch.
  366. return false;
  367. } while (c2 != '\0'); // At this point, we know c1 == c2, so there's no need to test them both.
  368. return true;
  369. }
  370. int StringUtils::CompareNoCase(const std::string& str1, const std::string& str2, size_t n /* = 0 */)
  371. {
  372. return CompareNoCase(str1.c_str(), str2.c_str(), n);
  373. }
  374. int StringUtils::CompareNoCase(const char* s1, const char* s2, size_t n /* = 0 */)
  375. {
  376. char c2; // we need only one char outside the loop
  377. size_t index = 0;
  378. do
  379. {
  380. const char c1 = *s1++; // const local variable should help compiler to optimize
  381. c2 = *s2++;
  382. index++;
  383. if (c1 != c2 && ::tolower(c1) != ::tolower(c2)) // This includes the possibility that one of the characters is the null-terminator, which implies a string mismatch.
  384. return ::tolower(c1) - ::tolower(c2);
  385. } while (c2 != '\0' &&
  386. index != n); // At this point, we know c1 == c2, so there's no need to test them both.
  387. return 0;
  388. }
  389. std::string StringUtils::Left(const std::string &str, size_t count)
  390. {
  391. count = std::max((size_t)0, std::min(count, str.size()));
  392. return str.substr(0, count);
  393. }
  394. std::string StringUtils::Mid(const std::string &str, size_t first, size_t count /* = string::npos */)
  395. {
  396. if (first + count > str.size())
  397. count = str.size() - first;
  398. if (first > str.size())
  399. return std::string();
  400. assert(first + count <= str.size());
  401. return str.substr(first, count);
  402. }
  403. std::string StringUtils::Right(const std::string &str, size_t count)
  404. {
  405. count = std::max((size_t)0, std::min(count, str.size()));
  406. return str.substr(str.size() - count);
  407. }
  408. std::string& StringUtils::Trim(std::string &str)
  409. {
  410. TrimLeft(str);
  411. return TrimRight(str);
  412. }
  413. std::string& StringUtils::Trim(std::string &str, const char* const chars)
  414. {
  415. TrimLeft(str, chars);
  416. return TrimRight(str, chars);
  417. }
  418. // hack to check only first byte of UTF-8 character
  419. // without this hack "TrimX" functions failed on Win32 and OS X with UTF-8 strings
  420. static int isspace_c(char c)
  421. {
  422. return (c & 0x80) == 0 && ::isspace(c);
  423. }
  424. std::string& StringUtils::TrimLeft(std::string &str)
  425. {
  426. str.erase(str.begin(), std::find_if(str.begin(), str.end(), std::not1(std::function<int(char)>(isspace_c))));
  427. return str;
  428. }
  429. std::string& StringUtils::TrimLeft(std::string &str, const char* const chars)
  430. {
  431. size_t nidx = str.find_first_not_of(chars);
  432. str.erase(0, nidx);
  433. return str;
  434. }
  435. std::string& StringUtils::TrimRight(std::string &str)
  436. {
  437. str.erase(std::find_if(str.rbegin(), str.rend(), std::not1(std::function<int(char)>(isspace_c))).base(), str.end());
  438. return str;
  439. }
  440. std::string& StringUtils::TrimRight(std::string &str, const char* const chars)
  441. {
  442. size_t nidx = str.find_last_not_of(chars);
  443. str.erase(str.npos == nidx ? 0 : ++nidx);
  444. return str;
  445. }
  446. int StringUtils::ReturnDigits(const std::string& str)
  447. {
  448. std::stringstream ss;
  449. for (const auto& character : str)
  450. {
  451. if (isdigit(character))
  452. ss << character;
  453. }
  454. return atoi(ss.str().c_str());
  455. }
  456. std::string& StringUtils::RemoveDuplicatedSpacesAndTabs(std::string& str)
  457. {
  458. std::string::iterator it = str.begin();
  459. bool onSpace = false;
  460. while(it != str.end())
  461. {
  462. if (*it == '\t')
  463. *it = ' ';
  464. if (*it == ' ')
  465. {
  466. if (onSpace)
  467. {
  468. it = str.erase(it);
  469. continue;
  470. }
  471. else
  472. onSpace = true;
  473. }
  474. else
  475. onSpace = false;
  476. ++it;
  477. }
  478. return str;
  479. }
  480. int StringUtils::Replace(std::string &str, char oldChar, char newChar)
  481. {
  482. int replacedChars = 0;
  483. for (std::string::iterator it = str.begin(); it != str.end(); ++it)
  484. {
  485. if (*it == oldChar)
  486. {
  487. *it = newChar;
  488. replacedChars++;
  489. }
  490. }
  491. return replacedChars;
  492. }
  493. int StringUtils::Replace(std::string &str, const std::string &oldStr, const std::string &newStr)
  494. {
  495. if (oldStr.empty())
  496. return 0;
  497. int replacedChars = 0;
  498. size_t index = 0;
  499. while (index < str.size() && (index = str.find(oldStr, index)) != std::string::npos)
  500. {
  501. str.replace(index, oldStr.size(), newStr);
  502. index += newStr.size();
  503. replacedChars++;
  504. }
  505. return replacedChars;
  506. }
  507. int StringUtils::Replace(std::wstring &str, const std::wstring &oldStr, const std::wstring &newStr)
  508. {
  509. if (oldStr.empty())
  510. return 0;
  511. int replacedChars = 0;
  512. size_t index = 0;
  513. while (index < str.size() && (index = str.find(oldStr, index)) != std::string::npos)
  514. {
  515. str.replace(index, oldStr.size(), newStr);
  516. index += newStr.size();
  517. replacedChars++;
  518. }
  519. return replacedChars;
  520. }
  521. bool StringUtils::StartsWith(const std::string &str1, const std::string &str2)
  522. {
  523. return str1.compare(0, str2.size(), str2) == 0;
  524. }
  525. bool StringUtils::StartsWith(const std::string &str1, const char *s2)
  526. {
  527. return StartsWith(str1.c_str(), s2);
  528. }
  529. bool StringUtils::StartsWith(const char *s1, const char *s2)
  530. {
  531. while (*s2 != '\0')
  532. {
  533. if (*s1 != *s2)
  534. return false;
  535. s1++;
  536. s2++;
  537. }
  538. return true;
  539. }
  540. bool StringUtils::StartsWithNoCase(const std::string &str1, const std::string &str2)
  541. {
  542. return StartsWithNoCase(str1.c_str(), str2.c_str());
  543. }
  544. bool StringUtils::StartsWithNoCase(const std::string &str1, const char *s2)
  545. {
  546. return StartsWithNoCase(str1.c_str(), s2);
  547. }
  548. bool StringUtils::StartsWithNoCase(const char *s1, const char *s2)
  549. {
  550. while (*s2 != '\0')
  551. {
  552. if (::tolower(*s1) != ::tolower(*s2))
  553. return false;
  554. s1++;
  555. s2++;
  556. }
  557. return true;
  558. }
  559. bool StringUtils::EndsWith(const std::string &str1, const std::string &str2)
  560. {
  561. if (str1.size() < str2.size())
  562. return false;
  563. return str1.compare(str1.size() - str2.size(), str2.size(), str2) == 0;
  564. }
  565. bool StringUtils::EndsWith(const std::string &str1, const char *s2)
  566. {
  567. size_t len2 = strlen(s2);
  568. if (str1.size() < len2)
  569. return false;
  570. return str1.compare(str1.size() - len2, len2, s2) == 0;
  571. }
  572. bool StringUtils::EndsWithNoCase(const std::string &str1, const std::string &str2)
  573. {
  574. if (str1.size() < str2.size())
  575. return false;
  576. const char *s1 = str1.c_str() + str1.size() - str2.size();
  577. const char *s2 = str2.c_str();
  578. while (*s2 != '\0')
  579. {
  580. if (::tolower(*s1) != ::tolower(*s2))
  581. return false;
  582. s1++;
  583. s2++;
  584. }
  585. return true;
  586. }
  587. bool StringUtils::EndsWithNoCase(const std::string &str1, const char *s2)
  588. {
  589. size_t len2 = strlen(s2);
  590. if (str1.size() < len2)
  591. return false;
  592. const char *s1 = str1.c_str() + str1.size() - len2;
  593. while (*s2 != '\0')
  594. {
  595. if (::tolower(*s1) != ::tolower(*s2))
  596. return false;
  597. s1++;
  598. s2++;
  599. }
  600. return true;
  601. }
  602. std::vector<std::string> StringUtils::Split(const std::string& input, const std::string& delimiter, unsigned int iMaxStrings)
  603. {
  604. std::vector<std::string> result;
  605. SplitTo(std::back_inserter(result), input, delimiter, iMaxStrings);
  606. return result;
  607. }
  608. std::vector<std::string> StringUtils::Split(const std::string& input, const char delimiter, size_t iMaxStrings)
  609. {
  610. std::vector<std::string> result;
  611. SplitTo(std::back_inserter(result), input, delimiter, iMaxStrings);
  612. return result;
  613. }
  614. std::vector<std::string> StringUtils::Split(const std::string& input, const std::vector<std::string>& delimiters)
  615. {
  616. std::vector<std::string> result;
  617. SplitTo(std::back_inserter(result), input, delimiters);
  618. return result;
  619. }
  620. std::vector<std::string> StringUtils::SplitMulti(const std::vector<std::string> &input, const std::vector<std::string> &delimiters, unsigned int iMaxStrings /* = 0 */)
  621. {
  622. if (input.empty())
  623. return std::vector<std::string>();
  624. std::vector<std::string> results(input);
  625. if (delimiters.empty() || (iMaxStrings > 0 && iMaxStrings <= input.size()))
  626. return results;
  627. std::vector<std::string> strings1;
  628. if (iMaxStrings == 0)
  629. {
  630. for (size_t di = 0; di < delimiters.size(); di++)
  631. {
  632. for (size_t i = 0; i < results.size(); i++)
  633. {
  634. std::vector<std::string> substrings = StringUtils::Split(results[i], delimiters[di]);
  635. for (size_t j = 0; j < substrings.size(); j++)
  636. strings1.push_back(substrings[j]);
  637. }
  638. results = strings1;
  639. strings1.clear();
  640. }
  641. return results;
  642. }
  643. // Control the number of strings input is split into, keeping the original strings.
  644. // Note iMaxStrings > input.size()
  645. int iNew = iMaxStrings - results.size();
  646. for (size_t di = 0; di < delimiters.size(); di++)
  647. {
  648. for (size_t i = 0; i < results.size(); i++)
  649. {
  650. if (iNew > 0)
  651. {
  652. std::vector<std::string> substrings = StringUtils::Split(results[i], delimiters[di], iNew + 1);
  653. iNew = iNew - substrings.size() + 1;
  654. for (size_t j = 0; j < substrings.size(); j++)
  655. strings1.push_back(substrings[j]);
  656. }
  657. else
  658. strings1.push_back(results[i]);
  659. }
  660. results = strings1;
  661. iNew = iMaxStrings - results.size();
  662. strings1.clear();
  663. if ((iNew <= 0))
  664. break; //Stop trying any more delimiters
  665. }
  666. return results;
  667. }
  668. // returns the number of occurrences of strFind in strInput.
  669. int StringUtils::FindNumber(const std::string& strInput, const std::string &strFind)
  670. {
  671. size_t pos = strInput.find(strFind, 0);
  672. int numfound = 0;
  673. while (pos != std::string::npos)
  674. {
  675. numfound++;
  676. pos = strInput.find(strFind, pos + 1);
  677. }
  678. return numfound;
  679. }
  680. // Plane maps for MySQL utf8_general_ci (now known as utf8mb3_general_ci) collation
  681. // Derived from https://github.com/MariaDB/server/blob/10.5/strings/ctype-utf8.c
  682. // clang format off
  683. static const uint16_t plane00[] = {
  684. 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F,
  685. 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F,
  686. 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
  687. 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
  688. 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
  689. 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F,
  690. 0x0060, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
  691. 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F,
  692. 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
  693. 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
  694. 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
  695. 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x039C, 0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
  696. 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x00C6, 0x0043, 0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049,
  697. 0x00D0, 0x004E, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x00D7, 0x00D8, 0x0055, 0x0055, 0x0055, 0x0055, 0x0059, 0x00DE, 0x0053,
  698. 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x00C6, 0x0043, 0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049,
  699. 0x00D0, 0x004E, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x00F7, 0x00D8, 0x0055, 0x0055, 0x0055, 0x0055, 0x0059, 0x00DE, 0x0059
  700. };
  701. static const uint16_t plane01[] = {
  702. 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0043, 0x0043, 0x0043, 0x0043, 0x0043, 0x0043, 0x0043, 0x0043, 0x0044, 0x0044,
  703. 0x0110, 0x0110, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0047, 0x0047, 0x0047, 0x0047,
  704. 0x0047, 0x0047, 0x0047, 0x0047, 0x0048, 0x0048, 0x0126, 0x0126, 0x0049, 0x0049, 0x0049, 0x0049, 0x0049, 0x0049, 0x0049, 0x0049,
  705. 0x0049, 0x0049, 0x0132, 0x0132, 0x004A, 0x004A, 0x004B, 0x004B, 0x0138, 0x004C, 0x004C, 0x004C, 0x004C, 0x004C, 0x004C, 0x013F,
  706. 0x013F, 0x0141, 0x0141, 0x004E, 0x004E, 0x004E, 0x004E, 0x004E, 0x004E, 0x0149, 0x014A, 0x014A, 0x004F, 0x004F, 0x004F, 0x004F,
  707. 0x004F, 0x004F, 0x0152, 0x0152, 0x0052, 0x0052, 0x0052, 0x0052, 0x0052, 0x0052, 0x0053, 0x0053, 0x0053, 0x0053, 0x0053, 0x0053,
  708. 0x0053, 0x0053, 0x0054, 0x0054, 0x0054, 0x0054, 0x0166, 0x0166, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055,
  709. 0x0055, 0x0055, 0x0055, 0x0055, 0x0057, 0x0057, 0x0059, 0x0059, 0x0059, 0x005A, 0x005A, 0x005A, 0x005A, 0x005A, 0x005A, 0x0053,
  710. 0x0180, 0x0181, 0x0182, 0x0182, 0x0184, 0x0184, 0x0186, 0x0187, 0x0187, 0x0189, 0x018A, 0x018B, 0x018B, 0x018D, 0x018E, 0x018F,
  711. 0x0190, 0x0191, 0x0191, 0x0193, 0x0194, 0x01F6, 0x0196, 0x0197, 0x0198, 0x0198, 0x019A, 0x019B, 0x019C, 0x019D, 0x019E, 0x019F,
  712. 0x004F, 0x004F, 0x01A2, 0x01A2, 0x01A4, 0x01A4, 0x01A6, 0x01A7, 0x01A7, 0x01A9, 0x01AA, 0x01AB, 0x01AC, 0x01AC, 0x01AE, 0x0055,
  713. 0x0055, 0x01B1, 0x01B2, 0x01B3, 0x01B3, 0x01B5, 0x01B5, 0x01B7, 0x01B8, 0x01B8, 0x01BA, 0x01BB, 0x01BC, 0x01BC, 0x01BE, 0x01F7,
  714. 0x01C0, 0x01C1, 0x01C2, 0x01C3, 0x01C4, 0x01C4, 0x01C4, 0x01C7, 0x01C7, 0x01C7, 0x01CA, 0x01CA, 0x01CA, 0x0041, 0x0041, 0x0049,
  715. 0x0049, 0x004F, 0x004F, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x018E, 0x0041, 0x0041,
  716. 0x0041, 0x0041, 0x00C6, 0x00C6, 0x01E4, 0x01E4, 0x0047, 0x0047, 0x004B, 0x004B, 0x004F, 0x004F, 0x004F, 0x004F, 0x01B7, 0x01B7,
  717. 0x004A, 0x01F1, 0x01F1, 0x01F1, 0x0047, 0x0047, 0x01F6, 0x01F7, 0x004E, 0x004E, 0x0041, 0x0041, 0x00C6, 0x00C6, 0x00D8, 0x00D8
  718. };
  719. static const uint16_t plane02[] = {
  720. 0x0041, 0x0041, 0x0041, 0x0041, 0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049, 0x004F, 0x004F, 0x004F, 0x004F,
  721. 0x0052, 0x0052, 0x0052, 0x0052, 0x0055, 0x0055, 0x0055, 0x0055, 0x0053, 0x0053, 0x0054, 0x0054, 0x021C, 0x021C, 0x0048, 0x0048,
  722. 0x0220, 0x0221, 0x0222, 0x0222, 0x0224, 0x0224, 0x0041, 0x0041, 0x0045, 0x0045, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F,
  723. 0x004F, 0x004F, 0x0059, 0x0059, 0x0234, 0x0235, 0x0236, 0x0237, 0x0238, 0x0239, 0x023A, 0x023B, 0x023C, 0x023D, 0x023E, 0x023F,
  724. 0x0240, 0x0241, 0x0242, 0x0243, 0x0244, 0x0245, 0x0246, 0x0247, 0x0248, 0x0249, 0x024A, 0x024B, 0x024C, 0x024D, 0x024E, 0x024F,
  725. 0x0250, 0x0251, 0x0252, 0x0181, 0x0186, 0x0255, 0x0189, 0x018A, 0x0258, 0x018F, 0x025A, 0x0190, 0x025C, 0x025D, 0x025E, 0x025F,
  726. 0x0193, 0x0261, 0x0262, 0x0194, 0x0264, 0x0265, 0x0266, 0x0267, 0x0197, 0x0196, 0x026A, 0x026B, 0x026C, 0x026D, 0x026E, 0x019C,
  727. 0x0270, 0x0271, 0x019D, 0x0273, 0x0274, 0x019F, 0x0276, 0x0277, 0x0278, 0x0279, 0x027A, 0x027B, 0x027C, 0x027D, 0x027E, 0x027F,
  728. 0x01A6, 0x0281, 0x0282, 0x01A9, 0x0284, 0x0285, 0x0286, 0x0287, 0x01AE, 0x0289, 0x01B1, 0x01B2, 0x028C, 0x028D, 0x028E, 0x028F,
  729. 0x0290, 0x0291, 0x01B7, 0x0293, 0x0294, 0x0295, 0x0296, 0x0297, 0x0298, 0x0299, 0x029A, 0x029B, 0x029C, 0x029D, 0x029E, 0x029F,
  730. 0x02A0, 0x02A1, 0x02A2, 0x02A3, 0x02A4, 0x02A5, 0x02A6, 0x02A7, 0x02A8, 0x02A9, 0x02AA, 0x02AB, 0x02AC, 0x02AD, 0x02AE, 0x02AF,
  731. 0x02B0, 0x02B1, 0x02B2, 0x02B3, 0x02B4, 0x02B5, 0x02B6, 0x02B7, 0x02B8, 0x02B9, 0x02BA, 0x02BB, 0x02BC, 0x02BD, 0x02BE, 0x02BF,
  732. 0x02C0, 0x02C1, 0x02C2, 0x02C3, 0x02C4, 0x02C5, 0x02C6, 0x02C7, 0x02C8, 0x02C9, 0x02CA, 0x02CB, 0x02CC, 0x02CD, 0x02CE, 0x02CF,
  733. 0x02D0, 0x02D1, 0x02D2, 0x02D3, 0x02D4, 0x02D5, 0x02D6, 0x02D7, 0x02D8, 0x02D9, 0x02DA, 0x02DB, 0x02DC, 0x02DD, 0x02DE, 0x02DF,
  734. 0x02E0, 0x02E1, 0x02E2, 0x02E3, 0x02E4, 0x02E5, 0x02E6, 0x02E7, 0x02E8, 0x02E9, 0x02EA, 0x02EB, 0x02EC, 0x02ED, 0x02EE, 0x02EF,
  735. 0x02F0, 0x02F1, 0x02F2, 0x02F3, 0x02F4, 0x02F5, 0x02F6, 0x02F7, 0x02F8, 0x02F9, 0x02FA, 0x02FB, 0x02FC, 0x02FD, 0x02FE, 0x02FF
  736. };
  737. static const uint16_t plane03[] = {
  738. 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 0x0307, 0x0308, 0x0309, 0x030A, 0x030B, 0x030C, 0x030D, 0x030E, 0x030F,
  739. 0x0310, 0x0311, 0x0312, 0x0313, 0x0314, 0x0315, 0x0316, 0x0317, 0x0318, 0x0319, 0x031A, 0x031B, 0x031C, 0x031D, 0x031E, 0x031F,
  740. 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326, 0x0327, 0x0328, 0x0329, 0x032A, 0x032B, 0x032C, 0x032D, 0x032E, 0x032F,
  741. 0x0330, 0x0331, 0x0332, 0x0333, 0x0334, 0x0335, 0x0336, 0x0337, 0x0338, 0x0339, 0x033A, 0x033B, 0x033C, 0x033D, 0x033E, 0x033F,
  742. 0x0340, 0x0341, 0x0342, 0x0343, 0x0344, 0x0399, 0x0346, 0x0347, 0x0348, 0x0349, 0x034A, 0x034B, 0x034C, 0x034D, 0x034E, 0x034F,
  743. 0x0350, 0x0351, 0x0352, 0x0353, 0x0354, 0x0355, 0x0356, 0x0357, 0x0358, 0x0359, 0x035A, 0x035B, 0x035C, 0x035D, 0x035E, 0x035F,
  744. 0x0360, 0x0361, 0x0362, 0x0363, 0x0364, 0x0365, 0x0366, 0x0367, 0x0368, 0x0369, 0x036A, 0x036B, 0x036C, 0x036D, 0x036E, 0x036F,
  745. 0x0370, 0x0371, 0x0372, 0x0373, 0x0374, 0x0375, 0x0376, 0x0377, 0x0378, 0x0379, 0x037A, 0x037B, 0x037C, 0x037D, 0x037E, 0x037F,
  746. 0x0380, 0x0381, 0x0382, 0x0383, 0x0384, 0x0385, 0x0391, 0x0387, 0x0395, 0x0397, 0x0399, 0x038B, 0x039F, 0x038D, 0x03A5, 0x03A9,
  747. 0x0399, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F,
  748. 0x03A0, 0x03A1, 0x03A2, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7, 0x03A8, 0x03A9, 0x0399, 0x03A5, 0x0391, 0x0395, 0x0397, 0x0399,
  749. 0x03A5, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F,
  750. 0x03A0, 0x03A1, 0x03A3, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7, 0x03A8, 0x03A9, 0x0399, 0x03A5, 0x039F, 0x03A5, 0x03A9, 0x03CF,
  751. 0x0392, 0x0398, 0x03D2, 0x03D2, 0x03D2, 0x03A6, 0x03A0, 0x03D7, 0x03D8, 0x03D9, 0x03DA, 0x03DA, 0x03DC, 0x03DC, 0x03DE, 0x03DE,
  752. 0x03E0, 0x03E0, 0x03E2, 0x03E2, 0x03E4, 0x03E4, 0x03E6, 0x03E6, 0x03E8, 0x03E8, 0x03EA, 0x03EA, 0x03EC, 0x03EC, 0x03EE, 0x03EE,
  753. 0x039A, 0x03A1, 0x03A3, 0x03F3, 0x03F4, 0x03F5, 0x03F6, 0x03F7, 0x03F8, 0x03F9, 0x03FA, 0x03FB, 0x03FC, 0x03FD, 0x03FE, 0x03FF
  754. };
  755. static const uint16_t plane04[] = {
  756. 0x0415, 0x0415, 0x0402, 0x0413, 0x0404, 0x0405, 0x0406, 0x0406, 0x0408, 0x0409, 0x040A, 0x040B, 0x041A, 0x0418, 0x0423, 0x040F,
  757. 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
  758. 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
  759. 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
  760. 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
  761. 0x0415, 0x0415, 0x0402, 0x0413, 0x0404, 0x0405, 0x0406, 0x0406, 0x0408, 0x0409, 0x040A, 0x040B, 0x041A, 0x0418, 0x0423, 0x040F,
  762. 0x0460, 0x0460, 0x0462, 0x0462, 0x0464, 0x0464, 0x0466, 0x0466, 0x0468, 0x0468, 0x046A, 0x046A, 0x046C, 0x046C, 0x046E, 0x046E,
  763. 0x0470, 0x0470, 0x0472, 0x0472, 0x0474, 0x0474, 0x0474, 0x0474, 0x0478, 0x0478, 0x047A, 0x047A, 0x047C, 0x047C, 0x047E, 0x047E,
  764. 0x0480, 0x0480, 0x0482, 0x0483, 0x0484, 0x0485, 0x0486, 0x0487, 0x0488, 0x0489, 0x048A, 0x048B, 0x048C, 0x048C, 0x048E, 0x048E,
  765. 0x0490, 0x0490, 0x0492, 0x0492, 0x0494, 0x0494, 0x0496, 0x0496, 0x0498, 0x0498, 0x049A, 0x049A, 0x049C, 0x049C, 0x049E, 0x049E,
  766. 0x04A0, 0x04A0, 0x04A2, 0x04A2, 0x04A4, 0x04A4, 0x04A6, 0x04A6, 0x04A8, 0x04A8, 0x04AA, 0x04AA, 0x04AC, 0x04AC, 0x04AE, 0x04AE,
  767. 0x04B0, 0x04B0, 0x04B2, 0x04B2, 0x04B4, 0x04B4, 0x04B6, 0x04B6, 0x04B8, 0x04B8, 0x04BA, 0x04BA, 0x04BC, 0x04BC, 0x04BE, 0x04BE,
  768. 0x04C0, 0x0416, 0x0416, 0x04C3, 0x04C3, 0x04C5, 0x04C6, 0x04C7, 0x04C7, 0x04C9, 0x04CA, 0x04CB, 0x04CB, 0x04CD, 0x04CE, 0x04CF,
  769. 0x0410, 0x0410, 0x0410, 0x0410, 0x04D4, 0x04D4, 0x0415, 0x0415, 0x04D8, 0x04D8, 0x04D8, 0x04D8, 0x0416, 0x0416, 0x0417, 0x0417,
  770. 0x04E0, 0x04E0, 0x0418, 0x0418, 0x0418, 0x0418, 0x041E, 0x041E, 0x04E8, 0x04E8, 0x04E8, 0x04E8, 0x042D, 0x042D, 0x0423, 0x0423,
  771. 0x0423, 0x0423, 0x0423, 0x0423, 0x0427, 0x0427, 0x04F6, 0x04F7, 0x042B, 0x042B, 0x04FA, 0x04FB, 0x04FC, 0x04FD, 0x04FE, 0x04FF
  772. };
  773. static const uint16_t plane05[] = {
  774. 0x0500, 0x0501, 0x0502, 0x0503, 0x0504, 0x0505, 0x0506, 0x0507, 0x0508, 0x0509, 0x050A, 0x050B, 0x050C, 0x050D, 0x050E, 0x050F,
  775. 0x0510, 0x0511, 0x0512, 0x0513, 0x0514, 0x0515, 0x0516, 0x0517, 0x0518, 0x0519, 0x051A, 0x051B, 0x051C, 0x051D, 0x051E, 0x051F,
  776. 0x0520, 0x0521, 0x0522, 0x0523, 0x0524, 0x0525, 0x0526, 0x0527, 0x0528, 0x0529, 0x052A, 0x052B, 0x052C, 0x052D, 0x052E, 0x052F,
  777. 0x0530, 0x0531, 0x0532, 0x0533, 0x0534, 0x0535, 0x0536, 0x0537, 0x0538, 0x0539, 0x053A, 0x053B, 0x053C, 0x053D, 0x053E, 0x053F,
  778. 0x0540, 0x0541, 0x0542, 0x0543, 0x0544, 0x0545, 0x0546, 0x0547, 0x0548, 0x0549, 0x054A, 0x054B, 0x054C, 0x054D, 0x054E, 0x054F,
  779. 0x0550, 0x0551, 0x0552, 0x0553, 0x0554, 0x0555, 0x0556, 0x0557, 0x0558, 0x0559, 0x055A, 0x055B, 0x055C, 0x055D, 0x055E, 0x055F,
  780. 0x0560, 0x0531, 0x0532, 0x0533, 0x0534, 0x0535, 0x0536, 0x0537, 0x0538, 0x0539, 0x053A, 0x053B, 0x053C, 0x053D, 0x053E, 0x053F,
  781. 0x0540, 0x0541, 0x0542, 0x0543, 0x0544, 0x0545, 0x0546, 0x0547, 0x0548, 0x0549, 0x054A, 0x054B, 0x054C, 0x054D, 0x054E, 0x054F,
  782. 0x0550, 0x0551, 0x0552, 0x0553, 0x0554, 0x0555, 0x0556, 0x0587, 0x0588, 0x0589, 0x058A, 0x058B, 0x058C, 0x058D, 0x058E, 0x058F,
  783. 0x0590, 0x0591, 0x0592, 0x0593, 0x0594, 0x0595, 0x0596, 0x0597, 0x0598, 0x0599, 0x059A, 0x059B, 0x059C, 0x059D, 0x059E, 0x059F,
  784. 0x05A0, 0x05A1, 0x05A2, 0x05A3, 0x05A4, 0x05A5, 0x05A6, 0x05A7, 0x05A8, 0x05A9, 0x05AA, 0x05AB, 0x05AC, 0x05AD, 0x05AE, 0x05AF,
  785. 0x05B0, 0x05B1, 0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7, 0x05B8, 0x05B9, 0x05BA, 0x05BB, 0x05BC, 0x05BD, 0x05BE, 0x05BF,
  786. 0x05C0, 0x05C1, 0x05C2, 0x05C3, 0x05C4, 0x05C5, 0x05C6, 0x05C7, 0x05C8, 0x05C9, 0x05CA, 0x05CB, 0x05CC, 0x05CD, 0x05CE, 0x05CF,
  787. 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF,
  788. 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7, 0x05E8, 0x05E9, 0x05EA, 0x05EB, 0x05EC, 0x05ED, 0x05EE, 0x05EF,
  789. 0x05F0, 0x05F1, 0x05F2, 0x05F3, 0x05F4, 0x05F5, 0x05F6, 0x05F7, 0x05F8, 0x05F9, 0x05FA, 0x05FB, 0x05FC, 0x05FD, 0x05FE, 0x05FF
  790. };
  791. static const uint16_t plane1E[] = {
  792. 0x0041, 0x0041, 0x0042, 0x0042, 0x0042, 0x0042, 0x0042, 0x0042, 0x0043, 0x0043, 0x0044, 0x0044, 0x0044, 0x0044, 0x0044, 0x0044,
  793. 0x0044, 0x0044, 0x0044, 0x0044, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0046, 0x0046,
  794. 0x0047, 0x0047, 0x0048, 0x0048, 0x0048, 0x0048, 0x0048, 0x0048, 0x0048, 0x0048, 0x0048, 0x0048, 0x0049, 0x0049, 0x0049, 0x0049,
  795. 0x004B, 0x004B, 0x004B, 0x004B, 0x004B, 0x004B, 0x004C, 0x004C, 0x004C, 0x004C, 0x004C, 0x004C, 0x004C, 0x004C, 0x004D, 0x004D,
  796. 0x004D, 0x004D, 0x004D, 0x004D, 0x004E, 0x004E, 0x004E, 0x004E, 0x004E, 0x004E, 0x004E, 0x004E, 0x004F, 0x004F, 0x004F, 0x004F,
  797. 0x004F, 0x004F, 0x004F, 0x004F, 0x0050, 0x0050, 0x0050, 0x0050, 0x0052, 0x0052, 0x0052, 0x0052, 0x0052, 0x0052, 0x0052, 0x0052,
  798. 0x0053, 0x0053, 0x0053, 0x0053, 0x0053, 0x0053, 0x0053, 0x0053, 0x0053, 0x0053, 0x0054, 0x0054, 0x0054, 0x0054, 0x0054, 0x0054,
  799. 0x0054, 0x0054, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0056, 0x0056, 0x0056, 0x0056,
  800. 0x0057, 0x0057, 0x0057, 0x0057, 0x0057, 0x0057, 0x0057, 0x0057, 0x0057, 0x0057, 0x0058, 0x0058, 0x0058, 0x0058, 0x0059, 0x0059,
  801. 0x005A, 0x005A, 0x005A, 0x005A, 0x005A, 0x005A, 0x0048, 0x0054, 0x0057, 0x0059, 0x1E9A, 0x0053, 0x1E9C, 0x1E9D, 0x1E9E, 0x1E9F,
  802. 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041,
  803. 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045,
  804. 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049, 0x004F, 0x004F, 0x004F, 0x004F,
  805. 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F,
  806. 0x004F, 0x004F, 0x004F, 0x004F, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055,
  807. 0x0055, 0x0055, 0x0059, 0x0059, 0x0059, 0x0059, 0x0059, 0x0059, 0x0059, 0x0059, 0x1EFA, 0x1EFB, 0x1EFC, 0x1EFD, 0x1EFE, 0x1EFF
  808. };
  809. static const uint16_t plane1F[] = {
  810. 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391,
  811. 0x0395, 0x0395, 0x0395, 0x0395, 0x0395, 0x0395, 0x1F16, 0x1F17, 0x0395, 0x0395, 0x0395, 0x0395, 0x0395, 0x0395, 0x1F1E, 0x1F1F,
  812. 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397,
  813. 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399,
  814. 0x039F, 0x039F, 0x039F, 0x039F, 0x039F, 0x039F, 0x1F46, 0x1F47, 0x039F, 0x039F, 0x039F, 0x039F, 0x039F, 0x039F, 0x1F4E, 0x1F4F,
  815. 0x03A5, 0x03A5, 0x03A5, 0x03A5, 0x03A5, 0x03A5, 0x03A5, 0x03A5, 0x1F58, 0x03A5, 0x1F5A, 0x03A5, 0x1F5C, 0x03A5, 0x1F5E, 0x03A5,
  816. 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9,
  817. 0x0391, 0x1FBB, 0x0395, 0x1FC9, 0x0397, 0x1FCB, 0x0399, 0x1FDB, 0x039F, 0x1FF9, 0x03A5, 0x1FEB, 0x03A9, 0x1FFB, 0x1F7E, 0x1F7F,
  818. 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391,
  819. 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397,
  820. 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9,
  821. 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x1FB5, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x1FBB, 0x0391, 0x1FBD, 0x0399, 0x1FBF,
  822. 0x1FC0, 0x1FC1, 0x0397, 0x0397, 0x0397, 0x1FC5, 0x0397, 0x0397, 0x0395, 0x1FC9, 0x0397, 0x1FCB, 0x0397, 0x1FCD, 0x1FCE, 0x1FCF,
  823. 0x0399, 0x0399, 0x0399, 0x1FD3, 0x1FD4, 0x1FD5, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x1FDB, 0x1FDC, 0x1FDD, 0x1FDE, 0x1FDF,
  824. 0x03A5, 0x03A5, 0x03A5, 0x1FE3, 0x03A1, 0x03A1, 0x03A5, 0x03A5, 0x03A5, 0x03A5, 0x03A5, 0x1FEB, 0x03A1, 0x1FED, 0x1FEE, 0x1FEF,
  825. 0x1FF0, 0x1FF1, 0x03A9, 0x03A9, 0x03A9, 0x1FF5, 0x03A9, 0x03A9, 0x039F, 0x1FF9, 0x03A9, 0x1FFB, 0x03A9, 0x1FFD, 0x1FFE, 0x1FFF
  826. };
  827. static const uint16_t plane21[] = {
  828. 0x2100, 0x2101, 0x2102, 0x2103, 0x2104, 0x2105, 0x2106, 0x2107, 0x2108, 0x2109, 0x210A, 0x210B, 0x210C, 0x210D, 0x210E, 0x210F,
  829. 0x2110, 0x2111, 0x2112, 0x2113, 0x2114, 0x2115, 0x2116, 0x2117, 0x2118, 0x2119, 0x211A, 0x211B, 0x211C, 0x211D, 0x211E, 0x211F,
  830. 0x2120, 0x2121, 0x2122, 0x2123, 0x2124, 0x2125, 0x2126, 0x2127, 0x2128, 0x2129, 0x212A, 0x212B, 0x212C, 0x212D, 0x212E, 0x212F,
  831. 0x2130, 0x2131, 0x2132, 0x2133, 0x2134, 0x2135, 0x2136, 0x2137, 0x2138, 0x2139, 0x213A, 0x213B, 0x213C, 0x213D, 0x213E, 0x213F,
  832. 0x2140, 0x2141, 0x2142, 0x2143, 0x2144, 0x2145, 0x2146, 0x2147, 0x2148, 0x2149, 0x214A, 0x214B, 0x214C, 0x214D, 0x214E, 0x214F,
  833. 0x2150, 0x2151, 0x2152, 0x2153, 0x2154, 0x2155, 0x2156, 0x2157, 0x2158, 0x2159, 0x215A, 0x215B, 0x215C, 0x215D, 0x215E, 0x215F,
  834. 0x2160, 0x2161, 0x2162, 0x2163, 0x2164, 0x2165, 0x2166, 0x2167, 0x2168, 0x2169, 0x216A, 0x216B, 0x216C, 0x216D, 0x216E, 0x216F,
  835. 0x2160, 0x2161, 0x2162, 0x2163, 0x2164, 0x2165, 0x2166, 0x2167, 0x2168, 0x2169, 0x216A, 0x216B, 0x216C, 0x216D, 0x216E, 0x216F,
  836. 0x2180, 0x2181, 0x2182, 0x2183, 0x2184, 0x2185, 0x2186, 0x2187, 0x2188, 0x2189, 0x218A, 0x218B, 0x218C, 0x218D, 0x218E, 0x218F,
  837. 0x2190, 0x2191, 0x2192, 0x2193, 0x2194, 0x2195, 0x2196, 0x2197, 0x2198, 0x2199, 0x219A, 0x219B, 0x219C, 0x219D, 0x219E, 0x219F,
  838. 0x21A0, 0x21A1, 0x21A2, 0x21A3, 0x21A4, 0x21A5, 0x21A6, 0x21A7, 0x21A8, 0x21A9, 0x21AA, 0x21AB, 0x21AC, 0x21AD, 0x21AE, 0x21AF,
  839. 0x21B0, 0x21B1, 0x21B2, 0x21B3, 0x21B4, 0x21B5, 0x21B6, 0x21B7, 0x21B8, 0x21B9, 0x21BA, 0x21BB, 0x21BC, 0x21BD, 0x21BE, 0x21BF,
  840. 0x21C0, 0x21C1, 0x21C2, 0x21C3, 0x21C4, 0x21C5, 0x21C6, 0x21C7, 0x21C8, 0x21C9, 0x21CA, 0x21CB, 0x21CC, 0x21CD, 0x21CE, 0x21CF,
  841. 0x21D0, 0x21D1, 0x21D2, 0x21D3, 0x21D4, 0x21D5, 0x21D6, 0x21D7, 0x21D8, 0x21D9, 0x21DA, 0x21DB, 0x21DC, 0x21DD, 0x21DE, 0x21DF,
  842. 0x21E0, 0x21E1, 0x21E2, 0x21E3, 0x21E4, 0x21E5, 0x21E6, 0x21E7, 0x21E8, 0x21E9, 0x21EA, 0x21EB, 0x21EC, 0x21ED, 0x21EE, 0x21EF,
  843. 0x21F0, 0x21F1, 0x21F2, 0x21F3, 0x21F4, 0x21F5, 0x21F6, 0x21F7, 0x21F8, 0x21F9, 0x21FA, 0x21FB, 0x21FC, 0x21FD, 0x21FE, 0x21FF
  844. };
  845. static const uint16_t plane24[] = {
  846. 0x2400, 0x2401, 0x2402, 0x2403, 0x2404, 0x2405, 0x2406, 0x2407, 0x2408, 0x2409, 0x240A, 0x240B, 0x240C, 0x240D, 0x240E, 0x240F,
  847. 0x2410, 0x2411, 0x2412, 0x2413, 0x2414, 0x2415, 0x2416, 0x2417, 0x2418, 0x2419, 0x241A, 0x241B, 0x241C, 0x241D, 0x241E, 0x241F,
  848. 0x2420, 0x2421, 0x2422, 0x2423, 0x2424, 0x2425, 0x2426, 0x2427, 0x2428, 0x2429, 0x242A, 0x242B, 0x242C, 0x242D, 0x242E, 0x242F,
  849. 0x2430, 0x2431, 0x2432, 0x2433, 0x2434, 0x2435, 0x2436, 0x2437, 0x2438, 0x2439, 0x243A, 0x243B, 0x243C, 0x243D, 0x243E, 0x243F,
  850. 0x2440, 0x2441, 0x2442, 0x2443, 0x2444, 0x2445, 0x2446, 0x2447, 0x2448, 0x2449, 0x244A, 0x244B, 0x244C, 0x244D, 0x244E, 0x244F,
  851. 0x2450, 0x2451, 0x2452, 0x2453, 0x2454, 0x2455, 0x2456, 0x2457, 0x2458, 0x2459, 0x245A, 0x245B, 0x245C, 0x245D, 0x245E, 0x245F,
  852. 0x2460, 0x2461, 0x2462, 0x2463, 0x2464, 0x2465, 0x2466, 0x2467, 0x2468, 0x2469, 0x246A, 0x246B, 0x246C, 0x246D, 0x246E, 0x246F,
  853. 0x2470, 0x2471, 0x2472, 0x2473, 0x2474, 0x2475, 0x2476, 0x2477, 0x2478, 0x2479, 0x247A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F,
  854. 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486, 0x2487, 0x2488, 0x2489, 0x248A, 0x248B, 0x248C, 0x248D, 0x248E, 0x248F,
  855. 0x2490, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496, 0x2497, 0x2498, 0x2499, 0x249A, 0x249B, 0x249C, 0x249D, 0x249E, 0x249F,
  856. 0x24A0, 0x24A1, 0x24A2, 0x24A3, 0x24A4, 0x24A5, 0x24A6, 0x24A7, 0x24A8, 0x24A9, 0x24AA, 0x24AB, 0x24AC, 0x24AD, 0x24AE, 0x24AF,
  857. 0x24B0, 0x24B1, 0x24B2, 0x24B3, 0x24B4, 0x24B5, 0x24B6, 0x24B7, 0x24B8, 0x24B9, 0x24BA, 0x24BB, 0x24BC, 0x24BD, 0x24BE, 0x24BF,
  858. 0x24C0, 0x24C1, 0x24C2, 0x24C3, 0x24C4, 0x24C5, 0x24C6, 0x24C7, 0x24C8, 0x24C9, 0x24CA, 0x24CB, 0x24CC, 0x24CD, 0x24CE, 0x24CF,
  859. 0x24B6, 0x24B7, 0x24B8, 0x24B9, 0x24BA, 0x24BB, 0x24BC, 0x24BD, 0x24BE, 0x24BF, 0x24C0, 0x24C1, 0x24C2, 0x24C3, 0x24C4, 0x24C5,
  860. 0x24C6, 0x24C7, 0x24C8, 0x24C9, 0x24CA, 0x24CB, 0x24CC, 0x24CD, 0x24CE, 0x24CF, 0x24EA, 0x24EB, 0x24EC, 0x24ED, 0x24EE, 0x24EF,
  861. 0x24F0, 0x24F1, 0x24F2, 0x24F3, 0x24F4, 0x24F5, 0x24F6, 0x24F7, 0x24F8, 0x24F9, 0x24FA, 0x24FB, 0x24FC, 0x24FD, 0x24FE, 0x24FF
  862. };
  863. static const uint16_t planeFF[] = {
  864. 0xFF00, 0xFF01, 0xFF02, 0xFF03, 0xFF04, 0xFF05, 0xFF06, 0xFF07, 0xFF08, 0xFF09, 0xFF0A, 0xFF0B, 0xFF0C, 0xFF0D, 0xFF0E, 0xFF0F,
  865. 0xFF10, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17, 0xFF18, 0xFF19, 0xFF1A, 0xFF1B, 0xFF1C, 0xFF1D, 0xFF1E, 0xFF1F,
  866. 0xFF20, 0xFF21, 0xFF22, 0xFF23, 0xFF24, 0xFF25, 0xFF26, 0xFF27, 0xFF28, 0xFF29, 0xFF2A, 0xFF2B, 0xFF2C, 0xFF2D, 0xFF2E, 0xFF2F,
  867. 0xFF30, 0xFF31, 0xFF32, 0xFF33, 0xFF34, 0xFF35, 0xFF36, 0xFF37, 0xFF38, 0xFF39, 0xFF3A, 0xFF3B, 0xFF3C, 0xFF3D, 0xFF3E, 0xFF3F,
  868. 0xFF40, 0xFF21, 0xFF22, 0xFF23, 0xFF24, 0xFF25, 0xFF26, 0xFF27, 0xFF28, 0xFF29, 0xFF2A, 0xFF2B, 0xFF2C, 0xFF2D, 0xFF2E, 0xFF2F,
  869. 0xFF30, 0xFF31, 0xFF32, 0xFF33, 0xFF34, 0xFF35, 0xFF36, 0xFF37, 0xFF38, 0xFF39, 0xFF3A, 0xFF5B, 0xFF5C, 0xFF5D, 0xFF5E, 0xFF5F,
  870. 0xFF60, 0xFF61, 0xFF62, 0xFF63, 0xFF64, 0xFF65, 0xFF66, 0xFF67, 0xFF68, 0xFF69, 0xFF6A, 0xFF6B, 0xFF6C, 0xFF6D, 0xFF6E, 0xFF6F,
  871. 0xFF70, 0xFF71, 0xFF72, 0xFF73, 0xFF74, 0xFF75, 0xFF76, 0xFF77, 0xFF78, 0xFF79, 0xFF7A, 0xFF7B, 0xFF7C, 0xFF7D, 0xFF7E, 0xFF7F,
  872. 0xFF80, 0xFF81, 0xFF82, 0xFF83, 0xFF84, 0xFF85, 0xFF86, 0xFF87, 0xFF88, 0xFF89, 0xFF8A, 0xFF8B, 0xFF8C, 0xFF8D, 0xFF8E, 0xFF8F,
  873. 0xFF90, 0xFF91, 0xFF92, 0xFF93, 0xFF94, 0xFF95, 0xFF96, 0xFF97, 0xFF98, 0xFF99, 0xFF9A, 0xFF9B, 0xFF9C, 0xFF9D, 0xFF9E, 0xFF9F,
  874. 0xFFA0, 0xFFA1, 0xFFA2, 0xFFA3, 0xFFA4, 0xFFA5, 0xFFA6, 0xFFA7, 0xFFA8, 0xFFA9, 0xFFAA, 0xFFAB, 0xFFAC, 0xFFAD, 0xFFAE, 0xFFAF,
  875. 0xFFB0, 0xFFB1, 0xFFB2, 0xFFB3, 0xFFB4, 0xFFB5, 0xFFB6, 0xFFB7, 0xFFB8, 0xFFB9, 0xFFBA, 0xFFBB, 0xFFBC, 0xFFBD, 0xFFBE, 0xFFBF,
  876. 0xFFC0, 0xFFC1, 0xFFC2, 0xFFC3, 0xFFC4, 0xFFC5, 0xFFC6, 0xFFC7, 0xFFC8, 0xFFC9, 0xFFCA, 0xFFCB, 0xFFCC, 0xFFCD, 0xFFCE, 0xFFCF,
  877. 0xFFD0, 0xFFD1, 0xFFD2, 0xFFD3, 0xFFD4, 0xFFD5, 0xFFD6, 0xFFD7, 0xFFD8, 0xFFD9, 0xFFDA, 0xFFDB, 0xFFDC, 0xFFDD, 0xFFDE, 0xFFDF,
  878. 0xFFE0, 0xFFE1, 0xFFE2, 0xFFE3, 0xFFE4, 0xFFE5, 0xFFE6, 0xFFE7, 0xFFE8, 0xFFE9, 0xFFEA, 0xFFEB, 0xFFEC, 0xFFED, 0xFFEE, 0xFFEF,
  879. 0xFFF0, 0xFFF1, 0xFFF2, 0xFFF3, 0xFFF4, 0xFFF5, 0xFFF6, 0xFFF7, 0xFFF8, 0xFFF9, 0xFFFA, 0xFFFB, 0xFFFC, 0xFFFD, 0xFFFE, 0xFFFF
  880. };
  881. static const uint16_t* const planemap[256] = {
  882. plane00, plane01, plane02, plane03, plane04, plane05, NULL, NULL, NULL, NULL, NULL,
  883. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  884. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, plane1E, plane1F, NULL,
  885. plane21, NULL, NULL, plane24, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  886. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  887. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  888. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  889. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  890. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  891. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  892. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  893. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  894. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  895. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  896. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  897. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  898. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  899. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  900. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  901. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  902. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  903. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  904. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  905. NULL, NULL, planeFF
  906. };
  907. // clang format on
  908. static wchar_t GetCollationWeight(const wchar_t& r)
  909. {
  910. // Lookup the "weight" of a UFT8 char, equivalent lowercase ascii letter, in the plane map,
  911. // the character comparison value used by using "accent folding" collation utf8_general_ci
  912. // in MySQL (AKA utf8mb3_general_ci in MariaDB 10)
  913. auto index = r >> 8;
  914. if (index > 255)
  915. return 0xFFFD;
  916. auto plane = planemap[index];
  917. if (plane == nullptr)
  918. return r;
  919. return static_cast<wchar_t>(plane[r & 0xFF]);
  920. }
  921. // Compares separately the numeric and alphabetic parts of a string.
  922. // returns negative if left < right, positive if left > right
  923. // and 0 if they are identical
  924. int64_t StringUtils::AlphaNumericCompare(const wchar_t* left, const wchar_t* right)
  925. {
  926. const wchar_t *l = left;
  927. const wchar_t *r = right;
  928. const wchar_t *ld, *rd;
  929. wchar_t lc, rc;
  930. int64_t lnum, rnum;
  931. bool lsym, rsym;
  932. while (*l != 0 && *r != 0)
  933. {
  934. // check if we have a numerical value
  935. if (*l >= L'0' && *l <= L'9' && *r >= L'0' && *r <= L'9')
  936. {
  937. ld = l;
  938. lnum = 0;
  939. while (*ld >= L'0' && *ld <= L'9' && ld < l + 15)
  940. { // compare only up to 15 digits
  941. lnum *= 10;
  942. lnum += *ld++ - L'0';
  943. }
  944. rd = r;
  945. rnum = 0;
  946. while (*rd >= L'0' && *rd <= L'9' && rd < r + 15)
  947. { // compare only up to 15 digits
  948. rnum *= 10;
  949. rnum += *rd++ - L'0';
  950. }
  951. // do we have numbers?
  952. if (lnum != rnum)
  953. { // yes - and they're different!
  954. return lnum - rnum;
  955. }
  956. l = ld;
  957. r = rd;
  958. continue;
  959. }
  960. lc = *l;
  961. rc = *r;
  962. if (!g_langInfo.UseLocaleCollation())
  963. {
  964. // Apply case sensitive accent folding collation to non-ascii chars.
  965. // This mimics utf8_general_ci collation, and provides simple collation of LATIN-1 chars
  966. // for any platformthat doesn't have a language specific collate facet implemented
  967. if (lc > 128)
  968. lc = GetCollationWeight(lc);
  969. if (rc > 128)
  970. rc = GetCollationWeight(rc);
  971. }
  972. // Do case less comparison, convert ascii upper case to lower case
  973. if (lc >= L'A' && lc <= L'Z')
  974. lc += L'a' - L'A';
  975. if (rc >= L'A' && rc <= L'Z')
  976. rc += L'a' - L'A';
  977. if (lc != rc)
  978. {
  979. // Put ascii punctuation and symbols e.g. !#$&()*+,-./:;<=>?@[\]^_ `{|}~ above the other
  980. // alphanumeric ascii, rather than some being mixed between the numbers and letters, and
  981. // above all other unicode letters, symbols and punctuation.
  982. // (Locale collation of these chars varies across platforms)
  983. lsym = lc < 128 && !(lc >= L'a' && lc <= L'z') && !(lc >= L'0' && lc <= L'9');
  984. rsym = rc < 128 && !(rc >= L'a' && rc <= L'z') && !(rc >= L'0' && rc <= L'9');
  985. if (lsym && !rsym)
  986. return -1;
  987. if (!lsym && rsym)
  988. return 1;
  989. // Either both or neither are ascii symbols or punctuation marks
  990. if (!g_langInfo.UseLocaleCollation())
  991. {
  992. // Compare unicode (having applied accent folding collation to non-ascii chars).
  993. int i = wcsncmp(&lc, &rc, 1);
  994. return i;
  995. }
  996. else
  997. {
  998. // Fetch collation facet from locale to do comparison of wide char although on some
  999. // platforms this is not langauge specific but just compares unicode
  1000. const std::collate<wchar_t>& coll =
  1001. std::use_facet<std::collate<wchar_t>>(g_langInfo.GetSystemLocale());
  1002. int cmp_res = 0;
  1003. cmp_res = coll.compare(&lc, &lc + 1, &rc, &rc + 1);
  1004. if (cmp_res != 0)
  1005. return cmp_res;
  1006. }
  1007. }
  1008. l++; r++;
  1009. }
  1010. if (*r)
  1011. { // r is longer
  1012. return -1;
  1013. }
  1014. else if (*l)
  1015. { // l is longer
  1016. return 1;
  1017. }
  1018. return 0; // files are the same
  1019. }
  1020. int StringUtils::DateStringToYYYYMMDD(const std::string &dateString)
  1021. {
  1022. std::vector<std::string> days = StringUtils::Split(dateString, '-');
  1023. if (days.size() == 1)
  1024. return atoi(days[0].c_str());
  1025. else if (days.size() == 2)
  1026. return atoi(days[0].c_str())*100+atoi(days[1].c_str());
  1027. else if (days.size() == 3)
  1028. return atoi(days[0].c_str())*10000+atoi(days[1].c_str())*100+atoi(days[2].c_str());
  1029. else
  1030. return -1;
  1031. }
  1032. std::string StringUtils::ISODateToLocalizedDate(const std::string& strIsoDate)
  1033. {
  1034. // Convert ISO8601 date strings YYYY, YYYY-MM, or YYYY-MM-DD to (partial) localized date strings
  1035. CDateTime date;
  1036. std::string formattedDate = strIsoDate;
  1037. if (formattedDate.size() == 10)
  1038. {
  1039. date.SetFromDBDate(strIsoDate);
  1040. formattedDate = date.GetAsLocalizedDate();
  1041. }
  1042. else if (formattedDate.size() == 7)
  1043. {
  1044. std::string strFormat = date.GetAsLocalizedDate(false);
  1045. std::string tempdate;
  1046. // find which date separator we are using. Can be -./
  1047. size_t pos = strFormat.find_first_of("-./");
  1048. if (pos != std::string::npos)
  1049. {
  1050. bool yearFirst = strFormat.find("1601") == 0; // true if year comes first
  1051. std::string sep = strFormat.substr(pos, 1);
  1052. if (yearFirst)
  1053. { // build formatted date with year first, then separator and month
  1054. tempdate = formattedDate.substr(0, 4);
  1055. tempdate += sep;
  1056. tempdate += formattedDate.substr(5, 2);
  1057. }
  1058. else
  1059. {
  1060. tempdate = formattedDate.substr(5, 2);
  1061. tempdate += sep;
  1062. tempdate += formattedDate.substr(0, 4);
  1063. }
  1064. formattedDate = tempdate;
  1065. }
  1066. // return either just the year or the locally formatted version of the ISO date
  1067. }
  1068. return formattedDate;
  1069. }
  1070. long StringUtils::TimeStringToSeconds(const std::string &timeString)
  1071. {
  1072. std::string strCopy(timeString);
  1073. StringUtils::Trim(strCopy);
  1074. if(StringUtils::EndsWithNoCase(strCopy, " min"))
  1075. {
  1076. // this is imdb format of "XXX min"
  1077. return 60 * atoi(strCopy.c_str());
  1078. }
  1079. else
  1080. {
  1081. std::vector<std::string> secs = StringUtils::Split(strCopy, ':');
  1082. int timeInSecs = 0;
  1083. for (unsigned int i = 0; i < 3 && i < secs.size(); i++)
  1084. {
  1085. timeInSecs *= 60;
  1086. timeInSecs += atoi(secs[i].c_str());
  1087. }
  1088. return timeInSecs;
  1089. }
  1090. }
  1091. std::string StringUtils::SecondsToTimeString(long lSeconds, TIME_FORMAT format)
  1092. {
  1093. bool isNegative = lSeconds < 0;
  1094. lSeconds = std::abs(lSeconds);
  1095. std::string strHMS;
  1096. if (format == TIME_FORMAT_SECS)
  1097. strHMS = StringUtils::Format("%i", lSeconds);
  1098. else if (format == TIME_FORMAT_MINS)
  1099. strHMS = StringUtils::Format("%i", lrintf(static_cast<float>(lSeconds) / 60.0f));
  1100. else if (format == TIME_FORMAT_HOURS)
  1101. strHMS = StringUtils::Format("%i", lrintf(static_cast<float>(lSeconds) / 3600.0f));
  1102. else if (format & TIME_FORMAT_M)
  1103. strHMS += StringUtils::Format("%i", lSeconds % 3600 / 60);
  1104. else
  1105. {
  1106. int hh = lSeconds / 3600;
  1107. lSeconds = lSeconds % 3600;
  1108. int mm = lSeconds / 60;
  1109. int ss = lSeconds % 60;
  1110. if (format == TIME_FORMAT_GUESS)
  1111. format = (hh >= 1) ? TIME_FORMAT_HH_MM_SS : TIME_FORMAT_MM_SS;
  1112. if (format & TIME_FORMAT_HH)
  1113. strHMS += StringUtils::Format("%2.2i", hh);
  1114. else if (format & TIME_FORMAT_H)
  1115. strHMS += StringUtils::Format("%i", hh);
  1116. if (format & TIME_FORMAT_MM)
  1117. strHMS += StringUtils::Format(strHMS.empty() ? "%2.2i" : ":%2.2i", mm);
  1118. if (format & TIME_FORMAT_SS)
  1119. strHMS += StringUtils::Format(strHMS.empty() ? "%2.2i" : ":%2.2i", ss);
  1120. }
  1121. if (isNegative)
  1122. strHMS = "-" + strHMS;
  1123. return strHMS;
  1124. }
  1125. bool StringUtils::IsNaturalNumber(const std::string& str)
  1126. {
  1127. size_t i = 0, n = 0;
  1128. // allow whitespace,digits,whitespace
  1129. while (i < str.size() && isspace((unsigned char) str[i]))
  1130. i++;
  1131. while (i < str.size() && isdigit((unsigned char) str[i]))
  1132. {
  1133. i++; n++;
  1134. }
  1135. while (i < str.size() && isspace((unsigned char) str[i]))
  1136. i++;
  1137. return i == str.size() && n > 0;
  1138. }
  1139. bool StringUtils::IsInteger(const std::string& str)
  1140. {
  1141. size_t i = 0, n = 0;
  1142. // allow whitespace,-,digits,whitespace
  1143. while (i < str.size() && isspace((unsigned char) str[i]))
  1144. i++;
  1145. if (i < str.size() && str[i] == '-')
  1146. i++;
  1147. while (i < str.size() && isdigit((unsigned char) str[i]))
  1148. {
  1149. i++; n++;
  1150. }
  1151. while (i < str.size() && isspace((unsigned char) str[i]))
  1152. i++;
  1153. return i == str.size() && n > 0;
  1154. }
  1155. int StringUtils::asciidigitvalue(char chr)
  1156. {
  1157. if (!isasciidigit(chr))
  1158. return -1;
  1159. return chr - '0';
  1160. }
  1161. int StringUtils::asciixdigitvalue(char chr)
  1162. {
  1163. int v = asciidigitvalue(chr);
  1164. if (v >= 0)
  1165. return v;
  1166. if (chr >= 'a' && chr <= 'f')
  1167. return chr - 'a' + 10;
  1168. if (chr >= 'A' && chr <= 'F')
  1169. return chr - 'A' + 10;
  1170. return -1;
  1171. }
  1172. void StringUtils::RemoveCRLF(std::string& strLine)
  1173. {
  1174. StringUtils::TrimRight(strLine, "\n\r");
  1175. }
  1176. std::string StringUtils::SizeToString(int64_t size)
  1177. {
  1178. std::string strLabel;
  1179. const char prefixes[] = {' ', 'k', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'};
  1180. unsigned int i = 0;
  1181. double s = (double)size;
  1182. while (i < ARRAY_SIZE(prefixes) && s >= 1000.0)
  1183. {
  1184. s /= 1024.0;
  1185. i++;
  1186. }
  1187. if (!i)
  1188. strLabel = StringUtils::Format("%.lf B", s);
  1189. else if (i == ARRAY_SIZE(prefixes))
  1190. {
  1191. if (s >= 1000.0)
  1192. strLabel = StringUtils::Format(">999.99 %cB", prefixes[i - 1]);
  1193. else
  1194. strLabel = StringUtils::Format("%.2lf %cB", s, prefixes[i - 1]);
  1195. }
  1196. else if (s >= 100.0)
  1197. strLabel = StringUtils::Format("%.1lf %cB", s, prefixes[i]);
  1198. else
  1199. strLabel = StringUtils::Format("%.2lf %cB", s, prefixes[i]);
  1200. return strLabel;
  1201. }
  1202. std::string StringUtils::BinaryStringToString(const std::string& in)
  1203. {
  1204. std::string out;
  1205. out.reserve(in.size() / 2);
  1206. for (const char *cur = in.c_str(), *end = cur + in.size(); cur != end; ++cur) {
  1207. if (*cur == '\\') {
  1208. ++cur;
  1209. if (cur == end) {
  1210. break;
  1211. }
  1212. if (isdigit(*cur)) {
  1213. char* end;
  1214. unsigned long num = strtol(cur, &end, 10);
  1215. cur = end - 1;
  1216. out.push_back(num);
  1217. continue;
  1218. }
  1219. }
  1220. out.push_back(*cur);
  1221. }
  1222. return out;
  1223. }
  1224. std::string StringUtils::ToHexadecimal(const std::string& in)
  1225. {
  1226. std::ostringstream ss;
  1227. ss << std::hex;
  1228. for (unsigned char ch : in) {
  1229. ss << std::setw(2) << std::setfill('0') << static_cast<unsigned long> (ch);
  1230. }
  1231. return ss.str();
  1232. }
  1233. // return -1 if not, else return the utf8 char length.
  1234. int IsUTF8Letter(const unsigned char *str)
  1235. {
  1236. // reference:
  1237. // unicode -> utf8 table: http://www.utf8-chartable.de/
  1238. // latin characters in unicode: http://en.wikipedia.org/wiki/Latin_characters_in_Unicode
  1239. unsigned char ch = str[0];
  1240. if (!ch)
  1241. return -1;
  1242. if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))
  1243. return 1;
  1244. if (!(ch & 0x80))
  1245. return -1;
  1246. unsigned char ch2 = str[1];
  1247. if (!ch2)
  1248. return -1;
  1249. // check latin 1 letter table: http://en.wikipedia.org/wiki/C1_Controls_and_Latin-1_Supplement
  1250. if (ch == 0xC3 && ch2 >= 0x80 && ch2 <= 0xBF && ch2 != 0x97 && ch2 != 0xB7)
  1251. return 2;
  1252. // check latin extended A table: http://en.wikipedia.org/wiki/Latin_Extended-A
  1253. if (ch >= 0xC4 && ch <= 0xC7 && ch2 >= 0x80 && ch2 <= 0xBF)
  1254. return 2;
  1255. // check latin extended B table: http://en.wikipedia.org/wiki/Latin_Extended-B
  1256. // and International Phonetic Alphabet: http://en.wikipedia.org/wiki/IPA_Extensions_(Unicode_block)
  1257. if (((ch == 0xC8 || ch == 0xC9) && ch2 >= 0x80 && ch2 <= 0xBF)
  1258. || (ch == 0xCA && ch2 >= 0x80 && ch2 <= 0xAF))
  1259. return 2;
  1260. return -1;
  1261. }
  1262. size_t StringUtils::FindWords(const char *str, const char *wordLowerCase)
  1263. {
  1264. // NOTE: This assumes word is lowercase!
  1265. const unsigned char *s = (const unsigned char *)str;
  1266. do
  1267. {
  1268. // start with a compare
  1269. const unsigned char *c = s;
  1270. const unsigned char *w = (const unsigned char *)wordLowerCase;
  1271. bool same = true;
  1272. while (same && *c && *w)
  1273. {
  1274. unsigned char lc = *c++;
  1275. if (lc >= 'A' && lc <= 'Z')
  1276. lc += 'a'-'A';
  1277. if (lc != *w++) // different
  1278. same = false;
  1279. }
  1280. if (same && *w == 0) // only the same if word has been exhausted
  1281. return (const char *)s - str;
  1282. // otherwise, skip current word (composed by latin letters) or number
  1283. int l;
  1284. if (*s >= '0' && *s <= '9')
  1285. {
  1286. ++s;
  1287. while (*s >= '0' && *s <= '9') ++s;
  1288. }
  1289. else if ((l = IsUTF8Letter(s)) > 0)
  1290. {
  1291. s += l;
  1292. while ((l = IsUTF8Letter(s)) > 0) s += l;
  1293. }
  1294. else
  1295. ++s;
  1296. while (*s && *s == ' ') s++;
  1297. // and repeat until we're done
  1298. } while (*s);
  1299. return std::string::npos;
  1300. }
  1301. // assumes it is called from after the first open bracket is found
  1302. int StringUtils::FindEndBracket(const std::string &str, char opener, char closer, int startPos)
  1303. {
  1304. int blocks = 1;
  1305. for (unsigned int i = startPos; i < str.size(); i++)
  1306. {
  1307. if (str[i] == opener)
  1308. blocks++;
  1309. else if (str[i] == closer)
  1310. {
  1311. blocks--;
  1312. if (!blocks)
  1313. return i;
  1314. }
  1315. }
  1316. return (int)std::string::npos;
  1317. }
  1318. void StringUtils::WordToDigits(std::string &word)
  1319. {
  1320. static const char word_to_letter[] = "22233344455566677778889999";
  1321. StringUtils::ToLower(word);
  1322. for (unsigned int i = 0; i < word.size(); ++i)
  1323. { // NB: This assumes ascii, which probably needs extending at some point.
  1324. char letter = word[i];
  1325. if ((letter >= 'a' && letter <= 'z')) // assume contiguous letter range
  1326. {
  1327. word[i] = word_to_letter[letter-'a'];
  1328. }
  1329. else if (letter < '0' || letter > '9') // We want to keep 0-9!
  1330. {
  1331. word[i] = ' '; // replace everything else with a space
  1332. }
  1333. }
  1334. }
  1335. std::string StringUtils::CreateUUID()
  1336. {
  1337. #ifdef HAVE_NEW_CROSSGUID
  1338. return xg::newGuid().str();
  1339. #else
  1340. static GuidGenerator guidGenerator;
  1341. auto guid = guidGenerator.newGuid();
  1342. std::stringstream strGuid; strGuid << guid;
  1343. return strGuid.str();
  1344. #endif
  1345. }
  1346. bool StringUtils::ValidateUUID(const std::string &uuid)
  1347. {
  1348. CRegExp guidRE;
  1349. guidRE.RegComp(ADDON_GUID_RE);
  1350. return (guidRE.RegFind(uuid.c_str()) == 0);
  1351. }
  1352. double StringUtils::CompareFuzzy(const std::string &left, const std::string &right)
  1353. {
  1354. return (0.5 + fstrcmp(left.c_str(), right.c_str()) * (left.length() + right.length())) / 2.0;
  1355. }
  1356. int StringUtils::FindBestMatch(const std::string &str, const std::vector<std::string> &strings, double &matchscore)
  1357. {
  1358. int best = -1;
  1359. matchscore = 0;
  1360. int i = 0;
  1361. for (std::vector<std::string>::const_iterator it = strings.begin(); it != strings.end(); ++it, i++)
  1362. {
  1363. int maxlength = std::max(str.length(), it->length());
  1364. double score = StringUtils::CompareFuzzy(str, *it) / maxlength;
  1365. if (score > matchscore)
  1366. {
  1367. matchscore = score;
  1368. best = i;
  1369. }
  1370. }
  1371. return best;
  1372. }
  1373. bool StringUtils::ContainsKeyword(const std::string &str, const std::vector<std::string> &keywords)
  1374. {
  1375. for (std::vector<std::string>::const_iterator it = keywords.begin(); it != keywords.end(); ++it)
  1376. {
  1377. if (str.find(*it) != str.npos)
  1378. return true;
  1379. }
  1380. return false;
  1381. }
  1382. size_t StringUtils::utf8_strlen(const char *s)
  1383. {
  1384. size_t length = 0;
  1385. while (*s)
  1386. {
  1387. if ((*s++ & 0xC0) != 0x80)
  1388. length++;
  1389. }
  1390. return length;
  1391. }
  1392. std::string StringUtils::Paramify(const std::string &param)
  1393. {
  1394. std::string result = param;
  1395. // escape backspaces
  1396. StringUtils::Replace(result, "\\", "\\\\");
  1397. // escape double quotes
  1398. StringUtils::Replace(result, "\"", "\\\"");
  1399. // add double quotes around the whole string
  1400. return "\"" + result + "\"";
  1401. }
  1402. std::vector<std::string> StringUtils::Tokenize(const std::string &input, const std::string &delimiters)
  1403. {
  1404. std::vector<std::string> tokens;
  1405. Tokenize(input, tokens, delimiters);
  1406. return tokens;
  1407. }
  1408. void StringUtils::Tokenize(const std::string& input, std::vector<std::string>& tokens, const std::string& delimiters)
  1409. {
  1410. tokens.clear();
  1411. // Skip delimiters at beginning.
  1412. std::string::size_type dataPos = input.find_first_not_of(delimiters);
  1413. while (dataPos != std::string::npos)
  1414. {
  1415. // Find next delimiter
  1416. const std::string::size_type nextDelimPos = input.find_first_of(delimiters, dataPos);
  1417. // Found a token, add it to the vector.
  1418. tokens.push_back(input.substr(dataPos, nextDelimPos - dataPos));
  1419. // Skip delimiters. Note the "not_of"
  1420. dataPos = input.find_first_not_of(delimiters, nextDelimPos);
  1421. }
  1422. }
  1423. std::vector<std::string> StringUtils::Tokenize(const std::string &input, const char delimiter)
  1424. {
  1425. std::vector<std::string> tokens;
  1426. Tokenize(input, tokens, delimiter);
  1427. return tokens;
  1428. }
  1429. void StringUtils::Tokenize(const std::string& input, std::vector<std::string>& tokens, const char delimiter)
  1430. {
  1431. tokens.clear();
  1432. // Skip delimiters at beginning.
  1433. std::string::size_type dataPos = input.find_first_not_of(delimiter);
  1434. while (dataPos != std::string::npos)
  1435. {
  1436. // Find next delimiter
  1437. const std::string::size_type nextDelimPos = input.find(delimiter, dataPos);
  1438. // Found a token, add it to the vector.
  1439. tokens.push_back(input.substr(dataPos, nextDelimPos - dataPos));
  1440. // Skip delimiters. Note the "not_of"
  1441. dataPos = input.find_first_not_of(delimiter, nextDelimPos);
  1442. }
  1443. }
  1444. uint64_t StringUtils::ToUint64(std::string str, uint64_t fallback) noexcept
  1445. {
  1446. std::istringstream iss(str);
  1447. uint64_t result(fallback);
  1448. iss >> result;
  1449. return result;
  1450. }
  1451. std::string StringUtils::FormatFileSize(uint64_t bytes)
  1452. {
  1453. const std::array<std::string, 6> units{{"B", "kB", "MB", "GB", "TB", "PB"}};
  1454. if (bytes < 1000)
  1455. return Format("%" PRIu64 "B", bytes);
  1456. size_t i = 0;
  1457. double value = static_cast<double>(bytes);
  1458. while (i + 1 < units.size() && value >= 999.5)
  1459. {
  1460. ++i;
  1461. value /= 1024.0;
  1462. }
  1463. unsigned int decimals = value < 9.995 ? 2 : (value < 99.95 ? 1 : 0);
  1464. auto frmt = "%." + Format("%u", decimals) + "f%s";
  1465. return Format(frmt.c_str(), value, units[i].c_str());
  1466. }
  1467. const std::locale& StringUtils::GetOriginalLocale() noexcept
  1468. {
  1469. return g_langInfo.GetOriginalLocale();
  1470. }