/thirdparty/breakpad/common/string_conversion.cc

http://github.com/tomahawk-player/tomahawk · C++ · 154 lines · 95 code · 23 blank · 36 comment · 13 complexity · 8a27df77ef7e8dcc15272c881bc43993 MD5 · raw file

  1. // Copyright (c) 2006, Google Inc.
  2. // All rights reserved.
  3. //
  4. // Redistribution and use in source and binary forms, with or without
  5. // modification, are permitted provided that the following conditions are
  6. // met:
  7. //
  8. // * Redistributions of source code must retain the above copyright
  9. // notice, this list of conditions and the following disclaimer.
  10. // * Redistributions in binary form must reproduce the above
  11. // copyright notice, this list of conditions and the following disclaimer
  12. // in the documentation and/or other materials provided with the
  13. // distribution.
  14. // * Neither the name of Google Inc. nor the names of its
  15. // contributors may be used to endorse or promote products derived from
  16. // this software without specific prior written permission.
  17. //
  18. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  22. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29. #include "common/convert_UTF.h"
  30. #include "processor/scoped_ptr.h"
  31. #include "common/string_conversion.h"
  32. #include <string.h>
  33. namespace google_breakpad {
  34. using std::string;
  35. using std::vector;
  36. void UTF8ToUTF16(const char *in, vector<u_int16_t> *out) {
  37. size_t source_length = strlen(in);
  38. const UTF8 *source_ptr = reinterpret_cast<const UTF8 *>(in);
  39. const UTF8 *source_end_ptr = source_ptr + source_length;
  40. // Erase the contents and zero fill to the expected size
  41. out->clear();
  42. out->insert(out->begin(), source_length, 0);
  43. u_int16_t *target_ptr = &(*out)[0];
  44. u_int16_t *target_end_ptr = target_ptr + out->capacity() * sizeof(u_int16_t);
  45. ConversionResult result = ConvertUTF8toUTF16(&source_ptr, source_end_ptr,
  46. &target_ptr, target_end_ptr,
  47. strictConversion);
  48. // Resize to be the size of the # of converted characters + NULL
  49. out->resize(result == conversionOK ? target_ptr - &(*out)[0] + 1: 0);
  50. }
  51. int UTF8ToUTF16Char(const char *in, int in_length, u_int16_t out[2]) {
  52. const UTF8 *source_ptr = reinterpret_cast<const UTF8 *>(in);
  53. const UTF8 *source_end_ptr = source_ptr + sizeof(char);
  54. u_int16_t *target_ptr = out;
  55. u_int16_t *target_end_ptr = target_ptr + 2 * sizeof(u_int16_t);
  56. out[0] = out[1] = 0;
  57. // Process one character at a time
  58. while (1) {
  59. ConversionResult result = ConvertUTF8toUTF16(&source_ptr, source_end_ptr,
  60. &target_ptr, target_end_ptr,
  61. strictConversion);
  62. if (result == conversionOK)
  63. return static_cast<int>(source_ptr - reinterpret_cast<const UTF8 *>(in));
  64. // Add another character to the input stream and try again
  65. source_ptr = reinterpret_cast<const UTF8 *>(in);
  66. ++source_end_ptr;
  67. if (source_end_ptr > reinterpret_cast<const UTF8 *>(in) + in_length)
  68. break;
  69. }
  70. return 0;
  71. }
  72. void UTF32ToUTF16(const wchar_t *in, vector<u_int16_t> *out) {
  73. size_t source_length = wcslen(in);
  74. const UTF32 *source_ptr = reinterpret_cast<const UTF32 *>(in);
  75. const UTF32 *source_end_ptr = source_ptr + source_length;
  76. // Erase the contents and zero fill to the expected size
  77. out->clear();
  78. out->insert(out->begin(), source_length, 0);
  79. u_int16_t *target_ptr = &(*out)[0];
  80. u_int16_t *target_end_ptr = target_ptr + out->capacity() * sizeof(u_int16_t);
  81. ConversionResult result = ConvertUTF32toUTF16(&source_ptr, source_end_ptr,
  82. &target_ptr, target_end_ptr,
  83. strictConversion);
  84. // Resize to be the size of the # of converted characters + NULL
  85. out->resize(result == conversionOK ? target_ptr - &(*out)[0] + 1: 0);
  86. }
  87. void UTF32ToUTF16Char(wchar_t in, u_int16_t out[2]) {
  88. const UTF32 *source_ptr = reinterpret_cast<const UTF32 *>(&in);
  89. const UTF32 *source_end_ptr = source_ptr + 1;
  90. u_int16_t *target_ptr = out;
  91. u_int16_t *target_end_ptr = target_ptr + 2 * sizeof(u_int16_t);
  92. out[0] = out[1] = 0;
  93. ConversionResult result = ConvertUTF32toUTF16(&source_ptr, source_end_ptr,
  94. &target_ptr, target_end_ptr,
  95. strictConversion);
  96. if (result != conversionOK) {
  97. out[0] = out[1] = 0;
  98. }
  99. }
  100. static inline u_int16_t Swap(u_int16_t value) {
  101. return (value >> 8) | (value << 8);
  102. }
  103. string UTF16ToUTF8(const vector<u_int16_t> &in, bool swap) {
  104. const UTF16 *source_ptr = &in[0];
  105. scoped_ptr<u_int16_t> source_buffer;
  106. // If we're to swap, we need to make a local copy and swap each byte pair
  107. if (swap) {
  108. int idx = 0;
  109. source_buffer.reset(new u_int16_t[in.size()]);
  110. UTF16 *source_buffer_ptr = source_buffer.get();
  111. for (vector<u_int16_t>::const_iterator it = in.begin();
  112. it != in.end(); ++it, ++idx)
  113. source_buffer_ptr[idx] = Swap(*it);
  114. source_ptr = source_buffer.get();
  115. }
  116. // The maximum expansion would be 4x the size of the input string.
  117. const UTF16 *source_end_ptr = source_ptr + in.size();
  118. size_t target_capacity = in.size() * 4;
  119. scoped_array<UTF8> target_buffer(new UTF8[target_capacity]);
  120. UTF8 *target_ptr = target_buffer.get();
  121. UTF8 *target_end_ptr = target_ptr + target_capacity;
  122. ConversionResult result = ConvertUTF16toUTF8(&source_ptr, source_end_ptr,
  123. &target_ptr, target_end_ptr,
  124. strictConversion);
  125. if (result == conversionOK) {
  126. const char *targetPtr = reinterpret_cast<const char *>(target_buffer.get());
  127. return targetPtr;
  128. }
  129. return "";
  130. }
  131. } // namespace google_breakpad