PageRenderTime 50ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 0ms

/src/qt/qtbase/src/3rdparty/xkbcommon/src/utf8.c

https://code.google.com/
C | 142 lines | 100 code | 13 blank | 29 comment | 56 complexity | e1822a7120285b339537b24396844228 MD5 | raw file
Possible License(s): LGPL-3.0, CC-BY-SA-4.0, MIT, AGPL-3.0, BSD-3-Clause, LGPL-2.1, CC0-1.0, GPL-2.0, LGPL-2.0, GPL-3.0
  1. /*
  2. * Copyright © 2012 Intel Corporation
  3. * Copyright © 2014 Ran Benita <ran234@gmail.com>
  4. *
  5. * Permission is hereby granted, free of charge, to any person obtaining a
  6. * copy of this software and associated documentation files (the "Software"),
  7. * to deal in the Software without restriction, including without limitation
  8. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  9. * and/or sell copies of the Software, and to permit persons to whom the
  10. * Software is furnished to do so, subject to the following conditions:
  11. *
  12. * The above copyright notice and this permission notice (including the next
  13. * paragraph) shall be included in all copies or substantial portions of the
  14. * Software.
  15. *
  16. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  22. * DEALINGS IN THE SOFTWARE.
  23. *
  24. * Author: Rob Bradford <rob@linux.intel.com>
  25. */
  26. #include <stddef.h>
  27. #include <stdbool.h>
  28. #include <inttypes.h>
  29. #include "utf8.h"
  30. int
  31. utf32_to_utf8(uint32_t unichar, char *buffer)
  32. {
  33. int count, shift, length;
  34. uint8_t head;
  35. if (unichar <= 0x007f) {
  36. buffer[0] = unichar;
  37. buffer[1] = '\0';
  38. return 2;
  39. }
  40. else if (unichar <= 0x07FF) {
  41. length = 2;
  42. head = 0xc0;
  43. }
  44. else if (unichar <= 0xffff) {
  45. length = 3;
  46. head = 0xe0;
  47. }
  48. else if (unichar <= 0x1fffff) {
  49. length = 4;
  50. head = 0xf0;
  51. }
  52. else if (unichar <= 0x3ffffff) {
  53. length = 5;
  54. head = 0xf8;
  55. }
  56. else {
  57. length = 6;
  58. head = 0xfc;
  59. }
  60. for (count = length - 1, shift = 0; count > 0; count--, shift += 6)
  61. buffer[count] = 0x80 | ((unichar >> shift) & 0x3f);
  62. buffer[0] = head | ((unichar >> shift) & 0x3f);
  63. buffer[length] = '\0';
  64. return length + 1;
  65. }
  66. bool
  67. is_valid_utf8(const char *ss, size_t len)
  68. {
  69. size_t i = 0;
  70. size_t tail_bytes = 0;
  71. const uint8_t *s = (const uint8_t *) ss;
  72. /* This beauty is from:
  73. * The Unicode Standard Version 6.2 - Core Specification, Table 3.7
  74. * http://www.unicode.org/versions/Unicode6.2.0/ch03.pdf#G7404
  75. * We can optimize if needed. */
  76. while (i < len)
  77. {
  78. if (s[i] <= 0x7F) {
  79. tail_bytes = 0;
  80. }
  81. else if (s[i] >= 0xC2 && s[i] <= 0xDF) {
  82. tail_bytes = 1;
  83. }
  84. else if (s[i] == 0xE0) {
  85. i++;
  86. if (i >= len || !(s[i] >= 0xA0 && s[i] <= 0xBF))
  87. return false;
  88. tail_bytes = 1;
  89. }
  90. else if (s[i] >= 0xE1 && s[i] <= 0xEC) {
  91. tail_bytes = 2;
  92. }
  93. else if (s[i] == 0xED) {
  94. i++;
  95. if (i >= len || !(s[i] >= 0x80 && s[i] <= 0x9F))
  96. return false;
  97. tail_bytes = 1;
  98. }
  99. else if (s[i] >= 0xEE && s[i] <= 0xEF) {
  100. tail_bytes = 2;
  101. }
  102. else if (s[i] == 0xF0) {
  103. i++;
  104. if (i >= len || !(s[i] >= 0x90 && s[i] <= 0xBF))
  105. return false;
  106. tail_bytes = 2;
  107. }
  108. else if (s[i] >= 0xF1 && s[i] <= 0xF3) {
  109. tail_bytes = 3;
  110. }
  111. else if (s[i] == 0xF4) {
  112. i++;
  113. if (i >= len || !(s[i] >= 0x80 && s[i] <= 0x8F))
  114. return false;
  115. tail_bytes = 2;
  116. }
  117. else {
  118. return false;
  119. }
  120. i++;
  121. while (i < len && tail_bytes > 0 && s[i] >= 0x80 && s[i] <= 0xBF) {
  122. i++;
  123. tail_bytes--;
  124. }
  125. if (tail_bytes != 0)
  126. return false;
  127. }
  128. return true;
  129. }