PageRenderTime 42ms CodeModel.GetById 12ms RepoModel.GetById 1ms app.codeStats 0ms

/release/src/router/php/ext/mbstring/libmbfl/filters/mbfilter_utf16.c

https://gitlab.com/envieidoc/tomato
C | 316 lines | 242 code | 31 blank | 43 comment | 60 complexity | 863e1969ad747e0906dd9bf867c9bb06 MD5 | raw file
  1. /*
  2. * "streamable kanji code filter and converter"
  3. * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
  4. *
  5. * LICENSE NOTICES
  6. *
  7. * This file is part of "streamable kanji code filter and converter",
  8. * which is distributed under the terms of GNU Lesser General Public
  9. * License (version 2) as published by the Free Software Foundation.
  10. *
  11. * This software is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with "streamable kanji code filter and converter";
  18. * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
  19. * Suite 330, Boston, MA 02111-1307 USA
  20. *
  21. * The author of this file:
  22. *
  23. */
  24. /*
  25. * The source code included in this files was separated from mbfilter.c
  26. * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
  27. *
  28. */
  29. #ifdef HAVE_CONFIG_H
  30. #include "config.h"
  31. #endif
  32. #include "mbfilter.h"
  33. #include "mbfilter_utf16.h"
  34. static const char *mbfl_encoding_utf16_aliases[] = {"utf16", NULL};
  35. const mbfl_encoding mbfl_encoding_utf16 = {
  36. mbfl_no_encoding_utf16,
  37. "UTF-16",
  38. "UTF-16",
  39. (const char *(*)[])&mbfl_encoding_utf16_aliases,
  40. NULL,
  41. MBFL_ENCTYPE_MWC2BE
  42. };
  43. const mbfl_encoding mbfl_encoding_utf16be = {
  44. mbfl_no_encoding_utf16be,
  45. "UTF-16BE",
  46. "UTF-16BE",
  47. NULL,
  48. NULL,
  49. MBFL_ENCTYPE_MWC2BE
  50. };
  51. const mbfl_encoding mbfl_encoding_utf16le = {
  52. mbfl_no_encoding_utf16le,
  53. "UTF-16LE",
  54. "UTF-16LE",
  55. NULL,
  56. NULL,
  57. MBFL_ENCTYPE_MWC2LE
  58. };
  59. const struct mbfl_convert_vtbl vtbl_utf16_wchar = {
  60. mbfl_no_encoding_utf16,
  61. mbfl_no_encoding_wchar,
  62. mbfl_filt_conv_common_ctor,
  63. mbfl_filt_conv_common_dtor,
  64. mbfl_filt_conv_utf16_wchar,
  65. mbfl_filt_conv_common_flush
  66. };
  67. const struct mbfl_convert_vtbl vtbl_wchar_utf16 = {
  68. mbfl_no_encoding_wchar,
  69. mbfl_no_encoding_utf16,
  70. mbfl_filt_conv_common_ctor,
  71. mbfl_filt_conv_common_dtor,
  72. mbfl_filt_conv_wchar_utf16be,
  73. mbfl_filt_conv_common_flush
  74. };
  75. const struct mbfl_convert_vtbl vtbl_utf16be_wchar = {
  76. mbfl_no_encoding_utf16be,
  77. mbfl_no_encoding_wchar,
  78. mbfl_filt_conv_common_ctor,
  79. mbfl_filt_conv_common_dtor,
  80. mbfl_filt_conv_utf16be_wchar,
  81. mbfl_filt_conv_common_flush
  82. };
  83. const struct mbfl_convert_vtbl vtbl_wchar_utf16be = {
  84. mbfl_no_encoding_wchar,
  85. mbfl_no_encoding_utf16be,
  86. mbfl_filt_conv_common_ctor,
  87. mbfl_filt_conv_common_dtor,
  88. mbfl_filt_conv_wchar_utf16be,
  89. mbfl_filt_conv_common_flush
  90. };
  91. const struct mbfl_convert_vtbl vtbl_utf16le_wchar = {
  92. mbfl_no_encoding_utf16le,
  93. mbfl_no_encoding_wchar,
  94. mbfl_filt_conv_common_ctor,
  95. mbfl_filt_conv_common_dtor,
  96. mbfl_filt_conv_utf16le_wchar,
  97. mbfl_filt_conv_common_flush
  98. };
  99. const struct mbfl_convert_vtbl vtbl_wchar_utf16le = {
  100. mbfl_no_encoding_wchar,
  101. mbfl_no_encoding_utf16le,
  102. mbfl_filt_conv_common_ctor,
  103. mbfl_filt_conv_common_dtor,
  104. mbfl_filt_conv_wchar_utf16le,
  105. mbfl_filt_conv_common_flush
  106. };
  107. #define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
  108. /*
  109. * UTF-16 => wchar
  110. */
  111. int mbfl_filt_conv_utf16_wchar(int c, mbfl_convert_filter *filter)
  112. {
  113. int n, endian;
  114. endian = filter->status & 0xff00;
  115. switch (filter->status & 0x0f) {
  116. case 0:
  117. if (endian) {
  118. n = c & 0xff;
  119. } else {
  120. n = (c & 0xff) << 8;
  121. }
  122. filter->cache |= n;
  123. filter->status++;
  124. break;
  125. default:
  126. if (endian) {
  127. n = (c & 0xff) << 8;
  128. } else {
  129. n = c & 0xff;
  130. }
  131. n |= filter->cache & 0xffff;
  132. filter->status &= ~0x0f;
  133. if (n >= 0xd800 && n < 0xdc00) {
  134. filter->cache = ((n & 0x3ff) << 16) + 0x400000;
  135. } else if (n >= 0xdc00 && n < 0xe000) {
  136. n &= 0x3ff;
  137. n |= (filter->cache & 0xfff0000) >> 6;
  138. filter->cache = 0;
  139. if (n >= MBFL_WCSPLANE_SUPMIN && n < MBFL_WCSPLANE_SUPMAX) {
  140. CK((*filter->output_function)(n, filter->data));
  141. } else { /* illegal character */
  142. n &= MBFL_WCSGROUP_MASK;
  143. n |= MBFL_WCSGROUP_THROUGH;
  144. CK((*filter->output_function)(n, filter->data));
  145. }
  146. } else {
  147. int is_first = filter->status & 0x10;
  148. filter->cache = 0;
  149. filter->status |= 0x10;
  150. if (!is_first) {
  151. if (n == 0xfffe) {
  152. if (endian) {
  153. filter->status &= ~0x100; /* big-endian */
  154. } else {
  155. filter->status |= 0x100; /* little-endian */
  156. }
  157. break;
  158. } else if (n == 0xfeff) {
  159. break;
  160. }
  161. }
  162. CK((*filter->output_function)(n, filter->data));
  163. }
  164. break;
  165. }
  166. return c;
  167. }
  168. /*
  169. * UTF-16BE => wchar
  170. */
  171. int mbfl_filt_conv_utf16be_wchar(int c, mbfl_convert_filter *filter)
  172. {
  173. int n;
  174. switch (filter->status) {
  175. case 0:
  176. filter->status = 1;
  177. n = (c & 0xff) << 8;
  178. filter->cache |= n;
  179. break;
  180. default:
  181. filter->status = 0;
  182. n = (filter->cache & 0xff00) | (c & 0xff);
  183. if (n >= 0xd800 && n < 0xdc00) {
  184. filter->cache = ((n & 0x3ff) << 16) + 0x400000;
  185. } else if (n >= 0xdc00 && n < 0xe000) {
  186. n &= 0x3ff;
  187. n |= (filter->cache & 0xfff0000) >> 6;
  188. filter->cache = 0;
  189. if (n >= MBFL_WCSPLANE_SUPMIN && n < MBFL_WCSPLANE_SUPMAX) {
  190. CK((*filter->output_function)(n, filter->data));
  191. } else { /* illegal character */
  192. n &= MBFL_WCSGROUP_MASK;
  193. n |= MBFL_WCSGROUP_THROUGH;
  194. CK((*filter->output_function)(n, filter->data));
  195. }
  196. } else {
  197. filter->cache = 0;
  198. CK((*filter->output_function)(n, filter->data));
  199. }
  200. break;
  201. }
  202. return c;
  203. }
  204. /*
  205. * wchar => UTF-16BE
  206. */
  207. int mbfl_filt_conv_wchar_utf16be(int c, mbfl_convert_filter *filter)
  208. {
  209. int n;
  210. if (c >= 0 && c < MBFL_WCSPLANE_UCS2MAX) {
  211. CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
  212. CK((*filter->output_function)(c & 0xff, filter->data));
  213. } else if (c >= MBFL_WCSPLANE_SUPMIN && c < MBFL_WCSPLANE_SUPMAX) {
  214. n = ((c >> 10) - 0x40) | 0xd800;
  215. CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
  216. CK((*filter->output_function)(n & 0xff, filter->data));
  217. n = (c & 0x3ff) | 0xdc00;
  218. CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
  219. CK((*filter->output_function)(n & 0xff, filter->data));
  220. } else {
  221. if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
  222. CK(mbfl_filt_conv_illegal_output(c, filter));
  223. }
  224. }
  225. return c;
  226. }
  227. /*
  228. * UTF-16LE => wchar
  229. */
  230. int mbfl_filt_conv_utf16le_wchar(int c, mbfl_convert_filter *filter)
  231. {
  232. int n;
  233. switch (filter->status) {
  234. case 0:
  235. filter->status = 1;
  236. n = c & 0xff;
  237. filter->cache |= n;
  238. break;
  239. default:
  240. filter->status = 0;
  241. n = (filter->cache & 0xff) | ((c & 0xff) << 8);
  242. if (n >= 0xd800 && n < 0xdc00) {
  243. filter->cache = ((n & 0x3ff) << 16) + 0x400000;
  244. } else if (n >= 0xdc00 && n < 0xe000) {
  245. n &= 0x3ff;
  246. n |= (filter->cache & 0xfff0000) >> 6;
  247. filter->cache = 0;
  248. if (n >= MBFL_WCSPLANE_SUPMIN && n < MBFL_WCSPLANE_SUPMAX) {
  249. CK((*filter->output_function)(n, filter->data));
  250. } else { /* illegal character */
  251. n &= MBFL_WCSGROUP_MASK;
  252. n |= MBFL_WCSGROUP_THROUGH;
  253. CK((*filter->output_function)(n, filter->data));
  254. }
  255. } else {
  256. filter->cache = 0;
  257. CK((*filter->output_function)(n, filter->data));
  258. }
  259. break;
  260. }
  261. return c;
  262. }
  263. /*
  264. * wchar => UTF-16LE
  265. */
  266. int mbfl_filt_conv_wchar_utf16le(int c, mbfl_convert_filter *filter)
  267. {
  268. int n;
  269. if (c >= 0 && c < MBFL_WCSPLANE_UCS2MAX) {
  270. CK((*filter->output_function)(c & 0xff, filter->data));
  271. CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
  272. } else if (c >= MBFL_WCSPLANE_SUPMIN && c < MBFL_WCSPLANE_SUPMAX) {
  273. n = ((c >> 10) - 0x40) | 0xd800;
  274. CK((*filter->output_function)(n & 0xff, filter->data));
  275. CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
  276. n = (c & 0x3ff) | 0xdc00;
  277. CK((*filter->output_function)(n & 0xff, filter->data));
  278. CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
  279. } else {
  280. if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
  281. CK(mbfl_filt_conv_illegal_output(c, filter));
  282. }
  283. }
  284. return c;
  285. }