/extlibs/SFML/include/SFML/System/Utf.inl

https://bitbucket.org/hugoruscitti/pilascpp · C++ Header · 671 lines · 445 code · 124 blank · 102 comment · 67 complexity · 02d108254fa111f7422014451bae706f MD5 · raw file

  1. ////////////////////////////////////////////////////////////
  2. //
  3. // SFML - Simple and Fast Multimedia Library
  4. // Copyright (C) 2007-2009 Laurent Gomila (laurent.gom@gmail.com)
  5. //
  6. // This software is provided 'as-is', without any express or implied warranty.
  7. // In no event will the authors be held liable for any damages arising from the use of this software.
  8. //
  9. // Permission is granted to anyone to use this software for any purpose,
  10. // including commercial applications, and to alter it and redistribute it freely,
  11. // subject to the following restrictions:
  12. //
  13. // 1. The origin of this software must not be misrepresented;
  14. // you must not claim that you wrote the original software.
  15. // If you use this software in a product, an acknowledgment
  16. // in the product documentation would be appreciated but is not required.
  17. //
  18. // 2. Altered source versions must be plainly marked as such,
  19. // and must not be misrepresented as being the original software.
  20. //
  21. // 3. This notice may not be removed or altered from any source distribution.
  22. //
  23. ////////////////////////////////////////////////////////////
  24. ////////////////////////////////////////////////////////////
  25. template <typename In>
  26. In Utf<8>::Decode(In begin, In end, Uint32& output, Uint32 replacement)
  27. {
  28. // Some useful precomputed data
  29. static const int trailing[256] =
  30. {
  31. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  32. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  33. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  34. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  35. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  36. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  37. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  38. 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5
  39. };
  40. static const Uint32 offsets[6] =
  41. {
  42. 0x00000000, 0x00003080, 0x000E2080, 0x03C82080, 0xFA082080, 0x82082080
  43. };
  44. // Decode the character
  45. int trailingBytes = trailing[static_cast<Uint8>(*begin)];
  46. if (begin + trailingBytes < end)
  47. {
  48. output = 0;
  49. switch (trailingBytes)
  50. {
  51. case 5 : output += static_cast<Uint8>(*begin++); output <<= 6;
  52. case 4 : output += static_cast<Uint8>(*begin++); output <<= 6;
  53. case 3 : output += static_cast<Uint8>(*begin++); output <<= 6;
  54. case 2 : output += static_cast<Uint8>(*begin++); output <<= 6;
  55. case 1 : output += static_cast<Uint8>(*begin++); output <<= 6;
  56. case 0 : output += static_cast<Uint8>(*begin++);
  57. }
  58. output -= offsets[trailingBytes];
  59. }
  60. else
  61. {
  62. // Incomplete character
  63. begin = end;
  64. output = replacement;
  65. }
  66. return begin;
  67. }
  68. ////////////////////////////////////////////////////////////
  69. template <typename Out>
  70. Out Utf<8>::Encode(Uint32 input, Out output, Uint8 replacement)
  71. {
  72. // Some useful precomputed data
  73. static const Uint8 firstBytes[7] =
  74. {
  75. 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC
  76. };
  77. // Encode the character
  78. if ((input > 0x0010FFFF) || ((input >= 0xD800) && (input <= 0xDBFF)))
  79. {
  80. // Invalid character
  81. if (replacement)
  82. *output++ = replacement;
  83. }
  84. else
  85. {
  86. // Valid character
  87. // Get the number of bytes to write
  88. int bytesToWrite = 1;
  89. if (input < 0x80) bytesToWrite = 1;
  90. else if (input < 0x800) bytesToWrite = 2;
  91. else if (input < 0x10000) bytesToWrite = 3;
  92. else if (input <= 0x0010FFFF) bytesToWrite = 4;
  93. // Extract the bytes to write
  94. Uint8 bytes[4];
  95. switch (bytesToWrite)
  96. {
  97. case 4 : bytes[3] = static_cast<Uint8>((input | 0x80) & 0xBF); input >>= 6;
  98. case 3 : bytes[2] = static_cast<Uint8>((input | 0x80) & 0xBF); input >>= 6;
  99. case 2 : bytes[1] = static_cast<Uint8>((input | 0x80) & 0xBF); input >>= 6;
  100. case 1 : bytes[0] = static_cast<Uint8> (input | firstBytes[bytesToWrite]);
  101. }
  102. // Add them to the output
  103. const Uint8* currentByte = bytes;
  104. switch (bytesToWrite)
  105. {
  106. case 4 : *output++ = *currentByte++;
  107. case 3 : *output++ = *currentByte++;
  108. case 2 : *output++ = *currentByte++;
  109. case 1 : *output++ = *currentByte++;
  110. }
  111. }
  112. return output;
  113. }
  114. ////////////////////////////////////////////////////////////
  115. template <typename In>
  116. In Utf<8>::Next(In begin, In end)
  117. {
  118. Uint32 codepoint;
  119. return Decode(begin, end, codepoint);
  120. }
  121. ////////////////////////////////////////////////////////////
  122. template <typename In>
  123. std::size_t Utf<8>::Count(In begin, In end)
  124. {
  125. std::size_t length = 0;
  126. while (begin < end)
  127. {
  128. begin = Next(begin, end);
  129. ++length;
  130. }
  131. return length;
  132. }
  133. ////////////////////////////////////////////////////////////
  134. template <typename In, typename Out>
  135. Out Utf<8>::FromAnsi(In begin, In end, Out output, const std::locale& locale)
  136. {
  137. while (begin < end)
  138. {
  139. Uint32 codepoint = Utf<32>::DecodeAnsi(*begin++, locale);
  140. output = Encode(codepoint, output);
  141. }
  142. return output;
  143. }
  144. ////////////////////////////////////////////////////////////
  145. template <typename In, typename Out>
  146. Out Utf<8>::FromWide(In begin, In end, Out output)
  147. {
  148. while (begin < end)
  149. {
  150. Uint32 codepoint = Utf<32>::DecodeWide(*begin++);
  151. output = Encode(codepoint, output);
  152. }
  153. return output;
  154. }
  155. ////////////////////////////////////////////////////////////
  156. template <typename In, typename Out>
  157. Out Utf<8>::ToAnsi(In begin, In end, Out output, char replacement, const std::locale& locale)
  158. {
  159. while (begin < end)
  160. {
  161. Uint32 codepoint;
  162. begin = Decode(begin, end, codepoint);
  163. output = Utf<32>::EncodeAnsi(codepoint, output, replacement, locale);
  164. }
  165. return output;
  166. }
  167. ////////////////////////////////////////////////////////////
  168. template <typename In, typename Out>
  169. Out Utf<8>::ToWide(In begin, In end, Out output, wchar_t replacement)
  170. {
  171. while (begin < end)
  172. {
  173. Uint32 codepoint;
  174. begin = Decode(begin, end, codepoint);
  175. output = Utf<32>::EncodeWide(codepoint, output, replacement);
  176. }
  177. return output;
  178. }
  179. ////////////////////////////////////////////////////////////
  180. template <typename In, typename Out>
  181. Out Utf<8>::ToUtf8(In begin, In end, Out output)
  182. {
  183. while (begin < end)
  184. *output++ = *begin++;
  185. return output;
  186. }
  187. ////////////////////////////////////////////////////////////
  188. template <typename In, typename Out>
  189. Out Utf<8>::ToUtf16(In begin, In end, Out output)
  190. {
  191. while (begin < end)
  192. {
  193. Uint32 codepoint;
  194. begin = Decode(begin, end, codepoint);
  195. output = Utf<16>::Encode(codepoint, output);
  196. }
  197. return output;
  198. }
  199. ////////////////////////////////////////////////////////////
  200. template <typename In, typename Out>
  201. Out Utf<8>::ToUtf32(In begin, In end, Out output)
  202. {
  203. while (begin < end)
  204. {
  205. Uint32 codepoint;
  206. begin = Decode(begin, end, codepoint);
  207. *output++ = codepoint;
  208. }
  209. return output;
  210. }
  211. ////////////////////////////////////////////////////////////
  212. template <typename In>
  213. In Utf<16>::Decode(In begin, In end, Uint32& output, Uint32 replacement)
  214. {
  215. Uint16 first = *begin++;
  216. // If it's a surrogate pair, first convert to a single UTF-32 character
  217. if ((first >= 0xD800) && (first <= 0xDBFF))
  218. {
  219. if (begin < end)
  220. {
  221. Uint32 second = *begin++;
  222. if ((second >= 0xDC00) && (second <= 0xDFFF))
  223. {
  224. // The second element is valid: convert the two elements to a UTF-32 character
  225. output = static_cast<Uint32>(((first - 0xD800) << 10) + (second - 0xDC00) + 0x0010000);
  226. }
  227. else
  228. {
  229. // Invalid character
  230. output = replacement;
  231. }
  232. }
  233. else
  234. {
  235. // Invalid character
  236. begin = end;
  237. output = replacement;
  238. }
  239. }
  240. else
  241. {
  242. // We can make a direct copy
  243. output = first;
  244. }
  245. return begin;
  246. }
  247. ////////////////////////////////////////////////////////////
  248. template <typename Out>
  249. Out Utf<16>::Encode(Uint32 input, Out output, Uint16 replacement)
  250. {
  251. if (input < 0xFFFF)
  252. {
  253. // The character can be copied directly, we just need to check if it's in the valid range
  254. if ((input >= 0xD800) && (input <= 0xDFFF))
  255. {
  256. // Invalid character (this range is reserved)
  257. if (replacement)
  258. *output++ = replacement;
  259. }
  260. else
  261. {
  262. // Valid character directly convertible to a single UTF-16 character
  263. *output++ = static_cast<Uint16>(input);
  264. }
  265. }
  266. else if (input > 0x0010FFFF)
  267. {
  268. // Invalid character (greater than the maximum unicode value)
  269. if (replacement)
  270. *output++ = replacement;
  271. }
  272. else
  273. {
  274. // The input character will be converted to two UTF-16 elements
  275. input -= 0x0010000;
  276. *output++ = static_cast<Uint16>((input >> 10) + 0xD800);
  277. *output++ = static_cast<Uint16>((input & 0x3FFUL) + 0xDC00);
  278. }
  279. return output;
  280. }
  281. ////////////////////////////////////////////////////////////
  282. template <typename In>
  283. In Utf<16>::Next(In begin, In end)
  284. {
  285. Uint32 codepoint;
  286. return Decode(begin, end, codepoint);
  287. }
  288. ////////////////////////////////////////////////////////////
  289. template <typename In>
  290. std::size_t Utf<16>::Count(In begin, In end)
  291. {
  292. std::size_t length = 0;
  293. while (begin < end)
  294. {
  295. begin = Next(begin, end);
  296. ++length;
  297. }
  298. return length;
  299. }
  300. ////////////////////////////////////////////////////////////
  301. template <typename In, typename Out>
  302. Out Utf<16>::FromAnsi(In begin, In end, Out output, const std::locale& locale)
  303. {
  304. while (begin < end)
  305. {
  306. Uint32 codepoint = Utf<32>::DecodeAnsi(*begin++, locale);
  307. output = Encode(codepoint, output);
  308. }
  309. return output;
  310. }
  311. ////////////////////////////////////////////////////////////
  312. template <typename In, typename Out>
  313. Out Utf<16>::FromWide(In begin, In end, Out output)
  314. {
  315. while (begin < end)
  316. {
  317. Uint32 codepoint = Utf<32>::DecodeWide(*begin++);
  318. output = Encode(codepoint, output);
  319. }
  320. return output;
  321. }
  322. ////////////////////////////////////////////////////////////
  323. template <typename In, typename Out>
  324. Out Utf<16>::ToAnsi(In begin, In end, Out output, char replacement, const std::locale& locale)
  325. {
  326. while (begin < end)
  327. {
  328. Uint32 codepoint;
  329. begin = Decode(begin, end, codepoint);
  330. output = Utf<32>::EncodeAnsi(codepoint, output, replacement, locale);
  331. }
  332. return output;
  333. }
  334. ////////////////////////////////////////////////////////////
  335. template <typename In, typename Out>
  336. Out Utf<16>::ToWide(In begin, In end, Out output, wchar_t replacement)
  337. {
  338. while (begin < end)
  339. {
  340. Uint32 codepoint;
  341. begin = Decode(begin, end, codepoint);
  342. output = Utf<32>::EncodeWide(codepoint, output, replacement);
  343. }
  344. return output;
  345. }
  346. ////////////////////////////////////////////////////////////
  347. template <typename In, typename Out>
  348. Out Utf<16>::ToUtf8(In begin, In end, Out output)
  349. {
  350. while (begin < end)
  351. {
  352. Uint32 codepoint;
  353. begin = Decode(begin, end, codepoint);
  354. output = Utf<8>::Encode(codepoint, output);
  355. }
  356. return output;
  357. }
  358. ////////////////////////////////////////////////////////////
  359. template <typename In, typename Out>
  360. Out Utf<16>::ToUtf16(In begin, In end, Out output)
  361. {
  362. while (begin < end)
  363. *output++ = *begin++;
  364. return output;
  365. }
  366. ////////////////////////////////////////////////////////////
  367. template <typename In, typename Out>
  368. Out Utf<16>::ToUtf32(In begin, In end, Out output)
  369. {
  370. while (begin < end)
  371. {
  372. Uint32 codepoint;
  373. begin = Decode(begin, end, codepoint);
  374. *output++ = codepoint;
  375. }
  376. return output;
  377. }
  378. ////////////////////////////////////////////////////////////
  379. template <typename In>
  380. In Utf<32>::Decode(In begin, In end, Uint32& output, Uint32)
  381. {
  382. output = *begin++;
  383. return begin;
  384. }
  385. ////////////////////////////////////////////////////////////
  386. template <typename Out>
  387. Out Utf<32>::Encode(Uint32 input, Out output, Uint32 replacement)
  388. {
  389. *output++ = input;
  390. return output;
  391. }
  392. ////////////////////////////////////////////////////////////
  393. template <typename In>
  394. In Utf<32>::Next(In begin, In end)
  395. {
  396. return ++begin;
  397. }
  398. ////////////////////////////////////////////////////////////
  399. template <typename In>
  400. std::size_t Utf<32>::Count(In begin, In end)
  401. {
  402. return begin - end;
  403. }
  404. ////////////////////////////////////////////////////////////
  405. template <typename In, typename Out>
  406. Out Utf<32>::FromAnsi(In begin, In end, Out output, const std::locale& locale)
  407. {
  408. while (begin < end)
  409. *output++ = DecodeAnsi(*begin++, locale);
  410. return output;
  411. }
  412. ////////////////////////////////////////////////////////////
  413. template <typename In, typename Out>
  414. Out Utf<32>::FromWide(In begin, In end, Out output)
  415. {
  416. while (begin < end)
  417. *output++ = DecodeWide(*begin++);
  418. return output;
  419. }
  420. ////////////////////////////////////////////////////////////
  421. template <typename In, typename Out>
  422. Out Utf<32>::ToAnsi(In begin, In end, Out output, char replacement, const std::locale& locale)
  423. {
  424. while (begin < end)
  425. output = EncodeAnsi(*begin++, output, replacement, locale);
  426. return output;
  427. }
  428. ////////////////////////////////////////////////////////////
  429. template <typename In, typename Out>
  430. Out Utf<32>::ToWide(In begin, In end, Out output, wchar_t replacement)
  431. {
  432. while (begin < end)
  433. output = EncodeWide(*begin++, output, replacement);
  434. return output;
  435. }
  436. ////////////////////////////////////////////////////////////
  437. template <typename In, typename Out>
  438. Out Utf<32>::ToUtf8(In begin, In end, Out output)
  439. {
  440. while (begin < end)
  441. output = Utf<8>::Encode(*begin++, output);
  442. return output;
  443. }
  444. ////////////////////////////////////////////////////////////
  445. template <typename In, typename Out>
  446. Out Utf<32>::ToUtf16(In begin, In end, Out output)
  447. {
  448. while (begin < end)
  449. output = Utf<16>::Encode(*begin++, output);
  450. return output;
  451. }
  452. ////////////////////////////////////////////////////////////
  453. template <typename In, typename Out>
  454. Out Utf<32>::ToUtf32(In begin, In end, Out output)
  455. {
  456. while (begin < end)
  457. *output++ = *begin++;
  458. return output;
  459. }
  460. ////////////////////////////////////////////////////////////
  461. template <typename In>
  462. Uint32 Utf<32>::DecodeAnsi(In input, const std::locale& locale)
  463. {
  464. // On Windows, gcc's standard library (glibc++) has almost
  465. // no support for Unicode stuff. As a consequence, in this
  466. // context we can only use the default locale and ignore
  467. // the one passed as parameter.
  468. #if defined(SFML_SYSTEM_WINDOWS) && /* if Windows ... */ \
  469. (defined(__GLIBCPP__) || defined (__GLIBCXX__)) && /* ... and standard library is glibc++ ... */ \
  470. !(defined(__SGI_STL_PORT) || defined(_STLPORT_VERSION)) /* ... and STLPort is not used on top of it */
  471. wchar_t character = 0;
  472. mbtowc(&character, &input, 1);
  473. return static_cast<Uint32>(character);
  474. #else
  475. // Get the facet of the locale which deals with character conversion
  476. const std::ctype<wchar_t>& facet = std::use_facet< std::ctype<wchar_t> >(locale);
  477. // Use the facet to convert each character of the input string
  478. return static_cast<Uint32>(facet.widen(input));
  479. #endif
  480. }
  481. ////////////////////////////////////////////////////////////
  482. template <typename In>
  483. Uint32 Utf<32>::DecodeWide(In input)
  484. {
  485. // The encoding of wide characters is not well defined and is left to the system;
  486. // however we can safely assume that it is UCS-2 on Windows and
  487. // UCS-4 on Unix systems.
  488. // In both cases, a simple copy is enough (UCS-2 is a subset of UCS-4,
  489. // and UCS-4 *is* UTF-32).
  490. return input;
  491. }
  492. ////////////////////////////////////////////////////////////
  493. template <typename Out>
  494. Out Utf<32>::EncodeAnsi(Uint32 codepoint, Out output, char replacement, const std::locale& locale)
  495. {
  496. // On Windows, gcc's standard library (glibc++) has almost
  497. // no support for Unicode stuff. As a consequence, in this
  498. // context we can only use the default locale and ignore
  499. // the one passed as parameter.
  500. #if defined(SFML_SYSTEM_WINDOWS) && /* if Windows ... */ \
  501. (defined(__GLIBCPP__) || defined (__GLIBCXX__)) && /* ... and standard library is glibc++ ... */ \
  502. !(defined(__SGI_STL_PORT) || defined(_STLPORT_VERSION)) /* ... and STLPort is not used on top of it */
  503. char character = 0;
  504. if (wctomb(&character, static_cast<wchar_t>(codepoint)) >= 0)
  505. *output++ = character;
  506. else if (replacement)
  507. *output++ = replacement;
  508. return output;
  509. #else
  510. // Get the facet of the locale which deals with character conversion
  511. const std::ctype<wchar_t>& facet = std::use_facet< std::ctype<wchar_t> >(locale);
  512. // Use the facet to convert each character of the input string
  513. *output++ = facet.narrow(static_cast<wchar_t>(codepoint), replacement);
  514. return output;
  515. #endif
  516. }
  517. ////////////////////////////////////////////////////////////
  518. template <typename Out>
  519. Out Utf<32>::EncodeWide(Uint32 codepoint, Out output, wchar_t replacement)
  520. {
  521. // The encoding of wide characters is not well defined and is left to the system;
  522. // however we can safely assume that it is UCS-2 on Windows and
  523. // UCS-4 on Unix systems.
  524. // For UCS-2 we need to check if the source characters fits in (UCS-2 is a subset of UCS-4).
  525. // For UCS-4 we can do a direct copy (UCS-4 *is* UTF-32).
  526. switch (sizeof(wchar_t))
  527. {
  528. case 4:
  529. {
  530. *output++ = static_cast<wchar_t>(codepoint);
  531. break;
  532. }
  533. default:
  534. {
  535. if ((codepoint <= 0xFFFF) && ((codepoint < 0xD800) || (codepoint > 0xDFFF)))
  536. {
  537. *output++ = static_cast<wchar_t>(codepoint);
  538. }
  539. else if (replacement)
  540. {
  541. *output++ = replacement;
  542. }
  543. break;
  544. }
  545. }
  546. return output;
  547. }