PageRenderTime 53ms CodeModel.GetById 27ms RepoModel.GetById 0ms app.codeStats 0ms

/afpfs-ng-0.8.1/lib/unicode.h

#
C Header | 198 lines | 61 code | 16 blank | 121 comment | 0 complexity | cb15be534190c868a4a710e450fa37b9 MD5 | raw file
Possible License(s): GPL-2.0
  1. /**********************************************************************
  2. *
  3. * unicode.h: Functions to handle UTF8/UCS2 coded strings.
  4. *
  5. * Most of these functions have been adopted from Roland Krause's
  6. * UTF8.c, which is part of the XawPlus package. See
  7. * http://freenet-homepage.de/kra/ for details.
  8. *
  9. * int str16len() A strlen() on a char16 string
  10. * char16 *str16chr() A strchr() on a char16 string
  11. * void str16cpy() A strcpy() on a char16 string
  12. * void str16ncpy() A strncpy() on a char16 string
  13. * void str16cat() A strcat() on a char16 string
  14. *
  15. * int mbCharLen() Calc number of byte of an UTF8 character
  16. * int mbStrLen() Calc # of characters in an UTF8 string
  17. * char16 *UTF8toUCS2() Convert UTF8 string to UCS2/UNICODE
  18. * char *UCS2toUTF8() Convert UCS2/UNICODE string to UTF8
  19. *
  20. * int UCS2precompose() Canonically combine two UCS2 characters
  21. *
  22. * Copyright (c) Roland Krause 2002, roland_krause@freenet.de
  23. * Copyright (c) Michael Ulbrich 2007, mul@rentapacs.de
  24. *
  25. * This module is free software; you can redistribute it and/or modify
  26. * it under the terms of the GNU General Public License as published by
  27. * the Free Software Foundation; either version 2 of the License, or
  28. * (at your option) any later version.
  29. *
  30. * This program is distributed in the hope that it will be useful,
  31. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  32. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  33. * GNU General Public License for more details.
  34. *
  35. * You should have received a copy of the GNU General Public License
  36. * along with this program; if not, write to the Free Software
  37. * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  38. *
  39. **********************************************************************/
  40. #ifndef unicode_h
  41. #define unicode_h
  42. #include <ctype.h>
  43. /* The data type used for 16 bit character strings.
  44. * The format is handled compatible to *XChar2b* used by Xlib.
  45. */
  46. typedef unsigned short char16;
  47. /* Function Name: str16len
  48. * Description: Determine the string length of a char16 string
  49. * independent of the locale settings.
  50. * Arguments: str16 - A terminated string of char16's
  51. * Returns: Length in char16's
  52. */
  53. extern int str16len(
  54. #if NeedFunctionPrototypes
  55. char16 * /* str16 */
  56. #endif
  57. );
  58. /* Function Name: str16chr
  59. * Description: Search an 8 bit character in a char16 string.
  60. * The upper byte of *ch* is assumed as '0'!
  61. * Arguments: str16 - A terminated string of char16's
  62. * ch - An 8 bit character
  63. * Returns: Position of the leftmost occurance of *ch*
  64. * in str16 or NULL.
  65. */
  66. extern char16 *str16chr(
  67. #if NeedFunctionPrototypes
  68. char16 *, /* str16 */
  69. char /* ch */
  70. #endif
  71. );
  72. /* Function Name: str16cpy
  73. * Description: Copy a string of char16's from *src* to *dest*
  74. * Arguments: dest - Destination string
  75. * src - Source string
  76. * Returns: None
  77. */
  78. extern void str16cpy(
  79. #if NeedFunctionPrototypes
  80. char16 *, /* dest */
  81. char16 * /* src */
  82. #endif
  83. );
  84. /* Function Name: str16ncpy
  85. * Description: Copy *n* char16's from *src* to *dest* and
  86. * terminate *dest*.
  87. * Arguments: dest - Destination string
  88. * src - Source string
  89. * n - # of characters to copy
  90. * Returns: None
  91. */
  92. extern void str16ncpy(
  93. #if NeedFunctionPrototypes
  94. char16 *, /* dest */
  95. char16 *, /* src */
  96. size_t /* n */
  97. #endif
  98. );
  99. /* Function Name: str16cat
  100. * Description: Concatenate the string of char16's in *src* with *dest*.
  101. * Arguments: dest - Destination string
  102. * src - Source string
  103. * Returns: None
  104. */
  105. extern void str16cat(
  106. #if NeedFunctionPrototypes
  107. char16 *, /* dest */
  108. char16 * /* src */
  109. #endif
  110. );
  111. /* Function Name: mbCharLen
  112. * Description: Determine the length in byte of an UTF8 coded
  113. * character.
  114. * Arguments: str - Pointer into an UTF8 coded string
  115. * Returns: Number of byte of the next character in the string
  116. * or 0 in case of an error.
  117. */
  118. extern int mbCharLen(
  119. #if NeedFunctionPrototypes
  120. char * /* str */
  121. #endif
  122. );
  123. /* Function Name: mbStrLen
  124. * Description: Determine the string length of an UTF8 coded string
  125. * in characters (not in byte!).
  126. * Arguments: str - The UTF8 coded string
  127. * Returns: The length in characters, illegal coded bytes
  128. * are counted as one character per byte.
  129. * See UTF8toUCS2() for the reason!
  130. */
  131. extern int mbStrLen(
  132. #if NeedFunctionPrototypes
  133. char * /* str */
  134. #endif
  135. );
  136. /* Function Name: UTF8toUCS2
  137. * Description: Conversion of an UTF8 coded string into UCS2/UNICODE.
  138. * If the encoding of the character is not representable
  139. * in two bytes, the tilde sign ~ is written into the
  140. * result string at this position.
  141. * For an illegal UTF8 code an asterix * is stored in
  142. * the result string.
  143. * Arguments: str - The UTF8 coded string
  144. * Returns: The UCS2 coded result string. The allocated memory
  145. * for this string has to be freed by the caller!
  146. * The result string is stored independent of the
  147. * architecture in the high byte/low byte order and is
  148. * compatible to the XChar2b format! Type casting is valid.
  149. * char16 is used to increase the performance.
  150. */
  151. extern char16 *UTF8toUCS2(
  152. #if NeedFunctionPrototypes
  153. char * /* str */
  154. #endif
  155. );
  156. /* Function Name: UCS2toUTF8
  157. * Description: Conversion of an UCS2 coded string into UTF8.
  158. * Arguments: str16 - The UCS2 coded string
  159. * Returns: The UTF8 coded result string. The allocated memory
  160. * for this string has to be freed by the caller!
  161. */
  162. extern char *UCS2toUTF8(
  163. #if NeedFunctionPrototypes
  164. char16 * /* str */
  165. #endif
  166. );
  167. /* Function Name: UCS2precompose
  168. * Description: Canonically combine two UCS2 characters, if matching
  169. * pattern is found in table. Uniform binary search
  170. * algorithm from D. Knuth TAOCP Vol.3 p.414.
  171. * Arguments: first - the first UCS2 character
  172. * second - the second UCS2 character
  173. * Returns: Canonical composition of first and second or
  174. * -1 if no such composition exists in table.
  175. */
  176. extern int UCS2precompose(
  177. #if NeedFunctionPrototypes
  178. char16, /* first */
  179. char16 /* second */
  180. #endif
  181. );
  182. #endif