PageRenderTime 62ms CodeModel.GetById 24ms RepoModel.GetById 1ms app.codeStats 0ms

/src/wrappers/glib/library/utilities/glib_unicode_manipulation.e

http://github.com/tybor/Liberty
Specman e | 1278 lines | 35 code | 338 blank | 905 comment | 2 complexity | b03c41098c1a40f12d152ad7cc22961f MD5 | raw file
Possible License(s): GPL-3.0, LGPL-2.1, GPL-2.0
  1. indexing
  2. description: "C string Utility Functions -- various C-string-related functions."
  3. copyright: "[
  4. Copyright (C) 2007 Paolo Redaelli, Anthony Lenton,
  5. Soluciones Informaticas Libres S.A., GLib team
  6. This library is free software; you can redistribute it and/or
  7. modify it under the terms of the GNU Lesser General Public License
  8. as published by the Free Software Foundation; either version 2.1 of
  9. the License, or (at your option) any later version.
  10. This library is distributed in the hopeOA that it will be useful, but
  11. WITHOUT ANY WARRANTY; without even the implied warranty of
  12. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. Lesser General Public License for more details.
  14. You should have received a copy of the GNU Lesser General Public
  15. License along with this library; if not, write to the Free Software
  16. Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  17. 02110-1301 USA
  18. ]"
  19. deferred class GLIB_UNICODE_MANIPULATION
  20. insert
  21. ANY undefine is_equal, copy end
  22. feature {} -- Utility functions, inherit them if you need them
  23. -- Unicode Manipulation -- functions operating on Unicode characters and UTF-8
  24. -- strings.
  25. -- Synopsis
  26. -- #include <glib.h>
  27. -- typedef gunichar;
  28. -- typedef gunichar2;
  29. -- gboolean g_unichar_validate (gunichar ch);
  30. -- gboolean g_unichar_isalnum (gunichar c);
  31. -- gboolean g_unichar_isalpha (gunichar c);
  32. -- gboolean g_unichar_iscntrl (gunichar c);
  33. -- gboolean g_unichar_isdigit (gunichar c);
  34. -- gboolean g_unichar_isgraph (gunichar c);
  35. -- gboolean g_unichar_islower (gunichar c);
  36. -- gboolean g_unichar_isprint (gunichar c);
  37. -- gboolean g_unichar_ispunct (gunichar c);
  38. -- gboolean g_unichar_isspace (gunichar c);
  39. -- gboolean g_unichar_isupper (gunichar c);
  40. -- gboolean g_unichar_isxdigit (gunichar c);
  41. -- gboolean g_unichar_istitle (gunichar c);
  42. -- gboolean g_unichar_isdefined (gunichar c);
  43. -- gboolean g_unichar_iswide (gunichar c);
  44. -- gboolean g_unichar_iswide_cjk (gunichar c);
  45. -- gunichar g_unichar_toupper (gunichar c);
  46. -- gunichar g_unichar_tolower (gunichar c);
  47. -- gunichar g_unichar_totitle (gunichar c);
  48. -- gint g_unichar_digit_value (gunichar c);
  49. -- gint g_unichar_xdigit_value (gunichar c);
  50. -- enum GUnicodeType;
  51. -- GUnicodeType g_unichar_type (gunichar c);
  52. -- enum GUnicodeBreakType;
  53. -- GUnicodeBreakType g_unichar_break_type (gunichar c);
  54. -- void g_unicode_canonical_ordering (gunichar *string,
  55. -- gsize len);
  56. -- gunichar* g_unicode_canonical_decomposition
  57. -- (gunichar ch,
  58. -- gsize *result_len);
  59. -- gboolean g_unichar_get_mirror_char (gunichar ch,
  60. -- gunichar *mirrored_ch);
  61. -- #define g_utf8_next_char (p)
  62. -- gunichar g_utf8_get_char (const gchar *p);
  63. -- gunichar g_utf8_get_char_validated (const gchar *p,
  64. -- gssize max_len);
  65. -- gchar* g_utf8_offset_to_pointer (const gchar *str,
  66. -- glong offset);
  67. -- glong g_utf8_pointer_to_offset (const gchar *str,
  68. -- const gchar *pos);
  69. -- gchar* g_utf8_prev_char (const gchar *p);
  70. -- gchar* g_utf8_find_next_char (const gchar *p,
  71. -- const gchar *end);
  72. -- gchar* g_utf8_find_prev_char (const gchar *str,
  73. -- const gchar *p);
  74. -- glong g_utf8_strlen (const gchar *p,
  75. -- gssize max);
  76. -- gchar* g_utf8_strncpy (gchar *dest,
  77. -- const gchar *src,
  78. -- gsize n);
  79. -- gchar* g_utf8_strchr (const gchar *p,
  80. -- gssize len,
  81. -- gunichar c);
  82. -- gchar* g_utf8_strrchr (const gchar *p,
  83. -- gssize len,
  84. -- gunichar c);
  85. -- gchar* g_utf8_strreverse (const gchar *str,
  86. -- gssize len);
  87. -- gboolean g_utf8_validate (const gchar *str,
  88. -- gssize max_len,
  89. -- const gchar **end);
  90. -- gchar* g_utf8_strup (const gchar *str,
  91. -- gssize len);
  92. -- gchar* g_utf8_strdown (const gchar *str,
  93. -- gssize len);
  94. -- gchar* g_utf8_casefold (const gchar *str,
  95. -- gssize len);
  96. -- gchar* g_utf8_normalize (const gchar *str,
  97. -- gssize len,
  98. -- GNormalizeMode mode);
  99. -- enum GNormalizeMode;
  100. -- gint g_utf8_collate (const gchar *str1,
  101. -- const gchar *str2);
  102. -- gchar* g_utf8_collate_key (const gchar *str,
  103. -- gssize len);
  104. -- gchar* g_utf8_collate_key_for_filename (const gchar *str,
  105. -- gssize len);
  106. -- gunichar2* g_utf8_to_utf16 (const gchar *str,
  107. -- glong len,
  108. -- glong *items_read,
  109. -- glong *items_written,
  110. -- GError **error);
  111. -- gunichar* g_utf8_to_ucs4 (const gchar *str,
  112. -- glong len,
  113. -- glong *items_read,
  114. -- glong *items_written,
  115. -- GError **error);
  116. -- gunichar* g_utf8_to_ucs4_fast (const gchar *str,
  117. -- glong len,
  118. -- glong *items_written);
  119. -- gunichar* g_utf16_to_ucs4 (const gunichar2 *str,
  120. -- glong len,
  121. -- glong *items_read,
  122. -- glong *items_written,
  123. -- GError **error);
  124. -- gchar* g_utf16_to_utf8 (const gunichar2 *str,
  125. -- glong len,
  126. -- glong *items_read,
  127. -- glong *items_written,
  128. -- GError **error);
  129. -- gunichar2* g_ucs4_to_utf16 (const gunichar *str,
  130. -- glong len,
  131. -- glong *items_read,
  132. -- glong *items_written,
  133. -- GError **error);
  134. -- gchar* g_ucs4_to_utf8 (const gunichar *str,
  135. -- glong len,
  136. -- glong *items_read,
  137. -- glong *items_written,
  138. -- GError **error);
  139. -- gint g_unichar_to_utf8 (gunichar c,
  140. -- gchar *outbuf);
  141. -- Description
  142. -- This section describes a number of functions for dealing with Unicode characters
  143. -- and strings. There are analogues of the traditional ctype.h character
  144. -- classification and case conversion functions, UTF-8 analogues of some string
  145. -- utility functions, functions to perform normalization, case conversion and
  146. -- collation on UTF-8 strings and finally functions to convert between the UTF-8,
  147. -- UTF-16 and UCS-4 encodings of Unicode.
  148. -- The implementations of the Unicode functions in GLib are based on the Unicode
  149. -- Character Data tables, which are available from www.unicode.org. GLib 2.8
  150. -- supports Unicode 4.0, GLib 2.10 supports Unicode 4.1, GLib 2.12 supports Unicode
  151. -- 5.0.
  152. -- Details
  153. -- gunichar
  154. -- typedef guint32 gunichar;
  155. -- A type which can hold any UCS-4 character code.
  156. -- ---------------------------------------------------------------------------------
  157. -- gunichar2
  158. -- typedef guint16 gunichar2;
  159. -- A type which can hold any UTF-16 code point^[3].
  160. -- ---------------------------------------------------------------------------------
  161. -- g_unichar_validate ()
  162. -- gboolean g_unichar_validate (gunichar ch);
  163. -- Checks whether ch is a valid Unicode character. Some possible integer values of
  164. -- ch will not be valid. 0 is considered a valid character, though it's normally a
  165. -- string terminator.
  166. -- ch : a Unicode character
  167. -- Returns : TRUE if ch is a valid Unicode character
  168. -- ---------------------------------------------------------------------------------
  169. -- g_unichar_isalnum ()
  170. -- gboolean g_unichar_isalnum (gunichar c);
  171. -- Determines whether a character is alphanumeric. Given some UTF-8 text, obtain a
  172. -- character value with g_utf8_get_char().
  173. -- c : a Unicode character
  174. -- Returns : TRUE if c is an alphanumeric character
  175. -- ---------------------------------------------------------------------------------
  176. -- g_unichar_isalpha ()
  177. -- gboolean g_unichar_isalpha (gunichar c);
  178. -- Determines whether a character is alphabetic (i.e. a letter). Given some UTF-8
  179. -- text, obtain a character value with g_utf8_get_char().
  180. -- c : a Unicode character
  181. -- Returns : TRUE if c is an alphabetic character
  182. -- ---------------------------------------------------------------------------------
  183. -- g_unichar_iscntrl ()
  184. -- gboolean g_unichar_iscntrl (gunichar c);
  185. -- Determines whether a character is a control character. Given some UTF-8 text,
  186. -- obtain a character value with g_utf8_get_char().
  187. -- c : a Unicode character
  188. -- Returns : TRUE if c is a control character
  189. -- ---------------------------------------------------------------------------------
  190. -- g_unichar_isdigit ()
  191. -- gboolean g_unichar_isdigit (gunichar c);
  192. -- Determines whether a character is numeric (i.e. a digit). This covers ASCII 0-9
  193. -- and also digits in other languages/scripts. Given some UTF-8 text, obtain a
  194. -- character value with g_utf8_get_char().
  195. -- c : a Unicode character
  196. -- Returns : TRUE if c is a digit
  197. -- ---------------------------------------------------------------------------------
  198. -- g_unichar_isgraph ()
  199. -- gboolean g_unichar_isgraph (gunichar c);
  200. -- Determines whether a character is printable and not a space (returns FALSE for
  201. -- control characters, format characters, and spaces). g_unichar_isprint() is
  202. -- similar, but returns TRUE for spaces. Given some UTF-8 text, obtain a character
  203. -- value with g_utf8_get_char().
  204. -- c : a Unicode character
  205. -- Returns : TRUE if c is printable unless it's a space
  206. -- ---------------------------------------------------------------------------------
  207. -- g_unichar_islower ()
  208. -- gboolean g_unichar_islower (gunichar c);
  209. -- Determines whether a character is a lowercase letter. Given some UTF-8 text,
  210. -- obtain a character value with g_utf8_get_char().
  211. -- c : a Unicode character
  212. -- Returns : TRUE if c is a lowercase letter
  213. -- ---------------------------------------------------------------------------------
  214. -- g_unichar_isprint ()
  215. -- gboolean g_unichar_isprint (gunichar c);
  216. -- Determines whether a character is printable. Unlike g_unichar_isgraph(), returns
  217. -- TRUE for spaces. Given some UTF-8 text, obtain a character value with
  218. -- g_utf8_get_char().
  219. -- c : a Unicode character
  220. -- Returns : TRUE if c is printable
  221. -- ---------------------------------------------------------------------------------
  222. -- g_unichar_ispunct ()
  223. -- gboolean g_unichar_ispunct (gunichar c);
  224. -- Determines whether a character is punctuation or a symbol. Given some UTF-8 text,
  225. -- obtain a character value with g_utf8_get_char().
  226. -- c : a Unicode character
  227. -- Returns : TRUE if c is a punctuation or symbol character
  228. -- ---------------------------------------------------------------------------------
  229. -- g_unichar_isspace ()
  230. -- gboolean g_unichar_isspace (gunichar c);
  231. -- Determines whether a character is a space, tab, or line separator (newline,
  232. -- carriage return, etc.). Given some UTF-8 text, obtain a character value with
  233. -- g_utf8_get_char().
  234. -- (Note: don't use this to do word breaking; you have to use Pango or equivalent to
  235. -- get word breaking right, the algorithm is fairly complex.)
  236. -- c : a Unicode character
  237. -- Returns : TRUE if c is a space character
  238. -- ---------------------------------------------------------------------------------
  239. -- g_unichar_isupper ()
  240. -- gboolean g_unichar_isupper (gunichar c);
  241. -- Determines if a character is uppercase.
  242. -- c : a Unicode character
  243. -- Returns : TRUE if c is an uppercase character
  244. -- ---------------------------------------------------------------------------------
  245. -- g_unichar_isxdigit ()
  246. -- gboolean g_unichar_isxdigit (gunichar c);
  247. -- Determines if a character is a hexidecimal digit.
  248. -- c : a Unicode character.
  249. -- Returns : TRUE if the character is a hexadecimal digit
  250. -- ---------------------------------------------------------------------------------
  251. -- g_unichar_istitle ()
  252. -- gboolean g_unichar_istitle (gunichar c);
  253. -- Determines if a character is titlecase. Some characters in Unicode which are
  254. -- composites, such as the DZ digraph have three case variants instead of just two.
  255. -- The titlecase form is used at the beginning of a word where only the first letter
  256. -- is capitalized. The titlecase form of the DZ digraph is U+01F2 LATIN CAPITAL
  257. -- LETTTER D WITH SMALL LETTER Z.
  258. -- c : a Unicode character
  259. -- Returns : TRUE if the character is titlecase
  260. -- ---------------------------------------------------------------------------------
  261. -- g_unichar_isdefined ()
  262. -- gboolean g_unichar_isdefined (gunichar c);
  263. -- Determines if a given character is assigned in the Unicode standard.
  264. -- c : a Unicode character
  265. -- Returns : TRUE if the character has an assigned value
  266. -- ---------------------------------------------------------------------------------
  267. -- g_unichar_iswide ()
  268. -- gboolean g_unichar_iswide (gunichar c);
  269. -- Determines if a character is typically rendered in a double-width cell.
  270. -- c : a Unicode character
  271. -- Returns : TRUE if the character is wide
  272. -- ---------------------------------------------------------------------------------
  273. -- g_unichar_iswide_cjk ()
  274. -- gboolean g_unichar_iswide_cjk (gunichar c);
  275. -- Determines if a character is typically rendered in a double-width cell under
  276. -- legacy East Asian locales. If a character is wide according to
  277. -- g_unichar_iswide(), then it is also reported wide with this function, but the
  278. -- converse is not necessarily true. See the Unicode Standard Annex 11 for details.
  279. -- c : a Unicode character
  280. -- Returns : TRUE if the character is wide in legacy East Asian locales
  281. -- Since 2.12
  282. -- ---------------------------------------------------------------------------------
  283. -- g_unichar_toupper ()
  284. -- gunichar g_unichar_toupper (gunichar c);
  285. -- Converts a character to uppercase.
  286. -- c : a Unicode character
  287. -- Returns : the result of converting c to uppercase. If c is not an lowercase or
  288. -- titlecase character, or has no upper case equivalent c is returned
  289. -- unchanged.
  290. -- ---------------------------------------------------------------------------------
  291. -- g_unichar_tolower ()
  292. -- gunichar g_unichar_tolower (gunichar c);
  293. -- Converts a character to lower case.
  294. -- c : a Unicode character.
  295. -- Returns : the result of converting c to lower case. If c is not an upperlower or
  296. -- titlecase character, or has no lowercase equivalent c is returned
  297. -- unchanged.
  298. -- ---------------------------------------------------------------------------------
  299. -- g_unichar_totitle ()
  300. -- gunichar g_unichar_totitle (gunichar c);
  301. -- Converts a character to the titlecase.
  302. -- c : a Unicode character
  303. -- Returns : the result of converting c to titlecase. If c is not an uppercase or
  304. -- lowercase character, c is returned unchanged.
  305. -- ---------------------------------------------------------------------------------
  306. -- g_unichar_digit_value ()
  307. -- gint g_unichar_digit_value (gunichar c);
  308. -- Determines the numeric value of a character as a decimal digit.
  309. -- c : a Unicode character
  310. -- Returns : If c is a decimal digit (according to g_unichar_isdigit()), its numeric
  311. -- value. Otherwise, -1.
  312. -- ---------------------------------------------------------------------------------
  313. -- g_unichar_xdigit_value ()
  314. -- gint g_unichar_xdigit_value (gunichar c);
  315. -- Determines the numeric value of a character as a hexidecimal digit.
  316. -- c : a Unicode character
  317. -- Returns : If c is a hex digit (according to g_unichar_isxdigit()), its numeric
  318. -- value. Otherwise, -1.
  319. -- ---------------------------------------------------------------------------------
  320. -- enum GUnicodeType
  321. -- typedef enum
  322. -- {
  323. -- G_UNICODE_CONTROL,
  324. -- G_UNICODE_FORMAT,
  325. -- G_UNICODE_UNASSIGNED,
  326. -- G_UNICODE_PRIVATE_USE,
  327. -- G_UNICODE_SURROGATE,
  328. -- G_UNICODE_LOWERCASE_LETTER,
  329. -- G_UNICODE_MODIFIER_LETTER,
  330. -- G_UNICODE_OTHER_LETTER,
  331. -- G_UNICODE_TITLECASE_LETTER,
  332. -- G_UNICODE_UPPERCASE_LETTER,
  333. -- G_UNICODE_COMBINING_MARK,
  334. -- G_UNICODE_ENCLOSING_MARK,
  335. -- G_UNICODE_NON_SPACING_MARK,
  336. -- G_UNICODE_DECIMAL_NUMBER,
  337. -- G_UNICODE_LETTER_NUMBER,
  338. -- G_UNICODE_OTHER_NUMBER,
  339. -- G_UNICODE_CONNECT_PUNCTUATION,
  340. -- G_UNICODE_DASH_PUNCTUATION,
  341. -- G_UNICODE_CLOSE_PUNCTUATION,
  342. -- G_UNICODE_FINAL_PUNCTUATION,
  343. -- G_UNICODE_INITIAL_PUNCTUATION,
  344. -- G_UNICODE_OTHER_PUNCTUATION,
  345. -- G_UNICODE_OPEN_PUNCTUATION,
  346. -- G_UNICODE_CURRENCY_SYMBOL,
  347. -- G_UNICODE_MODIFIER_SYMBOL,
  348. -- G_UNICODE_MATH_SYMBOL,
  349. -- G_UNICODE_OTHER_SYMBOL,
  350. -- G_UNICODE_LINE_SEPARATOR,
  351. -- G_UNICODE_PARAGRAPH_SEPARATOR,
  352. -- G_UNICODE_SPACE_SEPARATOR
  353. -- } GUnicodeType;
  354. -- These are the possible character classifications. See
  355. -- http://www.unicode.org/Public/UNIDATA/UnicodeData.html.
  356. -- ---------------------------------------------------------------------------------
  357. -- g_unichar_type ()
  358. -- GUnicodeType g_unichar_type (gunichar c);
  359. -- Classifies a Unicode character by type.
  360. -- c : a Unicode character
  361. -- Returns : the type of the character.
  362. -- ---------------------------------------------------------------------------------
  363. -- enum GUnicodeBreakType
  364. -- typedef enum
  365. -- {
  366. -- G_UNICODE_BREAK_MANDATORY,
  367. -- G_UNICODE_BREAK_CARRIAGE_RETURN,
  368. -- G_UNICODE_BREAK_LINE_FEED,
  369. -- G_UNICODE_BREAK_COMBINING_MARK,
  370. -- G_UNICODE_BREAK_SURROGATE,
  371. -- G_UNICODE_BREAK_ZERO_WIDTH_SPACE,
  372. -- G_UNICODE_BREAK_INSEPARABLE,
  373. -- G_UNICODE_BREAK_NON_BREAKING_GLUE,
  374. -- G_UNICODE_BREAK_CONTINGENT,
  375. -- G_UNICODE_BREAK_SPACE,
  376. -- G_UNICODE_BREAK_AFTER,
  377. -- G_UNICODE_BREAK_BEFORE,
  378. -- G_UNICODE_BREAK_BEFORE_AND_AFTER,
  379. -- G_UNICODE_BREAK_HYPHEN,
  380. -- G_UNICODE_BREAK_NON_STARTER,
  381. -- G_UNICODE_BREAK_OPEN_PUNCTUATION,
  382. -- G_UNICODE_BREAK_CLOSE_PUNCTUATION,
  383. -- G_UNICODE_BREAK_QUOTATION,
  384. -- G_UNICODE_BREAK_EXCLAMATION,
  385. -- G_UNICODE_BREAK_IDEOGRAPHIC,
  386. -- G_UNICODE_BREAK_NUMERIC,
  387. -- G_UNICODE_BREAK_INFIX_SEPARATOR,
  388. -- G_UNICODE_BREAK_SYMBOL,
  389. -- G_UNICODE_BREAK_ALPHABETIC,
  390. -- G_UNICODE_BREAK_PREFIX,
  391. -- G_UNICODE_BREAK_POSTFIX,
  392. -- G_UNICODE_BREAK_COMPLEX_CONTEXT,
  393. -- G_UNICODE_BREAK_AMBIGUOUS,
  394. -- G_UNICODE_BREAK_UNKNOWN,
  395. -- G_UNICODE_BREAK_NEXT_LINE,
  396. -- G_UNICODE_BREAK_WORD_JOINER,
  397. -- G_UNICODE_BREAK_HANGUL_L_JAMO,
  398. -- G_UNICODE_BREAK_HANGUL_V_JAMO,
  399. -- G_UNICODE_BREAK_HANGUL_T_JAMO,
  400. -- G_UNICODE_BREAK_HANGUL_LV_SYLLABLE,
  401. -- G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE
  402. -- } GUnicodeBreakType;
  403. -- These are the possible line break classifications. The five Hangul types were
  404. -- added in Unicode 4.1, so, has been introduced in GLib 2.10. Note that new types
  405. -- may be added in the future. Applications should be ready to handle unknown
  406. -- values. They may be regarded as G_UNICODE_BREAK_UNKNOWN. See
  407. -- http://www.unicode.org/unicode/reports/tr14/.
  408. -- ---------------------------------------------------------------------------------
  409. -- g_unichar_break_type ()
  410. -- GUnicodeBreakType g_unichar_break_type (gunichar c);
  411. -- Determines the break type of c. c should be a Unicode character (to derive a
  412. -- character from UTF-8 encoded text, use g_utf8_get_char()). The break type is used
  413. -- to find word and line breaks ("text boundaries"), Pango implements the Unicode
  414. -- boundary resolution algorithms and normally you would use a function such as
  415. -- pango_break() instead of caring about break types yourself.
  416. -- c : a Unicode character
  417. -- Returns : the break type of c
  418. -- ---------------------------------------------------------------------------------
  419. -- g_unicode_canonical_ordering ()
  420. -- void g_unicode_canonical_ordering (gunichar *string,
  421. -- gsize len);
  422. -- Computes the canonical ordering of a string in-place. This rearranges decomposed
  423. -- characters in the string according to their combining classes. See the Unicode
  424. -- manual for more information.
  425. -- string : a UCS-4 encoded string.
  426. -- len : the maximum length of string to use.
  427. -- ---------------------------------------------------------------------------------
  428. -- g_unicode_canonical_decomposition ()
  429. -- gunichar* g_unicode_canonical_decomposition
  430. -- (gunichar ch,
  431. -- gsize *result_len);
  432. -- Computes the canonical decomposition of a Unicode character.
  433. -- ch : a Unicode character.
  434. -- result_len : location to store the length of the return value.
  435. -- Returns : a newly allocated string of Unicode characters. result_len is set to
  436. -- the resulting length of the string.
  437. -- ---------------------------------------------------------------------------------
  438. -- g_unichar_get_mirror_char ()
  439. -- gboolean g_unichar_get_mirror_char (gunichar ch,
  440. -- gunichar *mirrored_ch);
  441. -- In Unicode, some characters are mirrored. This means that their images are
  442. -- mirrored horizontally in text that is laid out from right to left. For instance,
  443. -- "(" would become its mirror image, ")", in right-to-left text.
  444. -- If ch has the Unicode mirrored property and there is another unicode character
  445. -- that typically has a glyph that is the mirror image of ch's glyph and mirrored_ch
  446. -- is set, it puts that character in the address pointed to by mirrored_ch.
  447. -- Otherwise the original character is put.
  448. -- ch : a Unicode character
  449. -- mirrored_ch : location to store the mirrored character
  450. -- Returns : TRUE if ch has a mirrored character, FALSE otherwise
  451. -- Since 2.4
  452. -- ---------------------------------------------------------------------------------
  453. -- g_utf8_next_char()
  454. -- #define g_utf8_next_char(p)
  455. -- Skips to the next character in a UTF-8 string. The string must be valid; this
  456. -- macro is as fast as possible, and has no error-checking. You would use this macro
  457. -- to iterate over a string character by character. The macro returns the start of
  458. -- the next UTF-8 character. Before using this macro, use g_utf8_validate() to
  459. -- validate strings that may contain invalid UTF-8.
  460. -- p : Pointer to the start of a valid UTF-8 character.
  461. -- ---------------------------------------------------------------------------------
  462. -- g_utf8_get_char ()
  463. -- gunichar g_utf8_get_char (const gchar *p);
  464. -- Converts a sequence of bytes encoded as UTF-8 to a Unicode character. If p does
  465. -- not point to a valid UTF-8 encoded character, results are undefined. If you are
  466. -- not sure that the bytes are complete valid Unicode characters, you should use
  467. -- g_utf8_get_char_validated() instead.
  468. -- p : a pointer to Unicode character encoded as UTF-8
  469. -- Returns : the resulting character
  470. -- ---------------------------------------------------------------------------------
  471. -- g_utf8_get_char_validated ()
  472. -- gunichar g_utf8_get_char_validated (const gchar *p,
  473. -- gssize max_len);
  474. -- Convert a sequence of bytes encoded as UTF-8 to a Unicode character. This
  475. -- function checks for incomplete characters, for invalid characters such as
  476. -- characters that are out of the range of Unicode, and for overlong encodings of
  477. -- valid characters.
  478. -- p : a pointer to Unicode character encoded as UTF-8
  479. -- max_len : the maximum number of bytes to read, or -1, for no maximum.
  480. -- Returns : the resulting character. If p points to a partial sequence at the end
  481. -- of a string that could begin a valid character, returns (gunichar)-2;
  482. -- otherwise, if p does not point to a valid UTF-8 encoded Unicode
  483. -- character, returns (gunichar)-1.
  484. -- ---------------------------------------------------------------------------------
  485. -- g_utf8_offset_to_pointer ()
  486. -- gchar* g_utf8_offset_to_pointer (const gchar *str,
  487. -- glong offset);
  488. -- Converts from an integer character offset to a pointer to a position within the
  489. -- string.
  490. -- Since 2.10, this function allows to pass a negative offset to step backwards. It
  491. -- is usually worth stepping backwards from the end instead of forwards if offset is
  492. -- in the last fourth of the string, since moving forward is about 3 times faster
  493. -- than moving backward.
  494. -- str : a UTF-8 encoded string
  495. -- offset : a character offset within str
  496. -- Returns : the resulting pointer
  497. -- ---------------------------------------------------------------------------------
  498. -- g_utf8_pointer_to_offset ()
  499. -- glong g_utf8_pointer_to_offset (const gchar *str,
  500. -- const gchar *pos);
  501. -- Converts from a pointer to position within a string to a integer character
  502. -- offset.
  503. -- Since 2.10, this function allows pos to be before str, and returns a negative
  504. -- offset in this case.
  505. -- str : a UTF-8 encoded string
  506. -- pos : a pointer to a position within str
  507. -- Returns : the resulting character offset
  508. -- ---------------------------------------------------------------------------------
  509. -- g_utf8_prev_char ()
  510. -- gchar* g_utf8_prev_char (const gchar *p);
  511. -- Finds the previous UTF-8 character in the string before p.
  512. -- p does not have to be at the beginning of a UTF-8 character. No check is made to
  513. -- see if the character found is actually valid other than it starts with an
  514. -- appropriate byte. If p might be the first character of the string, you must use
  515. -- g_utf8_find_prev_char() instead.
  516. -- p : a pointer to a position within a UTF-8 encoded string
  517. -- Returns : a pointer to the found character.
  518. -- ---------------------------------------------------------------------------------
  519. -- g_utf8_find_next_char ()
  520. -- gchar* g_utf8_find_next_char (const gchar *p,
  521. -- const gchar *end);
  522. -- Finds the start of the next UTF-8 character in the string after p.
  523. -- p does not have to be at the beginning of a UTF-8 character. No check is made to
  524. -- see if the character found is actually valid other than it starts with an
  525. -- appropriate byte.
  526. -- p : a pointer to a position within a UTF-8 encoded string
  527. -- end : a pointer to the end of the string, or NULL to indicate that the string
  528. -- is nul-terminated, in which case the returned value will be
  529. -- Returns : a pointer to the found character or NULL
  530. -- ---------------------------------------------------------------------------------
  531. -- g_utf8_find_prev_char ()
  532. -- gchar* g_utf8_find_prev_char (const gchar *str,
  533. -- const gchar *p);
  534. -- Given a position p with a UTF-8 encoded string str, find the start of the
  535. -- previous UTF-8 character starting before p. Returns NULL if no UTF-8 characters
  536. -- are present in str before p.
  537. -- p does not have to be at the beginning of a UTF-8 character. No check is made to
  538. -- see if the character found is actually valid other than it starts with an
  539. -- appropriate byte.
  540. -- str : pointer to the beginning of a UTF-8 encoded string
  541. -- p : pointer to some position within str
  542. -- Returns : a pointer to the found character or NULL.
  543. -- ---------------------------------------------------------------------------------
  544. -- g_utf8_strlen ()
  545. -- glong g_utf8_strlen (const gchar *p,
  546. -- gssize max);
  547. -- Returns the length of the string in characters.
  548. -- p : pointer to the start of a UTF-8 encoded string.
  549. -- max : the maximum number of bytes to examine. If max is less than 0, then the
  550. -- string is assumed to be nul-terminated. If max is 0, p will not be
  551. -- examined and may be NULL.
  552. -- Returns : the length of the string in characters
  553. -- ---------------------------------------------------------------------------------
  554. -- g_utf8_strncpy ()
  555. -- gchar* g_utf8_strncpy (gchar *dest,
  556. -- const gchar *src,
  557. -- gsize n);
  558. -- Like the standard C strncpy() function, but copies a given number of characters
  559. -- instead of a given number of bytes. The src string must be valid UTF-8 encoded
  560. -- text. (Use g_utf8_validate() on all text before trying to use UTF-8 utility
  561. -- functions with it.)
  562. -- dest : buffer to fill with characters from src
  563. -- src : UTF-8 encoded string
  564. -- n : character count
  565. -- Returns : dest
  566. -- ---------------------------------------------------------------------------------
  567. -- g_utf8_strchr ()
  568. -- gchar* g_utf8_strchr (const gchar *p,
  569. -- gssize len,
  570. -- gunichar c);
  571. -- Finds the leftmost occurrence of the given Unicode character in a UTF-8 encoded
  572. -- string, while limiting the search to len bytes. If len is -1, allow unbounded
  573. -- search.
  574. -- p : a nul-terminated UTF-8 encoded string
  575. -- len : the maximum length of p
  576. -- c : a Unicode character
  577. -- Returns : NULL if the string does not contain the character, otherwise, a pointer
  578. -- to the start of the leftmost occurrence of the character in the string.
  579. -- ---------------------------------------------------------------------------------
  580. -- g_utf8_strrchr ()
  581. -- gchar* g_utf8_strrchr (const gchar *p,
  582. -- gssize len,
  583. -- gunichar c);
  584. -- Find the rightmost occurrence of the given Unicode character in a UTF-8 encoded
  585. -- string, while limiting the search to len bytes. If len is -1, allow unbounded
  586. -- search.
  587. -- p : a nul-terminated UTF-8 encoded string
  588. -- len : the maximum length of p
  589. -- c : a Unicode character
  590. -- Returns : NULL if the string does not contain the character, otherwise, a pointer
  591. -- to the start of the rightmost occurrence of the character in the
  592. -- string.
  593. -- ---------------------------------------------------------------------------------
  594. -- g_utf8_strreverse ()
  595. -- gchar* g_utf8_strreverse (const gchar *str,
  596. -- gssize len);
  597. -- Reverses a UTF-8 string. str must be valid UTF-8 encoded text. (Use
  598. -- g_utf8_validate() on all text before trying to use UTF-8 utility functions with
  599. -- it.)
  600. -- Note that unlike g_strreverse(), this function returns newly-allocated memory,
  601. -- which should be freed with g_free() when no longer needed.
  602. -- str : a UTF-8 encoded string
  603. -- len : the maximum length of str to use. If len < 0, then the string is
  604. -- nul-terminated.
  605. -- Returns : a newly-allocated string which is the reverse of str.
  606. -- Since 2.2
  607. -- ---------------------------------------------------------------------------------
  608. -- g_utf8_validate ()
  609. -- gboolean g_utf8_validate (const gchar *str,
  610. -- gssize max_len,
  611. -- const gchar **end);
  612. -- Validates UTF-8 encoded text. str is the text to validate; if str is
  613. -- nul-terminated, then max_len can be -1, otherwise max_len should be the number of
  614. -- bytes to validate. If end is non-NULL, then the end of the valid range will be
  615. -- stored there (i.e. the start of the first invalid character if some bytes were
  616. -- invalid, or the end of the text being validated otherwise).
  617. -- Note that g_utf8_validate() returns FALSE if max_len is positive and NUL is met
  618. -- before max_len bytes have been read.
  619. -- Returns TRUE if all of str was valid. Many GLib and GTK+ routines require valid
  620. -- UTF-8 as input; so data read from a file or the network should be checked with
  621. -- g_utf8_validate() before doing anything else with it.
  622. -- str : a pointer to character data
  623. -- max_len : max bytes to validate, or -1 to go until NUL
  624. -- end : return location for end of valid data
  625. -- Returns : TRUE if the text was valid UTF-8
  626. -- ---------------------------------------------------------------------------------
  627. -- g_utf8_strup ()
  628. -- gchar* g_utf8_strup (const gchar *str,
  629. -- gssize len);
  630. -- Converts all Unicode characters in the string that have a case to uppercase. The
  631. -- exact manner that this is done depends on the current locale, and may result in
  632. -- the number of characters in the string increasing. (For instance, the German
  633. -- ess-zet will be changed to SS.)
  634. -- str : a UTF-8 encoded string
  635. -- len : length of str, in bytes, or -1 if str is nul-terminated.
  636. -- Returns : a newly allocated string, with all characters converted to uppercase.
  637. -- ---------------------------------------------------------------------------------
  638. -- g_utf8_strdown ()
  639. -- gchar* g_utf8_strdown (const gchar *str,
  640. -- gssize len);
  641. -- Converts all Unicode characters in the string that have a case to lowercase. The
  642. -- exact manner that this is done depends on the current locale, and may result in
  643. -- the number of characters in the string changing.
  644. -- str : a UTF-8 encoded string
  645. -- len : length of str, in bytes, or -1 if str is nul-terminated.
  646. -- Returns : a newly allocated string, with all characters converted to lowercase.
  647. -- ---------------------------------------------------------------------------------
  648. -- g_utf8_casefold ()
  649. -- gchar* g_utf8_casefold (const gchar *str,
  650. -- gssize len);
  651. -- Converts a string into a form that is independent of case. The result will not
  652. -- correspond to any particular case, but can be compared for equality or ordered
  653. -- with the results of calling g_utf8_casefold() on other strings.
  654. -- Note that calling g_utf8_casefold() followed by g_utf8_collate() is only an
  655. -- approximation to the correct linguistic case insensitive ordering, though it is a
  656. -- fairly good one. Getting this exactly right would require a more sophisticated
  657. -- collation function that takes case sensitivity into account. GLib does not
  658. -- currently provide such a function.
  659. -- str : a UTF-8 encoded string
  660. -- len : length of str, in bytes, or -1 if str is nul-terminated.
  661. -- Returns : a newly allocated string, that is a case independent form of str.
  662. -- ---------------------------------------------------------------------------------
  663. -- g_utf8_normalize ()
  664. -- gchar* g_utf8_normalize (const gchar *str,
  665. -- gssize len,
  666. -- GNormalizeMode mode);
  667. -- Converts a string into canonical form, standardizing such issues as whether a
  668. -- character with an accent is represented as a base character and combining accent
  669. -- or as a single precomposed character. You should generally call
  670. -- g_utf8_normalize() before comparing two Unicode strings.
  671. -- The normalization mode G_NORMALIZE_DEFAULT only standardizes differences that do
  672. -- not affect the text content, such as the above-mentioned accent representation.
  673. -- G_NORMALIZE_ALL also standardizes the "compatibility" characters in Unicode, such
  674. -- as SUPERSCRIPT THREE to the standard forms (in this case DIGIT THREE). Formatting
  675. -- information may be lost but for most text operations such characters should be
  676. -- considered the same. For example, g_utf8_collate() normalizes with
  677. -- G_NORMALIZE_ALL as its first step.
  678. -- G_NORMALIZE_DEFAULT_COMPOSE and G_NORMALIZE_ALL_COMPOSE are like
  679. -- G_NORMALIZE_DEFAULT and G_NORMALIZE_ALL, but returned a result with composed
  680. -- forms rather than a maximally decomposed form. This is often useful if you intend
  681. -- to convert the string to a legacy encoding or pass it to a system with less
  682. -- capable Unicode handling.
  683. -- str : a UTF-8 encoded string.
  684. -- len : length of str, in bytes, or -1 if str is nul-terminated.
  685. -- mode : the type of normalization to perform.
  686. -- Returns : a newly allocated string, that is the normalized form of str.
  687. -- ---------------------------------------------------------------------------------
  688. -- enum GNormalizeMode
  689. -- typedef enum {
  690. -- G_NORMALIZE_DEFAULT,
  691. -- G_NORMALIZE_NFD = G_NORMALIZE_DEFAULT,
  692. -- G_NORMALIZE_DEFAULT_COMPOSE,
  693. -- G_NORMALIZE_NFC = G_NORMALIZE_DEFAULT_COMPOSE,
  694. -- G_NORMALIZE_ALL,
  695. -- G_NORMALIZE_NFKD = G_NORMALIZE_ALL,
  696. -- G_NORMALIZE_ALL_COMPOSE,
  697. -- G_NORMALIZE_NFKC = G_NORMALIZE_ALL_COMPOSE
  698. -- } GNormalizeMode;
  699. -- Defines how a Unicode string is transformed in a canonical form, standardizing
  700. -- such issues as whether a character with an accent is represented as a base
  701. -- character and combining accent or as a single precomposed character. Unicode
  702. -- strings should generally be normalized before comparing them.
  703. -- G_NORMALIZE_DEFAULT standardize differences that do not affect the text
  704. -- content, such as the above-mentioned accent
  705. -- representation.
  706. -- G_NORMALIZE_NFD another name for G_NORMALIZE_DEFAULT.
  707. -- G_NORMALIZE_DEFAULT_COMPOSE like G_NORMALIZE_DEFAULT, but with composed forms
  708. -- rather than a maximally decomposed form.
  709. -- G_NORMALIZE_NFC another name for G_NORMALIZE_DEFAULT_COMPOSE.
  710. -- G_NORMALIZE_ALL beyond G_NORMALIZE_DEFAULT also standardize the
  711. -- "compatibility" characters in Unicode, such as
  712. -- SUPERSCRIPT THREE to the standard forms (in this case
  713. -- DIGIT THREE). Formatting information may be lost but
  714. -- for most text operations such characters should be
  715. -- considered the same.
  716. -- G_NORMALIZE_NFKD another name for G_NORMALIZE_ALL.
  717. -- G_NORMALIZE_ALL_COMPOSE like G_NORMALIZE_ALL, but with composed forms rather
  718. -- than a maximally decomposed form.
  719. -- G_NORMALIZE_NFKC another name for G_NORMALIZE_ALL_COMPOSE.
  720. -- ---------------------------------------------------------------------------------
  721. -- g_utf8_collate ()
  722. -- gint g_utf8_collate (const gchar *str1,
  723. -- const gchar *str2);
  724. -- Compares two strings for ordering using the linguistically correct rules for the
  725. -- current locale. When sorting a large number of strings, it will be significantly
  726. -- faster to obtain collation keys with g_utf8_collate_key() and compare the keys
  727. -- with strcmp() when sorting instead of sorting the original strings.
  728. -- str1 : a UTF-8 encoded string
  729. -- str2 : a UTF-8 encoded string
  730. -- Returns : < 0 if str1 compares before str2, 0 if they compare equal, > 0 if str1
  731. -- compares after str2.
  732. -- ---------------------------------------------------------------------------------
  733. -- g_utf8_collate_key ()
  734. -- gchar* g_utf8_collate_key (const gchar *str,
  735. -- gssize len);
  736. -- Converts a string into a collation key that can be compared with other collation
  737. -- keys produced by the same function using strcmp(). The results of comparing the
  738. -- collation keys of two strings with strcmp() will always be the same as comparing
  739. -- the two original keys with g_utf8_collate().
  740. -- str : a UTF-8 encoded string.
  741. -- len : length of str, in bytes, or -1 if str is nul-terminated.
  742. -- Returns : a newly allocated string. This string should be freed with g_free()
  743. -- when you are done with it.
  744. -- ---------------------------------------------------------------------------------
  745. -- g_utf8_collate_key_for_filename ()
  746. -- gchar* g_utf8_collate_key_for_filename (const gchar *str,
  747. -- gssize len);
  748. -- Converts a string into a collation key that can be compared with other collation
  749. -- keys produced by the same function using strcmp().
  750. -- In order to sort filenames correctly, this function treats the dot '.' as a
  751. -- special case. Most dictionary orderings seem to consider it insignificant, thus
  752. -- producing the ordering "event.c" "eventgenerator.c" "event.h" instead of
  753. -- "event.c" "event.h" "eventgenerator.c". Also, we would like to treat numbers
  754. -- intelligently so that "file1" "file10" "file5" is sorted as "file1" "file5"
  755. -- "file10".
  756. -- str : a UTF-8 encoded string.
  757. -- len : length of str, in bytes, or -1 if str is nul-terminated.
  758. -- Returns : a newly allocated string. This string should be freed with g_free()
  759. -- when you are done with it.
  760. -- Since 2.8
  761. -- ---------------------------------------------------------------------------------
  762. -- g_utf8_to_utf16 ()
  763. -- gunichar2* g_utf8_to_utf16 (const gchar *str,
  764. -- glong len,
  765. -- glong *items_read,
  766. -- glong *items_written,
  767. -- GError **error);
  768. -- Convert a string from UTF-8 to UTF-16. A 0 character will be added to the result
  769. -- after the converted text.
  770. -- str : a UTF-8 encoded string
  771. -- len : the maximum length (number of characters) of str to use. If len <
  772. -- 0, then the string is nul-terminated.
  773. -- items_read : location to store number of bytes read, or NULL. If NULL, then
  774. -- G_CONVERT_ERROR_PARTIAL_INPUT will be returned in case str
  775. -- contains a trailing partial character. If an error occurs then
  776. -- the index of the invalid input is stored here.
  777. -- items_written : location to store number of gunichar2 written, or NULL. The value
  778. -- stored here does not include the trailing 0.
  779. -- error : location to store the error occuring, or NULL to ignore errors.
  780. -- Any of the errors in GConvertError other than
  781. -- G_CONVERT_ERROR_NO_CONVERSION may occur.
  782. -- Returns : a pointer to a newly allocated UTF-16 string. This value must be
  783. -- freed with g_free(). If an error occurs, NULL will be returned
  784. -- and error set.
  785. -- ---------------------------------------------------------------------------------
  786. -- g_utf8_to_ucs4 ()
  787. -- gunichar* g_utf8_to_ucs4 (const gchar *str,
  788. -- glong len,
  789. -- glong *items_read,
  790. -- glong *items_written,
  791. -- GError **error);
  792. -- Convert a string from UTF-8 to a 32-bit fixed width representation as UCS-4. A
  793. -- trailing 0 will be added to the string after the converted text.
  794. -- str : a UTF-8 encoded string
  795. -- len : the maximum length of str to use. If len < 0, then the string is
  796. -- nul-terminated.
  797. -- items_read : location to store number of bytes read, or NULL. If NULL, then
  798. -- G_CONVERT_ERROR_PARTIAL_INPUT will be returned in case str
  799. -- contains a trailing partial character. If an error occurs then
  800. -- the index of the invalid input is stored here.
  801. -- items_written : location to store number of characters written or NULL. The value
  802. -- here stored does not include the trailing 0 character.
  803. -- error : location to store the error occuring, or NULL to ignore errors.
  804. -- Any of the errors in GConvertError other than
  805. -- G_CONVERT_ERROR_NO_CONVERSION may occur.
  806. -- Returns : a pointer to a newly allocated UCS-4 string. This value must be
  807. -- freed with g_free(). If an error occurs, NULL will be returned
  808. -- and error set.
  809. -- ---------------------------------------------------------------------------------
  810. -- g_utf8_to_ucs4_fast ()
  811. -- gunichar* g_utf8_to_ucs4_fast (const gchar *str,
  812. -- glong len,
  813. -- glong *items_written);
  814. -- Convert a string from UTF-8 to a 32-bit fixed width representation as UCS-4,
  815. -- assuming valid UTF-8 input. This function is roughly twice as fast as
  816. -- g_utf8_to_ucs4() but does no error checking on the input.
  817. -- str : a UTF-8 encoded string
  818. -- len : the maximum length of str to use. If len < 0, then the string is
  819. -- nul-terminated.
  820. -- items_written : location to store the number of characters in the result, or
  821. -- NULL.
  822. -- Returns : a pointer to a newly allocated UCS-4 string. This value must be
  823. -- freed with g_free().
  824. -- ---------------------------------------------------------------------------------
  825. -- g_utf16_to_ucs4 ()
  826. -- gunichar* g_utf16_to_ucs4 (const gunichar2 *str,
  827. -- glong len,
  828. -- glong *items_read,
  829. -- glong *items_written,
  830. -- GError **error);
  831. -- Convert a string from UTF-16 to UCS-4. The result will be terminated with a 0
  832. -- character.
  833. -- str : a UTF-16 encoded string
  834. -- len : the maximum length (number of gunichar2) of str to use. If len <
  835. -- 0, then the string is terminated with a 0 character.
  836. -- items_read : location to store number of words read, or NULL. If NULL, then
  837. -- G_CONVERT_ERROR_PARTIAL_INPUT will be returned in case str
  838. -- contains a trailing partial character. If an error occurs then
  839. -- the index of the invalid input is stored here.
  840. -- items_written : location to store number of characters written, or NULL. The
  841. -- value stored here does not include the trailing 0 character.
  842. -- error : location to store the error occuring, or NULL to ignore errors.
  843. -- Any of the errors in GConvertError other than
  844. -- G_CONVERT_ERROR_NO_CONVERSION may occur.
  845. -- Returns : a pointer to a newly allocated UCS-4 string. This value must be
  846. -- freed with g_free(). If an error occurs, NULL will be returned
  847. -- and error set.
  848. -- ---------------------------------------------------------------------------------
  849. -- g_utf16_to_utf8 ()
  850. -- gchar* g_utf16_to_utf8 (const gunichar2 *str,
  851. -- glong len,
  852. -- glong *items_read,
  853. -- glong *items_written,
  854. -- GError **error);
  855. -- Convert a string from UTF-16 to UTF-8. The result will be terminated with a 0
  856. -- byte.
  857. -- Note that the input is expected to be already in native endianness, an initial
  858. -- byte-order-mark character is not handled specially. g_convert() can be used to
  859. -- convert a byte buffer of UTF-16 data of ambiguous endianess.
  860. -- str : a UTF-16 encoded string
  861. -- len : the maximum length (number of gunichar2) of str to use. If len <
  862. -- 0, then the string is terminated with a 0 character.
  863. -- items_read : location to store number of words read, or NULL. If NULL, then
  864. -- G_CONVERT_ERROR_PARTIAL_INPUT will be returned in case str
  865. -- contains a trailing partial character. If an error occurs then
  866. -- the index of the invalid input is stored here.
  867. -- items_written : location to store number of bytes written, or NULL. The value
  868. -- stored here does not include the trailing 0 byte.
  869. -- error : location to store the error occuring, or NULL to ignore errors.
  870. -- Any of the errors in GConvertError other than
  871. -- G_CONVERT_ERROR_NO_CONVERSION may occur.
  872. -- Returns : a pointer to a newly allocated UTF-8 string. This value must be
  873. -- freed with g_free(). If an error occurs, NULL will be returned
  874. -- and error set.
  875. -- ---------------------------------------------------------------------------------
  876. -- g_ucs4_to_utf16 ()
  877. -- gunichar2* g_ucs4_to_utf16 (const gunichar *str,
  878. -- glong len,
  879. -- glong *items_read,
  880. -- glong *items_written,
  881. -- GError **error);
  882. -- Convert a string from UCS-4 to UTF-16. A 0 character will be added to the result
  883. -- after the converted text.
  884. -- str : a UCS-4 encoded string
  885. -- len : the maximum length (number of characters) of str to use. If len <
  886. -- 0, then the string is terminated with a 0 character.
  887. -- items_read : location to store number of bytes read, or NULL. If an error
  888. -- occurs then the index of the invalid input is stored here.
  889. -- items_written : location to store number of gunichar2 written, or NULL. The value
  890. -- stored here does not include the trailing 0.
  891. -- error : location to store the error occuring, or NULL to ignore errors.
  892. -- Any of the errors in GConvertError other than
  893. -- G_CONVERT_ERROR_NO_CONVERSION may occur.
  894. -- Returns : a pointer to a newly allocated UTF-16 string. This value must be
  895. -- freed with g_free(). If an error occurs, NULL will be returned
  896. -- and error set.
  897. -- ---------------------------------------------------------------------------------
  898. -- g_ucs4_to_utf8 ()
  899. -- gchar* g_ucs4_to_utf8 (const gunichar *str,
  900. -- glong len,
  901. -- glong *items_read,
  902. -- glong *items_written,
  903. -- GError **error);
  904. -- Convert a string from a 32-bit fixed width representation as UCS-4. to UTF-8. The
  905. -- result will be terminated with a 0 byte.
  906. -- str : a UCS-4 encoded string
  907. -- len : the maximum length (number of characters) of str to use. If len <
  908. -- 0, then the string is terminated with a 0 character.
  909. -- items_read : location to store number of characters read, or NULL.
  910. -- items_written : location to store number of bytes written or NULL. The value here
  911. -- stored does not include the trailing 0 byte.
  912. -- error : location to store the error occuring, or NULL to ignore errors.
  913. -- Any of the errors in GConvertError other than
  914. -- G_CONVERT_ERROR_NO_CONVERSION may occur.
  915. -- Returns : a pointer to a newly allocated UTF-8 string. This value must be
  916. -- freed with g_free(). If an error occurs, NULL will be returned
  917. -- and error set. In that case, items_read will be set to the
  918. -- position of the first invalid input character.
  919. -- ---------------------------------------------------------------------------------
  920. unichar_to_utf8 (a_gunichar: INTEGER): STRING is
  921. -- Converts a single Unicode character to UTF-8.
  922. -- a_gunichar : a Unicode character code
  923. -- Returns : the UTF-8 representation of a_unichar
  924. local
  925. size: INTEGER
  926. do
  927. create Result.make_filled ('x', 6)
  928. size := g_unichar_to_utf8 (a_gunichar, Result.to_external)
  929. Result.keep_head (size)
  930. end
  931. feature {} -- External calls
  932. g_unichar_to_utf8 (a_gunichar: INTEGER; an_outbuf: POINTER): INTEGER is
  933. external "C use <glib.h>"
  934. end
  935. -- See Also
  936. -- g_locale_to_utf8(), g_locale_from_utf8() Convenience functions for converting
  937. -- between UTF-8 and the locale encoding.
  938. -- --------------
  939. -- ^[3] surrogate pairs
  940. end -- deferred class GLIB_UNICODE_MANIPULATION