PageRenderTime 45ms CodeModel.GetById 35ms app.highlight 6ms RepoModel.GetById 1ms app.codeStats 0ms

/afpfs-ng-0.8.1/lib/unicode.h

#
C++ Header | 198 lines | 61 code | 16 blank | 121 comment | 0 complexity | cb15be534190c868a4a710e450fa37b9 MD5 | raw file
  1/**********************************************************************
  2 *
  3 * unicode.h: Functions to handle UTF8/UCS2 coded strings.
  4 *
  5 * Most of these functions have been adopted from Roland Krause's
  6 * UTF8.c, which is part of the XawPlus package. See
  7 * http://freenet-homepage.de/kra/ for details.
  8 *
  9 * int str16len()	A strlen() on a char16 string
 10 * char16 *str16chr()	A strchr() on a char16 string
 11 * void str16cpy()	A strcpy() on a char16 string
 12 * void str16ncpy()	A strncpy() on a char16 string
 13 * void str16cat()	A strcat() on a char16 string
 14 *
 15 * int mbCharLen()	Calc number of byte of an UTF8 character
 16 * int mbStrLen()	Calc # of characters in an UTF8 string
 17 * char16 *UTF8toUCS2() Convert UTF8 string to UCS2/UNICODE
 18 * char *UCS2toUTF8()   Convert UCS2/UNICODE string to UTF8
 19 *
 20 * int UCS2precompose() Canonically combine two UCS2 characters
 21 *
 22 * Copyright (c) Roland Krause 2002, roland_krause@freenet.de
 23 * Copyright (c) Michael Ulbrich 2007, mul@rentapacs.de
 24 *
 25 * This module is free software; you can redistribute it and/or modify
 26 * it under the terms of the GNU General Public License as published by
 27 * the Free Software Foundation; either version 2 of the License, or
 28 * (at your option) any later version.
 29 *
 30 * This program is distributed in the hope that it will be useful,
 31 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 32 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 33 * GNU General Public License for more details.
 34 *
 35 * You should have received a copy of the GNU General Public License
 36 * along with this program; if not, write to the Free Software
 37 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 38 *
 39 **********************************************************************/
 40
 41#ifndef unicode_h
 42#define unicode_h
 43
 44#include <ctype.h>
 45
 46/* The data type used for 16 bit character strings.
 47 * The format is handled compatible to *XChar2b* used by Xlib.
 48 */
 49typedef unsigned short char16;
 50
 51
 52/*	Function Name:	str16len
 53 *	Description: 	Determine the string length of a char16 string
 54 *			independent of the locale settings.
 55 *	Arguments:	str16	- A terminated string of char16's
 56 *	Returns:	Length in char16's
 57 */
 58extern int str16len(
 59#if NeedFunctionPrototypes
 60	char16 *	/* str16 */
 61#endif
 62);
 63
 64/*	Function Name:	str16chr
 65 *	Description: 	Search an 8 bit character in a char16 string.
 66 *			The upper byte of *ch* is assumed as '0'!
 67 *	Arguments:	str16	- A terminated string of char16's
 68 *			ch	- An 8 bit character
 69 *	Returns:	Position of the leftmost occurance of *ch*
 70 *			in str16 or NULL.
 71 */
 72extern char16 *str16chr(
 73#if NeedFunctionPrototypes
 74	char16 *,	/* str16 */
 75	char		/* ch */
 76#endif
 77);
 78
 79/*	Function Name:	str16cpy
 80 *	Description: 	Copy a string of char16's from *src* to *dest*
 81 *	Arguments:	dest	- Destination string
 82 *			src	- Source string
 83 *	Returns:	None
 84 */
 85extern void str16cpy(
 86#if NeedFunctionPrototypes
 87	char16 *,	/* dest */
 88	char16 *	/* src */
 89#endif
 90);
 91
 92/*	Function Name:	str16ncpy
 93 *	Description: 	Copy *n* char16's from *src* to *dest* and
 94 *			terminate *dest*.
 95 *	Arguments:	dest	- Destination string
 96 *			src	- Source string
 97 *			n	- # of characters to copy
 98 *	Returns:	None	
 99 */
100extern void str16ncpy(
101#if NeedFunctionPrototypes
102	char16 *,	/* dest */
103	char16 *,	/* src */
104	size_t		/* n */
105#endif
106);
107
108/*	Function Name:	str16cat
109 *	Description: 	Concatenate the string of char16's in *src* with *dest*.
110 *	Arguments:	dest	- Destination string
111 *			src	- Source string
112 *	Returns:	None
113 */
114extern void str16cat(
115#if NeedFunctionPrototypes
116	char16 *,	/* dest */
117	char16 *	/* src */
118#endif
119);
120
121/*	Function Name:	mbCharLen
122 *	Description: 	Determine the length in byte of an UTF8 coded
123 *			character.
124 *	Arguments:	str	- Pointer into an UTF8 coded string
125 *	Returns:	Number of byte of the next character in the string
126 *			or 0 in case of an error.
127 */
128extern int mbCharLen(
129#if NeedFunctionPrototypes
130	char *		/* str */
131#endif
132);
133
134/*	Function Name:	mbStrLen
135 *	Description: 	Determine the string length of an UTF8 coded string
136 *			in characters (not in byte!).
137 *	Arguments:	str	- The UTF8 coded string
138 *	Returns:	The length in characters, illegal coded bytes
139 *			are counted as one character per byte.
140 *			See UTF8toUCS2() for the reason!
141 */
142extern int mbStrLen(
143#if NeedFunctionPrototypes
144	char *		/* str */
145#endif
146);
147
148/*	Function Name:	UTF8toUCS2
149 *	Description: 	Conversion of an UTF8 coded string into UCS2/UNICODE.
150 *			If the encoding of the character is not representable
151 *			in two bytes, the tilde sign ~ is written into the
152 *			result string at this position.
153 *			For an illegal UTF8 code an asterix * is stored in
154 *			the result string.
155 *	Arguments:	str	- The UTF8 coded string
156 *	Returns:	The UCS2 coded result string. The allocated memory
157 *			for this string has to be freed by the caller!
158 *			The result string is stored independent of the
159 *			architecture in the high byte/low byte order and is
160 *			compatible to the XChar2b format! Type casting is valid.
161 *			char16 is used to increase the performance.
162 */
163extern char16 *UTF8toUCS2(
164#if NeedFunctionPrototypes
165	char *		/* str */
166#endif
167);
168
169/*      Function Name:  UCS2toUTF8
170 *      Description:    Conversion of an UCS2 coded string into UTF8.
171 *      Arguments:      str16     - The UCS2 coded string
172 *      Returns:        The UTF8 coded result string. The allocated memory
173 *                      for this string has to be freed by the caller!
174 */
175extern char *UCS2toUTF8(
176#if NeedFunctionPrototypes
177        char16 *          /* str */
178#endif
179);
180
181/*      Function Name:  UCS2precompose
182 *      Description:    Canonically combine two UCS2 characters, if matching
183 *                      pattern is found in table. Uniform binary search
184 *                      algorithm from D. Knuth TAOCP Vol.3 p.414.
185 *      Arguments:      first   - the first UCS2 character
186 *                      second  - the second UCS2 character
187 *      Returns:        Canonical composition of first and second or
188 *                      -1 if no such composition exists in table.
189 */
190extern int UCS2precompose(
191#if NeedFunctionPrototypes
192	char16,           /* first */
193	char16            /* second */
194#endif
195);
196
197#endif
198