PageRenderTime 54ms CodeModel.GetById 18ms app.highlight 31ms RepoModel.GetById 1ms app.codeStats 0ms

/extensions/spellcheck/hunspell/src/csutil.hxx

http://github.com/zpao/v8monkey
C++ Header | 277 lines | 121 code | 57 blank | 99 comment | 10 complexity | 2a715548ff3980d31043fa89a76425da MD5 | raw file
  1/******* BEGIN LICENSE BLOCK *******
  2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
  3 * 
  4 * The contents of this file are subject to the Mozilla Public License Version
  5 * 1.1 (the "License"); you may not use this file except in compliance with
  6 * the License. You may obtain a copy of the License at
  7 * http://www.mozilla.org/MPL/
  8 * 
  9 * Software distributed under the License is distributed on an "AS IS" basis,
 10 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 11 * for the specific language governing rights and limitations under the
 12 * License.
 13 * 
 14 * The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
 15 * and L�szl� N�meth (Hunspell). Portions created by the Initial Developers
 16 * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
 17 * 
 18 * Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
 19 *                 David Einstein (deinst@world.std.com)
 20 *                 L�szl� N�meth (nemethl@gyorsposta.hu)
 21 *                 Caolan McNamara (caolanm@redhat.com)
 22 *                 Davide Prina
 23 *                 Giuseppe Modugno
 24 *                 Gianluca Turconi
 25 *                 Simon Brouwer
 26 *                 Noll Janos
 27 *                 Biro Arpad
 28 *                 Goldman Eleonora
 29 *                 Sarlos Tamas
 30 *                 Bencsath Boldizsar
 31 *                 Halacsy Peter
 32 *                 Dvornik Laszlo
 33 *                 Gefferth Andras
 34 *                 Nagy Viktor
 35 *                 Varga Daniel
 36 *                 Chris Halls
 37 *                 Rene Engelhard
 38 *                 Bram Moolenaar
 39 *                 Dafydd Jones
 40 *                 Harri Pitkanen
 41 *                 Andras Timar
 42 *                 Tor Lillqvist
 43 * 
 44 * Alternatively, the contents of this file may be used under the terms of
 45 * either the GNU General Public License Version 2 or later (the "GPL"), or
 46 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 47 * in which case the provisions of the GPL or the LGPL are applicable instead
 48 * of those above. If you wish to allow use of your version of this file only
 49 * under the terms of either the GPL or the LGPL, and not to allow others to
 50 * use your version of this file under the terms of the MPL, indicate your
 51 * decision by deleting the provisions above and replace them with the notice
 52 * and other provisions required by the GPL or the LGPL. If you do not delete
 53 * the provisions above, a recipient may use your version of this file under
 54 * the terms of any one of the MPL, the GPL or the LGPL.
 55 *
 56 ******* END LICENSE BLOCK *******/
 57
 58#ifndef __CSUTILHXX__
 59#define __CSUTILHXX__
 60
 61#include "hunvisapi.h"
 62
 63// First some base level utility routines
 64
 65#include <string.h>
 66#include "w_char.hxx"
 67#include "htypes.hxx"
 68
 69#ifdef MOZILLA_CLIENT
 70#include "nscore.h" // for mozalloc headers
 71#endif
 72
 73// casing
 74#define NOCAP   0
 75#define INITCAP 1
 76#define ALLCAP  2
 77#define HUHCAP  3
 78#define HUHINITCAP  4
 79
 80// default encoding and keystring
 81#define SPELL_ENCODING  "ISO8859-1"
 82#define SPELL_KEYSTRING "qwertyuiop|asdfghjkl|zxcvbnm" 
 83
 84// default morphological fields
 85#define MORPH_STEM        "st:"
 86#define MORPH_ALLOMORPH   "al:"
 87#define MORPH_POS         "po:"
 88#define MORPH_DERI_PFX    "dp:"
 89#define MORPH_INFL_PFX    "ip:"
 90#define MORPH_TERM_PFX    "tp:"
 91#define MORPH_DERI_SFX    "ds:"
 92#define MORPH_INFL_SFX    "is:"
 93#define MORPH_TERM_SFX    "ts:"
 94#define MORPH_SURF_PFX    "sp:"
 95#define MORPH_FREQ        "fr:"
 96#define MORPH_PHON        "ph:"
 97#define MORPH_HYPH        "hy:"
 98#define MORPH_PART        "pa:"
 99#define MORPH_FLAG        "fl:"
100#define MORPH_HENTRY      "_H:"
101#define MORPH_TAG_LEN     strlen(MORPH_STEM)
102
103#define MSEP_FLD ' '
104#define MSEP_REC '\n'
105#define MSEP_ALT '\v'
106
107// default flags
108#define DEFAULTFLAGS   65510
109#define FORBIDDENWORD  65510
110#define ONLYUPCASEFLAG 65511
111
112// convert UTF-16 characters to UTF-8
113LIBHUNSPELL_DLL_EXPORTED char * u16_u8(char * dest, int size, const w_char * src, int srclen);
114
115// convert UTF-8 characters to UTF-16
116LIBHUNSPELL_DLL_EXPORTED int u8_u16(w_char * dest, int size, const char * src);
117
118// sort 2-byte vector
119LIBHUNSPELL_DLL_EXPORTED void flag_qsort(unsigned short flags[], int begin, int end);
120
121// binary search in 2-byte vector
122LIBHUNSPELL_DLL_EXPORTED int flag_bsearch(unsigned short flags[], unsigned short flag, int right);
123
124// remove end of line char(s)
125LIBHUNSPELL_DLL_EXPORTED void mychomp(char * s);
126
127// duplicate string
128LIBHUNSPELL_DLL_EXPORTED char * mystrdup(const char * s);
129
130// strcat for limited length destination string
131LIBHUNSPELL_DLL_EXPORTED char * mystrcat(char * dest, const char * st, int max);
132
133// duplicate reverse of string
134LIBHUNSPELL_DLL_EXPORTED char * myrevstrdup(const char * s);
135
136// parse into tokens with char delimiter
137LIBHUNSPELL_DLL_EXPORTED char * mystrsep(char ** sptr, const char delim);
138// parse into tokens with char delimiter
139LIBHUNSPELL_DLL_EXPORTED char * mystrsep2(char ** sptr, const char delim);
140
141// parse into tokens with char delimiter
142LIBHUNSPELL_DLL_EXPORTED char * mystrrep(char *, const char *, const char *);
143
144// append s to ends of every lines in text
145LIBHUNSPELL_DLL_EXPORTED void strlinecat(char * lines, const char * s);
146
147// tokenize into lines with new line
148LIBHUNSPELL_DLL_EXPORTED int line_tok(const char * text, char *** lines, char breakchar);
149
150// tokenize into lines with new line and uniq in place
151LIBHUNSPELL_DLL_EXPORTED char * line_uniq(char * text, char breakchar);
152LIBHUNSPELL_DLL_EXPORTED char * line_uniq_app(char ** text, char breakchar);
153
154// change oldchar to newchar in place
155LIBHUNSPELL_DLL_EXPORTED char * tr(char * text, char oldc, char newc);
156
157// reverse word
158LIBHUNSPELL_DLL_EXPORTED int reverseword(char *);
159
160// reverse word
161LIBHUNSPELL_DLL_EXPORTED int reverseword_utf(char *);
162
163// remove duplicates
164LIBHUNSPELL_DLL_EXPORTED int uniqlist(char ** list, int n);
165
166// free character array list
167LIBHUNSPELL_DLL_EXPORTED void freelist(char *** list, int n);
168
169// character encoding information
170struct cs_info {
171  unsigned char ccase;
172  unsigned char clower;
173  unsigned char cupper;
174};
175
176LIBHUNSPELL_DLL_EXPORTED int initialize_utf_tbl();
177LIBHUNSPELL_DLL_EXPORTED void free_utf_tbl();
178LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetoupper(unsigned short c, int langnum);
179LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetolower(unsigned short c, int langnum);
180LIBHUNSPELL_DLL_EXPORTED int unicodeisalpha(unsigned short c);
181
182LIBHUNSPELL_DLL_EXPORTED struct cs_info * get_current_cs(const char * es);
183
184// get language identifiers of language codes
185LIBHUNSPELL_DLL_EXPORTED int get_lang_num(const char * lang);
186
187// get characters of the given 8bit encoding with lower- and uppercase forms
188LIBHUNSPELL_DLL_EXPORTED char * get_casechars(const char * enc);
189
190// convert null terminated string to all caps using encoding
191LIBHUNSPELL_DLL_EXPORTED void enmkallcap(char * d, const char * p, const char * encoding);
192
193// convert null terminated string to all little using encoding
194LIBHUNSPELL_DLL_EXPORTED void enmkallsmall(char * d, const char * p, const char * encoding);
195
196// convert null terminated string to have initial capital using encoding
197LIBHUNSPELL_DLL_EXPORTED void enmkinitcap(char * d, const char * p, const char * encoding);
198
199// convert null terminated string to all caps
200LIBHUNSPELL_DLL_EXPORTED void mkallcap(char * p, const struct cs_info * csconv);
201
202// convert null terminated string to all little
203LIBHUNSPELL_DLL_EXPORTED void mkallsmall(char * p, const struct cs_info * csconv);
204
205// convert null terminated string to have initial capital
206LIBHUNSPELL_DLL_EXPORTED void mkinitcap(char * p, const struct cs_info * csconv);
207
208// convert first nc characters of UTF-8 string to little
209LIBHUNSPELL_DLL_EXPORTED void mkallsmall_utf(w_char * u, int nc, int langnum);
210
211// convert first nc characters of UTF-8 string to capital
212LIBHUNSPELL_DLL_EXPORTED void mkallcap_utf(w_char * u, int nc, int langnum);
213
214// get type of capitalization
215LIBHUNSPELL_DLL_EXPORTED int get_captype(char * q, int nl, cs_info *);
216
217// get type of capitalization (UTF-8)
218LIBHUNSPELL_DLL_EXPORTED int get_captype_utf8(w_char * q, int nl, int langnum);
219
220// strip all ignored characters in the string
221LIBHUNSPELL_DLL_EXPORTED void remove_ignored_chars_utf(char * word, unsigned short ignored_chars[], int ignored_len);
222
223// strip all ignored characters in the string
224LIBHUNSPELL_DLL_EXPORTED void remove_ignored_chars(char * word, char * ignored_chars);
225
226LIBHUNSPELL_DLL_EXPORTED int parse_string(char * line, char ** out, int ln);
227
228LIBHUNSPELL_DLL_EXPORTED int parse_array(char * line, char ** out, unsigned short ** out_utf16,
229    int * out_utf16_len, int utf8, int ln);
230
231LIBHUNSPELL_DLL_EXPORTED int fieldlen(const char * r);
232LIBHUNSPELL_DLL_EXPORTED char * copy_field(char * dest, const char * morph, const char * var);
233
234LIBHUNSPELL_DLL_EXPORTED int morphcmp(const char * s, const char * t);
235
236LIBHUNSPELL_DLL_EXPORTED int get_sfxcount(const char * morph);
237
238// conversion function for protected memory
239LIBHUNSPELL_DLL_EXPORTED void store_pointer(char * dest, char * source);
240
241// conversion function for protected memory
242LIBHUNSPELL_DLL_EXPORTED char * get_stored_pointer(const char * s);
243
244// hash entry macros
245LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_DATA(struct hentry *h)
246{
247    char *ret;
248    if (!h->var)
249        ret = NULL;
250    else if (h->var & H_OPT_ALIASM)
251        ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
252    else 
253        ret = HENTRY_WORD(h) + h->blen + 1;
254    return ret;
255}
256
257// NULL-free version for warning-free OOo build
258LIBHUNSPELL_DLL_EXPORTED inline const char* HENTRY_DATA2(const struct hentry *h)
259{
260    const char *ret;
261    if (!h->var)
262        ret = "";
263    else if (h->var & H_OPT_ALIASM)
264        ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
265    else
266        ret = HENTRY_WORD(h) + h->blen + 1;
267    return ret;
268}
269
270LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_FIND(struct hentry *h, const char *p)
271{
272    return (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL);
273}
274
275#define w_char_eq(a,b) (((a).l == (b).l) && ((a).h == (b).h))
276
277#endif