/extensions/spellcheck/hunspell/src/csutil.hxx

http://github.com/zpao/v8monkey · C++ Header · 277 lines · 121 code · 57 blank · 99 comment · 10 complexity · 2a715548ff3980d31043fa89a76425da MD5 · raw file

  1. /******* BEGIN LICENSE BLOCK *******
  2. * Version: MPL 1.1/GPL 2.0/LGPL 2.1
  3. *
  4. * The contents of this file are subject to the Mozilla Public License Version
  5. * 1.1 (the "License"); you may not use this file except in compliance with
  6. * the License. You may obtain a copy of the License at
  7. * http://www.mozilla.org/MPL/
  8. *
  9. * Software distributed under the License is distributed on an "AS IS" basis,
  10. * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  11. * for the specific language governing rights and limitations under the
  12. * License.
  13. *
  14. * The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
  15. * and László Németh (Hunspell). Portions created by the Initial Developers
  16. * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
  17. *
  18. * Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
  19. * David Einstein (deinst@world.std.com)
  20. * László Németh (nemethl@gyorsposta.hu)
  21. * Caolan McNamara (caolanm@redhat.com)
  22. * Davide Prina
  23. * Giuseppe Modugno
  24. * Gianluca Turconi
  25. * Simon Brouwer
  26. * Noll Janos
  27. * Biro Arpad
  28. * Goldman Eleonora
  29. * Sarlos Tamas
  30. * Bencsath Boldizsar
  31. * Halacsy Peter
  32. * Dvornik Laszlo
  33. * Gefferth Andras
  34. * Nagy Viktor
  35. * Varga Daniel
  36. * Chris Halls
  37. * Rene Engelhard
  38. * Bram Moolenaar
  39. * Dafydd Jones
  40. * Harri Pitkanen
  41. * Andras Timar
  42. * Tor Lillqvist
  43. *
  44. * Alternatively, the contents of this file may be used under the terms of
  45. * either the GNU General Public License Version 2 or later (the "GPL"), or
  46. * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  47. * in which case the provisions of the GPL or the LGPL are applicable instead
  48. * of those above. If you wish to allow use of your version of this file only
  49. * under the terms of either the GPL or the LGPL, and not to allow others to
  50. * use your version of this file under the terms of the MPL, indicate your
  51. * decision by deleting the provisions above and replace them with the notice
  52. * and other provisions required by the GPL or the LGPL. If you do not delete
  53. * the provisions above, a recipient may use your version of this file under
  54. * the terms of any one of the MPL, the GPL or the LGPL.
  55. *
  56. ******* END LICENSE BLOCK *******/
  57. #ifndef __CSUTILHXX__
  58. #define __CSUTILHXX__
  59. #include "hunvisapi.h"
  60. // First some base level utility routines
  61. #include <string.h>
  62. #include "w_char.hxx"
  63. #include "htypes.hxx"
  64. #ifdef MOZILLA_CLIENT
  65. #include "nscore.h" // for mozalloc headers
  66. #endif
  67. // casing
  68. #define NOCAP 0
  69. #define INITCAP 1
  70. #define ALLCAP 2
  71. #define HUHCAP 3
  72. #define HUHINITCAP 4
  73. // default encoding and keystring
  74. #define SPELL_ENCODING "ISO8859-1"
  75. #define SPELL_KEYSTRING "qwertyuiop|asdfghjkl|zxcvbnm"
  76. // default morphological fields
  77. #define MORPH_STEM "st:"
  78. #define MORPH_ALLOMORPH "al:"
  79. #define MORPH_POS "po:"
  80. #define MORPH_DERI_PFX "dp:"
  81. #define MORPH_INFL_PFX "ip:"
  82. #define MORPH_TERM_PFX "tp:"
  83. #define MORPH_DERI_SFX "ds:"
  84. #define MORPH_INFL_SFX "is:"
  85. #define MORPH_TERM_SFX "ts:"
  86. #define MORPH_SURF_PFX "sp:"
  87. #define MORPH_FREQ "fr:"
  88. #define MORPH_PHON "ph:"
  89. #define MORPH_HYPH "hy:"
  90. #define MORPH_PART "pa:"
  91. #define MORPH_FLAG "fl:"
  92. #define MORPH_HENTRY "_H:"
  93. #define MORPH_TAG_LEN strlen(MORPH_STEM)
  94. #define MSEP_FLD ' '
  95. #define MSEP_REC '\n'
  96. #define MSEP_ALT '\v'
  97. // default flags
  98. #define DEFAULTFLAGS 65510
  99. #define FORBIDDENWORD 65510
  100. #define ONLYUPCASEFLAG 65511
  101. // convert UTF-16 characters to UTF-8
  102. LIBHUNSPELL_DLL_EXPORTED char * u16_u8(char * dest, int size, const w_char * src, int srclen);
  103. // convert UTF-8 characters to UTF-16
  104. LIBHUNSPELL_DLL_EXPORTED int u8_u16(w_char * dest, int size, const char * src);
  105. // sort 2-byte vector
  106. LIBHUNSPELL_DLL_EXPORTED void flag_qsort(unsigned short flags[], int begin, int end);
  107. // binary search in 2-byte vector
  108. LIBHUNSPELL_DLL_EXPORTED int flag_bsearch(unsigned short flags[], unsigned short flag, int right);
  109. // remove end of line char(s)
  110. LIBHUNSPELL_DLL_EXPORTED void mychomp(char * s);
  111. // duplicate string
  112. LIBHUNSPELL_DLL_EXPORTED char * mystrdup(const char * s);
  113. // strcat for limited length destination string
  114. LIBHUNSPELL_DLL_EXPORTED char * mystrcat(char * dest, const char * st, int max);
  115. // duplicate reverse of string
  116. LIBHUNSPELL_DLL_EXPORTED char * myrevstrdup(const char * s);
  117. // parse into tokens with char delimiter
  118. LIBHUNSPELL_DLL_EXPORTED char * mystrsep(char ** sptr, const char delim);
  119. // parse into tokens with char delimiter
  120. LIBHUNSPELL_DLL_EXPORTED char * mystrsep2(char ** sptr, const char delim);
  121. // parse into tokens with char delimiter
  122. LIBHUNSPELL_DLL_EXPORTED char * mystrrep(char *, const char *, const char *);
  123. // append s to ends of every lines in text
  124. LIBHUNSPELL_DLL_EXPORTED void strlinecat(char * lines, const char * s);
  125. // tokenize into lines with new line
  126. LIBHUNSPELL_DLL_EXPORTED int line_tok(const char * text, char *** lines, char breakchar);
  127. // tokenize into lines with new line and uniq in place
  128. LIBHUNSPELL_DLL_EXPORTED char * line_uniq(char * text, char breakchar);
  129. LIBHUNSPELL_DLL_EXPORTED char * line_uniq_app(char ** text, char breakchar);
  130. // change oldchar to newchar in place
  131. LIBHUNSPELL_DLL_EXPORTED char * tr(char * text, char oldc, char newc);
  132. // reverse word
  133. LIBHUNSPELL_DLL_EXPORTED int reverseword(char *);
  134. // reverse word
  135. LIBHUNSPELL_DLL_EXPORTED int reverseword_utf(char *);
  136. // remove duplicates
  137. LIBHUNSPELL_DLL_EXPORTED int uniqlist(char ** list, int n);
  138. // free character array list
  139. LIBHUNSPELL_DLL_EXPORTED void freelist(char *** list, int n);
  140. // character encoding information
  141. struct cs_info {
  142. unsigned char ccase;
  143. unsigned char clower;
  144. unsigned char cupper;
  145. };
  146. LIBHUNSPELL_DLL_EXPORTED int initialize_utf_tbl();
  147. LIBHUNSPELL_DLL_EXPORTED void free_utf_tbl();
  148. LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetoupper(unsigned short c, int langnum);
  149. LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetolower(unsigned short c, int langnum);
  150. LIBHUNSPELL_DLL_EXPORTED int unicodeisalpha(unsigned short c);
  151. LIBHUNSPELL_DLL_EXPORTED struct cs_info * get_current_cs(const char * es);
  152. // get language identifiers of language codes
  153. LIBHUNSPELL_DLL_EXPORTED int get_lang_num(const char * lang);
  154. // get characters of the given 8bit encoding with lower- and uppercase forms
  155. LIBHUNSPELL_DLL_EXPORTED char * get_casechars(const char * enc);
  156. // convert null terminated string to all caps using encoding
  157. LIBHUNSPELL_DLL_EXPORTED void enmkallcap(char * d, const char * p, const char * encoding);
  158. // convert null terminated string to all little using encoding
  159. LIBHUNSPELL_DLL_EXPORTED void enmkallsmall(char * d, const char * p, const char * encoding);
  160. // convert null terminated string to have initial capital using encoding
  161. LIBHUNSPELL_DLL_EXPORTED void enmkinitcap(char * d, const char * p, const char * encoding);
  162. // convert null terminated string to all caps
  163. LIBHUNSPELL_DLL_EXPORTED void mkallcap(char * p, const struct cs_info * csconv);
  164. // convert null terminated string to all little
  165. LIBHUNSPELL_DLL_EXPORTED void mkallsmall(char * p, const struct cs_info * csconv);
  166. // convert null terminated string to have initial capital
  167. LIBHUNSPELL_DLL_EXPORTED void mkinitcap(char * p, const struct cs_info * csconv);
  168. // convert first nc characters of UTF-8 string to little
  169. LIBHUNSPELL_DLL_EXPORTED void mkallsmall_utf(w_char * u, int nc, int langnum);
  170. // convert first nc characters of UTF-8 string to capital
  171. LIBHUNSPELL_DLL_EXPORTED void mkallcap_utf(w_char * u, int nc, int langnum);
  172. // get type of capitalization
  173. LIBHUNSPELL_DLL_EXPORTED int get_captype(char * q, int nl, cs_info *);
  174. // get type of capitalization (UTF-8)
  175. LIBHUNSPELL_DLL_EXPORTED int get_captype_utf8(w_char * q, int nl, int langnum);
  176. // strip all ignored characters in the string
  177. LIBHUNSPELL_DLL_EXPORTED void remove_ignored_chars_utf(char * word, unsigned short ignored_chars[], int ignored_len);
  178. // strip all ignored characters in the string
  179. LIBHUNSPELL_DLL_EXPORTED void remove_ignored_chars(char * word, char * ignored_chars);
  180. LIBHUNSPELL_DLL_EXPORTED int parse_string(char * line, char ** out, int ln);
  181. LIBHUNSPELL_DLL_EXPORTED int parse_array(char * line, char ** out, unsigned short ** out_utf16,
  182. int * out_utf16_len, int utf8, int ln);
  183. LIBHUNSPELL_DLL_EXPORTED int fieldlen(const char * r);
  184. LIBHUNSPELL_DLL_EXPORTED char * copy_field(char * dest, const char * morph, const char * var);
  185. LIBHUNSPELL_DLL_EXPORTED int morphcmp(const char * s, const char * t);
  186. LIBHUNSPELL_DLL_EXPORTED int get_sfxcount(const char * morph);
  187. // conversion function for protected memory
  188. LIBHUNSPELL_DLL_EXPORTED void store_pointer(char * dest, char * source);
  189. // conversion function for protected memory
  190. LIBHUNSPELL_DLL_EXPORTED char * get_stored_pointer(const char * s);
  191. // hash entry macros
  192. LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_DATA(struct hentry *h)
  193. {
  194. char *ret;
  195. if (!h->var)
  196. ret = NULL;
  197. else if (h->var & H_OPT_ALIASM)
  198. ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
  199. else
  200. ret = HENTRY_WORD(h) + h->blen + 1;
  201. return ret;
  202. }
  203. // NULL-free version for warning-free OOo build
  204. LIBHUNSPELL_DLL_EXPORTED inline const char* HENTRY_DATA2(const struct hentry *h)
  205. {
  206. const char *ret;
  207. if (!h->var)
  208. ret = "";
  209. else if (h->var & H_OPT_ALIASM)
  210. ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
  211. else
  212. ret = HENTRY_WORD(h) + h->blen + 1;
  213. return ret;
  214. }
  215. LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_FIND(struct hentry *h, const char *p)
  216. {
  217. return (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL);
  218. }
  219. #define w_char_eq(a,b) (((a).l == (b).l) && ((a).h == (b).h))
  220. #endif