PageRenderTime 85ms CodeModel.GetById 2ms app.highlight 74ms RepoModel.GetById 2ms app.codeStats 0ms

/extensions/spellcheck/hunspell/src/hashmgr.cpp

http://github.com/zpao/v8monkey
C++ | 982 lines | 814 code | 61 blank | 107 comment | 284 complexity | 368381e8b15b43ed8ac649b3dd0ff7c9 MD5 | raw file
  1/******* BEGIN LICENSE BLOCK *******
  2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
  3 * 
  4 * The contents of this file are subject to the Mozilla Public License Version
  5 * 1.1 (the "License"); you may not use this file except in compliance with
  6 * the License. You may obtain a copy of the License at
  7 * http://www.mozilla.org/MPL/
  8 * 
  9 * Software distributed under the License is distributed on an "AS IS" basis,
 10 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 11 * for the specific language governing rights and limitations under the
 12 * License.
 13 * 
 14 * The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
 15 * and L�szl� N�meth (Hunspell). Portions created by the Initial Developers
 16 * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
 17 * 
 18 * Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
 19 *                 David Einstein (deinst@world.std.com)
 20 *                 L�szl� N�meth (nemethl@gyorsposta.hu)
 21 *                 Caolan McNamara (caolanm@redhat.com)
 22 *                 Davide Prina
 23 *                 Giuseppe Modugno
 24 *                 Gianluca Turconi
 25 *                 Simon Brouwer
 26 *                 Noll Janos
 27 *                 Biro Arpad
 28 *                 Goldman Eleonora
 29 *                 Sarlos Tamas
 30 *                 Bencsath Boldizsar
 31 *                 Halacsy Peter
 32 *                 Dvornik Laszlo
 33 *                 Gefferth Andras
 34 *                 Nagy Viktor
 35 *                 Varga Daniel
 36 *                 Chris Halls
 37 *                 Rene Engelhard
 38 *                 Bram Moolenaar
 39 *                 Dafydd Jones
 40 *                 Harri Pitkanen
 41 *                 Andras Timar
 42 *                 Tor Lillqvist
 43 * 
 44 * Alternatively, the contents of this file may be used under the terms of
 45 * either the GNU General Public License Version 2 or later (the "GPL"), or
 46 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 47 * in which case the provisions of the GPL or the LGPL are applicable instead
 48 * of those above. If you wish to allow use of your version of this file only
 49 * under the terms of either the GPL or the LGPL, and not to allow others to
 50 * use your version of this file under the terms of the MPL, indicate your
 51 * decision by deleting the provisions above and replace them with the notice
 52 * and other provisions required by the GPL or the LGPL. If you do not delete
 53 * the provisions above, a recipient may use your version of this file under
 54 * the terms of any one of the MPL, the GPL or the LGPL.
 55 *
 56 ******* END LICENSE BLOCK *******/
 57
 58#include <stdlib.h> 
 59#include <string.h>
 60#include <stdio.h> 
 61#include <ctype.h>
 62
 63#include "hashmgr.hxx"
 64#include "csutil.hxx"
 65#include "atypes.hxx"
 66
 67// build a hash table from a munched word list
 68
 69HashMgr::HashMgr(const char * tpath, const char * apath, const char * key)
 70{
 71  tablesize = 0;
 72  tableptr = NULL;
 73  flag_mode = FLAG_CHAR;
 74  complexprefixes = 0;
 75  utf8 = 0;
 76  langnum = 0;
 77  lang = NULL;
 78  enc = NULL;
 79  csconv = 0;
 80  ignorechars = NULL;
 81  ignorechars_utf16 = NULL;
 82  ignorechars_utf16_len = 0;
 83  numaliasf = 0;
 84  aliasf = NULL;
 85  numaliasm = 0;
 86  aliasm = NULL;
 87  forbiddenword = FORBIDDENWORD; // forbidden word signing flag
 88  load_config(apath, key);
 89  int ec = load_tables(tpath, key);
 90  if (ec) {
 91    /* error condition - what should we do here */
 92    HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n",ec);
 93    if (tableptr) {
 94      free(tableptr);
 95      tableptr = NULL;
 96    }
 97    tablesize = 0;
 98  }
 99}
100
101
102HashMgr::~HashMgr()
103{
104  if (tableptr) {
105    // now pass through hash table freeing up everything
106    // go through column by column of the table
107    for (int i=0; i < tablesize; i++) {
108      struct hentry * pt = tableptr[i];
109      struct hentry * nt = NULL;
110      while(pt) {
111        nt = pt->next;
112        if (pt->astr && (!aliasf || TESTAFF(pt->astr, ONLYUPCASEFLAG, pt->alen))) free(pt->astr);
113        free(pt);
114        pt = nt;
115      }
116    }
117    free(tableptr);
118  }
119  tablesize = 0;
120
121  if (aliasf) {
122    for (int j = 0; j < (numaliasf); j++) free(aliasf[j]);
123    free(aliasf);
124    aliasf = NULL;
125    if (aliasflen) {
126      free(aliasflen);
127      aliasflen = NULL;
128    }
129  }
130  if (aliasm) {
131    for (int j = 0; j < (numaliasm); j++) free(aliasm[j]);
132    free(aliasm);
133    aliasm = NULL;
134  }  
135
136#ifndef OPENOFFICEORG
137#ifndef MOZILLA_CLIENT
138  if (utf8) free_utf_tbl();
139#endif
140#endif
141
142  if (enc) free(enc);
143  if (lang) free(lang);
144  
145  if (ignorechars) free(ignorechars);
146  if (ignorechars_utf16) free(ignorechars_utf16);
147
148#ifdef MOZILLA_CLIENT
149    delete [] csconv;
150#endif
151}
152
153// lookup a root word in the hashtable
154
155struct hentry * HashMgr::lookup(const char *word) const
156{
157    struct hentry * dp;
158    if (tableptr) {
159       dp = tableptr[hash(word)];
160       if (!dp) return NULL;
161       for (  ;  dp != NULL;  dp = dp->next) {
162          if (strcmp(word, dp->word) == 0) return dp;
163       }
164    }
165    return NULL;
166}
167
168// add a word to the hash table (private)
169int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff,
170    int al, const char * desc, bool onlyupcase)
171{
172    bool upcasehomonym = false;
173    int descl = desc ? (aliasm ? sizeof(short) : strlen(desc) + 1) : 0;
174    // variable-length hash record with word and optional fields
175    struct hentry* hp = 
176	(struct hentry *) malloc (sizeof(struct hentry) + wbl + descl);
177    if (!hp) return 1;
178    char * hpw = hp->word;
179    strcpy(hpw, word);
180    if (ignorechars != NULL) {
181      if (utf8) {
182        remove_ignored_chars_utf(hpw, ignorechars_utf16, ignorechars_utf16_len);
183      } else {
184        remove_ignored_chars(hpw, ignorechars);
185      }
186    }
187    if (complexprefixes) {
188        if (utf8) reverseword_utf(hpw); else reverseword(hpw);
189    }
190
191    int i = hash(hpw);
192
193    hp->blen = (unsigned char) wbl;
194    hp->clen = (unsigned char) wcl;
195    hp->alen = (short) al;
196    hp->astr = aff;
197    hp->next = NULL;      
198    hp->next_homonym = NULL;
199
200    // store the description string or its pointer
201    if (desc) {
202        hp->var = H_OPT;
203        if (aliasm) {
204            hp->var += H_OPT_ALIASM;
205            store_pointer(hpw + wbl + 1, get_aliasm(atoi(desc)));
206        } else {
207	    strcpy(hpw + wbl + 1, desc);
208            if (complexprefixes) {
209                if (utf8) reverseword_utf(HENTRY_DATA(hp));
210                else reverseword(HENTRY_DATA(hp));
211            }
212        }
213	if (strstr(HENTRY_DATA(hp), MORPH_PHON)) hp->var += H_OPT_PHON;
214    } else hp->var = 0;
215
216       struct hentry * dp = tableptr[i];
217       if (!dp) {
218         tableptr[i] = hp;
219         return 0;
220       }
221       while (dp->next != NULL) {
222         if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) {
223    	    // remove hidden onlyupcase homonym
224            if (!onlyupcase) {
225		if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) {
226		    free(dp->astr);
227		    dp->astr = hp->astr;
228		    dp->alen = hp->alen;
229		    free(hp);
230		    return 0;
231		} else {
232    		    dp->next_homonym = hp;
233    		}
234            } else {
235        	upcasehomonym = true;
236            }
237         }
238         dp=dp->next;
239       }
240       if (strcmp(hp->word, dp->word) == 0) {
241    	    // remove hidden onlyupcase homonym
242            if (!onlyupcase) {
243		if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) {
244		    free(dp->astr);
245		    dp->astr = hp->astr;
246		    dp->alen = hp->alen;
247		    free(hp);
248		    return 0;
249		} else {
250    		    dp->next_homonym = hp;
251    		}
252            } else {
253        	upcasehomonym = true;
254            }
255       }
256       if (!upcasehomonym) {
257    	    dp->next = hp;
258       } else {
259    	    // remove hidden onlyupcase homonym
260    	    if (hp->astr) free(hp->astr);
261    	    free(hp);
262       }
263    return 0;
264}     
265
266int HashMgr::add_hidden_capitalized_word(char * word, int wbl, int wcl,
267    unsigned short * flags, int al, char * dp, int captype)
268{
269    // add inner capitalized forms to handle the following allcap forms:
270    // Mixed caps: OpenOffice.org -> OPENOFFICE.ORG
271    // Allcaps with suffixes: CIA's -> CIA'S    
272    if (((captype == HUHCAP) || (captype == HUHINITCAP) ||
273      ((captype == ALLCAP) && (flags != NULL))) &&
274      !((flags != NULL) && TESTAFF(flags, forbiddenword, al))) {
275          unsigned short * flags2 = (unsigned short *) malloc (sizeof(unsigned short) * (al+1));
276	  if (!flags2) return 1;
277          if (al) memcpy(flags2, flags, al * sizeof(unsigned short));
278          flags2[al] = ONLYUPCASEFLAG;
279          if (utf8) {
280              char st[BUFSIZE];
281              w_char w[BUFSIZE];
282              int wlen = u8_u16(w, BUFSIZE, word);
283              mkallsmall_utf(w, wlen, langnum);
284              mkallcap_utf(w, 1, langnum);
285              u16_u8(st, BUFSIZE, w, wlen);
286              return add_word(st,wbl,wcl,flags2,al+1,dp, true);
287           } else {
288               mkallsmall(word, csconv);
289               mkinitcap(word, csconv);
290               return add_word(word,wbl,wcl,flags2,al+1,dp, true);
291           }
292    }
293    return 0;
294}
295
296// detect captype and modify word length for UTF-8 encoding
297int HashMgr::get_clen_and_captype(const char * word, int wbl, int * captype) {
298    int len;
299    if (utf8) {
300      w_char dest_utf[BUFSIZE];
301      len = u8_u16(dest_utf, BUFSIZE, word);
302      *captype = get_captype_utf8(dest_utf, len, langnum);
303    } else {
304      len = wbl;
305      *captype = get_captype((char *) word, len, csconv);
306    }
307    return len;
308}
309
310// remove word (personal dictionary function for standalone applications)
311int HashMgr::remove(const char * word)
312{
313    struct hentry * dp = lookup(word);
314    while (dp) {
315        if (dp->alen == 0 || !TESTAFF(dp->astr, forbiddenword, dp->alen)) {
316            unsigned short * flags =
317                (unsigned short *) malloc(sizeof(short) * (dp->alen + 1));
318            if (!flags) return 1;
319            for (int i = 0; i < dp->alen; i++) flags[i] = dp->astr[i];
320            flags[dp->alen] = forbiddenword;
321            dp->astr = flags;
322            dp->alen++;
323            flag_qsort(flags, 0, dp->alen);
324        }
325        dp = dp->next_homonym;
326    }
327    return 0;
328}
329
330/* remove forbidden flag to add a personal word to the hash */
331int HashMgr::remove_forbidden_flag(const char * word) {
332    struct hentry * dp = lookup(word);
333    if (!dp) return 1;
334    while (dp) {
335         if (dp->astr && TESTAFF(dp->astr, forbiddenword, dp->alen)) {
336            if (dp->alen == 1) dp->alen = 0; // XXX forbidden words of personal dic.
337            else {
338                unsigned short * flags2 =
339                    (unsigned short *) malloc(sizeof(short) * (dp->alen - 1));
340                if (!flags2) return 1;
341                int i, j = 0;
342                for (i = 0; i < dp->alen; i++) {
343                    if (dp->astr[i] != forbiddenword) flags2[j++] = dp->astr[i];
344                }
345                dp->alen--;
346                dp->astr = flags2; // XXX allowed forbidden words
347            }
348         }
349         dp = dp->next_homonym;
350       }
351   return 0;
352}
353
354// add a custom dic. word to the hash table (public)
355int HashMgr::add(const char * word)
356{
357    unsigned short * flags = NULL;
358    int al = 0;
359    if (remove_forbidden_flag(word)) {
360        int captype;
361        int wbl = strlen(word);
362        int wcl = get_clen_and_captype(word, wbl, &captype);
363        add_word(word, wbl, wcl, flags, al, NULL, false);
364        return add_hidden_capitalized_word((char *) word, wbl, wcl, flags, al, NULL, captype);
365    }
366    return 0;
367}
368
369int HashMgr::add_with_affix(const char * word, const char * example)
370{
371    // detect captype and modify word length for UTF-8 encoding
372    struct hentry * dp = lookup(example);
373    remove_forbidden_flag(word);
374    if (dp && dp->astr) {
375        int captype;
376        int wbl = strlen(word);
377        int wcl = get_clen_and_captype(word, wbl, &captype);
378	if (aliasf) {
379	    add_word(word, wbl, wcl, dp->astr, dp->alen, NULL, false);	
380	} else {
381    	    unsigned short * flags = (unsigned short *) malloc (dp->alen * sizeof(short));
382	    if (flags) {
383		memcpy((void *) flags, (void *) dp->astr, dp->alen * sizeof(short));
384		add_word(word, wbl, wcl, flags, dp->alen, NULL, false);
385	    } else return 1;
386	}
387    	return add_hidden_capitalized_word((char *) word, wbl, wcl, dp->astr, dp->alen, NULL, captype);
388    }
389    return 1;
390}
391
392// walk the hash table entry by entry - null at end
393// initialize: col=-1; hp = NULL; hp = walk_hashtable(&col, hp);
394struct hentry * HashMgr::walk_hashtable(int &col, struct hentry * hp) const
395{  
396  if (hp && hp->next != NULL) return hp->next;
397  for (col++; col < tablesize; col++) {
398    if (tableptr[col]) return tableptr[col];
399  }
400  // null at end and reset to start
401  col = -1;
402  return NULL;
403}
404
405// load a munched word list and build a hash table on the fly
406int HashMgr::load_tables(const char * tpath, const char * key)
407{
408  int al;
409  char * ap;
410  char * dp;
411  char * dp2;
412  unsigned short * flags;
413  char * ts;
414
415  // open dictionary file
416  FileMgr * dict = new FileMgr(tpath, key);
417  if (dict == NULL) return 1;
418
419  // first read the first line of file to get hash table size */
420  if (!(ts = dict->getline())) {
421    HUNSPELL_WARNING(stderr, "error: empty dic file\n");
422    delete dict;
423    return 2;
424  }
425  mychomp(ts);
426
427  /* remove byte order mark */
428  if (strncmp(ts,"\xEF\xBB\xBF",3) == 0) {
429    memmove(ts, ts+3, strlen(ts+3)+1);
430    // warning: dic file begins with byte order mark: possible incompatibility with old Hunspell versions
431  }
432
433  tablesize = atoi(ts);
434  if (tablesize == 0) {
435    HUNSPELL_WARNING(stderr, "error: line 1: missing or bad word count in the dic file\n");
436    delete dict;
437    return 4;
438  }
439  tablesize = tablesize + 5 + USERWORD;
440  if ((tablesize %2) == 0) tablesize++;
441
442  // allocate the hash table
443  tableptr = (struct hentry **) malloc(tablesize * sizeof(struct hentry *));
444  if (! tableptr) {
445    delete dict;
446    return 3;
447  }
448  for (int i=0; i<tablesize; i++) tableptr[i] = NULL;
449
450  // loop through all words on much list and add to hash
451  // table and create word and affix strings
452
453  while ((ts = dict->getline())) {
454    mychomp(ts);
455    // split each line into word and morphological description
456    dp = ts;
457    while ((dp = strchr(dp, ':'))) {
458	if ((dp > ts + 3) && (*(dp - 3) == ' ' || *(dp - 3) == '\t')) {
459	    for (dp -= 4; dp >= ts && (*dp == ' ' || *dp == '\t'); dp--);
460	    if (dp < ts) { // missing word
461		dp = NULL;
462	    } else {
463		*(dp + 1) = '\0';
464		dp = dp + 2;
465	    }
466	    break;
467	}
468	dp++;
469    }
470
471    // tabulator is the old morphological field separator
472    dp2 = strchr(ts, '\t');
473    if (dp2 && (!dp || dp2 < dp)) {
474	*dp2 = '\0';
475	dp = dp2 + 1;
476    }
477
478    // split each line into word and affix char strings
479    // "\/" signs slash in words (not affix separator)
480    // "/" at beginning of the line is word character (not affix separator)
481    ap = strchr(ts,'/');
482    while (ap) {
483        if (ap == ts) {
484            ap++;
485            continue;
486        } else if (*(ap - 1) != '\\') break;
487        // replace "\/" with "/"
488        for (char * sp = ap - 1; *sp; *sp = *(sp + 1), sp++);
489        ap = strchr(ap,'/');
490    }
491
492    if (ap) {
493      *ap = '\0';
494      if (aliasf) {
495        int index = atoi(ap + 1);
496        al = get_aliasf(index, &flags, dict);
497        if (!al) {
498            HUNSPELL_WARNING(stderr, "error: line %d: bad flag vector alias\n", dict->getlinenum());
499            *ap = '\0';
500        }
501      } else {
502        al = decode_flags(&flags, ap + 1, dict);
503        if (al == -1) {
504            HUNSPELL_WARNING(stderr, "Can't allocate memory.\n");
505            delete dict;
506            return 6;
507        }
508        flag_qsort(flags, 0, al);
509      }
510    } else {
511      al = 0;
512      ap = NULL;
513      flags = NULL;
514    }
515
516    int captype;
517    int wbl = strlen(ts);
518    int wcl = get_clen_and_captype(ts, wbl, &captype);
519    // add the word and its index plus its capitalized form optionally
520    if (add_word(ts,wbl,wcl,flags,al,dp, false) ||
521	add_hidden_capitalized_word(ts, wbl, wcl, flags, al, dp, captype)) {
522	delete dict;
523	return 5;
524    }
525  }
526
527  delete dict;
528  return 0;
529}
530
531// the hash function is a simple load and rotate
532// algorithm borrowed
533
534int HashMgr::hash(const char * word) const
535{
536    long  hv = 0;
537    for (int i=0; i < 4  &&  *word != 0; i++)
538        hv = (hv << 8) | (*word++);
539    while (*word != 0) {
540      ROTATE(hv,ROTATE_LEN);
541      hv ^= (*word++);
542    }
543    return (unsigned long) hv % tablesize;
544}
545
546int HashMgr::decode_flags(unsigned short ** result, char * flags, FileMgr * af) {
547    int len;
548    if (*flags == '\0') {
549        *result = NULL;
550        return 0;
551    }
552    switch (flag_mode) {
553      case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz)
554        len = strlen(flags);
555        if (len%2 == 1) HUNSPELL_WARNING(stderr, "error: line %d: bad flagvector\n", af->getlinenum());
556        len /= 2;
557        *result = (unsigned short *) malloc(len * sizeof(short));
558        if (!*result) return -1;
559        for (int i = 0; i < len; i++) {
560            (*result)[i] = (((unsigned short) flags[i * 2]) << 8) + (unsigned short) flags[i * 2 + 1]; 
561        }
562        break;
563      }
564      case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 4521 23 233)
565        int i;
566        len = 1;
567        char * src = flags; 
568        unsigned short * dest;
569        char * p;
570        for (p = flags; *p; p++) {
571          if (*p == ',') len++;
572        }
573        *result = (unsigned short *) malloc(len * sizeof(short));
574        if (!*result) return -1;
575        dest = *result;
576        for (p = flags; *p; p++) {
577          if (*p == ',') {
578            i = atoi(src);
579            if (i >= DEFAULTFLAGS) HUNSPELL_WARNING(stderr, "error: line %d: flag id %d is too large (max: %d)\n",
580              af->getlinenum(), i, DEFAULTFLAGS - 1);
581            *dest = (unsigned short) i;
582            if (*dest == 0) HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n", af->getlinenum());
583            src = p + 1;
584            dest++;
585          }
586        }
587        i = atoi(src);
588        if (i >= DEFAULTFLAGS) HUNSPELL_WARNING(stderr, "error: line %d: flag id %d is too large (max: %d)\n",
589          af->getlinenum(), i, DEFAULTFLAGS - 1);
590        *dest = (unsigned short) i;
591        if (*dest == 0) HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n", af->getlinenum());
592        break;
593      }    
594      case FLAG_UNI: { // UTF-8 characters
595        w_char w[BUFSIZE/2];
596        len = u8_u16(w, BUFSIZE/2, flags);
597        *result = (unsigned short *) malloc(len * sizeof(short));
598        if (!*result) return -1;
599        memcpy(*result, w, len * sizeof(short));
600        break;
601      }
602      default: { // Ispell's one-character flags (erfg -> e r f g)
603        unsigned short * dest;
604        len = strlen(flags);
605        *result = (unsigned short *) malloc(len * sizeof(short));
606        if (!*result) return -1;
607        dest = *result;
608        for (unsigned char * p = (unsigned char *) flags; *p; p++) {
609          *dest = (unsigned short) *p;
610          dest++;
611        }
612      }
613    }
614    return len;
615}
616
617unsigned short HashMgr::decode_flag(const char * f) {
618    unsigned short s = 0;
619    int i;
620    switch (flag_mode) {
621      case FLAG_LONG:
622        s = ((unsigned short) f[0] << 8) + (unsigned short) f[1];
623        break;
624      case FLAG_NUM:
625        i = atoi(f);
626        if (i >= DEFAULTFLAGS) HUNSPELL_WARNING(stderr, "error: flag id %d is too large (max: %d)\n", i, DEFAULTFLAGS - 1);
627        s = (unsigned short) i;
628        break;
629      case FLAG_UNI:
630        u8_u16((w_char *) &s, 1, f);
631        break;
632      default:
633        s = (unsigned short) *((unsigned char *)f);
634    }
635    if (s == 0) HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n");
636    return s;
637}
638
639char * HashMgr::encode_flag(unsigned short f) {
640    unsigned char ch[10];
641    if (f==0) return mystrdup("(NULL)");
642    if (flag_mode == FLAG_LONG) {
643        ch[0] = (unsigned char) (f >> 8);
644        ch[1] = (unsigned char) (f - ((f >> 8) << 8));
645        ch[2] = '\0';
646    } else if (flag_mode == FLAG_NUM) {
647        sprintf((char *) ch, "%d", f);
648    } else if (flag_mode == FLAG_UNI) {
649        u16_u8((char *) &ch, 10, (w_char *) &f, 1);
650    } else {
651        ch[0] = (unsigned char) (f);
652        ch[1] = '\0';
653    }
654    return mystrdup((char *) ch);
655}
656
657// read in aff file and set flag mode
658int  HashMgr::load_config(const char * affpath, const char * key)
659{
660  char * line; // io buffers
661  int firstline = 1;
662 
663  // open the affix file
664  FileMgr * afflst = new FileMgr(affpath, key);
665  if (!afflst) {
666    HUNSPELL_WARNING(stderr, "Error - could not open affix description file %s\n",affpath);
667    return 1;
668  }
669
670    // read in each line ignoring any that do not
671    // start with a known line type indicator
672
673    while ((line = afflst->getline())) {
674        mychomp(line);
675
676       /* remove byte order mark */
677       if (firstline) {
678         firstline = 0;
679         if (strncmp(line,"\xEF\xBB\xBF",3) == 0) memmove(line, line+3, strlen(line+3)+1);
680       }
681
682        /* parse in the try string */
683        if ((strncmp(line,"FLAG",4) == 0) && isspace(line[4])) {
684            if (flag_mode != FLAG_CHAR) {
685                HUNSPELL_WARNING(stderr, "error: line %d: multiple definitions of the FLAG affix file parameter\n", afflst->getlinenum());
686            }
687            if (strstr(line, "long")) flag_mode = FLAG_LONG;
688            if (strstr(line, "num")) flag_mode = FLAG_NUM;
689            if (strstr(line, "UTF-8")) flag_mode = FLAG_UNI;
690            if (flag_mode == FLAG_CHAR) {
691                HUNSPELL_WARNING(stderr, "error: line %d: FLAG needs `num', `long' or `UTF-8' parameter\n", afflst->getlinenum());
692            }
693        }
694        if (strncmp(line,"FORBIDDENWORD",13) == 0) {
695          char * st = NULL;
696          if (parse_string(line, &st, afflst->getlinenum())) {
697             delete afflst;
698             return 1;
699          }
700          forbiddenword = decode_flag(st);
701          free(st);
702        }
703        if (strncmp(line, "SET", 3) == 0) {
704    	  if (parse_string(line, &enc, afflst->getlinenum())) {
705             delete afflst;
706             return 1;
707          }    	    
708    	  if (strcmp(enc, "UTF-8") == 0) {
709    	    utf8 = 1;
710#ifndef OPENOFFICEORG
711#ifndef MOZILLA_CLIENT
712    	    initialize_utf_tbl();
713#endif
714#endif
715    	  } else csconv = get_current_cs(enc);
716    	}
717        if (strncmp(line, "LANG", 4) == 0) {
718    	  if (parse_string(line, &lang, afflst->getlinenum())) {
719             delete afflst;
720             return 1;
721          }    	    
722    	  langnum = get_lang_num(lang);
723    	}
724
725       /* parse in the ignored characters (for example, Arabic optional diacritics characters */
726       if (strncmp(line,"IGNORE",6) == 0) {
727          if (parse_array(line, &ignorechars, &ignorechars_utf16,
728                 &ignorechars_utf16_len, utf8, afflst->getlinenum())) {
729             delete afflst;
730             return 1;
731          }
732       }
733
734       if ((strncmp(line,"AF",2) == 0) && isspace(line[2])) {
735          if (parse_aliasf(line, afflst)) {
736             delete afflst;
737             return 1;
738          }
739       }
740
741       if ((strncmp(line,"AM",2) == 0) && isspace(line[2])) {
742          if (parse_aliasm(line, afflst)) {
743             delete afflst;
744             return 1;
745          }
746       }
747
748       if (strncmp(line,"COMPLEXPREFIXES",15) == 0) complexprefixes = 1;
749       if (((strncmp(line,"SFX",3) == 0) || (strncmp(line,"PFX",3) == 0)) && isspace(line[3])) break;
750    }
751    if (csconv == NULL) csconv = get_current_cs(SPELL_ENCODING);
752    delete afflst;
753    return 0;
754}
755
756/* parse in the ALIAS table */
757int  HashMgr::parse_aliasf(char * line, FileMgr * af)
758{
759   if (numaliasf != 0) {
760      HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum());
761      return 1;
762   }
763   char * tp = line;
764   char * piece;
765   int i = 0;
766   int np = 0;
767   piece = mystrsep(&tp, 0);
768   while (piece) {
769       if (*piece != '\0') {
770          switch(i) {
771             case 0: { np++; break; }
772             case 1: { 
773                       numaliasf = atoi(piece);
774                       if (numaliasf < 1) {
775                          numaliasf = 0;
776                          aliasf = NULL;
777                          aliasflen = NULL;
778                          HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", af->getlinenum());
779                          return 1;
780                       }
781                       aliasf = (unsigned short **) malloc(numaliasf * sizeof(unsigned short *));
782                       aliasflen = (unsigned short *) malloc(numaliasf * sizeof(short));
783                       if (!aliasf || !aliasflen) {
784                          numaliasf = 0;
785                          if (aliasf) free(aliasf);
786                          if (aliasflen) free(aliasflen);
787                          aliasf = NULL;
788                          aliasflen = NULL;
789                          return 1;
790                       }
791                       np++;
792                       break;
793                     }
794             default: break;
795          }
796          i++;
797       }
798       piece = mystrsep(&tp, 0);
799   }
800   if (np != 2) {
801      numaliasf = 0;
802      free(aliasf);
803      free(aliasflen);
804      aliasf = NULL;
805      aliasflen = NULL;
806      HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum());
807      return 1;
808   } 
809 
810   /* now parse the numaliasf lines to read in the remainder of the table */
811   char * nl;
812   for (int j=0; j < numaliasf; j++) {
813        if (!(nl = af->getline())) return 1;
814        mychomp(nl);
815        tp = nl;
816        i = 0;
817        aliasf[j] = NULL;
818        aliasflen[j] = 0;
819        piece = mystrsep(&tp, 0);
820        while (piece) {
821           if (*piece != '\0') {
822               switch(i) {
823                  case 0: {
824                             if (strncmp(piece,"AF",2) != 0) {
825                                 numaliasf = 0;
826                                 free(aliasf);
827                                 free(aliasflen);
828                                 aliasf = NULL;
829                                 aliasflen = NULL;
830                                 HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
831                                 return 1;
832                             }
833                             break;
834                          }
835                  case 1: {
836                            aliasflen[j] = (unsigned short) decode_flags(&(aliasf[j]), piece, af);
837                            flag_qsort(aliasf[j], 0, aliasflen[j]);
838                            break; 
839                          }
840                  default: break;
841               }
842               i++;
843           }
844           piece = mystrsep(&tp, 0);
845        }
846        if (!aliasf[j]) {
847             free(aliasf);
848             free(aliasflen);
849             aliasf = NULL;
850             aliasflen = NULL;
851             numaliasf = 0;
852             HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
853             return 1;
854        }
855   }
856   return 0;
857}
858
859int HashMgr::is_aliasf() {
860    return (aliasf != NULL);
861}
862
863int HashMgr::get_aliasf(int index, unsigned short ** fvec, FileMgr * af) {
864    if ((index > 0) && (index <= numaliasf)) {
865        *fvec = aliasf[index - 1];
866        return aliasflen[index - 1];
867    }
868    HUNSPELL_WARNING(stderr, "error: line %d: bad flag alias index: %d\n", af->getlinenum(), index);
869    *fvec = NULL;
870    return 0;
871}
872
873/* parse morph alias definitions */
874int  HashMgr::parse_aliasm(char * line, FileMgr * af)
875{
876   if (numaliasm != 0) {
877      HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum());
878      return 1;
879   }
880   char * tp = line;
881   char * piece;
882   int i = 0;
883   int np = 0;
884   piece = mystrsep(&tp, 0);
885   while (piece) {
886       if (*piece != '\0') {
887          switch(i) {
888             case 0: { np++; break; }
889             case 1: { 
890                       numaliasm = atoi(piece);
891                       if (numaliasm < 1) {
892                          HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", af->getlinenum());
893                          return 1;
894                       }
895                       aliasm = (char **) malloc(numaliasm * sizeof(char *));
896                       if (!aliasm) {
897                          numaliasm = 0;
898                          return 1;
899                       }
900                       np++;
901                       break;
902                     }
903             default: break;
904          }
905          i++;
906       }
907       piece = mystrsep(&tp, 0);
908   }
909   if (np != 2) {
910      numaliasm = 0;
911      free(aliasm);
912      aliasm = NULL;
913      HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum());
914      return 1;
915   } 
916
917   /* now parse the numaliasm lines to read in the remainder of the table */
918   char * nl = line;
919   for (int j=0; j < numaliasm; j++) {
920        if (!(nl = af->getline())) return 1;
921        mychomp(nl);
922        tp = nl;
923        i = 0;
924        aliasm[j] = NULL;
925        piece = mystrsep(&tp, ' ');
926        while (piece) {
927           if (*piece != '\0') {
928               switch(i) {
929                  case 0: {
930                             if (strncmp(piece,"AM",2) != 0) {
931                                 HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
932                                 numaliasm = 0;
933                                 free(aliasm);
934                                 aliasm = NULL;
935                                 return 1;
936                             }
937                             break;
938                          }
939                  case 1: {
940                            // add the remaining of the line
941                            if (*tp) {
942                                *(tp - 1) = ' ';
943                                tp = tp + strlen(tp);
944                            }
945                            if (complexprefixes) {
946                                if (utf8) reverseword_utf(piece);
947                                    else reverseword(piece);
948                            }
949                            aliasm[j] = mystrdup(piece);
950                            if (!aliasm[j]) {
951                                 numaliasm = 0;
952                                 free(aliasm);
953                                 aliasm = NULL;
954                                 return 1;
955                            }
956                            break; }
957                  default: break;
958               }
959               i++;
960           }
961           piece = mystrsep(&tp, ' ');
962        }
963        if (!aliasm[j]) {
964             numaliasm = 0;
965             free(aliasm);
966             aliasm = NULL;
967             HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
968             return 1;
969        }
970   }
971   return 0;
972}
973
974int HashMgr::is_aliasm() {
975    return (aliasm != NULL);
976}
977
978char * HashMgr::get_aliasm(int index) {
979    if ((index > 0) && (index <= numaliasm)) return aliasm[index - 1];
980    HUNSPELL_WARNING(stderr, "error: bad morph. alias index: %d\n", index);
981    return NULL;
982}