PageRenderTime 81ms CodeModel.GetById 13ms RepoModel.GetById 1ms app.codeStats 0ms

/Main/Libraries/Hunspell/hunspell/hashmgr.cxx

#
C++ | 928 lines | 816 code | 61 blank | 51 comment | 284 complexity | 2f2bc91317cb7bf675500fc1d463440b MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception
  1. #include "license.hunspell"
  2. #include "license.myspell"
  3. #include <stdlib.h>
  4. #include <string.h>
  5. #include <stdio.h>
  6. #include <ctype.h>
  7. #include "hashmgr.hxx"
  8. #include "csutil.hxx"
  9. #include "atypes.hxx"
  10. // build a hash table from a munched word list
  11. HashMgr::HashMgr(const char * tpath, const char * apath, const char * key)
  12. {
  13. tablesize = 0;
  14. tableptr = NULL;
  15. flag_mode = FLAG_CHAR;
  16. complexprefixes = 0;
  17. utf8 = 0;
  18. langnum = 0;
  19. lang = NULL;
  20. enc = NULL;
  21. csconv = 0;
  22. ignorechars = NULL;
  23. ignorechars_utf16 = NULL;
  24. ignorechars_utf16_len = 0;
  25. numaliasf = 0;
  26. aliasf = NULL;
  27. numaliasm = 0;
  28. aliasm = NULL;
  29. forbiddenword = FORBIDDENWORD; // forbidden word signing flag
  30. load_config(apath, key);
  31. int ec = load_tables(tpath, key);
  32. if (ec) {
  33. /* error condition - what should we do here */
  34. HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n",ec);
  35. if (tableptr) {
  36. free(tableptr);
  37. tableptr = NULL;
  38. }
  39. tablesize = 0;
  40. }
  41. }
  42. HashMgr::~HashMgr()
  43. {
  44. if (tableptr) {
  45. // now pass through hash table freeing up everything
  46. // go through column by column of the table
  47. for (int i=0; i < tablesize; i++) {
  48. struct hentry * pt = tableptr[i];
  49. struct hentry * nt = NULL;
  50. while(pt) {
  51. nt = pt->next;
  52. if (pt->astr && (!aliasf || TESTAFF(pt->astr, ONLYUPCASEFLAG, pt->alen))) free(pt->astr);
  53. free(pt);
  54. pt = nt;
  55. }
  56. }
  57. free(tableptr);
  58. }
  59. tablesize = 0;
  60. if (aliasf) {
  61. for (int j = 0; j < (numaliasf); j++) free(aliasf[j]);
  62. free(aliasf);
  63. aliasf = NULL;
  64. if (aliasflen) {
  65. free(aliasflen);
  66. aliasflen = NULL;
  67. }
  68. }
  69. if (aliasm) {
  70. for (int j = 0; j < (numaliasm); j++) free(aliasm[j]);
  71. free(aliasm);
  72. aliasm = NULL;
  73. }
  74. #ifndef OPENOFFICEORG
  75. #ifndef MOZILLA_CLIENT
  76. if (utf8) free_utf_tbl();
  77. #endif
  78. #endif
  79. if (enc) free(enc);
  80. if (lang) free(lang);
  81. if (ignorechars) free(ignorechars);
  82. if (ignorechars_utf16) free(ignorechars_utf16);
  83. #ifdef MOZILLA_CLIENT
  84. delete [] csconv;
  85. #endif
  86. }
  87. // lookup a root word in the hashtable
  88. struct hentry * HashMgr::lookup(const char *word) const
  89. {
  90. struct hentry * dp;
  91. if (tableptr) {
  92. dp = tableptr[hash(word)];
  93. if (!dp) return NULL;
  94. for ( ; dp != NULL; dp = dp->next) {
  95. if (strcmp(word, dp->word) == 0) return dp;
  96. }
  97. }
  98. return NULL;
  99. }
  100. // add a word to the hash table (private)
  101. int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff,
  102. int al, const char * desc, bool onlyupcase)
  103. {
  104. bool upcasehomonym = false;
  105. int descl = desc ? (aliasm ? sizeof(short) : strlen(desc) + 1) : 0;
  106. // variable-length hash record with word and optional fields
  107. struct hentry* hp =
  108. (struct hentry *) malloc (sizeof(struct hentry) + wbl + descl);
  109. if (!hp) return 1;
  110. char * hpw = hp->word;
  111. strcpy(hpw, word);
  112. if (ignorechars != NULL) {
  113. if (utf8) {
  114. remove_ignored_chars_utf(hpw, ignorechars_utf16, ignorechars_utf16_len);
  115. } else {
  116. remove_ignored_chars(hpw, ignorechars);
  117. }
  118. }
  119. if (complexprefixes) {
  120. if (utf8) reverseword_utf(hpw); else reverseword(hpw);
  121. }
  122. int i = hash(hpw);
  123. hp->blen = (unsigned char) wbl;
  124. hp->clen = (unsigned char) wcl;
  125. hp->alen = (short) al;
  126. hp->astr = aff;
  127. hp->next = NULL;
  128. hp->next_homonym = NULL;
  129. // store the description string or its pointer
  130. if (desc) {
  131. hp->var = H_OPT;
  132. if (aliasm) {
  133. hp->var += H_OPT_ALIASM;
  134. store_pointer(hpw + wbl + 1, get_aliasm(atoi(desc)));
  135. } else {
  136. strcpy(hpw + wbl + 1, desc);
  137. if (complexprefixes) {
  138. if (utf8) reverseword_utf(HENTRY_DATA(hp));
  139. else reverseword(HENTRY_DATA(hp));
  140. }
  141. }
  142. if (strstr(HENTRY_DATA(hp), MORPH_PHON)) hp->var += H_OPT_PHON;
  143. } else hp->var = 0;
  144. struct hentry * dp = tableptr[i];
  145. if (!dp) {
  146. tableptr[i] = hp;
  147. return 0;
  148. }
  149. while (dp->next != NULL) {
  150. if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) {
  151. // remove hidden onlyupcase homonym
  152. if (!onlyupcase) {
  153. if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) {
  154. free(dp->astr);
  155. dp->astr = hp->astr;
  156. dp->alen = hp->alen;
  157. free(hp);
  158. return 0;
  159. } else {
  160. dp->next_homonym = hp;
  161. }
  162. } else {
  163. upcasehomonym = true;
  164. }
  165. }
  166. dp=dp->next;
  167. }
  168. if (strcmp(hp->word, dp->word) == 0) {
  169. // remove hidden onlyupcase homonym
  170. if (!onlyupcase) {
  171. if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) {
  172. free(dp->astr);
  173. dp->astr = hp->astr;
  174. dp->alen = hp->alen;
  175. free(hp);
  176. return 0;
  177. } else {
  178. dp->next_homonym = hp;
  179. }
  180. } else {
  181. upcasehomonym = true;
  182. }
  183. }
  184. if (!upcasehomonym) {
  185. dp->next = hp;
  186. } else {
  187. // remove hidden onlyupcase homonym
  188. if (hp->astr) free(hp->astr);
  189. free(hp);
  190. }
  191. return 0;
  192. }
  193. int HashMgr::add_hidden_capitalized_word(char * word, int wbl, int wcl,
  194. unsigned short * flags, int al, char * dp, int captype)
  195. {
  196. // add inner capitalized forms to handle the following allcap forms:
  197. // Mixed caps: OpenOffice.org -> OPENOFFICE.ORG
  198. // Allcaps with suffixes: CIA's -> CIA'S
  199. if (((captype == HUHCAP) || (captype == HUHINITCAP) ||
  200. ((captype == ALLCAP) && (flags != NULL))) &&
  201. !((flags != NULL) && TESTAFF(flags, forbiddenword, al))) {
  202. unsigned short * flags2 = (unsigned short *) malloc (sizeof(unsigned short) * (al+1));
  203. if (!flags2) return 1;
  204. if (al) memcpy(flags2, flags, al * sizeof(unsigned short));
  205. flags2[al] = ONLYUPCASEFLAG;
  206. if (utf8) {
  207. char st[BUFSIZE];
  208. w_char w[BUFSIZE];
  209. int wlen = u8_u16(w, BUFSIZE, word);
  210. mkallsmall_utf(w, wlen, langnum);
  211. mkallcap_utf(w, 1, langnum);
  212. u16_u8(st, BUFSIZE, w, wlen);
  213. return add_word(st,wbl,wcl,flags2,al+1,dp, true);
  214. } else {
  215. mkallsmall(word, csconv);
  216. mkinitcap(word, csconv);
  217. return add_word(word,wbl,wcl,flags2,al+1,dp, true);
  218. }
  219. }
  220. return 0;
  221. }
  222. // detect captype and modify word length for UTF-8 encoding
  223. int HashMgr::get_clen_and_captype(const char * word, int wbl, int * captype) {
  224. int len;
  225. if (utf8) {
  226. w_char dest_utf[BUFSIZE];
  227. len = u8_u16(dest_utf, BUFSIZE, word);
  228. *captype = get_captype_utf8(dest_utf, len, langnum);
  229. } else {
  230. len = wbl;
  231. *captype = get_captype((char *) word, len, csconv);
  232. }
  233. return len;
  234. }
  235. // remove word (personal dictionary function for standalone applications)
  236. int HashMgr::remove(const char * word)
  237. {
  238. struct hentry * dp = lookup(word);
  239. while (dp) {
  240. if (dp->alen == 0 || !TESTAFF(dp->astr, forbiddenword, dp->alen)) {
  241. unsigned short * flags =
  242. (unsigned short *) malloc(sizeof(short) * (dp->alen + 1));
  243. if (!flags) return 1;
  244. for (int i = 0; i < dp->alen; i++) flags[i] = dp->astr[i];
  245. flags[dp->alen] = forbiddenword;
  246. dp->astr = flags;
  247. dp->alen++;
  248. flag_qsort(flags, 0, dp->alen);
  249. }
  250. dp = dp->next_homonym;
  251. }
  252. return 0;
  253. }
  254. /* remove forbidden flag to add a personal word to the hash */
  255. int HashMgr::remove_forbidden_flag(const char * word) {
  256. struct hentry * dp = lookup(word);
  257. if (!dp) return 1;
  258. while (dp) {
  259. if (dp->astr && TESTAFF(dp->astr, forbiddenword, dp->alen)) {
  260. if (dp->alen == 1) dp->alen = 0; // XXX forbidden words of personal dic.
  261. else {
  262. unsigned short * flags2 =
  263. (unsigned short *) malloc(sizeof(short) * (dp->alen - 1));
  264. if (!flags2) return 1;
  265. int i, j = 0;
  266. for (i = 0; i < dp->alen; i++) {
  267. if (dp->astr[i] != forbiddenword) flags2[j++] = dp->astr[i];
  268. }
  269. dp->alen--;
  270. dp->astr = flags2; // XXX allowed forbidden words
  271. }
  272. }
  273. dp = dp->next_homonym;
  274. }
  275. return 0;
  276. }
  277. // add a custom dic. word to the hash table (public)
  278. int HashMgr::add(const char * word)
  279. {
  280. unsigned short * flags = NULL;
  281. int al = 0;
  282. if (remove_forbidden_flag(word)) {
  283. int captype;
  284. int wbl = strlen(word);
  285. int wcl = get_clen_and_captype(word, wbl, &captype);
  286. add_word(word, wbl, wcl, flags, al, NULL, false);
  287. return add_hidden_capitalized_word((char *) word, wbl, wcl, flags, al, NULL, captype);
  288. }
  289. return 0;
  290. }
  291. int HashMgr::add_with_affix(const char * word, const char * example)
  292. {
  293. // detect captype and modify word length for UTF-8 encoding
  294. struct hentry * dp = lookup(example);
  295. remove_forbidden_flag(word);
  296. if (dp && dp->astr) {
  297. int captype;
  298. int wbl = strlen(word);
  299. int wcl = get_clen_and_captype(word, wbl, &captype);
  300. if (aliasf) {
  301. add_word(word, wbl, wcl, dp->astr, dp->alen, NULL, false);
  302. } else {
  303. unsigned short * flags = (unsigned short *) malloc (dp->alen * sizeof(short));
  304. if (flags) {
  305. memcpy((void *) flags, (void *) dp->astr, dp->alen * sizeof(short));
  306. add_word(word, wbl, wcl, flags, dp->alen, NULL, false);
  307. } else return 1;
  308. }
  309. return add_hidden_capitalized_word((char *) word, wbl, wcl, dp->astr, dp->alen, NULL, captype);
  310. }
  311. return 1;
  312. }
  313. // walk the hash table entry by entry - null at end
  314. // initialize: col=-1; hp = NULL; hp = walk_hashtable(&col, hp);
  315. struct hentry * HashMgr::walk_hashtable(int &col, struct hentry * hp) const
  316. {
  317. if (hp && hp->next != NULL) return hp->next;
  318. for (col++; col < tablesize; col++) {
  319. if (tableptr[col]) return tableptr[col];
  320. }
  321. // null at end and reset to start
  322. col = -1;
  323. return NULL;
  324. }
  325. // load a munched word list and build a hash table on the fly
  326. int HashMgr::load_tables(const char * tpath, const char * key)
  327. {
  328. int al;
  329. char * ap;
  330. char * dp;
  331. char * dp2;
  332. unsigned short * flags;
  333. char * ts;
  334. // open dictionary file
  335. FileMgr * dict = new FileMgr(tpath, key);
  336. if (dict == NULL) return 1;
  337. // first read the first line of file to get hash table size */
  338. if (!(ts = dict->getline())) {
  339. HUNSPELL_WARNING(stderr, "error: empty dic file\n");
  340. delete dict;
  341. return 2;
  342. }
  343. mychomp(ts);
  344. /* remove byte order mark */
  345. if (strncmp(ts,"\xEF\xBB\xBF",3) == 0) {
  346. memmove(ts, ts+3, strlen(ts+3)+1);
  347. // warning: dic file begins with byte order mark: possible incompatibility with old Hunspell versions
  348. }
  349. tablesize = atoi(ts);
  350. if (tablesize == 0) {
  351. HUNSPELL_WARNING(stderr, "error: line 1: missing or bad word count in the dic file\n");
  352. delete dict;
  353. return 4;
  354. }
  355. tablesize = tablesize + 5 + USERWORD;
  356. if ((tablesize %2) == 0) tablesize++;
  357. // allocate the hash table
  358. tableptr = (struct hentry **) malloc(tablesize * sizeof(struct hentry *));
  359. if (! tableptr) {
  360. delete dict;
  361. return 3;
  362. }
  363. for (int i=0; i<tablesize; i++) tableptr[i] = NULL;
  364. // loop through all words on much list and add to hash
  365. // table and create word and affix strings
  366. while ((ts = dict->getline())) {
  367. mychomp(ts);
  368. // split each line into word and morphological description
  369. dp = ts;
  370. while ((dp = strchr(dp, ':'))) {
  371. if ((dp > ts + 3) && (*(dp - 3) == ' ' || *(dp - 3) == '\t')) {
  372. for (dp -= 4; dp >= ts && (*dp == ' ' || *dp == '\t'); dp--);
  373. if (dp < ts) { // missing word
  374. dp = NULL;
  375. } else {
  376. *(dp + 1) = '\0';
  377. dp = dp + 2;
  378. }
  379. break;
  380. }
  381. dp++;
  382. }
  383. // tabulator is the old morphological field separator
  384. dp2 = strchr(ts, '\t');
  385. if (dp2 && (!dp || dp2 < dp)) {
  386. *dp2 = '\0';
  387. dp = dp2 + 1;
  388. }
  389. // split each line into word and affix char strings
  390. // "\/" signs slash in words (not affix separator)
  391. // "/" at beginning of the line is word character (not affix separator)
  392. ap = strchr(ts,'/');
  393. while (ap) {
  394. if (ap == ts) {
  395. ap++;
  396. continue;
  397. } else if (*(ap - 1) != '\\') break;
  398. // replace "\/" with "/"
  399. for (char * sp = ap - 1; *sp; *sp = *(sp + 1), sp++);
  400. ap = strchr(ap,'/');
  401. }
  402. if (ap) {
  403. *ap = '\0';
  404. if (aliasf) {
  405. int index = atoi(ap + 1);
  406. al = get_aliasf(index, &flags, dict);
  407. if (!al) {
  408. HUNSPELL_WARNING(stderr, "error: line %d: bad flag vector alias\n", dict->getlinenum());
  409. *ap = '\0';
  410. }
  411. } else {
  412. al = decode_flags(&flags, ap + 1, dict);
  413. if (al == -1) {
  414. HUNSPELL_WARNING(stderr, "Can't allocate memory.\n");
  415. return 6;
  416. }
  417. flag_qsort(flags, 0, al);
  418. }
  419. } else {
  420. al = 0;
  421. ap = NULL;
  422. flags = NULL;
  423. }
  424. int captype;
  425. int wbl = strlen(ts);
  426. int wcl = get_clen_and_captype(ts, wbl, &captype);
  427. // add the word and its index plus its capitalized form optionally
  428. if (add_word(ts,wbl,wcl,flags,al,dp, false) ||
  429. add_hidden_capitalized_word(ts, wbl, wcl, flags, al, dp, captype)) {
  430. delete dict;
  431. return 5;
  432. }
  433. }
  434. delete dict;
  435. return 0;
  436. }
  437. // the hash function is a simple load and rotate
  438. // algorithm borrowed
  439. int HashMgr::hash(const char * word) const
  440. {
  441. long hv = 0;
  442. for (int i=0; i < 4 && *word != 0; i++)
  443. hv = (hv << 8) | (*word++);
  444. while (*word != 0) {
  445. ROTATE(hv,ROTATE_LEN);
  446. hv ^= (*word++);
  447. }
  448. return (unsigned long) hv % tablesize;
  449. }
  450. int HashMgr::decode_flags(unsigned short ** result, char * flags, FileMgr * af) {
  451. int len;
  452. if (*flags == '\0') {
  453. HUNSPELL_WARNING(stderr, "error: line %d: bad flagvector\n", af->getlinenum());
  454. *result = NULL;
  455. return 0;
  456. }
  457. switch (flag_mode) {
  458. case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz)
  459. len = strlen(flags);
  460. if (len%2 == 1) HUNSPELL_WARNING(stderr, "error: line %d: bad flagvector\n", af->getlinenum());
  461. len /= 2;
  462. *result = (unsigned short *) malloc(len * sizeof(short));
  463. if (!*result) return -1;
  464. for (int i = 0; i < len; i++) {
  465. (*result)[i] = (((unsigned short) flags[i * 2]) << 8) + (unsigned short) flags[i * 2 + 1];
  466. }
  467. break;
  468. }
  469. case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 4521 23 233)
  470. int i;
  471. len = 1;
  472. char * src = flags;
  473. unsigned short * dest;
  474. char * p;
  475. for (p = flags; *p; p++) {
  476. if (*p == ',') len++;
  477. }
  478. *result = (unsigned short *) malloc(len * sizeof(short));
  479. if (!*result) return -1;
  480. dest = *result;
  481. for (p = flags; *p; p++) {
  482. if (*p == ',') {
  483. i = atoi(src);
  484. if (i >= DEFAULTFLAGS) HUNSPELL_WARNING(stderr, "error: line %d: flag id %d is too large (max: %d)\n",
  485. af->getlinenum(), i, DEFAULTFLAGS - 1);
  486. *dest = (unsigned short) i;
  487. if (*dest == 0) HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n", af->getlinenum());
  488. src = p + 1;
  489. dest++;
  490. }
  491. }
  492. i = atoi(src);
  493. if (i >= DEFAULTFLAGS) HUNSPELL_WARNING(stderr, "error: line %d: flag id %d is too large (max: %d)\n",
  494. af->getlinenum(), i, DEFAULTFLAGS - 1);
  495. *dest = (unsigned short) i;
  496. if (*dest == 0) HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n", af->getlinenum());
  497. break;
  498. }
  499. case FLAG_UNI: { // UTF-8 characters
  500. w_char w[BUFSIZE/2];
  501. len = u8_u16(w, BUFSIZE/2, flags);
  502. *result = (unsigned short *) malloc(len * sizeof(short));
  503. if (!*result) return -1;
  504. memcpy(*result, w, len * sizeof(short));
  505. break;
  506. }
  507. default: { // Ispell's one-character flags (erfg -> e r f g)
  508. unsigned short * dest;
  509. len = strlen(flags);
  510. *result = (unsigned short *) malloc(len * sizeof(short));
  511. if (!*result) return -1;
  512. dest = *result;
  513. for (unsigned char * p = (unsigned char *) flags; *p; p++) {
  514. *dest = (unsigned short) *p;
  515. dest++;
  516. }
  517. }
  518. }
  519. return len;
  520. }
  521. unsigned short HashMgr::decode_flag(const char * f) {
  522. unsigned short s = 0;
  523. int i;
  524. switch (flag_mode) {
  525. case FLAG_LONG:
  526. s = ((unsigned short) f[0] << 8) + (unsigned short) f[1];
  527. break;
  528. case FLAG_NUM:
  529. i = atoi(f);
  530. if (i >= DEFAULTFLAGS) HUNSPELL_WARNING(stderr, "error: flag id %d is too large (max: %d)\n", i, DEFAULTFLAGS - 1);
  531. s = (unsigned short) i;
  532. break;
  533. case FLAG_UNI:
  534. u8_u16((w_char *) &s, 1, f);
  535. break;
  536. default:
  537. s = (unsigned short) *((unsigned char *)f);
  538. }
  539. if (s == 0) HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n");
  540. return s;
  541. }
  542. char * HashMgr::encode_flag(unsigned short f) {
  543. unsigned char ch[10];
  544. if (f==0) return mystrdup("(NULL)");
  545. if (flag_mode == FLAG_LONG) {
  546. ch[0] = (unsigned char) (f >> 8);
  547. ch[1] = (unsigned char) (f - ((f >> 8) << 8));
  548. ch[2] = '\0';
  549. } else if (flag_mode == FLAG_NUM) {
  550. sprintf((char *) ch, "%d", f);
  551. } else if (flag_mode == FLAG_UNI) {
  552. u16_u8((char *) &ch, 10, (w_char *) &f, 1);
  553. } else {
  554. ch[0] = (unsigned char) (f);
  555. ch[1] = '\0';
  556. }
  557. return mystrdup((char *) ch);
  558. }
  559. // read in aff file and set flag mode
  560. int HashMgr::load_config(const char * affpath, const char * key)
  561. {
  562. char * line; // io buffers
  563. int firstline = 1;
  564. // open the affix file
  565. FileMgr * afflst = new FileMgr(affpath, key);
  566. if (!afflst) {
  567. HUNSPELL_WARNING(stderr, "Error - could not open affix description file %s\n",affpath);
  568. return 1;
  569. }
  570. // read in each line ignoring any that do not
  571. // start with a known line type indicator
  572. while ((line = afflst->getline())) {
  573. mychomp(line);
  574. /* remove byte order mark */
  575. if (firstline) {
  576. firstline = 0;
  577. if (strncmp(line,"\xEF\xBB\xBF",3) == 0) memmove(line, line+3, strlen(line+3)+1);
  578. }
  579. /* parse in the try string */
  580. if ((strncmp(line,"FLAG",4) == 0) && isspace(line[4])) {
  581. if (flag_mode != FLAG_CHAR) {
  582. HUNSPELL_WARNING(stderr, "error: line %d: multiple definitions of the FLAG affix file parameter\n", afflst->getlinenum());
  583. }
  584. if (strstr(line, "long")) flag_mode = FLAG_LONG;
  585. if (strstr(line, "num")) flag_mode = FLAG_NUM;
  586. if (strstr(line, "UTF-8")) flag_mode = FLAG_UNI;
  587. if (flag_mode == FLAG_CHAR) {
  588. HUNSPELL_WARNING(stderr, "error: line %d: FLAG needs `num', `long' or `UTF-8' parameter\n", afflst->getlinenum());
  589. }
  590. }
  591. if (strncmp(line,"FORBIDDENWORD",13) == 0) {
  592. char * st = NULL;
  593. if (parse_string(line, &st, afflst->getlinenum())) {
  594. delete afflst;
  595. return 1;
  596. }
  597. forbiddenword = decode_flag(st);
  598. free(st);
  599. }
  600. if (strncmp(line, "SET", 3) == 0) {
  601. if (parse_string(line, &enc, afflst->getlinenum())) {
  602. delete afflst;
  603. return 1;
  604. }
  605. if (strcmp(enc, "UTF-8") == 0) {
  606. utf8 = 1;
  607. #ifndef OPENOFFICEORG
  608. #ifndef MOZILLA_CLIENT
  609. initialize_utf_tbl();
  610. #endif
  611. #endif
  612. } else csconv = get_current_cs(enc);
  613. }
  614. if (strncmp(line, "LANG", 4) == 0) {
  615. if (parse_string(line, &lang, afflst->getlinenum())) {
  616. delete afflst;
  617. return 1;
  618. }
  619. langnum = get_lang_num(lang);
  620. }
  621. /* parse in the ignored characters (for example, Arabic optional diacritics characters */
  622. if (strncmp(line,"IGNORE",6) == 0) {
  623. if (parse_array(line, &ignorechars, &ignorechars_utf16,
  624. &ignorechars_utf16_len, utf8, afflst->getlinenum())) {
  625. delete afflst;
  626. return 1;
  627. }
  628. }
  629. if ((strncmp(line,"AF",2) == 0) && isspace(line[2])) {
  630. if (parse_aliasf(line, afflst)) {
  631. delete afflst;
  632. return 1;
  633. }
  634. }
  635. if ((strncmp(line,"AM",2) == 0) && isspace(line[2])) {
  636. if (parse_aliasm(line, afflst)) {
  637. delete afflst;
  638. return 1;
  639. }
  640. }
  641. if (strncmp(line,"COMPLEXPREFIXES",15) == 0) complexprefixes = 1;
  642. if (((strncmp(line,"SFX",3) == 0) || (strncmp(line,"PFX",3) == 0)) && isspace(line[3])) break;
  643. }
  644. if (csconv == NULL) csconv = get_current_cs(SPELL_ENCODING);
  645. delete afflst;
  646. return 0;
  647. }
  648. /* parse in the ALIAS table */
  649. int HashMgr::parse_aliasf(char * line, FileMgr * af)
  650. {
  651. if (numaliasf != 0) {
  652. HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum());
  653. return 1;
  654. }
  655. char * tp = line;
  656. char * piece;
  657. int i = 0;
  658. int np = 0;
  659. piece = mystrsep(&tp, 0);
  660. while (piece) {
  661. if (*piece != '\0') {
  662. switch(i) {
  663. case 0: { np++; break; }
  664. case 1: {
  665. numaliasf = atoi(piece);
  666. if (numaliasf < 1) {
  667. numaliasf = 0;
  668. aliasf = NULL;
  669. aliasflen = NULL;
  670. HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", af->getlinenum());
  671. return 1;
  672. }
  673. aliasf = (unsigned short **) malloc(numaliasf * sizeof(unsigned short *));
  674. aliasflen = (unsigned short *) malloc(numaliasf * sizeof(short));
  675. if (!aliasf || !aliasflen) {
  676. numaliasf = 0;
  677. if (aliasf) free(aliasf);
  678. if (aliasflen) free(aliasflen);
  679. aliasf = NULL;
  680. aliasflen = NULL;
  681. return 1;
  682. }
  683. np++;
  684. break;
  685. }
  686. default: break;
  687. }
  688. i++;
  689. }
  690. piece = mystrsep(&tp, 0);
  691. }
  692. if (np != 2) {
  693. numaliasf = 0;
  694. free(aliasf);
  695. free(aliasflen);
  696. aliasf = NULL;
  697. aliasflen = NULL;
  698. HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum());
  699. return 1;
  700. }
  701. /* now parse the numaliasf lines to read in the remainder of the table */
  702. char * nl;
  703. for (int j=0; j < numaliasf; j++) {
  704. if (!(nl = af->getline())) return 1;
  705. mychomp(nl);
  706. tp = nl;
  707. i = 0;
  708. aliasf[j] = NULL;
  709. aliasflen[j] = 0;
  710. piece = mystrsep(&tp, 0);
  711. while (piece) {
  712. if (*piece != '\0') {
  713. switch(i) {
  714. case 0: {
  715. if (strncmp(piece,"AF",2) != 0) {
  716. numaliasf = 0;
  717. free(aliasf);
  718. free(aliasflen);
  719. aliasf = NULL;
  720. aliasflen = NULL;
  721. HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
  722. return 1;
  723. }
  724. break;
  725. }
  726. case 1: {
  727. aliasflen[j] = (unsigned short) decode_flags(&(aliasf[j]), piece, af);
  728. flag_qsort(aliasf[j], 0, aliasflen[j]);
  729. break;
  730. }
  731. default: break;
  732. }
  733. i++;
  734. }
  735. piece = mystrsep(&tp, 0);
  736. }
  737. if (!aliasf[j]) {
  738. free(aliasf);
  739. free(aliasflen);
  740. aliasf = NULL;
  741. aliasflen = NULL;
  742. numaliasf = 0;
  743. HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
  744. return 1;
  745. }
  746. }
  747. return 0;
  748. }
  749. int HashMgr::is_aliasf() {
  750. return (aliasf != NULL);
  751. }
  752. int HashMgr::get_aliasf(int index, unsigned short ** fvec, FileMgr * af) {
  753. if ((index > 0) && (index <= numaliasf)) {
  754. *fvec = aliasf[index - 1];
  755. return aliasflen[index - 1];
  756. }
  757. HUNSPELL_WARNING(stderr, "error: line %d: bad flag alias index: %d\n", af->getlinenum(), index);
  758. *fvec = NULL;
  759. return 0;
  760. }
  761. /* parse morph alias definitions */
  762. int HashMgr::parse_aliasm(char * line, FileMgr * af)
  763. {
  764. if (numaliasm != 0) {
  765. HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum());
  766. return 1;
  767. }
  768. char * tp = line;
  769. char * piece;
  770. int i = 0;
  771. int np = 0;
  772. piece = mystrsep(&tp, 0);
  773. while (piece) {
  774. if (*piece != '\0') {
  775. switch(i) {
  776. case 0: { np++; break; }
  777. case 1: {
  778. numaliasm = atoi(piece);
  779. if (numaliasm < 1) {
  780. HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", af->getlinenum());
  781. return 1;
  782. }
  783. aliasm = (char **) malloc(numaliasm * sizeof(char *));
  784. if (!aliasm) {
  785. numaliasm = 0;
  786. return 1;
  787. }
  788. np++;
  789. break;
  790. }
  791. default: break;
  792. }
  793. i++;
  794. }
  795. piece = mystrsep(&tp, 0);
  796. }
  797. if (np != 2) {
  798. numaliasm = 0;
  799. free(aliasm);
  800. aliasm = NULL;
  801. HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum());
  802. return 1;
  803. }
  804. /* now parse the numaliasm lines to read in the remainder of the table */
  805. char * nl = line;
  806. for (int j=0; j < numaliasm; j++) {
  807. if (!(nl = af->getline())) return 1;
  808. mychomp(nl);
  809. tp = nl;
  810. i = 0;
  811. aliasm[j] = NULL;
  812. piece = mystrsep(&tp, ' ');
  813. while (piece) {
  814. if (*piece != '\0') {
  815. switch(i) {
  816. case 0: {
  817. if (strncmp(piece,"AM",2) != 0) {
  818. HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
  819. numaliasm = 0;
  820. free(aliasm);
  821. aliasm = NULL;
  822. return 1;
  823. }
  824. break;
  825. }
  826. case 1: {
  827. // add the remaining of the line
  828. if (*tp) {
  829. *(tp - 1) = ' ';
  830. tp = tp + strlen(tp);
  831. }
  832. if (complexprefixes) {
  833. if (utf8) reverseword_utf(piece);
  834. else reverseword(piece);
  835. }
  836. aliasm[j] = mystrdup(piece);
  837. if (!aliasm[j]) {
  838. numaliasm = 0;
  839. free(aliasm);
  840. aliasm = NULL;
  841. return 1;
  842. }
  843. break; }
  844. default: break;
  845. }
  846. i++;
  847. }
  848. piece = mystrsep(&tp, ' ');
  849. }
  850. if (!aliasm[j]) {
  851. numaliasm = 0;
  852. free(aliasm);
  853. aliasm = NULL;
  854. HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
  855. return 1;
  856. }
  857. }
  858. return 0;
  859. }
  860. int HashMgr::is_aliasm() {
  861. return (aliasm != NULL);
  862. }
  863. char * HashMgr::get_aliasm(int index) {
  864. if ((index > 0) && (index <= numaliasm)) return aliasm[index - 1];
  865. HUNSPELL_WARNING(stderr, "error: bad morph. alias index: %d\n", index);
  866. return NULL;
  867. }