PageRenderTime 62ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/extensions/spellcheck/hunspell/src/hunspell.cpp

http://github.com/zpao/v8monkey
C++ | 2060 lines | 1753 code | 146 blank | 161 comment | 605 complexity | 29929269e48533727bddc53f90b211c9 MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception, LGPL-3.0, AGPL-1.0, LGPL-2.1, BSD-3-Clause, GPL-2.0, JSON, Apache-2.0, 0BSD
  1. /******* BEGIN LICENSE BLOCK *******
  2. * Version: MPL 1.1/GPL 2.0/LGPL 2.1
  3. *
  4. * The contents of this file are subject to the Mozilla Public License Version
  5. * 1.1 (the "License"); you may not use this file except in compliance with
  6. * the License. You may obtain a copy of the License at
  7. * http://www.mozilla.org/MPL/
  8. *
  9. * Software distributed under the License is distributed on an "AS IS" basis,
  10. * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  11. * for the specific language governing rights and limitations under the
  12. * License.
  13. *
  14. * The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
  15. * and László Németh (Hunspell). Portions created by the Initial Developers
  16. * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
  17. *
  18. * Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
  19. * David Einstein (deinst@world.std.com)
  20. * László Németh (nemethl@gyorsposta.hu)
  21. * Caolan McNamara (caolanm@redhat.com)
  22. * Davide Prina
  23. * Giuseppe Modugno
  24. * Gianluca Turconi
  25. * Simon Brouwer
  26. * Noll Janos
  27. * Biro Arpad
  28. * Goldman Eleonora
  29. * Sarlos Tamas
  30. * Bencsath Boldizsar
  31. * Halacsy Peter
  32. * Dvornik Laszlo
  33. * Gefferth Andras
  34. * Nagy Viktor
  35. * Varga Daniel
  36. * Chris Halls
  37. * Rene Engelhard
  38. * Bram Moolenaar
  39. * Dafydd Jones
  40. * Harri Pitkanen
  41. * Andras Timar
  42. * Tor Lillqvist
  43. *
  44. * Alternatively, the contents of this file may be used under the terms of
  45. * either the GNU General Public License Version 2 or later (the "GPL"), or
  46. * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  47. * in which case the provisions of the GPL or the LGPL are applicable instead
  48. * of those above. If you wish to allow use of your version of this file only
  49. * under the terms of either the GPL or the LGPL, and not to allow others to
  50. * use your version of this file under the terms of the MPL, indicate your
  51. * decision by deleting the provisions above and replace them with the notice
  52. * and other provisions required by the GPL or the LGPL. If you do not delete
  53. * the provisions above, a recipient may use your version of this file under
  54. * the terms of any one of the MPL, the GPL or the LGPL.
  55. *
  56. ******* END LICENSE BLOCK *******/
  57. #include <stdlib.h>
  58. #include <string.h>
  59. #include <stdio.h>
  60. #include "hunspell.hxx"
  61. #include "hunspell.h"
  62. #ifndef MOZILLA_CLIENT
  63. # include "config.h"
  64. #endif
  65. #include "csutil.hxx"
  66. Hunspell::Hunspell(const char * affpath, const char * dpath, const char * key)
  67. {
  68. encoding = NULL;
  69. csconv = NULL;
  70. utf8 = 0;
  71. complexprefixes = 0;
  72. affixpath = mystrdup(affpath);
  73. maxdic = 0;
  74. /* first set up the hash manager */
  75. pHMgr[0] = new HashMgr(dpath, affpath, key);
  76. if (pHMgr[0]) maxdic = 1;
  77. /* next set up the affix manager */
  78. /* it needs access to the hash manager lookup methods */
  79. pAMgr = new AffixMgr(affpath, pHMgr, &maxdic, key);
  80. /* get the preferred try string and the dictionary */
  81. /* encoding from the Affix Manager for that dictionary */
  82. char * try_string = pAMgr->get_try_string();
  83. encoding = pAMgr->get_encoding();
  84. langnum = pAMgr->get_langnum();
  85. utf8 = pAMgr->get_utf8();
  86. if (!utf8)
  87. csconv = get_current_cs(encoding);
  88. complexprefixes = pAMgr->get_complexprefixes();
  89. wordbreak = pAMgr->get_breaktable();
  90. /* and finally set up the suggestion manager */
  91. pSMgr = new SuggestMgr(try_string, MAXSUGGESTION, pAMgr);
  92. if (try_string) free(try_string);
  93. }
  94. Hunspell::~Hunspell()
  95. {
  96. if (pSMgr) delete pSMgr;
  97. if (pAMgr) delete pAMgr;
  98. for (int i = 0; i < maxdic; i++) delete pHMgr[i];
  99. maxdic = 0;
  100. pSMgr = NULL;
  101. pAMgr = NULL;
  102. #ifdef MOZILLA_CLIENT
  103. delete [] csconv;
  104. #endif
  105. csconv= NULL;
  106. if (encoding) free(encoding);
  107. encoding = NULL;
  108. if (affixpath) free(affixpath);
  109. affixpath = NULL;
  110. }
  111. // load extra dictionaries
  112. int Hunspell::add_dic(const char * dpath, const char * key) {
  113. if (maxdic == MAXDIC || !affixpath) return 1;
  114. pHMgr[maxdic] = new HashMgr(dpath, affixpath, key);
  115. if (pHMgr[maxdic]) maxdic++; else return 1;
  116. return 0;
  117. }
  118. // make a copy of src at destination while removing all leading
  119. // blanks and removing any trailing periods after recording
  120. // their presence with the abbreviation flag
  121. // also since already going through character by character,
  122. // set the capitalization type
  123. // return the length of the "cleaned" (and UTF-8 encoded) word
  124. int Hunspell::cleanword2(char * dest, const char * src,
  125. w_char * dest_utf, int * nc, int * pcaptype, int * pabbrev)
  126. {
  127. unsigned char * p = (unsigned char *) dest;
  128. const unsigned char * q = (const unsigned char * ) src;
  129. // first skip over any leading blanks
  130. while ((*q != '\0') && (*q == ' ')) q++;
  131. // now strip off any trailing periods (recording their presence)
  132. *pabbrev = 0;
  133. int nl = strlen((const char *)q);
  134. while ((nl > 0) && (*(q+nl-1)=='.')) {
  135. nl--;
  136. (*pabbrev)++;
  137. }
  138. // if no characters are left it can't be capitalized
  139. if (nl <= 0) {
  140. *pcaptype = NOCAP;
  141. *p = '\0';
  142. return 0;
  143. }
  144. strncpy(dest, (char *) q, nl);
  145. *(dest + nl) = '\0';
  146. nl = strlen(dest);
  147. if (utf8) {
  148. *nc = u8_u16(dest_utf, MAXWORDLEN, dest);
  149. // don't check too long words
  150. if (*nc >= MAXWORDLEN) return 0;
  151. if (*nc == -1) { // big Unicode character (non BMP area)
  152. *pcaptype = NOCAP;
  153. return nl;
  154. }
  155. *pcaptype = get_captype_utf8(dest_utf, *nc, langnum);
  156. } else {
  157. *pcaptype = get_captype(dest, nl, csconv);
  158. *nc = nl;
  159. }
  160. return nl;
  161. }
  162. int Hunspell::cleanword(char * dest, const char * src,
  163. int * pcaptype, int * pabbrev)
  164. {
  165. unsigned char * p = (unsigned char *) dest;
  166. const unsigned char * q = (const unsigned char * ) src;
  167. int firstcap = 0;
  168. // first skip over any leading blanks
  169. while ((*q != '\0') && (*q == ' ')) q++;
  170. // now strip off any trailing periods (recording their presence)
  171. *pabbrev = 0;
  172. int nl = strlen((const char *)q);
  173. while ((nl > 0) && (*(q+nl-1)=='.')) {
  174. nl--;
  175. (*pabbrev)++;
  176. }
  177. // if no characters are left it can't be capitalized
  178. if (nl <= 0) {
  179. *pcaptype = NOCAP;
  180. *p = '\0';
  181. return 0;
  182. }
  183. // now determine the capitalization type of the first nl letters
  184. int ncap = 0;
  185. int nneutral = 0;
  186. int nc = 0;
  187. if (!utf8) {
  188. while (nl > 0) {
  189. nc++;
  190. if (csconv[(*q)].ccase) ncap++;
  191. if (csconv[(*q)].cupper == csconv[(*q)].clower) nneutral++;
  192. *p++ = *q++;
  193. nl--;
  194. }
  195. // remember to terminate the destination string
  196. *p = '\0';
  197. firstcap = csconv[(unsigned char)(*dest)].ccase;
  198. } else {
  199. unsigned short idx;
  200. w_char t[MAXWORDLEN];
  201. nc = u8_u16(t, MAXWORDLEN, src);
  202. for (int i = 0; i < nc; i++) {
  203. idx = (t[i].h << 8) + t[i].l;
  204. unsigned short low = unicodetolower(idx, langnum);
  205. if (idx != low) ncap++;
  206. if (unicodetoupper(idx, langnum) == low) nneutral++;
  207. }
  208. u16_u8(dest, MAXWORDUTF8LEN, t, nc);
  209. if (ncap) {
  210. idx = (t[0].h << 8) + t[0].l;
  211. firstcap = (idx != unicodetolower(idx, langnum));
  212. }
  213. }
  214. // now finally set the captype
  215. if (ncap == 0) {
  216. *pcaptype = NOCAP;
  217. } else if ((ncap == 1) && firstcap) {
  218. *pcaptype = INITCAP;
  219. } else if ((ncap == nc) || ((ncap + nneutral) == nc)){
  220. *pcaptype = ALLCAP;
  221. } else if ((ncap > 1) && firstcap) {
  222. *pcaptype = HUHINITCAP;
  223. } else {
  224. *pcaptype = HUHCAP;
  225. }
  226. return strlen(dest);
  227. }
  228. void Hunspell::mkallcap(char * p)
  229. {
  230. if (utf8) {
  231. w_char u[MAXWORDLEN];
  232. int nc = u8_u16(u, MAXWORDLEN, p);
  233. unsigned short idx;
  234. for (int i = 0; i < nc; i++) {
  235. idx = (u[i].h << 8) + u[i].l;
  236. if (idx != unicodetoupper(idx, langnum)) {
  237. u[i].h = (unsigned char) (unicodetoupper(idx, langnum) >> 8);
  238. u[i].l = (unsigned char) (unicodetoupper(idx, langnum) & 0x00FF);
  239. }
  240. }
  241. u16_u8(p, MAXWORDUTF8LEN, u, nc);
  242. } else {
  243. while (*p != '\0') {
  244. *p = csconv[((unsigned char) *p)].cupper;
  245. p++;
  246. }
  247. }
  248. }
  249. int Hunspell::mkallcap2(char * p, w_char * u, int nc)
  250. {
  251. if (utf8) {
  252. unsigned short idx;
  253. for (int i = 0; i < nc; i++) {
  254. idx = (u[i].h << 8) + u[i].l;
  255. unsigned short up = unicodetoupper(idx, langnum);
  256. if (idx != up) {
  257. u[i].h = (unsigned char) (up >> 8);
  258. u[i].l = (unsigned char) (up & 0x00FF);
  259. }
  260. }
  261. u16_u8(p, MAXWORDUTF8LEN, u, nc);
  262. return strlen(p);
  263. } else {
  264. while (*p != '\0') {
  265. *p = csconv[((unsigned char) *p)].cupper;
  266. p++;
  267. }
  268. }
  269. return nc;
  270. }
  271. void Hunspell::mkallsmall(char * p)
  272. {
  273. while (*p != '\0') {
  274. *p = csconv[((unsigned char) *p)].clower;
  275. p++;
  276. }
  277. }
  278. int Hunspell::mkallsmall2(char * p, w_char * u, int nc)
  279. {
  280. if (utf8) {
  281. unsigned short idx;
  282. for (int i = 0; i < nc; i++) {
  283. idx = (u[i].h << 8) + u[i].l;
  284. unsigned short low = unicodetolower(idx, langnum);
  285. if (idx != low) {
  286. u[i].h = (unsigned char) (low >> 8);
  287. u[i].l = (unsigned char) (low & 0x00FF);
  288. }
  289. }
  290. u16_u8(p, MAXWORDUTF8LEN, u, nc);
  291. return strlen(p);
  292. } else {
  293. while (*p != '\0') {
  294. *p = csconv[((unsigned char) *p)].clower;
  295. p++;
  296. }
  297. }
  298. return nc;
  299. }
  300. // convert UTF-8 sharp S codes to latin 1
  301. char * Hunspell::sharps_u8_l1(char * dest, char * source) {
  302. char * p = dest;
  303. *p = *source;
  304. for (p++, source++; *(source - 1); p++, source++) {
  305. *p = *source;
  306. if (*source == '\x9F') *--p = '\xDF';
  307. }
  308. return dest;
  309. }
  310. // recursive search for right ss - sharp s permutations
  311. hentry * Hunspell::spellsharps(char * base, char * pos, int n,
  312. int repnum, char * tmp, int * info, char **root) {
  313. pos = strstr(pos, "ss");
  314. if (pos && (n < MAXSHARPS)) {
  315. *pos = '\xC3';
  316. *(pos + 1) = '\x9F';
  317. hentry * h = spellsharps(base, pos + 2, n + 1, repnum + 1, tmp, info, root);
  318. if (h) return h;
  319. *pos = 's';
  320. *(pos + 1) = 's';
  321. h = spellsharps(base, pos + 2, n + 1, repnum, tmp, info, root);
  322. if (h) return h;
  323. } else if (repnum > 0) {
  324. if (utf8) return checkword(base, info, root);
  325. return checkword(sharps_u8_l1(tmp, base), info, root);
  326. }
  327. return NULL;
  328. }
  329. int Hunspell::is_keepcase(const hentry * rv) {
  330. return pAMgr && rv->astr && pAMgr->get_keepcase() &&
  331. TESTAFF(rv->astr, pAMgr->get_keepcase(), rv->alen);
  332. }
  333. /* insert a word to the beginning of the suggestion array and return ns */
  334. int Hunspell::insert_sug(char ***slst, char * word, int ns) {
  335. char * dup = mystrdup(word);
  336. if (!dup) return ns;
  337. if (ns == MAXSUGGESTION) {
  338. ns--;
  339. free((*slst)[ns]);
  340. }
  341. for (int k = ns; k > 0; k--) (*slst)[k] = (*slst)[k - 1];
  342. (*slst)[0] = dup;
  343. return ns + 1;
  344. }
  345. int Hunspell::spell(const char * word, int * info, char ** root)
  346. {
  347. struct hentry * rv=NULL;
  348. // need larger vector. For example, Turkish capital letter I converted a
  349. // 2-byte UTF-8 character (dotless i) by mkallsmall.
  350. char cw[MAXWORDUTF8LEN];
  351. char wspace[MAXWORDUTF8LEN];
  352. w_char unicw[MAXWORDLEN];
  353. // Hunspell supports XML input of the simplified API (see manual)
  354. if (strcmp(word, SPELL_XML) == 0) return 1;
  355. int nc = strlen(word);
  356. int wl2 = 0;
  357. if (utf8) {
  358. if (nc >= MAXWORDUTF8LEN) return 0;
  359. } else {
  360. if (nc >= MAXWORDLEN) return 0;
  361. }
  362. int captype = 0;
  363. int abbv = 0;
  364. int wl = 0;
  365. // input conversion
  366. RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
  367. if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
  368. else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
  369. int info2 = 0;
  370. if (wl == 0 || maxdic == 0) return 1;
  371. if (root) *root = NULL;
  372. // allow numbers with dots, dashes and commas (but forbid double separators: "..", "--" etc.)
  373. enum { NBEGIN, NNUM, NSEP };
  374. int nstate = NBEGIN;
  375. int i;
  376. for (i = 0; (i < wl); i++) {
  377. if ((cw[i] <= '9') && (cw[i] >= '0')) {
  378. nstate = NNUM;
  379. } else if ((cw[i] == ',') || (cw[i] == '.') || (cw[i] == '-')) {
  380. if ((nstate == NSEP) || (i == 0)) break;
  381. nstate = NSEP;
  382. } else break;
  383. }
  384. if ((i == wl) && (nstate == NNUM)) return 1;
  385. if (!info) info = &info2; else *info = 0;
  386. switch(captype) {
  387. case HUHCAP:
  388. case HUHINITCAP:
  389. *info += SPELL_ORIGCAP;
  390. case NOCAP: {
  391. rv = checkword(cw, info, root);
  392. if ((abbv) && !(rv)) {
  393. memcpy(wspace,cw,wl);
  394. *(wspace+wl) = '.';
  395. *(wspace+wl+1) = '\0';
  396. rv = checkword(wspace, info, root);
  397. }
  398. break;
  399. }
  400. case ALLCAP: {
  401. *info += SPELL_ORIGCAP;
  402. rv = checkword(cw, info, root);
  403. if (rv) break;
  404. if (abbv) {
  405. memcpy(wspace,cw,wl);
  406. *(wspace+wl) = '.';
  407. *(wspace+wl+1) = '\0';
  408. rv = checkword(wspace, info, root);
  409. if (rv) break;
  410. }
  411. // Spec. prefix handling for Catalan, French, Italian:
  412. // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia).
  413. if (pAMgr && strchr(cw, '\'')) {
  414. wl = mkallsmall2(cw, unicw, nc);
  415. //There are no really sane circumstances where this could fail,
  416. //but anyway...
  417. if (char * apostrophe = strchr(cw, '\'')) {
  418. if (utf8) {
  419. w_char tmpword[MAXWORDLEN];
  420. *apostrophe = '\0';
  421. wl2 = u8_u16(tmpword, MAXWORDLEN, cw);
  422. *apostrophe = '\'';
  423. if (wl2 < nc) {
  424. mkinitcap2(apostrophe + 1, unicw + wl2 + 1, nc - wl2 - 1);
  425. rv = checkword(cw, info, root);
  426. if (rv) break;
  427. }
  428. } else {
  429. mkinitcap2(apostrophe + 1, unicw, nc);
  430. rv = checkword(cw, info, root);
  431. if (rv) break;
  432. }
  433. }
  434. mkinitcap2(cw, unicw, nc);
  435. rv = checkword(cw, info, root);
  436. if (rv) break;
  437. }
  438. if (pAMgr && pAMgr->get_checksharps() && strstr(cw, "SS")) {
  439. char tmpword[MAXWORDUTF8LEN];
  440. wl = mkallsmall2(cw, unicw, nc);
  441. memcpy(wspace,cw,(wl+1));
  442. rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
  443. if (!rv) {
  444. wl2 = mkinitcap2(cw, unicw, nc);
  445. rv = spellsharps(cw, cw, 0, 0, tmpword, info, root);
  446. }
  447. if ((abbv) && !(rv)) {
  448. *(wspace+wl) = '.';
  449. *(wspace+wl+1) = '\0';
  450. rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
  451. if (!rv) {
  452. memcpy(wspace, cw, wl2);
  453. *(wspace+wl2) = '.';
  454. *(wspace+wl2+1) = '\0';
  455. rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
  456. }
  457. }
  458. if (rv) break;
  459. }
  460. }
  461. case INITCAP: {
  462. *info += SPELL_ORIGCAP;
  463. wl = mkallsmall2(cw, unicw, nc);
  464. memcpy(wspace,cw,(wl+1));
  465. wl2 = mkinitcap2(cw, unicw, nc);
  466. if (captype == INITCAP) *info += SPELL_INITCAP;
  467. rv = checkword(cw, info, root);
  468. if (captype == INITCAP) *info -= SPELL_INITCAP;
  469. // forbid bad capitalization
  470. // (for example, ijs -> Ijs instead of IJs in Dutch)
  471. // use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag)
  472. if (*info & SPELL_FORBIDDEN) {
  473. rv = NULL;
  474. break;
  475. }
  476. if (rv && is_keepcase(rv) && (captype == ALLCAP)) rv = NULL;
  477. if (rv) break;
  478. rv = checkword(wspace, info, root);
  479. if (abbv && !rv) {
  480. *(wspace+wl) = '.';
  481. *(wspace+wl+1) = '\0';
  482. rv = checkword(wspace, info, root);
  483. if (!rv) {
  484. memcpy(wspace, cw, wl2);
  485. *(wspace+wl2) = '.';
  486. *(wspace+wl2+1) = '\0';
  487. if (captype == INITCAP) *info += SPELL_INITCAP;
  488. rv = checkword(wspace, info, root);
  489. if (captype == INITCAP) *info -= SPELL_INITCAP;
  490. if (rv && is_keepcase(rv) && (captype == ALLCAP)) rv = NULL;
  491. break;
  492. }
  493. }
  494. if (rv && is_keepcase(rv) &&
  495. ((captype == ALLCAP) ||
  496. // if CHECKSHARPS: KEEPCASE words with \xDF are allowed
  497. // in INITCAP form, too.
  498. !(pAMgr->get_checksharps() &&
  499. ((utf8 && strstr(wspace, "\xC3\x9F")) ||
  500. (!utf8 && strchr(wspace, '\xDF')))))) rv = NULL;
  501. break;
  502. }
  503. }
  504. if (rv) {
  505. if (pAMgr && pAMgr->get_warn() && rv->astr &&
  506. TESTAFF(rv->astr, pAMgr->get_warn(), rv->alen)) {
  507. *info += SPELL_WARN;
  508. if (pAMgr->get_forbidwarn()) return 0;
  509. return HUNSPELL_OK_WARN;
  510. }
  511. return HUNSPELL_OK;
  512. }
  513. // recursive breaking at break points
  514. if (wordbreak) {
  515. char * s;
  516. char r;
  517. int nbr = 0;
  518. wl = strlen(cw);
  519. int numbreak = pAMgr ? pAMgr->get_numbreak() : 0;
  520. // calculate break points for recursion limit
  521. for (int j = 0; j < numbreak; j++) {
  522. s = cw;
  523. do {
  524. s = (char *) strstr(s, wordbreak[j]);
  525. if (s) {
  526. nbr++;
  527. s++;
  528. }
  529. } while (s);
  530. }
  531. if (nbr >= 10) return 0;
  532. // check boundary patterns (^begin and end$)
  533. for (int j = 0; j < numbreak; j++) {
  534. int plen = strlen(wordbreak[j]);
  535. if (plen == 1 || plen > wl) continue;
  536. if (wordbreak[j][0] == '^' && strncmp(cw, wordbreak[j] + 1, plen - 1) == 0
  537. && spell(cw + plen - 1)) return 1;
  538. if (wordbreak[j][plen - 1] == '$' &&
  539. strncmp(cw + wl - plen + 1, wordbreak[j], plen - 1) == 0) {
  540. r = cw[wl - plen + 1];
  541. cw[wl - plen + 1] = '\0';
  542. if (spell(cw)) return 1;
  543. cw[wl - plen + 1] = r;
  544. }
  545. }
  546. // other patterns
  547. for (int j = 0; j < numbreak; j++) {
  548. int plen = strlen(wordbreak[j]);
  549. s=(char *) strstr(cw, wordbreak[j]);
  550. if (s && (s > cw) && (s < cw + wl - plen)) {
  551. if (!spell(s + plen)) continue;
  552. r = *s;
  553. *s = '\0';
  554. // examine 2 sides of the break point
  555. if (spell(cw)) return 1;
  556. *s = r;
  557. // LANG_hu: spec. dash rule
  558. if (langnum == LANG_hu && strcmp(wordbreak[j], "-") == 0) {
  559. r = s[1];
  560. s[1] = '\0';
  561. if (spell(cw)) return 1; // check the first part with dash
  562. s[1] = r;
  563. }
  564. // end of LANG speficic region
  565. }
  566. }
  567. }
  568. return 0;
  569. }
  570. struct hentry * Hunspell::checkword(const char * w, int * info, char ** root)
  571. {
  572. struct hentry * he = NULL;
  573. int len, i;
  574. char w2[MAXWORDUTF8LEN];
  575. const char * word;
  576. char * ignoredchars = pAMgr->get_ignore();
  577. if (ignoredchars != NULL) {
  578. strcpy(w2, w);
  579. if (utf8) {
  580. int ignoredchars_utf16_len;
  581. unsigned short * ignoredchars_utf16 = pAMgr->get_ignore_utf16(&ignoredchars_utf16_len);
  582. remove_ignored_chars_utf(w2, ignoredchars_utf16, ignoredchars_utf16_len);
  583. } else {
  584. remove_ignored_chars(w2,ignoredchars);
  585. }
  586. word = w2;
  587. } else word = w;
  588. len = strlen(word);
  589. if (!len)
  590. return NULL;
  591. // word reversing wrapper for complex prefixes
  592. if (complexprefixes) {
  593. if (word != w2) {
  594. strcpy(w2, word);
  595. word = w2;
  596. }
  597. if (utf8) reverseword_utf(w2); else reverseword(w2);
  598. }
  599. // look word in hash table
  600. for (i = 0; (i < maxdic) && !he; i ++) {
  601. he = (pHMgr[i])->lookup(word);
  602. // check forbidden and onlyincompound words
  603. if ((he) && (he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
  604. if (info) *info += SPELL_FORBIDDEN;
  605. // LANG_hu section: set dash information for suggestions
  606. if (langnum == LANG_hu) {
  607. if (pAMgr->get_compoundflag() &&
  608. TESTAFF(he->astr, pAMgr->get_compoundflag(), he->alen)) {
  609. if (info) *info += SPELL_COMPOUND;
  610. }
  611. }
  612. return NULL;
  613. }
  614. // he = next not needaffix, onlyincompound homonym or onlyupcase word
  615. while (he && (he->astr) &&
  616. ((pAMgr->get_needaffix() && TESTAFF(he->astr, pAMgr->get_needaffix(), he->alen)) ||
  617. (pAMgr->get_onlyincompound() && TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
  618. (info && (*info & SPELL_INITCAP) && TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen))
  619. )) he = he->next_homonym;
  620. }
  621. // check with affixes
  622. if (!he && pAMgr) {
  623. // try stripping off affixes */
  624. he = pAMgr->affix_check(word, len, 0);
  625. // check compound restriction and onlyupcase
  626. if (he && he->astr && (
  627. (pAMgr->get_onlyincompound() &&
  628. TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
  629. (info && (*info & SPELL_INITCAP) &&
  630. TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)))) {
  631. he = NULL;
  632. }
  633. if (he) {
  634. if ((he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
  635. if (info) *info += SPELL_FORBIDDEN;
  636. return NULL;
  637. }
  638. if (root) {
  639. *root = mystrdup(he->word);
  640. if (*root && complexprefixes) {
  641. if (utf8) reverseword_utf(*root); else reverseword(*root);
  642. }
  643. }
  644. // try check compound word
  645. } else if (pAMgr->get_compound()) {
  646. he = pAMgr->compound_check(word, len, 0, 0, 100, 0, NULL, 0, 0, info);
  647. // LANG_hu section: `moving rule' with last dash
  648. if ((!he) && (langnum == LANG_hu) && (word[len-1] == '-')) {
  649. char * dup = mystrdup(word);
  650. if (!dup) return NULL;
  651. dup[len-1] = '\0';
  652. he = pAMgr->compound_check(dup, len-1, -5, 0, 100, 0, NULL, 1, 0, info);
  653. free(dup);
  654. }
  655. // end of LANG speficic region
  656. if (he) {
  657. if (root) {
  658. *root = mystrdup(he->word);
  659. if (*root && complexprefixes) {
  660. if (utf8) reverseword_utf(*root); else reverseword(*root);
  661. }
  662. }
  663. if (info) *info += SPELL_COMPOUND;
  664. }
  665. }
  666. }
  667. return he;
  668. }
  669. int Hunspell::suggest(char*** slst, const char * word)
  670. {
  671. int onlycmpdsug = 0;
  672. char cw[MAXWORDUTF8LEN];
  673. char wspace[MAXWORDUTF8LEN];
  674. if (!pSMgr || maxdic == 0) return 0;
  675. w_char unicw[MAXWORDLEN];
  676. *slst = NULL;
  677. // process XML input of the simplified API (see manual)
  678. if (strncmp(word, SPELL_XML, sizeof(SPELL_XML) - 3) == 0) {
  679. return spellml(slst, word);
  680. }
  681. int nc = strlen(word);
  682. if (utf8) {
  683. if (nc >= MAXWORDUTF8LEN) return 0;
  684. } else {
  685. if (nc >= MAXWORDLEN) return 0;
  686. }
  687. int captype = 0;
  688. int abbv = 0;
  689. int wl = 0;
  690. // input conversion
  691. RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
  692. if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
  693. else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
  694. if (wl == 0) return 0;
  695. int ns = 0;
  696. int capwords = 0;
  697. // check capitalized form for FORCEUCASE
  698. if (pAMgr && captype == NOCAP && pAMgr->get_forceucase()) {
  699. int info = SPELL_ORIGCAP;
  700. char ** wlst;
  701. if (checkword(cw, &info, NULL)) {
  702. if (*slst) {
  703. wlst = *slst;
  704. } else {
  705. wlst = (char **) malloc(MAXSUGGESTION * sizeof(char *));
  706. if (wlst == NULL) return -1;
  707. *slst = wlst;
  708. for (int i = 0; i < MAXSUGGESTION; i++) {
  709. wlst[i] = NULL;
  710. }
  711. }
  712. wlst[0] = mystrdup(cw);
  713. mkinitcap(wlst[0]);
  714. return 1;
  715. }
  716. }
  717. switch(captype) {
  718. case NOCAP: {
  719. ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);
  720. break;
  721. }
  722. case INITCAP: {
  723. capwords = 1;
  724. ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);
  725. if (ns == -1) break;
  726. memcpy(wspace,cw,(wl+1));
  727. mkallsmall2(wspace, unicw, nc);
  728. ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
  729. break;
  730. }
  731. case HUHINITCAP:
  732. capwords = 1;
  733. case HUHCAP: {
  734. ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);
  735. if (ns != -1) {
  736. int prevns;
  737. // something.The -> something. The
  738. char * dot = strchr(cw, '.');
  739. if (dot && (dot > cw)) {
  740. int captype_;
  741. if (utf8) {
  742. w_char w_[MAXWORDLEN];
  743. int wl_ = u8_u16(w_, MAXWORDLEN, dot + 1);
  744. captype_ = get_captype_utf8(w_, wl_, langnum);
  745. } else captype_ = get_captype(dot+1, strlen(dot+1), csconv);
  746. if (captype_ == INITCAP) {
  747. char * st = mystrdup(cw);
  748. if (st) st = (char *) realloc(st, wl + 2);
  749. if (st) {
  750. st[(dot - cw) + 1] = ' ';
  751. strcpy(st + (dot - cw) + 2, dot + 1);
  752. ns = insert_sug(slst, st, ns);
  753. free(st);
  754. }
  755. }
  756. }
  757. if (captype == HUHINITCAP) {
  758. // TheOpenOffice.org -> The OpenOffice.org
  759. memcpy(wspace,cw,(wl+1));
  760. mkinitsmall2(wspace, unicw, nc);
  761. ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
  762. }
  763. memcpy(wspace,cw,(wl+1));
  764. mkallsmall2(wspace, unicw, nc);
  765. if (spell(wspace)) ns = insert_sug(slst, wspace, ns);
  766. prevns = ns;
  767. ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
  768. if (captype == HUHINITCAP) {
  769. mkinitcap2(wspace, unicw, nc);
  770. if (spell(wspace)) ns = insert_sug(slst, wspace, ns);
  771. ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
  772. }
  773. // aNew -> "a New" (instead of "a new")
  774. for (int j = prevns; j < ns; j++) {
  775. char * space = strchr((*slst)[j],' ');
  776. if (space) {
  777. int slen = strlen(space + 1);
  778. // different case after space (need capitalisation)
  779. if ((slen < wl) && strcmp(cw + wl - slen, space + 1)) {
  780. w_char w[MAXWORDLEN];
  781. int wc = 0;
  782. char * r = (*slst)[j];
  783. if (utf8) wc = u8_u16(w, MAXWORDLEN, space + 1);
  784. mkinitcap2(space + 1, w, wc);
  785. // set as first suggestion
  786. for (int k = j; k > 0; k--) (*slst)[k] = (*slst)[k - 1];
  787. (*slst)[0] = r;
  788. }
  789. }
  790. }
  791. }
  792. break;
  793. }
  794. case ALLCAP: {
  795. memcpy(wspace, cw, (wl+1));
  796. mkallsmall2(wspace, unicw, nc);
  797. ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
  798. if (ns == -1) break;
  799. if (pAMgr && pAMgr->get_keepcase() && spell(wspace))
  800. ns = insert_sug(slst, wspace, ns);
  801. mkinitcap2(wspace, unicw, nc);
  802. ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
  803. for (int j=0; j < ns; j++) {
  804. mkallcap((*slst)[j]);
  805. if (pAMgr && pAMgr->get_checksharps()) {
  806. char * pos;
  807. if (utf8) {
  808. pos = strstr((*slst)[j], "\xC3\x9F");
  809. while (pos) {
  810. *pos = 'S';
  811. *(pos+1) = 'S';
  812. pos = strstr(pos+2, "\xC3\x9F");
  813. }
  814. } else {
  815. pos = strchr((*slst)[j], '\xDF');
  816. while (pos) {
  817. (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 2);
  818. mystrrep((*slst)[j], "\xDF", "SS");
  819. pos = strchr((*slst)[j], '\xDF');
  820. }
  821. }
  822. }
  823. }
  824. break;
  825. }
  826. }
  827. // LANG_hu section: replace '-' with ' ' in Hungarian
  828. if (langnum == LANG_hu) {
  829. for (int j=0; j < ns; j++) {
  830. char * pos = strchr((*slst)[j],'-');
  831. if (pos) {
  832. int info;
  833. char w[MAXWORDUTF8LEN];
  834. *pos = '\0';
  835. strcpy(w, (*slst)[j]);
  836. strcat(w, pos + 1);
  837. spell(w, &info, NULL);
  838. if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
  839. *pos = ' ';
  840. } else *pos = '-';
  841. }
  842. }
  843. }
  844. // END OF LANG_hu section
  845. // try ngram approach since found nothing or only compound words
  846. if (pAMgr && (ns == 0 || onlycmpdsug) && (pAMgr->get_maxngramsugs() != 0) && (*slst)) {
  847. switch(captype) {
  848. case NOCAP: {
  849. ns = pSMgr->ngsuggest(*slst, cw, ns, pHMgr, maxdic);
  850. break;
  851. }
  852. case HUHINITCAP:
  853. capwords = 1;
  854. case HUHCAP: {
  855. memcpy(wspace,cw,(wl+1));
  856. mkallsmall2(wspace, unicw, nc);
  857. ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
  858. break;
  859. }
  860. case INITCAP: {
  861. capwords = 1;
  862. memcpy(wspace,cw,(wl+1));
  863. mkallsmall2(wspace, unicw, nc);
  864. ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
  865. break;
  866. }
  867. case ALLCAP: {
  868. memcpy(wspace,cw,(wl+1));
  869. mkallsmall2(wspace, unicw, nc);
  870. int oldns = ns;
  871. ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
  872. for (int j = oldns; j < ns; j++)
  873. mkallcap((*slst)[j]);
  874. break;
  875. }
  876. }
  877. }
  878. // try dash suggestion (Afo-American -> Afro-American)
  879. if (char * pos = strchr(cw, '-')) {
  880. char * ppos = cw;
  881. int nodashsug = 1;
  882. char ** nlst = NULL;
  883. int nn = 0;
  884. int last = 0;
  885. if (*slst) {
  886. for (int j = 0; j < ns && nodashsug == 1; j++) {
  887. if (strchr((*slst)[j], '-')) nodashsug = 0;
  888. }
  889. }
  890. while (nodashsug && !last) {
  891. if (*pos == '\0') last = 1; else *pos = '\0';
  892. if (!spell(ppos)) {
  893. nn = suggest(&nlst, ppos);
  894. for (int j = nn - 1; j >= 0; j--) {
  895. strncpy(wspace, cw, ppos - cw);
  896. strcpy(wspace + (ppos - cw), nlst[j]);
  897. if (!last) {
  898. strcat(wspace, "-");
  899. strcat(wspace, pos + 1);
  900. }
  901. ns = insert_sug(slst, wspace, ns);
  902. free(nlst[j]);
  903. }
  904. if (nlst != NULL) free(nlst);
  905. nodashsug = 0;
  906. }
  907. if (!last) {
  908. *pos = '-';
  909. ppos = pos + 1;
  910. pos = strchr(ppos, '-');
  911. }
  912. if (!pos) pos = cw + strlen(cw);
  913. }
  914. }
  915. // word reversing wrapper for complex prefixes
  916. if (complexprefixes) {
  917. for (int j = 0; j < ns; j++) {
  918. if (utf8) reverseword_utf((*slst)[j]); else reverseword((*slst)[j]);
  919. }
  920. }
  921. // capitalize
  922. if (capwords) for (int j=0; j < ns; j++) {
  923. mkinitcap((*slst)[j]);
  924. }
  925. // expand suggestions with dot(s)
  926. if (abbv && pAMgr && pAMgr->get_sugswithdots()) {
  927. for (int j = 0; j < ns; j++) {
  928. (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 1 + abbv);
  929. strcat((*slst)[j], word + strlen(word) - abbv);
  930. }
  931. }
  932. // remove bad capitalized and forbidden forms
  933. if (pAMgr && (pAMgr->get_keepcase() || pAMgr->get_forbiddenword())) {
  934. switch (captype) {
  935. case INITCAP:
  936. case ALLCAP: {
  937. int l = 0;
  938. for (int j=0; j < ns; j++) {
  939. if (!strchr((*slst)[j],' ') && !spell((*slst)[j])) {
  940. char s[MAXSWUTF8L];
  941. w_char w[MAXSWL];
  942. int len;
  943. if (utf8) {
  944. len = u8_u16(w, MAXSWL, (*slst)[j]);
  945. } else {
  946. strcpy(s, (*slst)[j]);
  947. len = strlen(s);
  948. }
  949. mkallsmall2(s, w, len);
  950. free((*slst)[j]);
  951. if (spell(s)) {
  952. (*slst)[l] = mystrdup(s);
  953. if ((*slst)[l]) l++;
  954. } else {
  955. mkinitcap2(s, w, len);
  956. if (spell(s)) {
  957. (*slst)[l] = mystrdup(s);
  958. if ((*slst)[l]) l++;
  959. }
  960. }
  961. } else {
  962. (*slst)[l] = (*slst)[j];
  963. l++;
  964. }
  965. }
  966. ns = l;
  967. }
  968. }
  969. }
  970. // remove duplications
  971. int l = 0;
  972. for (int j = 0; j < ns; j++) {
  973. (*slst)[l] = (*slst)[j];
  974. for (int k = 0; k < l; k++) {
  975. if (strcmp((*slst)[k], (*slst)[j]) == 0) {
  976. free((*slst)[j]);
  977. l--;
  978. break;
  979. }
  980. }
  981. l++;
  982. }
  983. ns = l;
  984. // output conversion
  985. rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
  986. for (int j = 0; rl && j < ns; j++) {
  987. if (rl->conv((*slst)[j], wspace)) {
  988. free((*slst)[j]);
  989. (*slst)[j] = mystrdup(wspace);
  990. }
  991. }
  992. // if suggestions removed by nosuggest, onlyincompound parameters
  993. if (l == 0 && *slst) {
  994. free(*slst);
  995. *slst = NULL;
  996. }
  997. return l;
  998. }
  999. void Hunspell::free_list(char *** slst, int n) {
  1000. freelist(slst, n);
  1001. }
  1002. char * Hunspell::get_dic_encoding()
  1003. {
  1004. return encoding;
  1005. }
  1006. #ifdef HUNSPELL_EXPERIMENTAL
  1007. // XXX need UTF-8 support
  1008. int Hunspell::suggest_auto(char*** slst, const char * word)
  1009. {
  1010. char cw[MAXWORDUTF8LEN];
  1011. char wspace[MAXWORDUTF8LEN];
  1012. if (!pSMgr || maxdic == 0) return 0;
  1013. int wl = strlen(word);
  1014. if (utf8) {
  1015. if (wl >= MAXWORDUTF8LEN) return 0;
  1016. } else {
  1017. if (wl >= MAXWORDLEN) return 0;
  1018. }
  1019. int captype = 0;
  1020. int abbv = 0;
  1021. wl = cleanword(cw, word, &captype, &abbv);
  1022. if (wl == 0) return 0;
  1023. int ns = 0;
  1024. *slst = NULL; // HU, nsug in pSMgr->suggest
  1025. switch(captype) {
  1026. case NOCAP: {
  1027. ns = pSMgr->suggest_auto(slst, cw, ns);
  1028. if (ns>0) break;
  1029. break;
  1030. }
  1031. case INITCAP: {
  1032. memcpy(wspace,cw,(wl+1));
  1033. mkallsmall(wspace);
  1034. ns = pSMgr->suggest_auto(slst, wspace, ns);
  1035. for (int j=0; j < ns; j++)
  1036. mkinitcap((*slst)[j]);
  1037. ns = pSMgr->suggest_auto(slst, cw, ns);
  1038. break;
  1039. }
  1040. case HUHINITCAP:
  1041. case HUHCAP: {
  1042. ns = pSMgr->suggest_auto(slst, cw, ns);
  1043. if (ns == 0) {
  1044. memcpy(wspace,cw,(wl+1));
  1045. mkallsmall(wspace);
  1046. ns = pSMgr->suggest_auto(slst, wspace, ns);
  1047. }
  1048. break;
  1049. }
  1050. case ALLCAP: {
  1051. memcpy(wspace,cw,(wl+1));
  1052. mkallsmall(wspace);
  1053. ns = pSMgr->suggest_auto(slst, wspace, ns);
  1054. mkinitcap(wspace);
  1055. ns = pSMgr->suggest_auto(slst, wspace, ns);
  1056. for (int j=0; j < ns; j++)
  1057. mkallcap((*slst)[j]);
  1058. break;
  1059. }
  1060. }
  1061. // word reversing wrapper for complex prefixes
  1062. if (complexprefixes) {
  1063. for (int j = 0; j < ns; j++) {
  1064. if (utf8) reverseword_utf((*slst)[j]); else reverseword((*slst)[j]);
  1065. }
  1066. }
  1067. // expand suggestions with dot(s)
  1068. if (abbv && pAMgr && pAMgr->get_sugswithdots()) {
  1069. for (int j = 0; j < ns; j++) {
  1070. (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 1 + abbv);
  1071. strcat((*slst)[j], word + strlen(word) - abbv);
  1072. }
  1073. }
  1074. // LANG_hu section: replace '-' with ' ' in Hungarian
  1075. if (langnum == LANG_hu) {
  1076. for (int j=0; j < ns; j++) {
  1077. char * pos = strchr((*slst)[j],'-');
  1078. if (pos) {
  1079. int info;
  1080. char w[MAXWORDUTF8LEN];
  1081. *pos = '\0';
  1082. strcpy(w, (*slst)[j]);
  1083. strcat(w, pos + 1);
  1084. spell(w, &info, NULL);
  1085. if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
  1086. *pos = ' ';
  1087. } else *pos = '-';
  1088. }
  1089. }
  1090. }
  1091. // END OF LANG_hu section
  1092. return ns;
  1093. }
  1094. #endif
  1095. int Hunspell::stem(char*** slst, char ** desc, int n)
  1096. {
  1097. char result[MAXLNLEN];
  1098. char result2[MAXLNLEN];
  1099. *slst = NULL;
  1100. if (n == 0) return 0;
  1101. *result2 = '\0';
  1102. for (int i = 0; i < n; i++) {
  1103. *result = '\0';
  1104. // add compound word parts (except the last one)
  1105. char * s = (char *) desc[i];
  1106. char * part = strstr(s, MORPH_PART);
  1107. if (part) {
  1108. char * nextpart = strstr(part + 1, MORPH_PART);
  1109. while (nextpart) {
  1110. copy_field(result + strlen(result), part, MORPH_PART);
  1111. part = nextpart;
  1112. nextpart = strstr(part + 1, MORPH_PART);
  1113. }
  1114. s = part;
  1115. }
  1116. char **pl;
  1117. char tok[MAXLNLEN];
  1118. strcpy(tok, s);
  1119. char * alt = strstr(tok, " | ");
  1120. while (alt) {
  1121. alt[1] = MSEP_ALT;
  1122. alt = strstr(alt, " | ");
  1123. }
  1124. int pln = line_tok(tok, &pl, MSEP_ALT);
  1125. for (int k = 0; k < pln; k++) {
  1126. // add derivational suffixes
  1127. if (strstr(pl[k], MORPH_DERI_SFX)) {
  1128. // remove inflectional suffixes
  1129. char * is = strstr(pl[k], MORPH_INFL_SFX);
  1130. if (is) *is = '\0';
  1131. char * sg = pSMgr->suggest_gen(&(pl[k]), 1, pl[k]);
  1132. if (sg) {
  1133. char ** gen;
  1134. int genl = line_tok(sg, &gen, MSEP_REC);
  1135. free(sg);
  1136. for (int j = 0; j < genl; j++) {
  1137. sprintf(result2 + strlen(result2), "%c%s%s",
  1138. MSEP_REC, result, gen[j]);
  1139. }
  1140. freelist(&gen, genl);
  1141. }
  1142. } else {
  1143. sprintf(result2 + strlen(result2), "%c%s", MSEP_REC, result);
  1144. if (strstr(pl[k], MORPH_SURF_PFX)) {
  1145. copy_field(result2 + strlen(result2), pl[k], MORPH_SURF_PFX);
  1146. }
  1147. copy_field(result2 + strlen(result2), pl[k], MORPH_STEM);
  1148. }
  1149. }
  1150. freelist(&pl, pln);
  1151. }
  1152. int sln = line_tok(result2, slst, MSEP_REC);
  1153. return uniqlist(*slst, sln);
  1154. }
  1155. int Hunspell::stem(char*** slst, const char * word)
  1156. {
  1157. char ** pl;
  1158. int pln = analyze(&pl, word);
  1159. int pln2 = stem(slst, pl, pln);
  1160. freelist(&pl, pln);
  1161. return pln2;
  1162. }
  1163. #ifdef HUNSPELL_EXPERIMENTAL
  1164. int Hunspell::suggest_pos_stems(char*** slst, const char * word)
  1165. {
  1166. char cw[MAXWORDUTF8LEN];
  1167. char wspace[MAXWORDUTF8LEN];
  1168. if (! pSMgr || maxdic == 0) return 0;
  1169. int wl = strlen(word);
  1170. if (utf8) {
  1171. if (wl >= MAXWORDUTF8LEN) return 0;
  1172. } else {
  1173. if (wl >= MAXWORDLEN) return 0;
  1174. }
  1175. int captype = 0;
  1176. int abbv = 0;
  1177. wl = cleanword(cw, word, &captype, &abbv);
  1178. if (wl == 0) return 0;
  1179. int ns = 0; // ns=0 = normalized input
  1180. *slst = NULL; // HU, nsug in pSMgr->suggest
  1181. switch(captype) {
  1182. case HUHCAP:
  1183. case NOCAP: {
  1184. ns = pSMgr->suggest_pos_stems(slst, cw, ns);
  1185. if ((abbv) && (ns == 0)) {
  1186. memcpy(wspace,cw,wl);
  1187. *(wspace+wl) = '.';
  1188. *(wspace+wl+1) = '\0';
  1189. ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
  1190. }
  1191. break;
  1192. }
  1193. case INITCAP: {
  1194. ns = pSMgr->suggest_pos_stems(slst, cw, ns);
  1195. if (ns == 0 || ((*slst)[0][0] == '#')) {
  1196. memcpy(wspace,cw,(wl+1));
  1197. mkallsmall(wspace);
  1198. ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
  1199. }
  1200. break;
  1201. }
  1202. case ALLCAP: {
  1203. ns = pSMgr->suggest_pos_stems(slst, cw, ns);
  1204. if (ns != 0) break;
  1205. memcpy(wspace,cw,(wl+1));
  1206. mkallsmall(wspace);
  1207. ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
  1208. if (ns == 0) {
  1209. mkinitcap(wspace);
  1210. ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
  1211. }
  1212. break;
  1213. }
  1214. }
  1215. return ns;
  1216. }
  1217. #endif // END OF HUNSPELL_EXPERIMENTAL CODE
  1218. const char * Hunspell::get_wordchars()
  1219. {
  1220. return pAMgr->get_wordchars();
  1221. }
  1222. unsigned short * Hunspell::get_wordchars_utf16(int * len)
  1223. {
  1224. return pAMgr->get_wordchars_utf16(len);
  1225. }
  1226. void Hunspell::mkinitcap(char * p)
  1227. {
  1228. if (!utf8) {
  1229. if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper;
  1230. } else {
  1231. int len;
  1232. w_char u[MAXWORDLEN];
  1233. len = u8_u16(u, MAXWORDLEN, p);
  1234. unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum);
  1235. u[0].h = (unsigned char) (i >> 8);
  1236. u[0].l = (unsigned char) (i & 0x00FF);
  1237. u16_u8(p, MAXWORDUTF8LEN, u, len);
  1238. }
  1239. }
  1240. int Hunspell::mkinitcap2(char * p, w_char * u, int nc)
  1241. {
  1242. if (!utf8) {
  1243. if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper;
  1244. } else if (nc > 0) {
  1245. unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum);
  1246. u[0].h = (unsigned char) (i >> 8);
  1247. u[0].l = (unsigned char) (i & 0x00FF);
  1248. u16_u8(p, MAXWORDUTF8LEN, u, nc);
  1249. return strlen(p);
  1250. }
  1251. return nc;
  1252. }
  1253. int Hunspell::mkinitsmall2(char * p, w_char * u, int nc)
  1254. {
  1255. if (!utf8) {
  1256. if (*p != '\0') *p = csconv[((unsigned char)*p)].clower;
  1257. } else if (nc > 0) {
  1258. unsigned short i = unicodetolower((u[0].h << 8) + u[0].l, langnum);
  1259. u[0].h = (unsigned char) (i >> 8);
  1260. u[0].l = (unsigned char) (i & 0x00FF);
  1261. u16_u8(p, MAXWORDUTF8LEN, u, nc);
  1262. return strlen(p);
  1263. }
  1264. return nc;
  1265. }
  1266. int Hunspell::add(const char * word)
  1267. {
  1268. if (pHMgr[0]) return (pHMgr[0])->add(word);
  1269. return 0;
  1270. }
  1271. int Hunspell::add_with_affix(const char * word, const char * example)
  1272. {
  1273. if (pHMgr[0]) return (pHMgr[0])->add_with_affix(word, example);
  1274. return 0;
  1275. }
  1276. int Hunspell::remove(const char * word)
  1277. {
  1278. if (pHMgr[0]) return (pHMgr[0])->remove(word);
  1279. return 0;
  1280. }
  1281. const char * Hunspell::get_version()
  1282. {
  1283. return pAMgr->get_version();
  1284. }
  1285. struct cs_info * Hunspell::get_csconv()
  1286. {
  1287. return csconv;
  1288. }
  1289. void Hunspell::cat_result(char * result, char * st)
  1290. {
  1291. if (st) {
  1292. if (*result) mystrcat(result, "\n", MAXLNLEN);
  1293. mystrcat(result, st, MAXLNLEN);
  1294. free(st);
  1295. }
  1296. }
  1297. int Hunspell::analyze(char*** slst, const char * word)
  1298. {
  1299. char cw[MAXWORDUTF8LEN];
  1300. char wspace[MAXWORDUTF8LEN];
  1301. w_char unicw[MAXWORDLEN];
  1302. int wl2 = 0;
  1303. *slst = NULL;
  1304. if (! pSMgr || maxdic == 0) return 0;
  1305. int nc = strlen(word);
  1306. if (utf8) {
  1307. if (nc >= MAXWORDUTF8LEN) return 0;
  1308. } else {
  1309. if (nc >= MAXWORDLEN) return 0;
  1310. }
  1311. int captype = 0;
  1312. int abbv = 0;
  1313. int wl = 0;
  1314. // input conversion
  1315. RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
  1316. if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
  1317. else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
  1318. if (wl == 0) {
  1319. if (abbv) {
  1320. for (wl = 0; wl < abbv; wl++) cw[wl] = '.';
  1321. cw[wl] = '\0';
  1322. abbv = 0;
  1323. } else return 0;
  1324. }
  1325. char result[MAXLNLEN];
  1326. char * st = NULL;
  1327. *result = '\0';
  1328. int n = 0;
  1329. int n2 = 0;
  1330. int n3 = 0;
  1331. // test numbers
  1332. // LANG_hu section: set dash information for suggestions
  1333. if (langnum == LANG_hu) {
  1334. while ((n < wl) &&
  1335. (((cw[n] <= '9') && (cw[n] >= '0')) || (((cw[n] == '.') || (cw[n] == ',')) && (n > 0)))) {
  1336. n++;
  1337. if ((cw[n] == '.') || (cw[n] == ',')) {
  1338. if (((n2 == 0) && (n > 3)) ||
  1339. ((n2 > 0) && ((cw[n-1] == '.') || (cw[n-1] == ',')))) break;
  1340. n2++;
  1341. n3 = n;
  1342. }
  1343. }
  1344. if ((n == wl) && (n3 > 0) && (n - n3 > 3)) return 0;
  1345. if ((n == wl) || ((n>0) && ((cw[n]=='%') || (cw[n]=='\xB0')) && checkword(cw+n, NULL, NULL))) {
  1346. mystrcat(result, cw, MAXLNLEN);
  1347. result[n - 1] = '\0';
  1348. if (n == wl) cat_result(result, pSMgr->suggest_morph(cw + n - 1));
  1349. else {
  1350. char sign = cw[n];
  1351. cw[n] = '\0';
  1352. cat_result(result, pSMgr->suggest_morph(cw + n - 1));
  1353. mystrcat(result, "+", MAXLNLEN); // XXX SPEC. MORPHCODE
  1354. cw[n] = sign;
  1355. cat_result(result, pSMgr->suggest_morph(cw + n));
  1356. }
  1357. return line_tok(result, slst, MSEP_REC);
  1358. }
  1359. }
  1360. // END OF LANG_hu section
  1361. switch(captype) {
  1362. case HUHCAP:
  1363. case HUHINITCAP:
  1364. case NOCAP: {
  1365. cat_result(result, pSMgr->suggest_morph(cw));
  1366. if (abbv) {
  1367. memcpy(wspace,cw,wl);
  1368. *(wspace+wl) = '.';
  1369. *(wspace+wl+1) = '\0';
  1370. cat_result(result, pSMgr->suggest_morph(wspace));
  1371. }
  1372. break;
  1373. }
  1374. case INITCAP: {
  1375. wl = mkallsmall2(cw, unicw, nc);
  1376. memcpy(wspace,cw,(wl+1));
  1377. wl2 = mkinitcap2(cw, unicw, nc);
  1378. cat_result(result, pSMgr->suggest_morph(wspace));
  1379. cat_result(result, pSMgr->suggest_morph(cw));
  1380. if (abbv) {
  1381. *(wspace+wl) = '.';
  1382. *(wspace+wl+1) = '\0';
  1383. cat_result(result, pSMgr->suggest_morph(wspace));
  1384. memcpy(wspace, cw, wl2);
  1385. *(wspace+wl2) = '.';
  1386. *(wspace+wl2+1) = '\0';
  1387. cat_result(result, pSMgr->suggest_morph(wspace));
  1388. }
  1389. break;
  1390. }
  1391. case ALLCAP: {
  1392. cat_result(result, pSMgr->suggest_morph(cw));
  1393. if (abbv) {
  1394. memcpy(wspace,cw,wl);
  1395. *(wspace+wl) = '.';
  1396. *(wspace+wl+1) = '\0';
  1397. cat_result(result, pSMgr->suggest_morph(cw));
  1398. }
  1399. wl = mkallsmall2(cw, unicw, nc);
  1400. memcpy(wspace,cw,(wl+1));
  1401. wl2 = mkinitcap2(cw, unicw, nc);
  1402. cat_result(result, pSMgr->suggest_morph(wspace));
  1403. cat_result(result, pSMgr->suggest_morph(cw));
  1404. if (abbv) {
  1405. *(wspace+wl) = '.';
  1406. *(wspace+wl+1) = '\0';
  1407. cat_result(result, pSMgr->suggest_morph(wspace));
  1408. memcpy(wspace, cw, wl2);
  1409. *(wspace+wl2) = '.';
  1410. *(wspace+wl2+1) = '\0';
  1411. cat_result(result, pSMgr->suggest_morph(wspace));
  1412. }
  1413. break;
  1414. }
  1415. }
  1416. if (*result) {
  1417. // word reversing wrapper for complex prefixes
  1418. if (complexprefixes) {
  1419. if (utf8) reverseword_utf(result); else reverseword(result);
  1420. }
  1421. return line_tok(result, slst, MSEP_REC);
  1422. }
  1423. // compound word with dash (HU) I18n
  1424. char * dash = NULL;
  1425. int nresult = 0;
  1426. // LANG_hu section: set dash information for suggestions
  1427. if (langnum == LANG_hu) dash = (char *) strchr(cw,'-');
  1428. if ((langnum == LANG_hu) && dash) {
  1429. *dash='\0';
  1430. // examine 2 sides of the dash
  1431. if (dash[1] == '\0') { // base word ending with dash
  1432. if (spell(cw)) {
  1433. char * p = pSMgr->suggest_morph(cw);
  1434. if (p) {
  1435. int ret = line_tok(p, slst, MSEP_REC);
  1436. free(p);
  1437. return ret;
  1438. }
  1439. }
  1440. } else if ((dash[1] == 'e') && (dash[2] == '\0')) { // XXX (HU) -e hat.
  1441. if (spell(cw) && (spell("-e"))) {
  1442. st = pSMgr->suggest_morph(cw);
  1443. if (st) {
  1444. mystrcat(result, st, MAXLNLEN);
  1445. free(st);
  1446. }
  1447. mystrcat(result,"+", MAXLNLEN); // XXX spec. separator in MORPHCODE
  1448. st = pSMgr->suggest_morph("-e");
  1449. if (st) {
  1450. mystrcat(result, st, MAXLNLEN);
  1451. free(st);
  1452. }
  1453. return line_tok(result, slst, MSEP_REC);
  1454. }
  1455. } else {
  1456. // first word ending with dash: word- XXX ???
  1457. char r2 = *(dash + 1);
  1458. dash[0]='-';
  1459. dash[1]='\0';
  1460. nresult = spell(cw);
  1461. dash[1] = r2;
  1462. dash[0]='\0';
  1463. if (nresult && spell(dash+1) && ((strlen(dash+1) > 1) ||
  1464. ((dash[1] > '0') && (dash[1] < '9')))) {
  1465. st = pSMgr->suggest_morph(cw);
  1466. if (st) {
  1467. mystrcat(result, st, MAXLNLEN);
  1468. free(st);
  1469. mystrcat(result,"+", MAXLNLEN); // XXX spec. separator in MORPHCODE
  1470. }
  1471. st = pSMgr->suggest_morph(dash+1);
  1472. if (st) {
  1473. mystrcat(result, st, MAXLNLEN);
  1474. free(st);
  1475. }
  1476. return line_tok(result, slst, MSEP_REC);
  1477. }
  1478. }
  1479. // affixed number in correct word
  1480. if (nresult && (dash > cw) && (((*(dash-1)<='9') &&
  1481. (*(dash-1)>='0')) || (*(dash-1)=='.'))) {
  1482. *dash='-';
  1483. n = 1;
  1484. if (*(dash - n) == '.') n++;
  1485. // search first not a number character to left from dash
  1486. while (((dash - n)>=cw) && ((*(dash - n)=='0') || (n < 3)) && (n < 6)) {
  1487. n++;
  1488. }
  1489. if ((dash - n) < cw) n--;
  1490. // numbers: valami1000000-hoz
  1491. // examine 100000-hoz, 10000-hoz 1000-hoz, 10-hoz,
  1492. // 56-hoz, 6-hoz
  1493. for(; n >= 1; n--) {
  1494. if ((*(dash - n) >= '0') && (*(dash - n) <= '9') && checkword(dash - n, NULL, NULL)) {
  1495. mystrcat(result, cw, MAXLNLEN);
  1496. result[dash - cw - n] = '\0';
  1497. st = pSMgr->suggest_morph(dash - n);
  1498. if (st) {
  1499. mystrcat(result, st, MAXLNLEN);
  1500. free(st);
  1501. }
  1502. return line_tok(result, slst, MSEP_REC);
  1503. }
  1504. }
  1505. }
  1506. }
  1507. return 0;
  1508. }
  1509. int Hunspell::generate(char*** slst, const char * word, char ** pl, int pln)
  1510. {
  1511. *slst = NULL;
  1512. if (!pSMgr || !pln) return 0;
  1513. char **pl2;
  1514. int pl2n = analyze(&pl2, word);
  1515. int captype = 0;
  1516. int abbv = 0;
  1517. char cw[MAXWORDUTF8LEN];
  1518. cleanword(cw, word, &captype, &abbv);
  1519. char result[MAXLNLEN];
  1520. *result = '\0';
  1521. for (int i = 0; i < pln; i++) {
  1522. cat_result(result, pSMgr->suggest_gen(pl2, pl2n, pl[i]));
  1523. }
  1524. freelist(&pl2, pl2n);
  1525. if (*result) {
  1526. // allcap
  1527. if (captype == ALLCAP) mkallcap(result);
  1528. // line split
  1529. int linenum = line_tok(result, slst, MSEP_REC);
  1530. // capitalize
  1531. if (captype == INITCAP || captype == HUHINITCAP) {
  1532. for (int j=0; j < linenum; j++) mkinitcap((*slst)[j]);
  1533. }
  1534. // temporary filtering of prefix related errors (eg.
  1535. // generate("undrinkable", "eats") --> "undrinkables" and "*undrinks")
  1536. int r = 0;
  1537. for (int j=0; j < linenum; j++) {
  1538. if (!spell((*slst)[j])) {
  1539. free((*slst)[j]);
  1540. (*slst)[j] = NULL;
  1541. } else {
  1542. if (r < j) (*slst)[r] = (*slst)[j];
  1543. r++;
  1544. }
  1545. }
  1546. if (r > 0) return r;
  1547. free(*slst);
  1548. *slst = NULL;
  1549. }
  1550. return 0;
  1551. }
  1552. int Hunspell::generate(char*** slst, const char * word, const char * pattern)
  1553. {
  1554. char **pl;
  1555. int pln = analyze(&pl, pattern);
  1556. int n = generate(slst, word, pl, pln);
  1557. freelist(&pl, pln);
  1558. return uniqlist(*slst, n);
  1559. }
  1560. // minimal XML parser functions
  1561. int Hunspell::get_xml_par(char * dest, const char * par, int max)
  1562. {
  1563. char * d = dest;
  1564. if (!par) return 0;
  1565. char end = *par;
  1566. char * dmax = dest + max;
  1567. if (end == '>') end = '<';
  1568. else if (end != '\'' && end != '"') return 0; // bad XML
  1569. for (par++; d < dmax && *par != '\0' && *par != end; par++, d++) *d = *par;
  1570. *d = '\0';
  1571. mystrrep(dest, "&lt;", "<");
  1572. mystrrep(dest, "&amp;", "&");
  1573. return (int)(d - dest);
  1574. }
  1575. int Hunspell::get_langnum() const
  1576. {
  1577. return langnum;
  1578. }
  1579. // return the beginning of the element (attr == NULL) or the attribute
  1580. const char * Hunspell::get_xml_pos(const char * s, const char * attr)
  1581. {
  1582. const char * end = strchr(s, '>');
  1583. const char * p = s;
  1584. if (attr == NULL) return end;
  1585. do {
  1586. p = strstr(p, attr);
  1587. if (!p || p >= end) return 0;
  1588. } while (*(p-1) != ' ' && *(p-1) != '\n');
  1589. return p + strlen(attr);
  1590. }
  1591. int Hunspell::check_xml_par(const char * q, const char * attr, const char * value) {
  1592. char cw[MAXWORDUTF8LEN];
  1593. if (get_xml_par(cw, get_xml_pos(q, attr), MAXWORDUTF8LEN - 1) &&
  1594. strcmp(cw, value) == 0) return 1;
  1595. return 0;
  1596. }
  1597. int Hunspell::get_xml_list(char ***slst, char * list, const char * tag) {
  1598. int n = 0;
  1599. char * p;
  1600. if (!list) return 0;
  1601. for (p = list; (p = strstr(p, tag)); p++) n++;
  1602. if (n == 0) return 0;
  1603. *slst = (char **) malloc(sizeof(char *) * n);
  1604. if (!*slst) return 0;
  1605. for (p = list, n = 0; (p = strstr(p, tag)); p++, n++) {
  1606. int l = strlen(p);
  1607. (*slst)[n] = (char *) malloc(l + 1);
  1608. if (!(*slst)[n]) return n;
  1609. if (!get_xml_par((*slst)[n], p + strlen(tag) - 1, l)) {
  1610. free((*slst)[n]);
  1611. break;
  1612. }
  1613. }
  1614. return n;
  1615. }
  1616. int Hunspell::spellml(char*** slst, const char * word)
  1617. {
  1618. char *q, *q2;
  1619. char cw[MAXWORDUTF8LEN], cw2[MAXWORDUTF8LEN];
  1620. q = (char *) strstr(word, "<query");
  1621. if (!q) return 0; // bad XML input
  1622. q2 = strchr(q, '>');
  1623. if (!q2) return 0; // bad XML input
  1624. q2 = strstr(q2, "<word");
  1625. if (!q2) return 0; // bad XML input
  1626. if (check_xml_par(q, "type=", "analyze")) {
  1627. int n = 0, s = 0;
  1628. if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 10)) n = analyze(slst, cw);
  1629. if (n == 0) return 0;
  1630. // convert the result to <code><a>ana1</a><a>ana2</a></code> format
  1631. for (int i = 0; i < n; i++) s+= strlen((*slst)[i]);
  1632. char * r = (char *) malloc(6 + 5 * s + 7 * n + 7 + 1); // XXX 5*s->&->&amp;
  1633. if (!r) return 0;
  1634. strcpy(r, "<code>");
  1635. for (int i = 0; i < n; i++) {
  1636. int l = strlen(r);
  1637. strcpy(r + l, "<a>");
  1638. strcpy(r + l + 3, (*slst)[i]);
  1639. mystrrep(r + l + 3, "\t", " ");
  1640. mystrrep(r + l + 3, "<", "&lt;");
  1641. mystrrep(r + l + 3, "&", "&amp;");
  1642. strcat(r, "</a>");
  1643. free((*slst)[i]);
  1644. }
  1645. strcat(r, "</code>");
  1646. (*slst)[0] = r;
  1647. return 1;
  1648. } else if (check_xml_par(q, "type=", "stem")) {
  1649. if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1)) return stem(slst, cw);
  1650. } else if (check_xml_par(q, "type=", "generate")) {
  1651. int n = get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1);
  1652. if (n == 0) return 0;
  1653. char * q3 = strstr(q2 + 1, "<word");
  1654. if (q3) {
  1655. if (get_xml_par(cw2, strchr(q3, '>'), MAXWORDUTF8LEN - 1)) {
  1656. return generate(slst, cw, cw2);
  1657. }
  1658. } else {
  1659. if ((q2 = strstr(q2 + 1, "<code"))) {
  1660. char ** slst2;
  1661. if ((n = get_xml_list(&slst2, strchr(q2, '>'), "<a>"))) {
  1662. int n2 = generate(slst, cw, slst2, n);
  1663. freelist(&slst2, n);
  1664. return uniqlist(*slst, n2);
  1665. }
  1666. freelist(&slst2, n);
  1667. }
  1668. }
  1669. }
  1670. return 0;
  1671. }
  1672. #ifdef HUNSPELL_EXPERIMENTAL
  1673. // XXX need UTF-8 support
  1674. char * Hunspell::morph_with_correction(const char * word)
  1675. {
  1676. char cw[MAXWORDUTF8LEN];
  1677. char wspace[MAXWORDUTF8LEN];
  1678. if (! pSMgr || maxdic == 0) return NULL;
  1679. int wl = strlen(word);
  1680. if (utf8) {
  1681. if (wl >= MAXWORDUTF8LEN) return NULL;
  1682. } else {
  1683. if (wl >= MAXWORDLEN) return NULL;
  1684. }
  1685. int captype = 0;
  1686. int abbv = 0;
  1687. wl = cleanword(cw, word, &captype, &abbv);
  1688. if (wl == 0) return NULL;
  1689. char result[MAXLNLEN];
  1690. char * st = NULL;
  1691. *result = '\0';
  1692. switch(captype) {
  1693. case NOCAP: {
  1694. st = pSMgr->suggest_morph_for_spelling_error(cw);
  1695. if (st) {
  1696. mystrcat(result, st, MAXLNLEN);
  1697. free(st);
  1698. }
  1699. if (abbv) {
  1700. memcpy(wspace,cw,wl);
  1701. *(wspace+wl) = '.';
  1702. *(wspace+wl+1) = '\0';
  1703. st = pSMgr->suggest_morph_for_spelling_error(wspace);
  1704. if (st) {
  1705. if (*result) mystrcat(result, "\n", MAXLNLEN);
  1706. mystrcat(result, st, MAXLNLEN);
  1707. free(st);
  1708. }
  1709. }
  1710. break;
  1711. }
  1712. case INITCAP: {
  1713. memcpy(wspace,cw,(wl+1));
  1714. mkallsmall(wspace);
  1715. st = pSMgr->suggest_morph_for_spelling_error(wspace);
  1716. if (st) {
  1717. mystrcat(result, st, MAXLNLEN);
  1718. free(st);
  1719. }
  1720. st = pSMgr->suggest_morph_for_spelling_error(cw);
  1721. if (st) {
  1722. if (*result) mystrcat(result, "\n", MAXLNLEN);
  1723. mystrcat(result, st, MAXLNLEN);
  1724. free(st);
  1725. }
  1726. if (abbv) {
  1727. memcpy(wspace,cw,wl);
  1728. *(wspace+wl) = '.';
  1729. *(wspace+wl+1) = '\0';
  1730. mkallsmall(wspace);
  1731. st = pSMgr->suggest_morph_for_spelling_error(wspace);
  1732. if (st) {
  1733. if (*result) mystrcat(result, "\n", MAXLNLEN);
  1734. mystrcat(result, st, MAXLNLEN);
  1735. free(st);
  1736. }
  1737. mkinitcap(wspace);
  1738. st = pSMgr->suggest_morph_for_spelling_error(wspace);
  1739. if (st) {
  1740. if (*result) mystrcat(result, "\n", MAXLNLEN);
  1741. mystrcat(result, st, MAXLNLEN);
  1742. free(st);
  1743. }
  1744. }
  1745. break;
  1746. }
  1747. case HUHCAP: {
  1748. st = pSMgr->suggest_morph_for_spelling_error(cw);
  1749. if (st) {
  1750. mystrcat(result, st, MAXLNLEN);
  1751. free(st);
  1752. }
  1753. memcpy(wspace,cw,(wl+1));
  1754. mkallsmall(wspace);
  1755. st = pSMgr->suggest_morph_for_spelling_error(wspace);
  1756. if (st) {
  1757. if (*result) mystrcat(result, "\n", MAXLNLEN);
  1758. mystrcat(result, st, MAXLNLEN);
  1759. free(st);
  1760. }
  1761. break;
  1762. }
  1763. case ALLCAP: {
  1764. memcpy(wspace,cw,(wl+1));
  1765. st = pSMgr->suggest_morph_for_spelling_error(wspace);
  1766. if (st) {
  1767. mystrcat(result, st, MAXLNLEN);
  1768. free(st);
  1769. }
  1770. mkallsmall(wspace);
  1771. st = pSMgr->suggest_morph_for_spelling_error(wspace);
  1772. if (st) {
  1773. if (*result) mystrcat(result, "\n", MAXLNLEN);
  1774. mystrcat(result, st, MAXLNLEN);
  1775. free(st);
  1776. }
  1777. mkinitcap(wspace);
  1778. st = pSMgr->suggest_morph_for_spelling_error(wspace);
  1779. if (st) {
  1780. if (*result) mystrcat(result, "\n", MAXLNLEN);
  1781. mystrcat(result, st, MAXLNLEN);
  1782. free(st);
  1783. }
  1784. if (abbv) {
  1785. memcpy(wspace,cw,(wl+1));
  1786. *(wspace+wl) = '.';
  1787. *(wspace+wl+1) = '\0';
  1788. if (*result) mystrcat(result, "\n", MAXLNLEN);
  1789. st = pSMgr->suggest_morph_for_spelling_error(wspace);
  1790. if (st) {
  1791. mystrcat(result, st, MAXLNLEN);
  1792. free(st);
  1793. }
  1794. mkallsmall(wspace);
  1795. st = pSMgr->suggest_morph_for_spelling_error(wspace);
  1796. if (st) {
  1797. if (*result) mystrcat(result, "\n", MAXLNLEN);
  1798. mystrcat(result, st, MAXLNLEN);
  1799. free(st);
  1800. }
  1801. mkinitcap(wspace);
  1802. st = pSMgr->suggest_morph_for_spelling_error(wspace);
  1803. if (st) {
  1804. if (*result) mystrcat(result, "\n", MAXLNLEN);
  1805. mystrcat(result, st, MAXLNLEN);
  1806. free(st);
  1807. }
  1808. }
  1809. break;
  1810. }
  1811. }
  1812. if (*result) return mystrdup(result);
  1813. return NULL;
  1814. }
  1815. #endif // END OF HUNSPELL_EXPERIMENTAL CODE
  1816. Hunhandle *Hunspell_create(const char * affpath, const char * dpath)
  1817. {
  1818. return (Hunhandle*)(new Hunspell(affpath, dpath));
  1819. }
  1820. Hunhandle *Hunspell_create_key(const char * affpath, const char * dpath,
  1821. const char * key)
  1822. {
  1823. return (Hunhandle*)(new Hunspell(affpath, dpath, key));
  1824. }
  1825. void Hunspell_destroy(Hunhandle *pHunspell)
  1826. {
  1827. delete (Hunspell*)(pHunspell);
  1828. }
  1829. int Hunspell_spell(Hunhandle *pHunspell, const char *word)
  1830. {
  1831. return ((Hunspell*)pHunspell)->spell(word);
  1832. }
  1833. char *Hunspell_get_dic_encoding(Hunhandle *pHunspell)
  1834. {
  1835. return ((Hunspell*)pHunspell)->get_dic_encoding();
  1836. }
  1837. int Hunspell_suggest(Hunhandle *pHunspell, char*** slst, const char * word)
  1838. {
  1839. return ((Hunspell*)pHunspell)->suggest(slst, word);
  1840. }
  1841. int Hunspell_analyze(Hunhandle *pHunspell, char*** slst, const char * word)
  1842. {
  1843. return ((Hunspell*)pHunspell)->analyze(slst, word);
  1844. }
  1845. int Hunspell_stem(Hunhandle *pHunspell, char*** slst, const char * word)
  1846. {
  1847. return ((Hunspell*)pHunspell)->stem(slst, word);
  1848. }
  1849. int Hunspell_stem2(Hunhandle *pHunspell, char*** slst, char** desc, int n)
  1850. {
  1851. return ((Hunspell*)pHunspell)->stem(slst, desc, n);
  1852. }
  1853. int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word,
  1854. const char * word2)
  1855. {
  1856. return ((Hunspell*)pHunspell)->generate(slst, word, word2);
  1857. }
  1858. int Hunspell_generate2(Hunhandle *pHunspell, char*** slst, const char * word,
  1859. char** desc, int n)
  1860. {
  1861. return ((Hunspell*)pHunspell)->generate(slst, word, desc, n);
  1862. }
  1863. /* functions for run-time modification of the dictionary */
  1864. /* add word to the run-time dictionary */
  1865. int Hunspell_add(Hunhandle *pHunspell, const char * word) {
  1866. return ((Hunspell*)pHunspell)->add(word);
  1867. }
  1868. /* add word to the run-time dictionary with affix flags of
  1869. * the example (a dictionary word): Hunspell will recognize
  1870. * affixed forms of the new word, too.
  1871. */
  1872. int Hunspell_add_with_affix(Hunhandle *pHunspell, const char * word,
  1873. const char * example) {
  1874. return ((Hunspell*)pHunspell)->add_with_affix(word, example);
  1875. }
  1876. /* remove word from the run-time dictionary */
  1877. int Hunspell_remove(Hunhandle *pHunspell, const char * word) {
  1878. return ((Hunspell*)pHunspell)->remove(word);
  1879. }
  1880. void Hunspell_free_list(Hunhandle *, char *** slst, int n) {
  1881. freelist(slst, n);
  1882. }