PageRenderTime 55ms CodeModel.GetById 25ms RepoModel.GetById 0ms app.codeStats 0ms

/lingucomponent/source/thesaurus/mythes/mythes.cxx

https://bitbucket.org/mst/ooo340
C++ | 403 lines | 270 code | 67 blank | 66 comment | 59 complexity | aa31c512cc1db90eae04f5c691fcdf44 MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception, LGPL-3.0, AGPL-1.0, BSD-3-Clause-No-Nuclear-License-2014, GPL-3.0, GPL-2.0, BSD-3-Clause, LGPL-2.1
  1. /*************************************************************************
  2. *
  3. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4. *
  5. * Copyright 2000, 2010 Oracle and/or its affiliates.
  6. *
  7. * OpenOffice.org - a multi-platform office productivity suite
  8. *
  9. * This file is part of OpenOffice.org.
  10. *
  11. * OpenOffice.org is free software: you can redistribute it and/or modify
  12. * it under the terms of the GNU Lesser General Public License version 3
  13. * only, as published by the Free Software Foundation.
  14. *
  15. * OpenOffice.org is distributed in the hope that it will be useful,
  16. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  18. * GNU Lesser General Public License version 3 for more details
  19. * (a copy is included in the LICENSE file that accompanied this code).
  20. *
  21. * You should have received a copy of the GNU Lesser General Public License
  22. * version 3 along with OpenOffice.org. If not, see
  23. * <http://www.openoffice.org/license.html>
  24. * for a copy of the LGPLv3 License.
  25. *
  26. ************************************************************************/
  27. // MARKER(update_precomp.py): autogen include statement, do not remove
  28. #include "precompiled_lingucomponent.hxx"
  29. #include "license.readme"
  30. #include <stdio.h>
  31. #include <string.h>
  32. #include <stdlib.h>
  33. #include <errno.h>
  34. #include "mythes.hxx"
  35. MyThes::MyThes(const char* idxpath, const char * datpath)
  36. {
  37. nw = 0;
  38. encoding = NULL;
  39. list = NULL;
  40. offst = NULL;
  41. if (thInitialize(idxpath, datpath) != 1) {
  42. fprintf(stderr,"Error - can't open %s or %s\n",idxpath, datpath);
  43. fflush(stderr);
  44. thCleanup();
  45. // did not initialize properly - throw exception?
  46. }
  47. }
  48. MyThes::~MyThes()
  49. {
  50. thCleanup();
  51. }
  52. int MyThes::thInitialize(const char* idxpath, const char* datpath)
  53. {
  54. // open the index file
  55. FILE * pifile = fopen(idxpath,"r");
  56. if (!pifile) {
  57. return 0;
  58. }
  59. // parse in encoding and index size */
  60. char * wrd;
  61. wrd = (char *)calloc(1, MAX_WD_LEN);
  62. if (!wrd) {
  63. fprintf(stderr,"Error - bad memory allocation\n");
  64. fflush(stderr);
  65. fclose(pifile);
  66. return 0;
  67. }
  68. int len = readLine(pifile,wrd,MAX_WD_LEN);
  69. encoding = mystrdup(wrd);
  70. len = readLine(pifile,wrd,MAX_WD_LEN);
  71. int idxsz = atoi(wrd);
  72. // now allocate list, offst for the given size
  73. list = (char**) calloc(idxsz,sizeof(char*));
  74. offst = (unsigned int*) calloc(idxsz,sizeof(unsigned int));
  75. if ( (!(list)) || (!(offst)) ) {
  76. fprintf(stderr,"Error - bad memory allocation\n");
  77. fflush(stderr);
  78. fclose(pifile);
  79. return 0;
  80. }
  81. // now parse the remaining lines of the index
  82. len = readLine(pifile,wrd,MAX_WD_LEN);
  83. while (len > 0)
  84. {
  85. int np = mystr_indexOfChar(wrd,'|');
  86. if (nw < idxsz) {
  87. if (np >= 0) {
  88. *(wrd+np) = '\0';
  89. list[nw] = (char *)calloc(1,(np+1));
  90. if (!list[nw]) {
  91. fprintf(stderr,"Error - bad memory allocation\n");
  92. fflush(stderr);
  93. fclose(pifile);
  94. return 0;
  95. }
  96. memcpy((list[nw]),wrd,np);
  97. offst[nw] = atoi(wrd+np+1);
  98. nw++;
  99. }
  100. }
  101. len = readLine(pifile,wrd,MAX_WD_LEN);
  102. }
  103. free((void *)wrd);
  104. fclose(pifile);
  105. /* next open the data file */
  106. pdfile = fopen(datpath,"r");
  107. if (!pdfile) {
  108. return 0;
  109. }
  110. return 1;
  111. }
  112. void MyThes::thCleanup()
  113. {
  114. /* first close the data file */
  115. if (pdfile) {
  116. fclose(pdfile);
  117. pdfile=NULL;
  118. }
  119. if (list)
  120. {
  121. /* now free up all the allocated strings on the list */
  122. for (int i=0; i < nw; i++)
  123. {
  124. if (list[i]) {
  125. free(list[i]);
  126. list[i] = 0;
  127. }
  128. }
  129. free((void*)list);
  130. }
  131. if (encoding) free((void*)encoding);
  132. if (offst) free((void*)offst);
  133. encoding = NULL;
  134. list = NULL;
  135. offst = NULL;
  136. nw = 0;
  137. }
  138. // lookup text in index and count of meanings and a list of meaning entries
  139. // with each entry having a synonym count and pointer to an
  140. // array of char * (i.e the synonyms)
  141. //
  142. // note: calling routine should call CleanUpAfterLookup with the original
  143. // meaning point and count to properly deallocate memory
  144. int MyThes::Lookup(const char * pText, int len, mentry** pme)
  145. {
  146. *pme = NULL;
  147. // handle the case of missing file or file related errors
  148. if (! pdfile) return 0;
  149. long offset = 0;
  150. /* copy search word and make sure null terminated */
  151. char * wrd = (char *) calloc(1,(len+1));
  152. memcpy(wrd,pText,len);
  153. /* find it in the list */
  154. int idx = nw > 0 ? binsearch(wrd,list,nw) : -1;
  155. free(wrd);
  156. if (idx < 0) return 0;
  157. // now seek to the offset
  158. offset = (long) offst[idx];
  159. int rc = fseek(pdfile,offset,SEEK_SET);
  160. if (rc) {
  161. return 0;
  162. }
  163. // grab the count of the number of meanings
  164. // and allocate a list of meaning entries
  165. char * buf = NULL;
  166. buf = (char *) malloc( MAX_LN_LEN );
  167. if (!buf) return 0;
  168. readLine(pdfile, buf, (MAX_LN_LEN-1));
  169. int np = mystr_indexOfChar(buf,'|');
  170. if (np < 0) {
  171. free(buf);
  172. return 0;
  173. }
  174. int nmeanings = atoi(buf+np+1);
  175. *pme = (mentry*) malloc( nmeanings * sizeof(mentry) );
  176. if (!(*pme)) {
  177. free(buf);
  178. return 0;
  179. }
  180. // now read in each meaning and parse it to get defn, count and synonym lists
  181. mentry* pm = *(pme);
  182. char dfn[MAX_WD_LEN];
  183. for (int j = 0; j < nmeanings; j++) {
  184. readLine(pdfile, buf, (MAX_LN_LEN-1));
  185. pm->count = 0;
  186. pm->psyns = NULL;
  187. pm->defn = NULL;
  188. // store away the part of speech for later use
  189. char * p = buf;
  190. char * pos = NULL;
  191. np = mystr_indexOfChar(p,'|');
  192. if (np >= 0) {
  193. *(buf+np) = '\0';
  194. pos = mystrdup(p);
  195. p = p + np + 1;
  196. } else {
  197. pos = mystrdup("");
  198. }
  199. // count the number of fields in the remaining line
  200. int nf = 1;
  201. char * d = p;
  202. np = mystr_indexOfChar(d,'|');
  203. while ( np >= 0 ) {
  204. nf++;
  205. d = d + np + 1;
  206. np = mystr_indexOfChar(d,'|');
  207. }
  208. pm->count = nf;
  209. pm->psyns = (char **) malloc(nf*sizeof(char*));
  210. // fill in the synonym list
  211. d = p;
  212. for (int jj = 0; jj < nf; jj++)
  213. {
  214. np = mystr_indexOfChar(d,'|');
  215. if (np > 0)
  216. {
  217. *(d+np) = '\0';
  218. pm->psyns[jj] = mystrdup(d);
  219. d = d + np + 1;
  220. }
  221. else
  222. {
  223. pm->psyns[jj] = mystrdup(d);
  224. }
  225. }
  226. // add pos to first synonym to create the definition
  227. int k = strlen(pos);
  228. int m = strlen(pm->psyns[0]);
  229. if ((k+m) < (MAX_WD_LEN - 1)) {
  230. strncpy(dfn,pos,k);
  231. *(dfn+k) = ' ';
  232. strncpy((dfn+k+1),(pm->psyns[0]),m+1);
  233. pm->defn = mystrdup(dfn);
  234. } else {
  235. pm->defn = mystrdup(pm->psyns[0]);
  236. }
  237. free(pos);
  238. pm++;
  239. }
  240. free(buf);
  241. return nmeanings;
  242. }
  243. void MyThes::CleanUpAfterLookup(mentry ** pme, int nmeanings)
  244. {
  245. if (nmeanings == 0) return;
  246. if ((*pme) == NULL) return;
  247. mentry * pm = *pme;
  248. for (int i = 0; i < nmeanings; i++) {
  249. int count = pm->count;
  250. for (int j = 0; j < count; j++) {
  251. if (pm->psyns[j]) free(pm->psyns[j]);
  252. pm->psyns[j] = NULL;
  253. }
  254. if (pm->psyns) free(pm->psyns);
  255. pm->psyns = NULL;
  256. if (pm->defn) free(pm->defn);
  257. pm->defn = NULL;
  258. pm->count = 0;
  259. pm++;
  260. }
  261. pm = *pme;
  262. free(pm);
  263. *pme = NULL;
  264. return;
  265. }
  266. // read a line of text from a text file stripping
  267. // off the line terminator and replacing it with
  268. // a null string terminator.
  269. // returns: -1 on error or the number of characters in
  270. // in the returning string
  271. // A maximum of nc characters will be returned
  272. int MyThes::readLine(FILE * pf, char * buf, int nc)
  273. {
  274. if (fgets(buf,nc,pf)) {
  275. mychomp(buf);
  276. return strlen(buf);
  277. }
  278. return -1;
  279. }
  280. // performs a binary search on null terminated character
  281. // strings
  282. //
  283. // returns: -1 on not found
  284. // index of wrd in the list[]
  285. int MyThes::binsearch(char * sw, char* _list[], int nlst)
  286. {
  287. int lp, up, mp, j, indx;
  288. lp = 0;
  289. up = nlst-1;
  290. indx = -1;
  291. if (strcmp(sw,_list[lp]) < 0) return -1;
  292. if (strcmp(sw,_list[up]) > 0) return -1;
  293. while (indx < 0 ) {
  294. mp = (int)((lp+up) >> 1);
  295. j = strcmp(sw,_list[mp]);
  296. if ( j > 0) {
  297. lp = mp + 1;
  298. } else if (j < 0 ) {
  299. up = mp - 1;
  300. } else {
  301. indx = mp;
  302. }
  303. if (lp > up) return -1;
  304. }
  305. return indx;
  306. }
  307. char * MyThes::get_th_encoding()
  308. {
  309. if (encoding) return encoding;
  310. return NULL;
  311. }
  312. // string duplication routine
  313. char * MyThes::mystrdup(const char * p)
  314. {
  315. int sl = strlen(p) + 1;
  316. char * d = (char *)malloc(sl);
  317. if (d) {
  318. memcpy(d,p,sl);
  319. return d;
  320. }
  321. return NULL;
  322. }
  323. // remove cross-platform text line end characters
  324. void MyThes::mychomp(char * s)
  325. {
  326. int k = strlen(s);
  327. if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0';
  328. if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0';
  329. }
  330. // return index of char in string
  331. int MyThes::mystr_indexOfChar(const char * d, int c)
  332. {
  333. char * p = strchr((char *)d,c);
  334. if (p) return (int)(p-d);
  335. return -1;
  336. }