PageRenderTime 50ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/PROJECTS_ROOT/SmartWires/SystemUtils/myspell/mythes.cxx

http://wiredplane-wintools.googlecode.com/
C++ | 361 lines | 256 code | 65 blank | 40 comment | 62 complexity | a6f61d59fd554c1ada4d1bb76a591e3e MD5 | raw file
Possible License(s): GPL-2.0, Unlicense
  1. #include "license.readme"
  2. #include <stdio.h>
  3. #include <string.h>
  4. #include <stdlib.h>
  5. #include <errno.h>
  6. #include "mythes.hxx"
  7. MyThes::MyThes(const char* idxpath, const char * datpath)
  8. {
  9. nw = 0;
  10. encoding = NULL;
  11. list = NULL;
  12. offst = NULL;
  13. if (thInitialize(idxpath, datpath) != 1) {
  14. fprintf(stderr,"Error - can't open %s or %s\n",idxpath, datpath);
  15. fflush(stderr);
  16. if (encoding) free((void*)encoding);
  17. if (list) free((void*)list);
  18. if (offst) free((void*)offst);
  19. // did not initialize properly - throw exception?
  20. }
  21. }
  22. MyThes::~MyThes()
  23. {
  24. if (thCleanup() != 1) {
  25. /* did not cleanup properly - throw exception? */
  26. }
  27. if (encoding) free((void*)encoding);
  28. encoding = NULL;
  29. list = NULL;
  30. offst = NULL;
  31. }
  32. int MyThes::thInitialize(const char* idxpath, const char* datpath)
  33. {
  34. // open the index file
  35. FILE * pifile = fopen(idxpath,"r");
  36. if (!pifile) {
  37. pifile = NULL;
  38. return 0;
  39. }
  40. // parse in encoding and index size */
  41. char * wrd;
  42. wrd = (char *)calloc(1, MAX_WD_LEN);
  43. int len = readLine(pifile,wrd,MAX_WD_LEN);
  44. encoding = mystrdup(wrd);
  45. len = readLine(pifile,wrd,MAX_WD_LEN);
  46. int idxsz = atoi(wrd);
  47. // now allocate list, offst for the given size
  48. list = (char**) calloc(idxsz,sizeof(char*));
  49. offst = (unsigned int*) calloc(idxsz,sizeof(unsigned int));
  50. if ( (!(list)) || (!(offst)) ) {
  51. fprintf(stderr,"Error - bad memory allocation\n");
  52. fflush(stderr);
  53. return 0;
  54. }
  55. // now parse the remaining lines of the index
  56. len = readLine(pifile,wrd,MAX_WD_LEN);
  57. while (len > 0)
  58. {
  59. int np = mystr_indexOfChar(wrd,'|');
  60. if (nw < idxsz) {
  61. if (np >= 0) {
  62. *(wrd+np) = '\0';
  63. list[nw] = (char *)calloc(1,(np+1));
  64. memcpy((list[nw]),wrd,np);
  65. offst[nw] = atoi(wrd+np+1);
  66. nw++;
  67. }
  68. }
  69. len = readLine(pifile,wrd,MAX_WD_LEN);
  70. }
  71. free((void *)wrd);
  72. fclose(pifile);
  73. pifile=NULL;
  74. /* next open the data file */
  75. pdfile = fopen(datpath,"r");
  76. if (!pdfile) {
  77. pdfile = NULL;
  78. return 0;
  79. }
  80. return 1;
  81. }
  82. int MyThes::thCleanup()
  83. {
  84. /* first close the data file */
  85. if (pdfile) {
  86. fclose(pdfile);
  87. pdfile=NULL;
  88. }
  89. /* now free up all the allocated strings on the list */
  90. for (int i=0; i < nw; i++)
  91. {
  92. if (list[i]) {
  93. free(list[i]);
  94. list[i] = 0;
  95. }
  96. }
  97. if (list) free((void*)list);
  98. if (offst) free((void*)offst);
  99. nw = 0;
  100. return 1;
  101. }
  102. // lookup text in index and count of meanings and a list of meaning entries
  103. // with each entry having a synonym count and pointer to an
  104. // array of char * (i.e the synonyms)
  105. //
  106. // note: calling routine should call CleanUpAfterLookup with the original
  107. // meaning point and count to properly deallocate memory
  108. int MyThes::Lookup(const char * pText, int len, mentry** pme)
  109. {
  110. *pme = NULL;
  111. // handle the case of missing file or file related errors
  112. if (! pdfile) return 0;
  113. long offset = 0;
  114. /* copy search word and make sure null terminated */
  115. char * wrd = (char *) calloc(1,(len+1));
  116. memcpy(wrd,pText,len);
  117. /* find it in the list */
  118. int idx = binsearch(wrd,list,nw);
  119. free(wrd);
  120. if (idx < 0) return 0;
  121. // now seek to the offset
  122. offset = (long) offst[idx];
  123. int rc = fseek(pdfile,offset,SEEK_SET);
  124. if (rc) {
  125. return 0;
  126. }
  127. // grab the count of the number of meanings
  128. // and allocate a list of meaning entries
  129. char * buf = NULL;
  130. buf = (char *) malloc( MAX_LN_LEN );
  131. if (!buf) return 0;
  132. readLine(pdfile, buf, (MAX_LN_LEN-1));
  133. int np = mystr_indexOfChar(buf,'|');
  134. if (np < 0) {
  135. free(buf);
  136. return 0;
  137. }
  138. int nmeanings = atoi(buf+np+1);
  139. *pme = (mentry*) malloc( nmeanings * sizeof(mentry) );
  140. if (!(*pme)) {
  141. free(buf);
  142. return 0;
  143. }
  144. // now read in each meaning and parse it to get defn, count and synonym lists
  145. mentry* pm = *(pme);
  146. char dfn[MAX_WD_LEN];
  147. for (int j = 0; j < nmeanings; j++) {
  148. readLine(pdfile, buf, (MAX_LN_LEN-1));
  149. pm->count = 0;
  150. pm->psyns = NULL;
  151. pm->defn = NULL;
  152. // store away the part of speech for later use
  153. char * p = buf;
  154. char * pos = NULL;
  155. np = mystr_indexOfChar(p,'|');
  156. if (np >= 0) {
  157. *(buf+np) = '\0';
  158. pos = mystrdup(p);
  159. p = p + np + 1;
  160. } else {
  161. pos = mystrdup("");
  162. }
  163. // count the number of fields in the remaining line
  164. int nf = 1;
  165. char * d = p;
  166. np = mystr_indexOfChar(d,'|');
  167. while ( np >= 0 ) {
  168. nf++;
  169. d = d + np + 1;
  170. np = mystr_indexOfChar(d,'|');
  171. }
  172. pm->count = nf;
  173. pm->psyns = (char **) malloc(nf*sizeof(char*));
  174. // fill in the synonym list
  175. d = p;
  176. for (int j = 0; j < nf; j++) {
  177. np = mystr_indexOfChar(d,'|');
  178. if (np > 0) {
  179. *(d+np) = '\0';
  180. pm->psyns[j] = mystrdup(d);
  181. d = d + np + 1;
  182. } else {
  183. pm->psyns[j] = mystrdup(d);
  184. }
  185. }
  186. // add pos to first synonym to create the definition
  187. int k = strlen(pos);
  188. int m = strlen(pm->psyns[0]);
  189. if ((k+m) < (MAX_WD_LEN - 1)) {
  190. strncpy(dfn,pos,k);
  191. *(dfn+k) = ' ';
  192. strncpy((dfn+k+1),(pm->psyns[0]),m+1);
  193. pm->defn = mystrdup(dfn);
  194. } else {
  195. pm->defn = mystrdup(pm->psyns[0]);
  196. }
  197. free(pos);
  198. pm++;
  199. }
  200. free(buf);
  201. return nmeanings;
  202. }
  203. void MyThes::CleanUpAfterLookup(mentry ** pme, int nmeanings)
  204. {
  205. if (nmeanings == 0) return;
  206. if ((*pme) == NULL) return;
  207. mentry * pm = *pme;
  208. for (int i = 0; i < nmeanings; i++) {
  209. int count = pm->count;
  210. for (int j = 0; j < count; j++) {
  211. if (pm->psyns[j]) free(pm->psyns[j]);
  212. pm->psyns[j] = NULL;
  213. }
  214. if (pm->psyns) free(pm->psyns);
  215. pm->psyns = NULL;
  216. if (pm->defn) free(pm->defn);
  217. pm->defn = NULL;
  218. pm->count = 0;
  219. pm++;
  220. }
  221. pm = *pme;
  222. free(pm);
  223. *pme = NULL;
  224. return;
  225. }
  226. // read a line of text from a text file stripping
  227. // off the line terminator and replacing it with
  228. // a null string terminator.
  229. // returns: -1 on error or the number of characters in
  230. // in the returning string
  231. // A maximum of nc characters will be returned
  232. int MyThes::readLine(FILE * pf, char * buf, int nc)
  233. {
  234. if (fgets(buf,nc,pf)) {
  235. mychomp(buf);
  236. return strlen(buf);
  237. }
  238. return -1;
  239. }
  240. // performs a binary search on null terminated character
  241. // strings
  242. //
  243. // returns: -1 on not found
  244. // index of wrd in the list[]
  245. int MyThes::binsearch(char * sw, char* list[], int nlst)
  246. {
  247. int lp, up, mp, j, indx;
  248. lp = 0;
  249. up = nlst-1;
  250. indx = -1;
  251. if (strcmp(sw,list[lp]) < 0) return -1;
  252. if (strcmp(sw,list[up]) > 0) return -1;
  253. while (indx < 0 ) {
  254. mp = (int)((lp+up) >> 1);
  255. j = strcmp(sw,list[mp]);
  256. if ( j > 0) {
  257. lp = mp + 1;
  258. } else if (j < 0 ) {
  259. up = mp - 1;
  260. } else {
  261. indx = mp;
  262. }
  263. if (lp > up) return -1;
  264. }
  265. return indx;
  266. }
  267. char * MyThes::get_th_encoding()
  268. {
  269. if (encoding) return encoding;
  270. return NULL;
  271. }
  272. // string duplication routine
  273. char * MyThes::mystrdup(const char * p)
  274. {
  275. int sl = strlen(p) + 1;
  276. char * d = (char *)malloc(sl);
  277. if (d) {
  278. memcpy(d,p,sl);
  279. return d;
  280. }
  281. return NULL;
  282. }
  283. // remove cross-platform text line end characters
  284. void MyThes::mychomp(char * s)
  285. {
  286. int k = strlen(s);
  287. if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0';
  288. if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0';
  289. }
  290. // return index of char in string
  291. int MyThes::mystr_indexOfChar(const char * d, int c)
  292. {
  293. char * p = strchr((char *)d,c);
  294. if (p) return (int)(p-d);
  295. return -1;
  296. }