PageRenderTime 44ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/src/word.c

https://github.com/camion/hash-dico
C | 230 lines | 159 code | 20 blank | 51 comment | 32 complexity | 81e63670062c535fd02872cc045ccb5c MD5 | raw file
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <string.h>
  4. #include "../include/word.h"
  5. #include "../include/read.h"
  6. #include "../include/list.h"
  7. #include "../include/util.h"
  8. /*
  9. Allocates memory for a word
  10. The size of the string is not considered
  11. An empty word can be inserted so check before calling
  12. Returns the new allocated cell, or exit with 2 if not enouth memory
  13. */
  14. List alloc_cell_word(char* word){
  15. List new_cell;
  16. int size;
  17. /* Allocates the box of the cell (Cellword + List) */
  18. if((new_cell = malloc(sizeof(Cell))) == NULL){
  19. fprintf(stderr,"Error while allocating a cell\n");
  20. exit(2);
  21. }
  22. /* Allocates the box of the word (string + Listpos)*/
  23. if((new_cell->value = malloc(sizeof(Cellword))) == NULL){
  24. fprintf(stderr,"Error while allocating a cell\n");
  25. exit(2);
  26. }
  27. size=strlen(word);
  28. /* Allocates the string */
  29. if((new_cell->value->word = malloc(size*sizeof(char)+1)) == NULL){
  30. fprintf(stderr,"Error while allocating a word\n");
  31. exit(2);
  32. }
  33. /* The list of its position is void */
  34. new_cell->value->positions = NULL;
  35. /* Copy the word into the new cell.
  36. We can use strcpy in place of strncpy because we have allocated exactly
  37. the place needed.. so */
  38. strcpy(new_cell->value->word, word);
  39. new_cell->next = NULL;
  40. return new_cell;
  41. }
  42. /* void lower_string(char *string){ */
  43. /* int i; */
  44. /* for(i=0; string[i]!='\0'; ++i) */
  45. /* string[i]=tolower(string[i]); */
  46. /* } */
  47. /*
  48. Inserts a word into the list *w respecting the lexicographical order
  49. and adds it position contained in offset.
  50. This function cannot duplicate word : if the word is already in, it
  51. just adds the word position.
  52. */
  53. void insert_lexico_word(List *w, char* word, long offset){
  54. List new_cell, tmp1, tmp2;
  55. int result;
  56. /* if the list *w is empty */
  57. if(*w==NULL){
  58. new_cell=alloc_cell_word(word);
  59. *w=new_cell;
  60. insert_head_pos(&(new_cell->value->positions), offset);
  61. }
  62. /* if the word must be inserted before the head of the list */
  63. else if(strcmp(word, (*w)->value->word)<0){
  64. /* if the word is < head */
  65. new_cell=alloc_cell_word(word);
  66. new_cell->next = *w;
  67. *w=new_cell;
  68. insert_head_pos(&(new_cell->value->positions), offset);
  69. /* if the word must be inserted after the head of the list */
  70. }else{
  71. /* search word's position */
  72. tmp1=tmp2=*w;
  73. while(tmp2!=NULL && (result=strcmp(tmp2->value->word, word))<0){
  74. tmp1 = tmp2;
  75. tmp2 = tmp2->next;
  76. }
  77. /* if the word is not already in list */
  78. if(result!=0){
  79. new_cell=alloc_cell_word(word);
  80. new_cell->next = tmp2;
  81. tmp1->next = new_cell;
  82. }
  83. else
  84. insert_head_pos(&(tmp2->value->positions), offset);
  85. }
  86. }
  87. List search_word(List hash[], char* word){
  88. List w = hash[hash_string(word)%HASH_SIZE];
  89. for(; w!=NULL; w=w->next)
  90. if(strcmp(w->value->word, word) == 0)
  91. return w;
  92. return NULL;
  93. }
  94. void print_list_word(List w){
  95. if(w==NULL){
  96. printf("Empty List of words\n");
  97. return;
  98. }
  99. for(; w!=NULL; w=w->next)
  100. printf("%s\n",w->value->word);
  101. }
  102. /*
  103. Merges two list respecting lexical order
  104. The merged list is stored in w1
  105. w2 stay intact
  106. */
  107. void merge_list(List *w1, const List w2){
  108. List new_cell;
  109. List tmp1, tmp2, tmp3;
  110. /* We cannot merge with a void list */
  111. if(w2==NULL)
  112. return;
  113. /* for every element of w2 */
  114. for(tmp3=w2;tmp3!=NULL;tmp3=tmp3->next){
  115. new_cell=malloc(sizeof(Cell));
  116. new_cell->value=tmp3->value;
  117. new_cell->next=NULL;
  118. /* if the list *w is empty */
  119. if(*w1==NULL)
  120. *w1=new_cell;
  121. /* if the word must be inserted before the head of the list */
  122. else if(strcmp(tmp3->value->word, (*w1)->value->word)<0){
  123. /* if the word is < head */
  124. new_cell->next = *w1;
  125. *w1=new_cell;
  126. }
  127. /* if the word must be inserted after the head of the list */
  128. else{
  129. /* search word's position */
  130. tmp1=tmp2=*w1;
  131. while(tmp2!=NULL && strcmp(tmp2->value->word, tmp3->value->word)<0){
  132. tmp1 = tmp2;
  133. tmp2 = tmp2->next;
  134. }
  135. new_cell->next = tmp2;
  136. tmp1->next = new_cell;
  137. }
  138. }
  139. }
  140. /*
  141. Creates a sorted list from lists contained in hash[]
  142. It merges all list in hash, in the first box of hash[]
  143. Returns the list created
  144. */
  145. List create_sorted_list(List hash[]){
  146. int i;
  147. /* We merge all lists with the first */
  148. List w = NULL;
  149. for(i=0;i<HASH_SIZE;i++)
  150. merge_list(&w,hash[i]);
  151. return w;
  152. }
  153. void free_sorted_list(List *l){
  154. List tmp=*l;
  155. while(*l!=NULL){
  156. tmp=(*l)->next;
  157. free(*l);
  158. *l=tmp;
  159. }
  160. *l=NULL;
  161. }
  162. void free_list_word(List *w){
  163. List tmp=*w;
  164. while(*w!=NULL){
  165. tmp=(*w)->next;
  166. free_list_pos(&(*w)->value->positions);
  167. free((*w)->value->word);
  168. free((*w)->value);
  169. free(*w);
  170. *w=tmp;
  171. }
  172. *w=NULL;
  173. }
  174. void print_sentences_containing(FILE* text, FILE* output, List hash[], char *word){
  175. List w = search_word(hash,word);
  176. if(w==NULL){
  177. fprintf(output,"\"%s\" is not found in text\n",word);
  178. return;
  179. }
  180. Listpos tmp=w->value->positions;
  181. int i=count_list_pos(tmp);
  182. char c;
  183. /* if(verbose)printf("%s apparaƮt dans %d phrases du texte :\n\n",word,i); */
  184. /* print every sentences */
  185. for(i=1;tmp!=NULL;tmp=tmp->next,i++){
  186. /* we set text position at the begining of the sentence*/
  187. fseek(text,tmp->position,SEEK_SET);
  188. fprintf(output,"[%08d] ",i);
  189. while((c=fgetc(text)) && c!=EOF){
  190. fprintf(output,"%c",c);
  191. if(END_OF_PHRASE(c))break;
  192. }
  193. fprintf(output,"\n");
  194. }
  195. }
  196. void print_words_beginning_with(FILE* text, FILE* output, List index, char *word){
  197. int size_word=strlen(word);
  198. int i=1;
  199. if(size_word==0 || index==NULL)
  200. return;
  201. fprintf(output,"\t> Words begining with \"%s\" :\n",word);
  202. for(;index!=NULL;index=index->next){
  203. if(strncmp(word,index->value->word,size_word)==0){
  204. fprintf(output,"[%4d] %s\n",i,index->value->word);
  205. i++;
  206. }
  207. }
  208. if(i==1)
  209. fprintf(output,"No words begin with \"%s\"\n",word);
  210. }