PageRenderTime 46ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/LZ-4/index.c

https://github.com/peper/pizza
C | 189 lines | 157 code | 17 blank | 15 comment | 7 complexity | 0c255b14eef3159171a625792a58b706 MD5 | raw file
  1. // Indexing module
  2. #include "trie.h"
  3. #include "lztrie.h"
  4. #include "nodemap.h"
  5. #include "revtrie.h"
  6. #include "lzindex.h"
  7. #include <math.h>
  8. // creates lztrie over a null-terminated text
  9. // it also creates *ids
  10. #ifdef INDEXREPORT
  11. struct tms time;
  12. clock_t t1,t2;
  13. uint ticks;
  14. #endif
  15. extern uint PARAMETER_T_IDS, PARAMETER_T_RIDS;
  16. lztrie buildLZTrie(byte *text, byte s)
  17. {
  18. trie T;
  19. uint n;
  20. uint *parent, *ids;
  21. byte *letters;
  22. lztrie LZT;
  23. unsigned long long aux;
  24. // first creates a full trie T
  25. #ifdef INDEXREPORT
  26. ticks= sysconf(_SC_CLK_TCK);
  27. times(&time); t1 = time.tms_utime;
  28. printf(" Building LZTrie...\n"); fflush(stdout);
  29. printf(" Building normal trie...\n"); fflush(stdout);
  30. #endif
  31. T = createTrie();
  32. do {
  33. text = insertTrie(T,text);
  34. }
  35. while (text[-1] != s);
  36. // now compresses it
  37. #ifdef INDEXREPORT
  38. times(&time); t2 = time.tms_utime;
  39. printf(" User time: %f secs\n",(t2-t1)/(float)ticks); fflush(stdout);
  40. t1 = t2;
  41. printf(" Representing with parentheses, letters and ids...\n"); fflush(stdout);
  42. #endif
  43. n = T->nid;
  44. aux = (2*((unsigned long long)n)+W-1)/W;
  45. parent = malloc(aux*sizeof(uint));
  46. letters = malloc(n*sizeof(byte));
  47. aux = (((unsigned long long)n)*bits(n-1)+W-1)/W;
  48. ids = malloc(aux*sizeof(uint));
  49. //malloc(n*sizeof(uint));
  50. representTrie(T,parent,letters,ids,NULL,bits(n-1));
  51. #ifdef INDEXREPORT
  52. times(&time); t2 = time.tms_utime;
  53. printf(" User time: %f secs\n",(t2-t1)/(float)ticks); fflush(stdout);
  54. t1 = t2;
  55. printf(" Freing trie...\n"); fflush(stdout);
  56. #endif
  57. destroyTrie(T);
  58. #ifdef INDEXREPORT
  59. times(&time); t2 = time.tms_utime;
  60. printf(" User time: %f secs\n",(t2-t1)/(float)ticks); fflush(stdout);
  61. t1 = t2;
  62. printf(" Creating compressed trie...\n"); fflush(stdout);
  63. #endif
  64. LZT = createLZTrie(parent,letters,ids,n);
  65. #ifdef INDEXREPORT
  66. times(&time); t2 = time.tms_utime;
  67. printf(" User time: %f secs\n",(t2-t1)/(float)ticks); fflush(stdout);
  68. t1 = t2;
  69. printf(" End of LZTrie\n"); fflush(stdout);
  70. #endif
  71. return LZT;
  72. }
  73. // builds reverse trie from LZTrie, Map, and maximum LZTrie depth
  74. // returns reverse ids
  75. revtrie buildRevTrie(lztrie T, uint maxdepth)
  76. {
  77. byte *str;
  78. uint n,rn,depth,j;
  79. trieNode i;
  80. trie RT;
  81. uint *parent, *emptybmap, *ids;
  82. revtrie CRT;
  83. unsigned long long aux;
  84. // first create a full trie RT
  85. #ifdef INDEXREPORT
  86. times(&time); t1 = time.tms_utime;
  87. printf (" Building RevTrie...\n"); fflush(stdout);
  88. printf (" Creating full trie...\n"); fflush(stdout);
  89. #endif
  90. str = malloc(maxdepth*sizeof(byte));
  91. RT = createTrie();
  92. i = ROOT; depth = 0;
  93. for (j=1;j<T->n;j++) {
  94. i = nextLZTrie(T,i,&depth);
  95. str[maxdepth-depth] = letterLZTrie(T,i);
  96. insertstringTrie(RT,str+maxdepth-depth,depth,idLZTrie(T,i));
  97. }
  98. free(str);
  99. // now compresses it
  100. #ifdef INDEXREPORT
  101. times(&time); t2 = time.tms_utime;
  102. printf(" User time: %f secs\n",(t2-t1)/(float)ticks); fflush(stdout);
  103. t1 = t2;
  104. printf(" Representing with parentheses and ids...\n"); fflush(stdout);
  105. #endif
  106. n = T->n;
  107. rn = RT->nid;
  108. aux = (2*(unsigned long long)rn+W-1)/W;
  109. parent = malloc(aux*sizeof(uint)); // 2*rn bits
  110. emptybmap = calloc(((rn+W-1)/W),sizeof(uint)); // rn bits
  111. aux = (((unsigned long long)n)*bits(n-1)+W-1)/W;
  112. ids = malloc(aux*sizeof(uint)); // the rids array has n entries
  113. // (only for the non-empty nodes)
  114. representTrie(RT,parent,NULL,ids,emptybmap,bits(n-1));
  115. #ifdef INDEXREPORT
  116. times(&time); t2 = time.tms_utime;
  117. printf(" User time: %f secs\n",(t2-t1)/(float)ticks); fflush(stdout);
  118. t1 = t2;
  119. printf(" Freeing trie...\n"); fflush(stdout);
  120. #endif
  121. destroyTrie(RT);
  122. #ifdef INDEXREPORT
  123. times(&time); t2 = time.tms_utime;
  124. printf(" User time: %f secs\n",(t2-t1)/(float)ticks); fflush(stdout);
  125. t1 = t2;
  126. printf(" Creating compressed trie...\n"); fflush(stdout);
  127. #endif
  128. CRT = createRevTrie(parent,ids,T,emptybmap,rn);
  129. #ifdef INDEXREPORT
  130. times(&time); t2 = time.tms_utime;
  131. printf(" User time: %f secs\n",(t2-t1)/(float)ticks); fflush(stdout);
  132. t1 = t2;
  133. printf(" End of RevTrie...\n"); fflush(stdout);
  134. #endif
  135. return CRT;
  136. }
  137. byte selectSymbol(byte *text, ulong length)
  138. {
  139. ulong i;
  140. byte s;
  141. bool *A = calloc(256, sizeof(bool));;
  142. for (i=0;i<length;i++) A[text[i]]= true;
  143. for (s=0;s<256;s++)
  144. if (!A[s]) break;
  145. return s;
  146. }
  147. extern uint nbits_perm;
  148. // creates lzindex over a null-terminated text
  149. // frees text
  150. int build_index(byte *text, ulong length, char *build_options, void **index)
  151. {
  152. lzindex *I;
  153. uint *ids,maxdepth;
  154. char RIDS[15], IDS[15];
  155. uint i, j;
  156. // set default values for the parameters
  157. PARAMETER_T_IDS = 4;
  158. PARAMETER_T_RIDS = 4;
  159. if (build_options)
  160. PARAMETER_T_IDS = PARAMETER_T_RIDS = atoi(build_options);
  161. I = malloc(sizeof(lzindex));
  162. text[length] = selectSymbol(text, length);
  163. // build index
  164. I->fwdtrie = buildLZTrie(text, text[length]);
  165. nbits_perm = I->fwdtrie->ids->nbits;
  166. maxdepth = maxdepthLZTrie(I->fwdtrie);
  167. I->bwdtrie = buildRevTrie(I->fwdtrie,maxdepth);
  168. I->TPos = createPosition(I->fwdtrie, length);
  169. I->u = length;
  170. *index = I; // return index
  171. return 0; // no errors yet
  172. }