PageRenderTime 170ms CodeModel.GetById 18ms RepoModel.GetById 1ms app.codeStats 0ms

/bloomfileter/bloom.c

http://practice.codeplex.com
C | 195 lines | 154 code | 41 blank | 0 comment | 12 complexity | 294b1f945a5e94ec4775f45d86ba1341 MD5 | raw file
  1. #include <stdlib.h>
  2. #include <string.h>
  3. #include <math.h>
  4. #include <syslog.h>
  5. #include "log.h"
  6. #include "bloom.h"
  7. #include "bits.h"
  8. #define DICT_INITIAL_SIZE 16
  9. #define LONG_MAX_SIZE 0x01FFFFFF
  10. static u_int32 get_next_size(u_int32 size)
  11. {
  12. u_int32 i = DICT_INITIAL_SIZE;
  13. while(1)
  14. {
  15. if(i>size)
  16. return i;
  17. i*=2;
  18. }
  19. }
  20. static u_int32 dict_func1(const char *str, u_int32 len)
  21. {
  22. u_int32 hash = 5381;
  23. while(len--)
  24. hash = ((hash << 5) + hash) + (*str++);
  25. return hash;
  26. }
  27. static u_int32 dict_func2(const char *str, u_int32 len)
  28. {
  29. u_int32 hash = 0;
  30. while(*str)
  31. hash = (*str++) + (hash << 6) + (hash << 16) - hash;
  32. return hash&0x7fffffff;
  33. }
  34. static u_int32 dict_func3(const char *str, u_int32 len)
  35. {
  36. u_int32 a = 0x5c6b7;
  37. u_int32 b = 0xf8c9;
  38. u_int32 hash = 0;
  39. while(*str)
  40. {
  41. hash = hash*a + (*str++);
  42. a *= b;
  43. }
  44. return hash&0x7fffffff;
  45. }
  46. static u_int32 dict_func4(const char *str, u_int32 len)
  47. {
  48. u_int32 hash = 0x4e67c6a7;
  49. while(*str)
  50. hash = (hash << 5) + (*str++) + (hash >> 2);
  51. return hash&0x7fffffff;
  52. }
  53. u_int32 init_bloom(PBloom p_bloom, u_int32 size, double error_rate)
  54. {
  55. p_bloom->func1 = dict_func1;
  56. p_bloom->func2 = dict_func2;
  57. p_bloom->func3 = dict_func3;
  58. p_bloom->func4 = dict_func4;
  59. p_bloom->error_rate = error_rate;
  60. u_int32 actual_size = (u_int32)size*(log(1/error_rate));
  61. #ifdef _DEBUG_MODE
  62. write_log(LOG_DEBUG, NULL,
  63. "size:%u actual size:%u Max size:%u error_rate:%g\n",
  64. size,
  65. actual_size,
  66. LONG_MAX_SIZE,
  67. error_rate);
  68. #endif
  69. if(actual_size>size)
  70. {
  71. if(actual_size<=LONG_MAX_SIZE)
  72. {
  73. p_bloom->size = get_next_size(actual_size);
  74. p_bloom->size_mask = p_bloom->size - 1;
  75. }
  76. else
  77. {
  78. p_bloom->size_mask = p_bloom->size = actual_size;
  79. }
  80. }
  81. else
  82. return SIZE_OVERFLOW_ERROR;
  83. p_bloom->bit_size = (p_bloom->size)*sizeof(u_int32)*8;
  84. #ifdef _DEBUG_MODE
  85. write_log(LOG_DEBUG, NULL,
  86. "size:%u bit_size(HEX FORMAT):%x\n",
  87. p_bloom->size,
  88. p_bloom->bit_size);
  89. #endif
  90. p_bloom->map = (u_int32*)malloc((p_bloom->size)*sizeof(u_int32));
  91. if(p_bloom->map==NULL)
  92. return CREATE_BLOOM_ERROR;
  93. memset((void*)p_bloom->map, 0x00, (p_bloom->size)*sizeof(u_int32));
  94. return SUCCESS;
  95. }
  96. u_int32 add_ele(PBloom p_bloom, const char *str)
  97. {
  98. if(str==NULL||p_bloom==NULL)
  99. return NULL_POINTER;
  100. u_int32 hash1 = (p_bloom->func1(str, strlen(str)))%p_bloom->bit_size;
  101. u_int32 hash2 = (p_bloom->func2(str, strlen(str)))%p_bloom->bit_size;
  102. u_int32 hash3 = (p_bloom->func3(str, strlen(str)))%p_bloom->bit_size;
  103. u_int32 hash4 = (p_bloom->func4(str, strlen(str)))%p_bloom->bit_size;
  104. #ifdef _DEBUG_MODE
  105. write_log(LOG_DEBUG, NULL, "HASH1:%u\tHASH2:%u\tHASH3:%u\tHASH4:%u\n", hash1, hash2, hash3, hash4);
  106. #endif
  107. if(p_bloom->size_mask==p_bloom->size)
  108. {
  109. set_bits(&p_bloom->map[((u_int32)(hash1/32))%p_bloom->size_mask], hash1%32, BITS_ONE);
  110. set_bits(&p_bloom->map[((u_int32)(hash2/32))%p_bloom->size_mask], hash2%32, BITS_ONE);
  111. set_bits(&p_bloom->map[((u_int32)(hash3/32))%p_bloom->size_mask], hash3%32, BITS_ONE);
  112. set_bits(&p_bloom->map[((u_int32)(hash4/32))%p_bloom->size_mask], hash4%32, BITS_ONE);
  113. }
  114. else
  115. {
  116. set_bits(&p_bloom->map[((u_int32)(hash1/32))&p_bloom->size_mask], hash1%32, BITS_ONE);
  117. set_bits(&p_bloom->map[((u_int32)(hash2/32))&p_bloom->size_mask], hash2%32, BITS_ONE);
  118. set_bits(&p_bloom->map[((u_int32)(hash3/32))&p_bloom->size_mask], hash3%32, BITS_ONE);
  119. set_bits(&p_bloom->map[((u_int32)(hash4/32))&p_bloom->size_mask], hash4%32, BITS_ONE);
  120. }
  121. return SUCCESS;
  122. }
  123. u_int32 check_ele(PBloom p_bloom, const char *str)
  124. {
  125. if(str==NULL||p_bloom==NULL)
  126. return NULL_POINTER;
  127. u_int32 hash1 = (p_bloom->func1(str, strlen(str)))%p_bloom->bit_size;
  128. u_int32 hash2 = (p_bloom->func2(str, strlen(str)))%p_bloom->bit_size;
  129. u_int32 hash3 = (p_bloom->func3(str, strlen(str)))%p_bloom->bit_size;
  130. u_int32 hash4 = (p_bloom->func4(str, strlen(str)))%p_bloom->bit_size;
  131. int val = is_bit_zero(&p_bloom->map[(u_int32)(hash1/32)],hash1%32);
  132. if(val==0)
  133. return NOT_EXIST;
  134. val = is_bit_zero(&p_bloom->map[(u_int32)(hash2/32)],hash2%32);
  135. if(val==0)
  136. return NOT_EXIST;
  137. val = is_bit_zero(&p_bloom->map[(u_int32)(hash3/32)],hash3%32);
  138. if(val==0)
  139. return NOT_EXIST;
  140. val = is_bit_zero(&p_bloom->map[(u_int32)(hash4/32)],hash4%32);
  141. if(val==0)
  142. return NOT_EXIST;
  143. return EXIST;
  144. }
  145. void clean_bloom(PBloom p_bloom)
  146. {
  147. if(p_bloom==NULL)
  148. return;
  149. memset(p_bloom->map, 0x00, (p_bloom->size)*sizeof(u_int32));
  150. }
  151. void destroy_bloom(PBloom p_Bloom)
  152. {
  153. free(p_Bloom->map);
  154. }