/bloomfileter/bloom.c
C | 195 lines | 154 code | 41 blank | 0 comment | 12 complexity | 294b1f945a5e94ec4775f45d86ba1341 MD5 | raw file
- #include <stdlib.h>
- #include <string.h>
- #include <math.h>
- #include <syslog.h>
- #include "log.h"
- #include "bloom.h"
- #include "bits.h"
- #define DICT_INITIAL_SIZE 16
- #define LONG_MAX_SIZE 0x01FFFFFF
- static u_int32 get_next_size(u_int32 size)
- {
- u_int32 i = DICT_INITIAL_SIZE;
- while(1)
- {
- if(i>size)
- return i;
- i*=2;
- }
- }
- static u_int32 dict_func1(const char *str, u_int32 len)
- {
- u_int32 hash = 5381;
- while(len--)
- hash = ((hash << 5) + hash) + (*str++);
- return hash;
- }
- static u_int32 dict_func2(const char *str, u_int32 len)
- {
- u_int32 hash = 0;
- while(*str)
- hash = (*str++) + (hash << 6) + (hash << 16) - hash;
- return hash&0x7fffffff;
- }
- static u_int32 dict_func3(const char *str, u_int32 len)
- {
- u_int32 a = 0x5c6b7;
- u_int32 b = 0xf8c9;
- u_int32 hash = 0;
- while(*str)
- {
- hash = hash*a + (*str++);
- a *= b;
- }
- return hash&0x7fffffff;
- }
- static u_int32 dict_func4(const char *str, u_int32 len)
- {
- u_int32 hash = 0x4e67c6a7;
- while(*str)
- hash = (hash << 5) + (*str++) + (hash >> 2);
- return hash&0x7fffffff;
- }
- u_int32 init_bloom(PBloom p_bloom, u_int32 size, double error_rate)
- {
- p_bloom->func1 = dict_func1;
- p_bloom->func2 = dict_func2;
- p_bloom->func3 = dict_func3;
- p_bloom->func4 = dict_func4;
- p_bloom->error_rate = error_rate;
-
- u_int32 actual_size = (u_int32)size*(log(1/error_rate));
-
- #ifdef _DEBUG_MODE
-
- write_log(LOG_DEBUG, NULL,
- "size:%u actual size:%u Max size:%u error_rate:%g\n",
- size,
- actual_size,
- LONG_MAX_SIZE,
- error_rate);
- #endif
-
- if(actual_size>size)
- {
- if(actual_size<=LONG_MAX_SIZE)
- {
- p_bloom->size = get_next_size(actual_size);
- p_bloom->size_mask = p_bloom->size - 1;
- }
- else
- {
- p_bloom->size_mask = p_bloom->size = actual_size;
- }
- }
- else
- return SIZE_OVERFLOW_ERROR;
-
- p_bloom->bit_size = (p_bloom->size)*sizeof(u_int32)*8;
- #ifdef _DEBUG_MODE
- write_log(LOG_DEBUG, NULL,
- "size:%u bit_size(HEX FORMAT):%x\n",
- p_bloom->size,
- p_bloom->bit_size);
- #endif
- p_bloom->map = (u_int32*)malloc((p_bloom->size)*sizeof(u_int32));
-
- if(p_bloom->map==NULL)
- return CREATE_BLOOM_ERROR;
- memset((void*)p_bloom->map, 0x00, (p_bloom->size)*sizeof(u_int32));
- return SUCCESS;
- }
- u_int32 add_ele(PBloom p_bloom, const char *str)
- {
- if(str==NULL||p_bloom==NULL)
- return NULL_POINTER;
-
- u_int32 hash1 = (p_bloom->func1(str, strlen(str)))%p_bloom->bit_size;
- u_int32 hash2 = (p_bloom->func2(str, strlen(str)))%p_bloom->bit_size;
- u_int32 hash3 = (p_bloom->func3(str, strlen(str)))%p_bloom->bit_size;
- u_int32 hash4 = (p_bloom->func4(str, strlen(str)))%p_bloom->bit_size;
-
- #ifdef _DEBUG_MODE
- write_log(LOG_DEBUG, NULL, "HASH1:%u\tHASH2:%u\tHASH3:%u\tHASH4:%u\n", hash1, hash2, hash3, hash4);
- #endif
- if(p_bloom->size_mask==p_bloom->size)
- {
- set_bits(&p_bloom->map[((u_int32)(hash1/32))%p_bloom->size_mask], hash1%32, BITS_ONE);
- set_bits(&p_bloom->map[((u_int32)(hash2/32))%p_bloom->size_mask], hash2%32, BITS_ONE);
- set_bits(&p_bloom->map[((u_int32)(hash3/32))%p_bloom->size_mask], hash3%32, BITS_ONE);
- set_bits(&p_bloom->map[((u_int32)(hash4/32))%p_bloom->size_mask], hash4%32, BITS_ONE);
- }
- else
- {
- set_bits(&p_bloom->map[((u_int32)(hash1/32))&p_bloom->size_mask], hash1%32, BITS_ONE);
- set_bits(&p_bloom->map[((u_int32)(hash2/32))&p_bloom->size_mask], hash2%32, BITS_ONE);
- set_bits(&p_bloom->map[((u_int32)(hash3/32))&p_bloom->size_mask], hash3%32, BITS_ONE);
- set_bits(&p_bloom->map[((u_int32)(hash4/32))&p_bloom->size_mask], hash4%32, BITS_ONE);
- }
-
- return SUCCESS;
- }
- u_int32 check_ele(PBloom p_bloom, const char *str)
- {
- if(str==NULL||p_bloom==NULL)
- return NULL_POINTER;
-
- u_int32 hash1 = (p_bloom->func1(str, strlen(str)))%p_bloom->bit_size;
- u_int32 hash2 = (p_bloom->func2(str, strlen(str)))%p_bloom->bit_size;
- u_int32 hash3 = (p_bloom->func3(str, strlen(str)))%p_bloom->bit_size;
- u_int32 hash4 = (p_bloom->func4(str, strlen(str)))%p_bloom->bit_size;
- int val = is_bit_zero(&p_bloom->map[(u_int32)(hash1/32)],hash1%32);
- if(val==0)
- return NOT_EXIST;
- val = is_bit_zero(&p_bloom->map[(u_int32)(hash2/32)],hash2%32);
- if(val==0)
- return NOT_EXIST;
- val = is_bit_zero(&p_bloom->map[(u_int32)(hash3/32)],hash3%32);
- if(val==0)
- return NOT_EXIST;
- val = is_bit_zero(&p_bloom->map[(u_int32)(hash4/32)],hash4%32);
- if(val==0)
- return NOT_EXIST;
- return EXIST;
- }
- void clean_bloom(PBloom p_bloom)
- {
- if(p_bloom==NULL)
- return;
-
- memset(p_bloom->map, 0x00, (p_bloom->size)*sizeof(u_int32));
- }
- void destroy_bloom(PBloom p_Bloom)
- {
- free(p_Bloom->map);
- }