/system/jlib/jsuperhash.cpp
C++ | 608 lines | 542 code | 44 blank | 22 comment | 76 complexity | ac7c8714123a01dc4080422dc4e61ccd MD5 | raw file
- /*##############################################################################
- HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- ############################################################################## */
- #include "jlib.hpp"
- #include "jsuperhash.hpp"
- #include "jexcept.hpp"
- #ifndef HASHSIZE_POWER2
- #define HASHSIZE_POWER2
- #endif
- #ifdef HASHSIZE_POWER2
- #define InitialTableSize 16
- #else
- #define InitialTableSize 15
- #endif
- //#define MY_TRACE_HASH
- #ifdef MY_TRACE_HASH
- int my_search_tot = 0;
- int my_search_num = 0;
- #endif
- //-- SuperHashTable ---------------------------------------------------
- SuperHashTable::SuperHashTable(void)
- {
- tablesize = InitialTableSize;
- tablecount = 0;
- table = (void * *) checked_malloc(InitialTableSize*sizeof(void *),-601);
- memset(table,0,InitialTableSize*sizeof(void *));
- cache = 0;
- #ifdef TRACE_HASH
- search_tot = 0;
- search_num = 0;
- search_max = 0;
- #endif
- }
- SuperHashTable::SuperHashTable(unsigned initsize)
- {
- init(initsize);
- }
- static inline unsigned nextPowerOf2(unsigned v)
- {
- assert(sizeof(unsigned)==4);
- v--;
- v |= v >> 1;
- v |= v >> 2;
- v |= v >> 4;
- v |= v >> 8;
- v |= v >> 16;
- v++;
- return v;
- }
- void SuperHashTable::init(unsigned initsize)
- {
- if (initsize==0)
- initsize = InitialTableSize;
- #ifdef HASHSIZE_POWER2
- //size should be a power of 2
- initsize = nextPowerOf2(initsize);
- #endif
- tablesize = initsize;
- tablecount = 0;
- table = (void * *) checked_malloc(initsize*sizeof(void *),-602);
- memset(table,0,initsize*sizeof(void *));
- cache = 0;
- #ifdef TRACE_HASH
- search_tot = 0;
- search_num = 0;
- #endif
- }
- void SuperHashTable::reinit(unsigned initsize)
- {
- doKill();
- init(initsize);
- }
- SuperHashTable::~SuperHashTable()
- {
- doKill();
- }
- void SuperHashTable::dumpStats() const
- {
- #ifdef TRACE_HASH
- if (tablecount && search_tot && search_num)
- printf("Hash table %d entries, %d size, average search length %d(%d/%d) max %d\n", tablecount, tablesize,
- (int) (search_tot/search_num), search_tot, search_num, search_max);
- #endif
- }
-
- #ifdef TRACE_HASH
- void SuperHashTable::note_searchlen(int len) const
- {
- search_tot += len;
- search_num++;
- if (search_max < len)
- search_max = len;
- }
- #endif
- unsigned SuperHashTable::doFind(unsigned findHash, const void * findParam) const
- {
- #ifdef HASHSIZE_POWER2
- unsigned v = findHash & (tablesize - 1);
- #else
- unsigned v = findHash % tablesize;
- #endif
- unsigned vs = v;
- #ifdef TRACE_HASH
- unsigned searchlen = 0;
- #endif
- while (1)
- {
- #ifdef MY_TRACE_HASH
- my_search_tot++;
- #endif
- void *et = table[v];
- if (!et)
- break;
- if (matchesFindParam(et, findParam, findHash))
- break;
- #ifdef TRACE_HASH
- searchlen ++;
- #endif
- v++;
- if (v==tablesize)
- v = 0;
- if (v==vs)
- break;
- }
- #ifdef MY_TRACE_HASH
- my_search_num++;
- if(my_search_num != 0)
- printf("Hash table average search length %d\n", (int) (my_search_tot/my_search_num));
- #endif
- #ifdef TRACE_HASH
- note_searchlen(searchlen);
- #endif
- setCache(v);
- return v;
- }
- unsigned SuperHashTable::doFindElement(unsigned v, const void * findET) const
- {
- #ifdef HASHSIZE_POWER2
- v = v & (tablesize - 1);
- #else
- v = v % tablesize;
- #endif
- unsigned vs = v;
- #ifdef TRACE_HASH
- unsigned searchlen = 0;
- #endif
- while (1)
- {
- #ifdef MY_TRACE_HASH
- my_search_tot++;
- #endif
- void *et = table[v];
- if (!et)
- break;
- if (matchesElement(et, findET))
- break;
- #ifdef TRACE_HASH
- searchlen ++;
- #endif
- v++;
- if (v==tablesize)
- v = 0;
- if (v==vs)
- break;
- }
- #ifdef MY_TRACE_HASH
- my_search_num++;
- if(my_search_num != 0)
- printf("Hash table average search length %d\n", (int) (my_search_tot/my_search_num));
- #endif
- #ifdef TRACE_HASH
- note_searchlen(searchlen);
- #endif
- setCache(v);
- return v;
- }
- unsigned SuperHashTable::doFindNew(unsigned v) const
- {
- #ifdef HASHSIZE_POWER2
- v = v & (tablesize - 1);
- #else
- v = v % tablesize;
- #endif
- unsigned vs = v;
- #ifdef TRACE_HASH
- unsigned searchlen = 0;
- #endif
- while (1)
- {
- #ifdef MY_TRACE_HASH
- my_search_tot++;
- #endif
- void *et = table[v];
- if (!et)
- break;
- #ifdef TRACE_HASH
- searchlen ++;
- #endif
- v++;
- if (v==tablesize)
- v = 0;
- if (v==vs)
- break; //table is full, should never occur
- }
- #ifdef MY_TRACE_HASH
- my_search_num++;
- if(my_search_num != 0)
- printf("Hash table average search length %d\n", (int) (my_search_tot/my_search_num));
- #endif
- #ifdef TRACE_HASH
- note_searchlen(searchlen);
- #endif
- setCache(v);
- return v;
- }
- unsigned SuperHashTable::doFindExact(const void *et) const
- {
- unsigned i = cache;
- if (i>=tablesize || table[i]!=et)
- {
- #ifdef HASHSIZE_POWER2
- i = getHashFromElement(et) & (tablesize - 1);
- #else
- i = getHashFromElement(et) % tablesize;
- #endif
- unsigned is = i;
- for (;;)
- {
- const void * cur = table[i];
- if (!cur || cur == et)
- break;
- i++;
- if (i==tablesize)
- i = 0;
- if (i==is)
- break;
- }
- setCache(i);
- }
- return i;
- }
- void SuperHashTable::ensure(unsigned mincount)
- {
- if (mincount <= getTableLimit(tablesize))
- return;
- unsigned newsize = tablesize;
- for (;;)
- {
- #ifdef HASHSIZE_POWER2
- newsize += newsize;
- #else
- if (newsize>=0x3FF)
- newsize += 0x400;
- else
- newsize += newsize+1;
- #endif
- if (newsize < tablesize)
- throw MakeStringException(0, "HashTable expanded beyond 2^32 items");
- if (mincount <= getTableLimit(newsize))
- break;
- }
- expand(newsize);
- }
- void SuperHashTable::expand()
- {
- unsigned newsize = tablesize;
- #ifdef HASHSIZE_POWER2
- newsize += newsize;
- #else
- if (newsize>=0x3FF)
- newsize += 0x400;
- else
- newsize += newsize+1;
- #endif
- expand(newsize);
- }
- void SuperHashTable::expand(unsigned newsize)
- {
- if (newsize < tablesize)
- throw MakeStringException(0, "HashTable expanded beyond 2^32 items");
- void * *newtable = (void * *) checked_malloc(newsize*sizeof(void *),-603);
- memset(newtable,0,newsize*sizeof(void *));
- void * *oldtable = table;
- #ifdef HASHSIZE_POWER2
- const unsigned oldmask = tablesize-1;
- #endif
- unsigned i;
- for (i = 0; i < tablesize; i++)
- {
- #ifdef HASHSIZE_POWER2
- __builtin_prefetch(oldtable[(i+1) & oldmask]);
- #endif
- void *et = oldtable[i];
- if (et)
- {
- #ifdef HASHSIZE_POWER2
- unsigned v = getHashFromElement(et) & (newsize - 1);
- #else
- unsigned v = getHashFromElement(et) % newsize;
- #endif
- while (newtable[v])
- {
- v++;
- if (v==newsize)
- v = 0;
- }
- newtable[v] = et;
- }
- }
- free(table);
- table = newtable;
- tablesize = newsize;
- }
- bool SuperHashTable::doAdd(void * donor, bool replace)
- {
- unsigned vs = getHashFromElement(donor);
- unsigned vm = doFind(vs, getFindParam(donor));
- void *et = table[vm];
- if (et)
- {
- if (replace)
- {
- onRemove(et);
- table[vm] = donor;
- onAdd(donor);
- return true;
- }
- else
- return false;
- }
- else
- {
- unsigned tablelim = getTableLimit(tablesize);
- if (tablecount>=tablelim)
- {
- expand();
- vm = doFind(vs, getFindParam(donor));
- }
- tablecount++;
- table[vm] = donor;
- onAdd(donor);
- }
- return true;
- }
- void SuperHashTable::addNew(void * donor, unsigned hash)
- {
- unsigned tablelim = getTableLimit(tablesize);
- if (tablecount>=tablelim)
- expand();
- unsigned vm = doFindNew(hash);
- tablecount++;
- table[vm] = donor;
- onAdd(donor);
- }
- void SuperHashTable::addNew(void * donor)
- {
- addNew(donor, getHashFromElement(donor));
- }
- void SuperHashTable::doDeleteElement(unsigned v)
- {
- #ifdef HASHSIZE_POWER2
- unsigned hm = (tablesize - 1);
- #endif
- unsigned hs = tablesize;
- unsigned vn = v;
- table[v] = NULL;
- while (1)
- {
- vn++;
- if (vn==hs) vn = 0;
- void *et2 = table[vn];
- if (!et2)
- break;
- #ifdef HASHSIZE_POWER2
- unsigned vm = getHashFromElement(et2) & hm;
- if (((vn+hs-vm) & hm)>=((vn+hs-v) & hm)) // diff(vn,vm)>=diff(vn,v)
- #else
- unsigned vm = getHashFromElement(et2) % hs;
- if (((vn+hs-vm) % hs)>=((vn+hs-v) % hs)) // diff(vn,vm)>=diff(vn,v)
- #endif
- {
- table[v] = et2;
- v = vn;
- table[v] = NULL;
- }
- }
- tablecount--;
- }
- unsigned SuperHashTable::getTableLimit(unsigned max)
- {
- return (max * 3) / 4;
- }
- bool SuperHashTable::remove(const void *fp)
- {
- unsigned v = doFind(fp);
- void * et = table[v];
- if (!et)
- return false;
- doDeleteElement(v);
- onRemove(et);
- return true;
- }
- bool SuperHashTable::removeExact(void *et)
- {
- if (!et)
- return false;
- unsigned v = doFindExact(et);
- if (table[v]!=et)
- return false;
- doDeleteElement(v);
- onRemove(et);
- return true;
- }
- unsigned SuperHashTable::validIdx(unsigned i) const
- {
- while (i < tablesize && !table[i])
- i++;
- return i;
- }
- void *SuperHashTable::findElement(unsigned hash, const void * findEt) const
- {
- unsigned vm = doFindElement(hash, findEt);
- void *et = table[vm];
- return et;
- }
- void *SuperHashTable::findElement(const void * findEt) const
- {
- unsigned vm = doFindElement(getHashFromElement(findEt), findEt);
- void *et = table[vm];
- return et;
- }
- void *SuperHashTable::findExact(const void * findEt) const
- {
- unsigned vm = doFindExact(findEt);
- void *et = table[vm];
- return et;
- }
- void SuperHashTable::doKill(void)
- {
- // Check that releaseAll() has been called before doKill()
- // (in particular, derived class destructor must do this)
- #ifdef _DEBUG
- // NOTE - don't use an assertex here as exceptions thrown from inside destructors tend to be problematic
- for (unsigned i = 0; i < tablesize; i++)
- if (table[i]) assert(!"SuperHashTable::doKill() : table not empty");
- #endif
- free(table);
- }
- void SuperHashTable::_releaseAll(void)
- {
- if (tablecount)
- {
- unsigned i;
- for (i = 0; i < tablesize; i++)
- {
- void * et = table[i];
- table[i] = NULL;
- if (et)
- onRemove(et);
- }
- tablecount = 0;
- setCache(0);
- }
- }
- void SuperHashTable::releaseAll()
- {
- _releaseAll();
- }
- void SuperHashTable::kill(void)
- {
- _releaseAll();
- if (tablesize != InitialTableSize)
- {
- doKill();
- tablesize = InitialTableSize;
- table = (void * *)checked_malloc(InitialTableSize*sizeof(void *), -604);
- memset(table, 0, InitialTableSize*sizeof(void *));
- }
- }
- void *SuperHashTable::addOrFind(void * donor)
- {
- unsigned vs = getHashFromElement(donor);
- unsigned vm = doFind(vs, getFindParam(donor));
- void *et = table[vm];
- if(!et)
- {
- unsigned tablelim = getTableLimit(tablesize);
- if (tablecount>=tablelim)
- {
- expand();
- vm = doFind(vs, getFindParam(donor));
- }
- tablecount++;
- table[vm] = donor;
- onAdd(donor);
- return donor;
- }
- return et;
- }
- void *SuperHashTable::addOrFindExact(void * donor)
- {
- unsigned vm = doFindExact(donor);
- void *et = table[vm];
- if(!et)
- {
- unsigned tablelim = getTableLimit(tablesize);
- if (tablecount>=tablelim)
- {
- expand();
- vm = doFindExact(donor);
- }
- tablecount++;
- table[vm] = donor;
- onAdd(donor);
- return donor;
- }
- return et;
- }
- void *SuperHashTable::next(const void *et) const
- {
- unsigned i;
- if (!et)
- {
- if (!tablecount)
- return NULL;
- i = (unsigned) -1;
- }
- else
- {
- i = doFindExact(et);
- if (table[i] != et)
- {
- assertex(!"SuperHashTable::Next : start item not found");
- return NULL;
- }
- }
- while (1)
- {
- i++;
- if (i>=tablesize)
- return NULL;
- if (table[i])
- break;
- }
- setCache(i);
- return table[i];
- }
- bool SuperHashTable::matchesElement(const void *et, const void *searchET) const
- {
- assertex(!"SuperHashTable::matchesElement needs to be overridden");
- return false;
- }