PageRenderTime 26ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/system/jlib/jsuperhash.cpp

https://github.com/hpcc-systems/HPCC-Platform
C++ | 608 lines | 542 code | 44 blank | 22 comment | 76 complexity | ac7c8714123a01dc4080422dc4e61ccd MD5 | raw file
  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #include "jlib.hpp"
  14. #include "jsuperhash.hpp"
  15. #include "jexcept.hpp"
  16. #ifndef HASHSIZE_POWER2
  17. #define HASHSIZE_POWER2
  18. #endif
  19. #ifdef HASHSIZE_POWER2
  20. #define InitialTableSize 16
  21. #else
  22. #define InitialTableSize 15
  23. #endif
  24. //#define MY_TRACE_HASH
  25. #ifdef MY_TRACE_HASH
  26. int my_search_tot = 0;
  27. int my_search_num = 0;
  28. #endif
  29. //-- SuperHashTable ---------------------------------------------------
  30. SuperHashTable::SuperHashTable(void)
  31. {
  32. tablesize = InitialTableSize;
  33. tablecount = 0;
  34. table = (void * *) checked_malloc(InitialTableSize*sizeof(void *),-601);
  35. memset(table,0,InitialTableSize*sizeof(void *));
  36. cache = 0;
  37. #ifdef TRACE_HASH
  38. search_tot = 0;
  39. search_num = 0;
  40. search_max = 0;
  41. #endif
  42. }
  43. SuperHashTable::SuperHashTable(unsigned initsize)
  44. {
  45. init(initsize);
  46. }
  47. static inline unsigned nextPowerOf2(unsigned v)
  48. {
  49. assert(sizeof(unsigned)==4);
  50. v--;
  51. v |= v >> 1;
  52. v |= v >> 2;
  53. v |= v >> 4;
  54. v |= v >> 8;
  55. v |= v >> 16;
  56. v++;
  57. return v;
  58. }
  59. void SuperHashTable::init(unsigned initsize)
  60. {
  61. if (initsize==0)
  62. initsize = InitialTableSize;
  63. #ifdef HASHSIZE_POWER2
  64. //size should be a power of 2
  65. initsize = nextPowerOf2(initsize);
  66. #endif
  67. tablesize = initsize;
  68. tablecount = 0;
  69. table = (void * *) checked_malloc(initsize*sizeof(void *),-602);
  70. memset(table,0,initsize*sizeof(void *));
  71. cache = 0;
  72. #ifdef TRACE_HASH
  73. search_tot = 0;
  74. search_num = 0;
  75. #endif
  76. }
  77. void SuperHashTable::reinit(unsigned initsize)
  78. {
  79. doKill();
  80. init(initsize);
  81. }
  82. SuperHashTable::~SuperHashTable()
  83. {
  84. doKill();
  85. }
  86. void SuperHashTable::dumpStats() const
  87. {
  88. #ifdef TRACE_HASH
  89. if (tablecount && search_tot && search_num)
  90. printf("Hash table %d entries, %d size, average search length %d(%d/%d) max %d\n", tablecount, tablesize,
  91. (int) (search_tot/search_num), search_tot, search_num, search_max);
  92. #endif
  93. }
  94. #ifdef TRACE_HASH
  95. void SuperHashTable::note_searchlen(int len) const
  96. {
  97. search_tot += len;
  98. search_num++;
  99. if (search_max < len)
  100. search_max = len;
  101. }
  102. #endif
  103. unsigned SuperHashTable::doFind(unsigned findHash, const void * findParam) const
  104. {
  105. #ifdef HASHSIZE_POWER2
  106. unsigned v = findHash & (tablesize - 1);
  107. #else
  108. unsigned v = findHash % tablesize;
  109. #endif
  110. unsigned vs = v;
  111. #ifdef TRACE_HASH
  112. unsigned searchlen = 0;
  113. #endif
  114. while (1)
  115. {
  116. #ifdef MY_TRACE_HASH
  117. my_search_tot++;
  118. #endif
  119. void *et = table[v];
  120. if (!et)
  121. break;
  122. if (matchesFindParam(et, findParam, findHash))
  123. break;
  124. #ifdef TRACE_HASH
  125. searchlen ++;
  126. #endif
  127. v++;
  128. if (v==tablesize)
  129. v = 0;
  130. if (v==vs)
  131. break;
  132. }
  133. #ifdef MY_TRACE_HASH
  134. my_search_num++;
  135. if(my_search_num != 0)
  136. printf("Hash table average search length %d\n", (int) (my_search_tot/my_search_num));
  137. #endif
  138. #ifdef TRACE_HASH
  139. note_searchlen(searchlen);
  140. #endif
  141. setCache(v);
  142. return v;
  143. }
  144. unsigned SuperHashTable::doFindElement(unsigned v, const void * findET) const
  145. {
  146. #ifdef HASHSIZE_POWER2
  147. v = v & (tablesize - 1);
  148. #else
  149. v = v % tablesize;
  150. #endif
  151. unsigned vs = v;
  152. #ifdef TRACE_HASH
  153. unsigned searchlen = 0;
  154. #endif
  155. while (1)
  156. {
  157. #ifdef MY_TRACE_HASH
  158. my_search_tot++;
  159. #endif
  160. void *et = table[v];
  161. if (!et)
  162. break;
  163. if (matchesElement(et, findET))
  164. break;
  165. #ifdef TRACE_HASH
  166. searchlen ++;
  167. #endif
  168. v++;
  169. if (v==tablesize)
  170. v = 0;
  171. if (v==vs)
  172. break;
  173. }
  174. #ifdef MY_TRACE_HASH
  175. my_search_num++;
  176. if(my_search_num != 0)
  177. printf("Hash table average search length %d\n", (int) (my_search_tot/my_search_num));
  178. #endif
  179. #ifdef TRACE_HASH
  180. note_searchlen(searchlen);
  181. #endif
  182. setCache(v);
  183. return v;
  184. }
  185. unsigned SuperHashTable::doFindNew(unsigned v) const
  186. {
  187. #ifdef HASHSIZE_POWER2
  188. v = v & (tablesize - 1);
  189. #else
  190. v = v % tablesize;
  191. #endif
  192. unsigned vs = v;
  193. #ifdef TRACE_HASH
  194. unsigned searchlen = 0;
  195. #endif
  196. while (1)
  197. {
  198. #ifdef MY_TRACE_HASH
  199. my_search_tot++;
  200. #endif
  201. void *et = table[v];
  202. if (!et)
  203. break;
  204. #ifdef TRACE_HASH
  205. searchlen ++;
  206. #endif
  207. v++;
  208. if (v==tablesize)
  209. v = 0;
  210. if (v==vs)
  211. break; //table is full, should never occur
  212. }
  213. #ifdef MY_TRACE_HASH
  214. my_search_num++;
  215. if(my_search_num != 0)
  216. printf("Hash table average search length %d\n", (int) (my_search_tot/my_search_num));
  217. #endif
  218. #ifdef TRACE_HASH
  219. note_searchlen(searchlen);
  220. #endif
  221. setCache(v);
  222. return v;
  223. }
  224. unsigned SuperHashTable::doFindExact(const void *et) const
  225. {
  226. unsigned i = cache;
  227. if (i>=tablesize || table[i]!=et)
  228. {
  229. #ifdef HASHSIZE_POWER2
  230. i = getHashFromElement(et) & (tablesize - 1);
  231. #else
  232. i = getHashFromElement(et) % tablesize;
  233. #endif
  234. unsigned is = i;
  235. for (;;)
  236. {
  237. const void * cur = table[i];
  238. if (!cur || cur == et)
  239. break;
  240. i++;
  241. if (i==tablesize)
  242. i = 0;
  243. if (i==is)
  244. break;
  245. }
  246. setCache(i);
  247. }
  248. return i;
  249. }
  250. void SuperHashTable::ensure(unsigned mincount)
  251. {
  252. if (mincount <= getTableLimit(tablesize))
  253. return;
  254. unsigned newsize = tablesize;
  255. for (;;)
  256. {
  257. #ifdef HASHSIZE_POWER2
  258. newsize += newsize;
  259. #else
  260. if (newsize>=0x3FF)
  261. newsize += 0x400;
  262. else
  263. newsize += newsize+1;
  264. #endif
  265. if (newsize < tablesize)
  266. throw MakeStringException(0, "HashTable expanded beyond 2^32 items");
  267. if (mincount <= getTableLimit(newsize))
  268. break;
  269. }
  270. expand(newsize);
  271. }
  272. void SuperHashTable::expand()
  273. {
  274. unsigned newsize = tablesize;
  275. #ifdef HASHSIZE_POWER2
  276. newsize += newsize;
  277. #else
  278. if (newsize>=0x3FF)
  279. newsize += 0x400;
  280. else
  281. newsize += newsize+1;
  282. #endif
  283. expand(newsize);
  284. }
  285. void SuperHashTable::expand(unsigned newsize)
  286. {
  287. if (newsize < tablesize)
  288. throw MakeStringException(0, "HashTable expanded beyond 2^32 items");
  289. void * *newtable = (void * *) checked_malloc(newsize*sizeof(void *),-603);
  290. memset(newtable,0,newsize*sizeof(void *));
  291. void * *oldtable = table;
  292. #ifdef HASHSIZE_POWER2
  293. const unsigned oldmask = tablesize-1;
  294. #endif
  295. unsigned i;
  296. for (i = 0; i < tablesize; i++)
  297. {
  298. #ifdef HASHSIZE_POWER2
  299. __builtin_prefetch(oldtable[(i+1) & oldmask]);
  300. #endif
  301. void *et = oldtable[i];
  302. if (et)
  303. {
  304. #ifdef HASHSIZE_POWER2
  305. unsigned v = getHashFromElement(et) & (newsize - 1);
  306. #else
  307. unsigned v = getHashFromElement(et) % newsize;
  308. #endif
  309. while (newtable[v])
  310. {
  311. v++;
  312. if (v==newsize)
  313. v = 0;
  314. }
  315. newtable[v] = et;
  316. }
  317. }
  318. free(table);
  319. table = newtable;
  320. tablesize = newsize;
  321. }
  322. bool SuperHashTable::doAdd(void * donor, bool replace)
  323. {
  324. unsigned vs = getHashFromElement(donor);
  325. unsigned vm = doFind(vs, getFindParam(donor));
  326. void *et = table[vm];
  327. if (et)
  328. {
  329. if (replace)
  330. {
  331. onRemove(et);
  332. table[vm] = donor;
  333. onAdd(donor);
  334. return true;
  335. }
  336. else
  337. return false;
  338. }
  339. else
  340. {
  341. unsigned tablelim = getTableLimit(tablesize);
  342. if (tablecount>=tablelim)
  343. {
  344. expand();
  345. vm = doFind(vs, getFindParam(donor));
  346. }
  347. tablecount++;
  348. table[vm] = donor;
  349. onAdd(donor);
  350. }
  351. return true;
  352. }
  353. void SuperHashTable::addNew(void * donor, unsigned hash)
  354. {
  355. unsigned tablelim = getTableLimit(tablesize);
  356. if (tablecount>=tablelim)
  357. expand();
  358. unsigned vm = doFindNew(hash);
  359. tablecount++;
  360. table[vm] = donor;
  361. onAdd(donor);
  362. }
  363. void SuperHashTable::addNew(void * donor)
  364. {
  365. addNew(donor, getHashFromElement(donor));
  366. }
  367. void SuperHashTable::doDeleteElement(unsigned v)
  368. {
  369. #ifdef HASHSIZE_POWER2
  370. unsigned hm = (tablesize - 1);
  371. #endif
  372. unsigned hs = tablesize;
  373. unsigned vn = v;
  374. table[v] = NULL;
  375. while (1)
  376. {
  377. vn++;
  378. if (vn==hs) vn = 0;
  379. void *et2 = table[vn];
  380. if (!et2)
  381. break;
  382. #ifdef HASHSIZE_POWER2
  383. unsigned vm = getHashFromElement(et2) & hm;
  384. if (((vn+hs-vm) & hm)>=((vn+hs-v) & hm)) // diff(vn,vm)>=diff(vn,v)
  385. #else
  386. unsigned vm = getHashFromElement(et2) % hs;
  387. if (((vn+hs-vm) % hs)>=((vn+hs-v) % hs)) // diff(vn,vm)>=diff(vn,v)
  388. #endif
  389. {
  390. table[v] = et2;
  391. v = vn;
  392. table[v] = NULL;
  393. }
  394. }
  395. tablecount--;
  396. }
  397. unsigned SuperHashTable::getTableLimit(unsigned max)
  398. {
  399. return (max * 3) / 4;
  400. }
  401. bool SuperHashTable::remove(const void *fp)
  402. {
  403. unsigned v = doFind(fp);
  404. void * et = table[v];
  405. if (!et)
  406. return false;
  407. doDeleteElement(v);
  408. onRemove(et);
  409. return true;
  410. }
  411. bool SuperHashTable::removeExact(void *et)
  412. {
  413. if (!et)
  414. return false;
  415. unsigned v = doFindExact(et);
  416. if (table[v]!=et)
  417. return false;
  418. doDeleteElement(v);
  419. onRemove(et);
  420. return true;
  421. }
  422. unsigned SuperHashTable::validIdx(unsigned i) const
  423. {
  424. while (i < tablesize && !table[i])
  425. i++;
  426. return i;
  427. }
  428. void *SuperHashTable::findElement(unsigned hash, const void * findEt) const
  429. {
  430. unsigned vm = doFindElement(hash, findEt);
  431. void *et = table[vm];
  432. return et;
  433. }
  434. void *SuperHashTable::findElement(const void * findEt) const
  435. {
  436. unsigned vm = doFindElement(getHashFromElement(findEt), findEt);
  437. void *et = table[vm];
  438. return et;
  439. }
  440. void *SuperHashTable::findExact(const void * findEt) const
  441. {
  442. unsigned vm = doFindExact(findEt);
  443. void *et = table[vm];
  444. return et;
  445. }
  446. void SuperHashTable::doKill(void)
  447. {
  448. // Check that releaseAll() has been called before doKill()
  449. // (in particular, derived class destructor must do this)
  450. #ifdef _DEBUG
  451. // NOTE - don't use an assertex here as exceptions thrown from inside destructors tend to be problematic
  452. for (unsigned i = 0; i < tablesize; i++)
  453. if (table[i]) assert(!"SuperHashTable::doKill() : table not empty");
  454. #endif
  455. free(table);
  456. }
  457. void SuperHashTable::_releaseAll(void)
  458. {
  459. if (tablecount)
  460. {
  461. unsigned i;
  462. for (i = 0; i < tablesize; i++)
  463. {
  464. void * et = table[i];
  465. table[i] = NULL;
  466. if (et)
  467. onRemove(et);
  468. }
  469. tablecount = 0;
  470. setCache(0);
  471. }
  472. }
  473. void SuperHashTable::releaseAll()
  474. {
  475. _releaseAll();
  476. }
  477. void SuperHashTable::kill(void)
  478. {
  479. _releaseAll();
  480. if (tablesize != InitialTableSize)
  481. {
  482. doKill();
  483. tablesize = InitialTableSize;
  484. table = (void * *)checked_malloc(InitialTableSize*sizeof(void *), -604);
  485. memset(table, 0, InitialTableSize*sizeof(void *));
  486. }
  487. }
  488. void *SuperHashTable::addOrFind(void * donor)
  489. {
  490. unsigned vs = getHashFromElement(donor);
  491. unsigned vm = doFind(vs, getFindParam(donor));
  492. void *et = table[vm];
  493. if(!et)
  494. {
  495. unsigned tablelim = getTableLimit(tablesize);
  496. if (tablecount>=tablelim)
  497. {
  498. expand();
  499. vm = doFind(vs, getFindParam(donor));
  500. }
  501. tablecount++;
  502. table[vm] = donor;
  503. onAdd(donor);
  504. return donor;
  505. }
  506. return et;
  507. }
  508. void *SuperHashTable::addOrFindExact(void * donor)
  509. {
  510. unsigned vm = doFindExact(donor);
  511. void *et = table[vm];
  512. if(!et)
  513. {
  514. unsigned tablelim = getTableLimit(tablesize);
  515. if (tablecount>=tablelim)
  516. {
  517. expand();
  518. vm = doFindExact(donor);
  519. }
  520. tablecount++;
  521. table[vm] = donor;
  522. onAdd(donor);
  523. return donor;
  524. }
  525. return et;
  526. }
  527. void *SuperHashTable::next(const void *et) const
  528. {
  529. unsigned i;
  530. if (!et)
  531. {
  532. if (!tablecount)
  533. return NULL;
  534. i = (unsigned) -1;
  535. }
  536. else
  537. {
  538. i = doFindExact(et);
  539. if (table[i] != et)
  540. {
  541. assertex(!"SuperHashTable::Next : start item not found");
  542. return NULL;
  543. }
  544. }
  545. while (1)
  546. {
  547. i++;
  548. if (i>=tablesize)
  549. return NULL;
  550. if (table[i])
  551. break;
  552. }
  553. setCache(i);
  554. return table[i];
  555. }
  556. bool SuperHashTable::matchesElement(const void *et, const void *searchET) const
  557. {
  558. assertex(!"SuperHashTable::matchesElement needs to be overridden");
  559. return false;
  560. }