/symbols/pdb/Microsoft.Cci.Pdb/IntHashTable.cs

http://github.com/jbevain/cecil · C# · 576 lines · 183 code · 60 blank · 333 comment · 45 complexity · bc6550eb60ec02a618cf65471c392853 MD5 · raw file

  1. // Copyright (c) Microsoft. All rights reserved.
  2. // Licensed under the MIT license. See LICENSE file in the project root for full license information.
  3. using System;
  4. using System.Collections;
  5. namespace Microsoft.Cci.Pdb {
  6. // The IntHashTable class represents a dictionary of associated keys and
  7. // values with constant lookup time.
  8. //
  9. // Objects used as keys in a hashtable must implement the GetHashCode
  10. // and Equals methods (or they can rely on the default implementations
  11. // inherited from Object if key equality is simply reference
  12. // equality). Furthermore, the GetHashCode and Equals methods of
  13. // a key object must produce the same results given the same parameters
  14. // for the entire time the key is present in the hashtable. In practical
  15. // terms, this means that key objects should be immutable, at least for
  16. // the time they are used as keys in a hashtable.
  17. //
  18. // When entries are added to a hashtable, they are placed into
  19. // buckets based on the hashcode of their keys. Subsequent lookups of
  20. // keys will use the hashcode of the keys to only search a particular
  21. // bucket, thus substantially reducing the number of key comparisons
  22. // required to find an entry. A hashtable's maximum load factor, which
  23. // can be specified when the hashtable is instantiated, determines the
  24. // maximum ratio of hashtable entries to hashtable buckets. Smaller load
  25. // factors cause faster average lookup times at the cost of increased
  26. // memory consumption. The default maximum load factor of 1.0 generally
  27. // provides the best balance between speed and size. As entries are added
  28. // to a hashtable, the hashtable's actual load factor increases, and when
  29. // the actual load factor reaches the maximum load factor value, the
  30. // number of buckets in the hashtable is automatically increased by
  31. // approximately a factor of two (to be precise, the number of hashtable
  32. // buckets is increased to the smallest prime number that is larger than
  33. // twice the current number of hashtable buckets).
  34. //
  35. // Each object provides their own hash function, accessed by calling
  36. // GetHashCode(). However, one can write their own object
  37. // implementing IHashCodeProvider and pass it to a constructor on
  38. // the IntHashTable. That hash function would be used for all objects in
  39. // the table.
  40. //
  41. // This IntHashTable is implemented to support multiple concurrent readers
  42. // and one concurrent writer without using any synchronization primitives.
  43. // All read methods essentially must protect themselves from a resize
  44. // occuring while they are running. This was done by enforcing an
  45. // ordering on inserts & removes, as well as removing some member variables
  46. // and special casing the expand code to work in a temporary array instead
  47. // of the live bucket array. All inserts must set a bucket's value and
  48. // key before setting the hash code & collision field.
  49. //
  50. // By Brian Grunkemeyer, algorithm by Patrick Dussud.
  51. // Version 1.30 2/20/2000
  52. //| <include path='docs/doc[@for="IntHashTable"]/*' />
  53. internal class IntHashTable {//: IEnumerable {
  54. /*
  55. Implementation Notes:
  56. This IntHashTable uses double hashing. There are hashsize buckets in
  57. the table, and each bucket can contain 0 or 1 element. We a bit to
  58. mark whether there's been a collision when we inserted multiple
  59. elements (ie, an inserted item was hashed at least a second time and
  60. we probed this bucket, but it was already in use). Using the
  61. collision bit, we can terminate lookups & removes for elements that
  62. aren't in the hash table more quickly. We steal the most
  63. significant bit from the hash code to store the collision bit.
  64. Our hash function is of the following form:
  65. h(key, n) = h1(key) + n*h2(key)
  66. where n is the number of times we've hit a collided bucket and
  67. rehashed (on this particular lookup). Here are our hash functions:
  68. h1(key) = GetHash(key); // default implementation calls key.GetHashCode();
  69. h2(key) = 1 + (((h1(key) >> 5) + 1) % (hashsize - 1));
  70. The h1 can return any number. h2 must return a number between 1 and
  71. hashsize - 1 that is relatively prime to hashsize (not a problem if
  72. hashsize is prime). (Knuth's Art of Computer Programming, Vol. 3,
  73. p. 528-9)
  74. If this is true, then we are guaranteed to visit every bucket in
  75. exactly hashsize probes, since the least common multiple of hashsize
  76. and h2(key) will be hashsize * h2(key). (This is the first number
  77. where adding h2 to h1 mod hashsize will be 0 and we will search the
  78. same bucket twice).
  79. We previously used a different h2(key, n) that was not constant.
  80. That is a horrifically bad idea, unless you can prove that series
  81. will never produce any identical numbers that overlap when you mod
  82. them by hashsize, for all subranges from i to i+hashsize, for all i.
  83. It's not worth investigating, since there was no clear benefit from
  84. using that hash function, and it was broken.
  85. For efficiency reasons, we've implemented this by storing h1 and h2
  86. in a temporary, and setting a variable called seed equal to h1. We
  87. do a probe, and if we collided, we simply add h2 to seed each time
  88. through the loop.
  89. A good test for h2() is to subclass IntHashTable, provide your own
  90. implementation of GetHash() that returns a constant, then add many
  91. items to the hash table. Make sure Count equals the number of items
  92. you inserted.
  93. -- Brian Grunkemeyer, 10/28/1999
  94. */
  95. // A typical resize algorithm would pick the smallest prime number in this array
  96. // that is larger than twice the previous capacity.
  97. // Suppose our Hashtable currently has capacity x and enough elements are added
  98. // such that a resize needs to occur. Resizing first computes 2x then finds the
  99. // first prime in the table greater than 2x, i.e. if primes are ordered
  100. // p_1, p_2, …, p_i,…, it finds p_n such that p_n-1 < 2x < p_n.
  101. // Doubling is important for preserving the asymptotic complexity of the
  102. // hashtable operations such as add. Having a prime guarantees that double
  103. // hashing does not lead to infinite loops. IE, your hash function will be
  104. // h1(key) + i*h2(key), 0 <= i < size. h2 and the size must be relatively prime.
  105. private static readonly int[] primes = {
  106. 3, 7, 11, 17, 23, 29, 37, 47, 59, 71, 89, 107, 131, 163, 197, 239, 293, 353, 431, 521, 631, 761, 919,
  107. 1103, 1327, 1597, 1931, 2333, 2801, 3371, 4049, 4861, 5839, 7013, 8419, 10103, 12143, 14591,
  108. 17519, 21023, 25229, 30293, 36353, 43627, 52361, 62851, 75431, 90523, 108631, 130363, 156437,
  109. 187751, 225307, 270371, 324449, 389357, 467237, 560689, 672827, 807403, 968897, 1162687, 1395263,
  110. 1674319, 2009191, 2411033, 2893249, 3471899, 4166287, 4999559, 5999471, 7199369};
  111. private static int GetPrime(int minSize) {
  112. if (minSize < 0) {
  113. throw new ArgumentException("Arg_HTCapacityOverflow");
  114. }
  115. for (int i = 0; i < primes.Length; i++) {
  116. int size = primes[i];
  117. if (size >= minSize) {
  118. return size;
  119. }
  120. }
  121. throw new ArgumentException("Arg_HTCapacityOverflow");
  122. }
  123. // Deleted entries have their key set to buckets
  124. // The hash table data.
  125. // This cannot be serialised
  126. private struct bucket {
  127. internal int key;
  128. internal int hash_coll; // Store hash code; sign bit means there was a collision.
  129. internal Object val;
  130. }
  131. private bucket[] buckets;
  132. // The total number of entries in the hash table.
  133. private int count;
  134. // The total number of collision bits set in the hashtable
  135. private int occupancy;
  136. private int loadsize;
  137. private int loadFactorPerc; // 100 = 1.0
  138. private int version;
  139. // Constructs a new hashtable. The hashtable is created with an initial
  140. // capacity of zero and a load factor of 1.0.
  141. //| <include path='docs/doc[@for="IntHashTable.IntHashTable"]/*' />
  142. internal IntHashTable()
  143. : this(0, 100) {
  144. }
  145. //// Constructs a new hashtable with the given initial capacity and a load
  146. //// factor of 1.0. The capacity argument serves as an indication of
  147. //// the number of entries the hashtable will contain. When this number (or
  148. //// an approximation) is known, specifying it in the constructor can
  149. //// eliminate a number of resizing operations that would otherwise be
  150. //// performed when elements are added to the hashtable.
  151. ////
  152. ////| <include path='docs/doc[@for="IntHashTable.IntHashTable1"]/*' />
  153. //internal IntHashTable(int capacity)
  154. // : this(capacity, 100) {
  155. //}
  156. // Constructs a new hashtable with the given initial capacity and load
  157. // factor. The capacity argument serves as an indication of the
  158. // number of entries the hashtable will contain. When this number (or an
  159. // approximation) is known, specifying it in the constructor can eliminate
  160. // a number of resizing operations that would otherwise be performed when
  161. // elements are added to the hashtable. The loadFactorPerc argument
  162. // indicates the maximum ratio of hashtable entries to hashtable buckets.
  163. // Smaller load factors cause faster average lookup times at the cost of
  164. // increased memory consumption. A load factor of 1.0 generally provides
  165. // the best balance between speed and size.
  166. //
  167. //| <include path='docs/doc[@for="IntHashTable.IntHashTable3"]/*' />
  168. internal IntHashTable(int capacity, int loadFactorPerc) {
  169. if (capacity < 0)
  170. throw new ArgumentOutOfRangeException("capacity", "ArgumentOutOfRange_NeedNonNegNum");
  171. if (!(loadFactorPerc >= 10 && loadFactorPerc <= 100))
  172. throw new ArgumentOutOfRangeException("loadFactorPerc", String.Format("ArgumentOutOfRange_IntHashTableLoadFactor", 10, 100));
  173. // Based on perf work, .72 is the optimal load factor for this table.
  174. this.loadFactorPerc = (loadFactorPerc * 72) / 100;
  175. int hashsize = GetPrime((int)(capacity / this.loadFactorPerc));
  176. buckets = new bucket[hashsize];
  177. loadsize = (int)(this.loadFactorPerc * hashsize) / 100;
  178. if (loadsize >= hashsize)
  179. loadsize = hashsize-1;
  180. }
  181. // Computes the hash function: H(key, i) = h1(key) + i*h2(key, hashSize).
  182. // The out parameter seed is h1(key), while the out parameter
  183. // incr is h2(key, hashSize). Callers of this function should
  184. // add incr each time through a loop.
  185. private static uint InitHash(int key, int hashsize, out uint seed, out uint incr) {
  186. // Hashcode must be positive. Also, we must not use the sign bit, since
  187. // that is used for the collision bit.
  188. uint hashcode = (uint)key & 0x7FFFFFFF;
  189. seed = (uint)hashcode;
  190. // Restriction: incr MUST be between 1 and hashsize - 1, inclusive for
  191. // the modular arithmetic to work correctly. This guarantees you'll
  192. // visit every bucket in the table exactly once within hashsize
  193. // iterations. Violate this and it'll cause obscure bugs forever.
  194. // If you change this calculation for h2(key), update putEntry too!
  195. incr = (uint)(1 + (((seed >> 5) + 1) % ((uint)hashsize - 1)));
  196. return hashcode;
  197. }
  198. // Adds an entry with the given key and value to this hashtable. An
  199. // ArgumentException is thrown if the key is null or if the key is already
  200. // present in the hashtable.
  201. //
  202. //| <include path='docs/doc[@for="IntHashTable.Add"]/*' />
  203. internal void Add(int key, Object value) {
  204. Insert(key, value, true);
  205. }
  206. //// Removes all entries from this hashtable.
  207. ////| <include path='docs/doc[@for="IntHashTable.Clear"]/*' />
  208. //internal void Clear() {
  209. // if (count == 0)
  210. // return;
  211. // for (int i = 0; i < buckets.Length; i++) {
  212. // buckets[i].hash_coll = 0;
  213. // buckets[i].key = -1;
  214. // buckets[i].val = null;
  215. // }
  216. // count = 0;
  217. // occupancy = 0;
  218. //}
  219. // Checks if this hashtable contains an entry with the given key. This is
  220. // an O(1) operation.
  221. //
  222. //| <include path='docs/doc[@for="IntHashTable.Contains"]/*' />
  223. //internal bool Contains(int key) {
  224. // if (key < 0) {
  225. // throw new ArgumentException("Argument_KeyLessThanZero");
  226. // }
  227. // uint seed;
  228. // uint incr;
  229. // // Take a snapshot of buckets, in case another thread resizes table
  230. // bucket[] lbuckets = buckets;
  231. // uint hashcode = InitHash(key, lbuckets.Length, out seed, out incr);
  232. // int ntry = 0;
  233. // bucket b;
  234. // do {
  235. // int bucketNumber = (int)(seed % (uint)lbuckets.Length);
  236. // b = lbuckets[bucketNumber];
  237. // if (b.val == null) {
  238. // return false;
  239. // }
  240. // if (((b.hash_coll & 0x7FFFFFFF) == hashcode) && b.key == key) {
  241. // return true;
  242. // }
  243. // seed += incr;
  244. // } while (b.hash_coll < 0 && ++ntry < lbuckets.Length);
  245. // return false;
  246. //}
  247. // Returns the value associated with the given key. If an entry with the
  248. // given key is not found, the returned value is null.
  249. //
  250. //| <include path='docs/doc[@for="IntHashTable.this"]/*' />
  251. internal Object this[int key] {
  252. get {
  253. if (key < 0) {
  254. throw new ArgumentException("Argument_KeyLessThanZero");
  255. }
  256. uint seed;
  257. uint incr;
  258. // Take a snapshot of buckets, in case another thread does a resize
  259. bucket[] lbuckets = buckets;
  260. uint hashcode = InitHash(key, lbuckets.Length, out seed, out incr);
  261. int ntry = 0;
  262. bucket b;
  263. do {
  264. int bucketNumber = (int)(seed % (uint)lbuckets.Length);
  265. b = lbuckets[bucketNumber];
  266. if (b.val == null) {
  267. return null;
  268. }
  269. if (((b.hash_coll & 0x7FFFFFFF) == hashcode) && key == b.key) {
  270. return b.val;
  271. }
  272. seed += incr;
  273. } while (b.hash_coll < 0 && ++ntry < lbuckets.Length);
  274. return null;
  275. }
  276. //set {
  277. // Insert(key, value, false);
  278. //}
  279. }
  280. // Increases the bucket count of this hashtable. This method is called from
  281. // the Insert method when the actual load factor of the hashtable reaches
  282. // the upper limit specified when the hashtable was constructed. The number
  283. // of buckets in the hashtable is increased to the smallest prime number
  284. // that is larger than twice the current number of buckets, and the entries
  285. // in the hashtable are redistributed into the new buckets using the cached
  286. // hashcodes.
  287. private void expand() {
  288. rehash(GetPrime(1+buckets.Length*2));
  289. }
  290. // We occationally need to rehash the table to clean up the collision bits.
  291. private void rehash() {
  292. rehash(buckets.Length);
  293. }
  294. private void rehash(int newsize) {
  295. // reset occupancy
  296. occupancy=0;
  297. // Don't replace any internal state until we've finished adding to the
  298. // new bucket[]. This serves two purposes:
  299. // 1) Allow concurrent readers to see valid hashtable contents
  300. // at all times
  301. // 2) Protect against an OutOfMemoryException while allocating this
  302. // new bucket[].
  303. bucket[] newBuckets = new bucket[newsize];
  304. // rehash table into new buckets
  305. int nb;
  306. for (nb = 0; nb < buckets.Length; nb++) {
  307. bucket oldb = buckets[nb];
  308. if (oldb.val != null) {
  309. putEntry(newBuckets, oldb.key, oldb.val, oldb.hash_coll & 0x7FFFFFFF);
  310. }
  311. }
  312. // New bucket[] is good to go - replace buckets and other internal state.
  313. version++;
  314. buckets = newBuckets;
  315. loadsize = (int)(loadFactorPerc * newsize) / 100;
  316. if (loadsize >= newsize) {
  317. loadsize = newsize-1;
  318. }
  319. return;
  320. }
  321. // Returns an enumerator for this hashtable.
  322. // If modifications made to the hashtable while an enumeration is
  323. // in progress, the MoveNext and Current methods of the
  324. // enumerator will throw an exception.
  325. //
  326. //| <include path='docs/doc[@for="IntHashTable.IEnumerable.GetEnumerator"]/*' />
  327. //IEnumerator IEnumerable.GetEnumerator() {
  328. // return new IntHashTableEnumerator(this);
  329. //}
  330. // Internal method to compare two keys.
  331. //
  332. // Inserts an entry into this hashtable. This method is called from the Set
  333. // and Add methods. If the add parameter is true and the given key already
  334. // exists in the hashtable, an exception is thrown.
  335. private void Insert(int key, Object nvalue, bool add) {
  336. if (key < 0) {
  337. throw new ArgumentException("Argument_KeyLessThanZero");
  338. }
  339. if (nvalue == null) {
  340. throw new ArgumentNullException("nvalue", "ArgumentNull_Value");
  341. }
  342. if (count >= loadsize) {
  343. expand();
  344. } else if (occupancy > loadsize && count > 100) {
  345. rehash();
  346. }
  347. uint seed;
  348. uint incr;
  349. // Assume we only have one thread writing concurrently. Modify
  350. // buckets to contain new data, as long as we insert in the right order.
  351. uint hashcode = InitHash(key, buckets.Length, out seed, out incr);
  352. int ntry = 0;
  353. int emptySlotNumber = -1; // We use the empty slot number to cache the first empty slot. We chose to reuse slots
  354. // create by remove that have the collision bit set over using up new slots.
  355. do {
  356. int bucketNumber = (int)(seed % (uint)buckets.Length);
  357. // Set emptySlot number to current bucket if it is the first available bucket that we have seen
  358. // that once contained an entry and also has had a collision.
  359. // We need to search this entire collision chain because we have to ensure that there are no
  360. // duplicate entries in the table.
  361. // Insert the key/value pair into this bucket if this bucket is empty and has never contained an entry
  362. // OR
  363. // This bucket once contained an entry but there has never been a collision
  364. if (buckets[bucketNumber].val == null) {
  365. // If we have found an available bucket that has never had a collision, but we've seen an available
  366. // bucket in the past that has the collision bit set, use the previous bucket instead
  367. if (emptySlotNumber != -1) { // Reuse slot
  368. bucketNumber = emptySlotNumber;
  369. }
  370. // We pretty much have to insert in this order. Don't set hash
  371. // code until the value & key are set appropriately.
  372. buckets[bucketNumber].val = nvalue;
  373. buckets[bucketNumber].key = key;
  374. buckets[bucketNumber].hash_coll |= (int)hashcode;
  375. count++;
  376. version++;
  377. return;
  378. }
  379. // The current bucket is in use
  380. // OR
  381. // it is available, but has had the collision bit set and we have already found an available bucket
  382. if (((buckets[bucketNumber].hash_coll & 0x7FFFFFFF) == hashcode) &&
  383. key == buckets[bucketNumber].key) {
  384. if (add) {
  385. throw new ArgumentException("Argument_AddingDuplicate__" + buckets[bucketNumber].key);
  386. }
  387. buckets[bucketNumber].val = nvalue;
  388. version++;
  389. return;
  390. }
  391. // The current bucket is full, and we have therefore collided. We need to set the collision bit
  392. // UNLESS
  393. // we have remembered an available slot previously.
  394. if (emptySlotNumber == -1) {// We don't need to set the collision bit here since we already have an empty slot
  395. if (buckets[bucketNumber].hash_coll >= 0) {
  396. buckets[bucketNumber].hash_coll |= unchecked((int)0x80000000);
  397. occupancy++;
  398. }
  399. }
  400. seed += incr;
  401. } while (++ntry < buckets.Length);
  402. // This code is here if and only if there were no buckets without a collision bit set in the entire table
  403. if (emptySlotNumber != -1) {
  404. // We pretty much have to insert in this order. Don't set hash
  405. // code until the value & key are set appropriately.
  406. buckets[emptySlotNumber].val = nvalue;
  407. buckets[emptySlotNumber].key = key;
  408. buckets[emptySlotNumber].hash_coll |= (int)hashcode;
  409. count++;
  410. version++;
  411. return;
  412. }
  413. // If you see this assert, make sure load factor & count are reasonable.
  414. // Then verify that our double hash function (h2, described at top of file)
  415. // meets the requirements described above. You should never see this assert.
  416. throw new InvalidOperationException("InvalidOperation_HashInsertFailed");
  417. }
  418. private void putEntry(bucket[] newBuckets, int key, Object nvalue, int hashcode) {
  419. uint seed = (uint)hashcode;
  420. uint incr = (uint)(1 + (((seed >> 5) + 1) % ((uint)newBuckets.Length - 1)));
  421. do {
  422. int bucketNumber = (int)(seed % (uint)newBuckets.Length);
  423. if ((newBuckets[bucketNumber].val == null)) {
  424. newBuckets[bucketNumber].val = nvalue;
  425. newBuckets[bucketNumber].key = key;
  426. newBuckets[bucketNumber].hash_coll |= hashcode;
  427. return;
  428. }
  429. if (newBuckets[bucketNumber].hash_coll >= 0) {
  430. newBuckets[bucketNumber].hash_coll |= unchecked((int)0x80000000);
  431. occupancy++;
  432. }
  433. seed += incr;
  434. } while (true);
  435. }
  436. // Returns the number of associations in this hashtable.
  437. //
  438. //| <include path='docs/doc[@for="IntHashTable.Count"]/*' />
  439. //internal int Count {
  440. // get { return count; }
  441. //}
  442. // Implements an enumerator for a hashtable. The enumerator uses the
  443. // internal version number of the hashtabke to ensure that no modifications
  444. // are made to the hashtable while an enumeration is in progress.
  445. //private class IntHashTableEnumerator : IEnumerator {
  446. // private IntHashTable hashtable;
  447. // private int bucket;
  448. // private int version;
  449. // private bool current;
  450. // //private int currentKey;
  451. // private Object currentValue;
  452. // internal IntHashTableEnumerator(IntHashTable hashtable) {
  453. // this.hashtable = hashtable;
  454. // bucket = hashtable.buckets.Length;
  455. // version = hashtable.version;
  456. // }
  457. // public bool MoveNext() {
  458. // if (version != hashtable.version)
  459. // throw new InvalidOperationException("InvalidOperation_EnumFailedVersion");
  460. // while (bucket > 0) {
  461. // bucket--;
  462. // Object val = hashtable.buckets[bucket].val;
  463. // if (val != null) {
  464. // //currentKey = hashtable.buckets[bucket].key;
  465. // currentValue = val;
  466. // current = true;
  467. // return true;
  468. // }
  469. // }
  470. // current = false;
  471. // return false;
  472. // }
  473. // //internal int Key {
  474. // // get {
  475. // // if (current == false)
  476. // // throw new InvalidOperationException("InvalidOperation_EnumOpCantHappen");
  477. // // return currentKey;
  478. // // }
  479. // //}
  480. // public Object Current {
  481. // get {
  482. // if (current == false)
  483. // throw new InvalidOperationException("InvalidOperation_EnumOpCantHappen");
  484. // return currentValue;
  485. // }
  486. // }
  487. // //public Object Value {
  488. // // get {
  489. // // if (version != hashtable.version)
  490. // // throw new InvalidOperationException("InvalidOperation_EnumFailedVersion");
  491. // // if (current == false)
  492. // // throw new InvalidOperationException("InvalidOperation_EnumOpCantHappen");
  493. // // return currentValue;
  494. // // }
  495. // //}
  496. // public void Reset() {
  497. // if (version != hashtable.version) throw new InvalidOperationException("InvalidOperation_EnumFailedVersion");
  498. // current = false;
  499. // bucket = hashtable.buckets.Length;
  500. // //currentKey = -1;
  501. // currentValue = null;
  502. // }
  503. //}
  504. }
  505. }