PageRenderTime 34ms CodeModel.GetById 17ms app.highlight 11ms RepoModel.GetById 1ms app.codeStats 0ms

/symbols/pdb/Microsoft.Cci.Pdb/IntHashTable.cs

http://github.com/jbevain/cecil
C# | 576 lines | 183 code | 60 blank | 333 comment | 45 complexity | bc6550eb60ec02a618cf65471c392853 MD5 | raw file
  1// Copyright (c) Microsoft. All rights reserved.
  2// Licensed under the MIT license. See LICENSE file in the project root for full license information.
  3
  4using System;
  5using System.Collections;
  6
  7namespace Microsoft.Cci.Pdb {
  8  // The IntHashTable class represents a dictionary of associated keys and
  9  // values with constant lookup time.
 10  //
 11  // Objects used as keys in a hashtable must implement the GetHashCode
 12  // and Equals methods (or they can rely on the default implementations
 13  // inherited from Object if key equality is simply reference
 14  // equality). Furthermore, the GetHashCode and Equals methods of
 15  // a key object must produce the same results given the same parameters
 16  // for the entire time the key is present in the hashtable. In practical
 17  // terms, this means that key objects should be immutable, at least for
 18  // the time they are used as keys in a hashtable.
 19  //
 20  // When entries are added to a hashtable, they are placed into
 21  // buckets based on the hashcode of their keys. Subsequent lookups of
 22  // keys will use the hashcode of the keys to only search a particular
 23  // bucket, thus substantially reducing the number of key comparisons
 24  // required to find an entry. A hashtable's maximum load factor, which
 25  // can be specified when the hashtable is instantiated, determines the
 26  // maximum ratio of hashtable entries to hashtable buckets. Smaller load
 27  // factors cause faster average lookup times at the cost of increased
 28  // memory consumption. The default maximum load factor of 1.0 generally
 29  // provides the best balance between speed and size. As entries are added
 30  // to a hashtable, the hashtable's actual load factor increases, and when
 31  // the actual load factor reaches the maximum load factor value, the
 32  // number of buckets in the hashtable is automatically increased by
 33  // approximately a factor of two (to be precise, the number of hashtable
 34  // buckets is increased to the smallest prime number that is larger than
 35  // twice the current number of hashtable buckets).
 36  //
 37  // Each object provides their own hash function, accessed by calling
 38  // GetHashCode().  However, one can write their own object
 39  // implementing IHashCodeProvider and pass it to a constructor on
 40  // the IntHashTable.  That hash function would be used for all objects in
 41  // the table.
 42  //
 43  // This IntHashTable is implemented to support multiple concurrent readers
 44  // and one concurrent writer without using any synchronization primitives.
 45  // All read methods essentially must protect themselves from a resize
 46  // occuring while they are running.  This was done by enforcing an
 47  // ordering on inserts & removes, as well as removing some member variables
 48  // and special casing the expand code to work in a temporary array instead
 49  // of the live bucket array.  All inserts must set a bucket's value and
 50  // key before setting the hash code & collision field.
 51  //
 52  // By Brian Grunkemeyer, algorithm by Patrick Dussud.
 53  // Version 1.30 2/20/2000
 54  //| <include path='docs/doc[@for="IntHashTable"]/*' />
 55  internal class IntHashTable {//: IEnumerable {
 56    /*
 57      Implementation Notes:
 58
 59      This IntHashTable uses double hashing.  There are hashsize buckets in
 60      the table, and each bucket can contain 0 or 1 element.  We a bit to
 61      mark whether there's been a collision when we inserted multiple
 62      elements (ie, an inserted item was hashed at least a second time and
 63      we probed this bucket, but it was already in use).  Using the
 64      collision bit, we can terminate lookups & removes for elements that
 65      aren't in the hash table more quickly.  We steal the most
 66      significant bit from the hash code to store the collision bit.
 67
 68      Our hash function is of the following form:
 69
 70      h(key, n) = h1(key) + n*h2(key)
 71
 72      where n is the number of times we've hit a collided bucket and
 73      rehashed (on this particular lookup).  Here are our hash functions:
 74
 75      h1(key) = GetHash(key);  // default implementation calls key.GetHashCode();
 76      h2(key) = 1 + (((h1(key) >> 5) + 1) % (hashsize - 1));
 77
 78      The h1 can return any number.  h2 must return a number between 1 and
 79      hashsize - 1 that is relatively prime to hashsize (not a problem if
 80      hashsize is prime).  (Knuth's Art of Computer Programming, Vol. 3,
 81      p. 528-9)
 82
 83      If this is true, then we are guaranteed to visit every bucket in
 84      exactly hashsize probes, since the least common multiple of hashsize
 85      and h2(key) will be hashsize * h2(key).  (This is the first number
 86      where adding h2 to h1 mod hashsize will be 0 and we will search the
 87      same bucket twice).
 88
 89      We previously used a different h2(key, n) that was not constant.
 90      That is a horrifically bad idea, unless you can prove that series
 91      will never produce any identical numbers that overlap when you mod
 92      them by hashsize, for all subranges from i to i+hashsize, for all i.
 93      It's not worth investigating, since there was no clear benefit from
 94      using that hash function, and it was broken.
 95
 96      For efficiency reasons, we've implemented this by storing h1 and h2
 97      in a temporary, and setting a variable called seed equal to h1.  We
 98      do a probe, and if we collided, we simply add h2 to seed each time
 99      through the loop.
100
101      A good test for h2() is to subclass IntHashTable, provide your own
102      implementation of GetHash() that returns a constant, then add many
103      items to the hash table.  Make sure Count equals the number of items
104      you inserted.
105
106      -- Brian Grunkemeyer, 10/28/1999
107    */
108
109    // A typical resize algorithm would pick the smallest prime number in this array
110    // that is larger than twice the previous capacity. 
111    // Suppose our Hashtable currently has capacity x and enough elements are added 
112    // such that a resize needs to occur. Resizing first computes 2x then finds the 
113    // first prime in the table greater than 2x, i.e. if primes are ordered 
114    // p_1, p_2, �, p_i,�, it finds p_n such that p_n-1 < 2x < p_n. 
115    // Doubling is important for preserving the asymptotic complexity of the 
116    // hashtable operations such as add.  Having a prime guarantees that double 
117    // hashing does not lead to infinite loops.  IE, your hash function will be 
118    // h1(key) + i*h2(key), 0 <= i < size.  h2 and the size must be relatively prime.
119    private static readonly int[] primes = {
120            3, 7, 11, 17, 23, 29, 37, 47, 59, 71, 89, 107, 131, 163, 197, 239, 293, 353, 431, 521, 631, 761, 919,
121            1103, 1327, 1597, 1931, 2333, 2801, 3371, 4049, 4861, 5839, 7013, 8419, 10103, 12143, 14591,
122            17519, 21023, 25229, 30293, 36353, 43627, 52361, 62851, 75431, 90523, 108631, 130363, 156437,
123            187751, 225307, 270371, 324449, 389357, 467237, 560689, 672827, 807403, 968897, 1162687, 1395263,
124            1674319, 2009191, 2411033, 2893249, 3471899, 4166287, 4999559, 5999471, 7199369};
125
126    private static int GetPrime(int minSize) {
127      if (minSize < 0) {
128        throw new ArgumentException("Arg_HTCapacityOverflow");
129      }
130      for (int i = 0; i < primes.Length; i++) {
131        int size = primes[i];
132        if (size >= minSize) {
133          return size;
134        }
135      }
136      throw new ArgumentException("Arg_HTCapacityOverflow");
137    }
138
139    // Deleted entries have their key set to buckets
140
141    // The hash table data.
142    // This cannot be serialised
143    private struct bucket {
144      internal int key;
145      internal int hash_coll;   // Store hash code; sign bit means there was a collision.
146      internal Object val;
147    }
148
149    private bucket[] buckets;
150
151    // The total number of entries in the hash table.
152    private int count;
153
154    // The total number of collision bits set in the hashtable
155    private int occupancy;
156
157    private int loadsize;
158    private int loadFactorPerc;    // 100 = 1.0
159
160    private int version;
161
162    // Constructs a new hashtable. The hashtable is created with an initial
163    // capacity of zero and a load factor of 1.0.
164    //| <include path='docs/doc[@for="IntHashTable.IntHashTable"]/*' />
165    internal IntHashTable()
166      : this(0, 100) {
167    }
168
169    //// Constructs a new hashtable with the given initial capacity and a load
170    //// factor of 1.0. The capacity argument serves as an indication of
171    //// the number of entries the hashtable will contain. When this number (or
172    //// an approximation) is known, specifying it in the constructor can
173    //// eliminate a number of resizing operations that would otherwise be
174    //// performed when elements are added to the hashtable.
175    ////
176    ////| <include path='docs/doc[@for="IntHashTable.IntHashTable1"]/*' />
177    //internal IntHashTable(int capacity)
178    //  : this(capacity, 100) {
179    //}
180
181    // Constructs a new hashtable with the given initial capacity and load
182    // factor. The capacity argument serves as an indication of the
183    // number of entries the hashtable will contain. When this number (or an
184    // approximation) is known, specifying it in the constructor can eliminate
185    // a number of resizing operations that would otherwise be performed when
186    // elements are added to the hashtable. The loadFactorPerc argument
187    // indicates the maximum ratio of hashtable entries to hashtable buckets.
188    // Smaller load factors cause faster average lookup times at the cost of
189    // increased memory consumption. A load factor of 1.0 generally provides
190    // the best balance between speed and size.
191    //
192    //| <include path='docs/doc[@for="IntHashTable.IntHashTable3"]/*' />
193    internal IntHashTable(int capacity, int loadFactorPerc) {
194      if (capacity < 0)
195        throw new ArgumentOutOfRangeException("capacity", "ArgumentOutOfRange_NeedNonNegNum");
196      if (!(loadFactorPerc >= 10 && loadFactorPerc <= 100))
197        throw new ArgumentOutOfRangeException("loadFactorPerc", String.Format("ArgumentOutOfRange_IntHashTableLoadFactor", 10, 100));
198
199      // Based on perf work, .72 is the optimal load factor for this table.
200      this.loadFactorPerc = (loadFactorPerc * 72) / 100;
201
202      int hashsize = GetPrime((int)(capacity / this.loadFactorPerc));
203      buckets = new bucket[hashsize];
204
205      loadsize = (int)(this.loadFactorPerc * hashsize) / 100;
206      if (loadsize >= hashsize)
207        loadsize = hashsize-1;
208    }
209
210    // Computes the hash function:  H(key, i) = h1(key) + i*h2(key, hashSize).
211    // The out parameter seed is h1(key), while the out parameter
212    // incr is h2(key, hashSize).  Callers of this function should
213    // add incr each time through a loop.
214    private static uint InitHash(int key, int hashsize, out uint seed, out uint incr) {
215      // Hashcode must be positive.  Also, we must not use the sign bit, since
216      // that is used for the collision bit.
217      uint hashcode = (uint)key & 0x7FFFFFFF;
218      seed = (uint)hashcode;
219      // Restriction: incr MUST be between 1 and hashsize - 1, inclusive for
220      // the modular arithmetic to work correctly.  This guarantees you'll
221      // visit every bucket in the table exactly once within hashsize
222      // iterations.  Violate this and it'll cause obscure bugs forever.
223      // If you change this calculation for h2(key), update putEntry too!
224      incr = (uint)(1 + (((seed >> 5) + 1) % ((uint)hashsize - 1)));
225      return hashcode;
226    }
227
228    // Adds an entry with the given key and value to this hashtable. An
229    // ArgumentException is thrown if the key is null or if the key is already
230    // present in the hashtable.
231    //
232    //| <include path='docs/doc[@for="IntHashTable.Add"]/*' />
233    internal void Add(int key, Object value) {
234      Insert(key, value, true);
235    }
236
237    //// Removes all entries from this hashtable.
238    ////| <include path='docs/doc[@for="IntHashTable.Clear"]/*' />
239    //internal void Clear() {
240    //  if (count == 0)
241    //    return;
242
243    //  for (int i = 0; i < buckets.Length; i++) {
244    //    buckets[i].hash_coll = 0;
245    //    buckets[i].key = -1;
246    //    buckets[i].val = null;
247    //  }
248
249    //  count = 0;
250    //  occupancy = 0;
251    //}
252
253    // Checks if this hashtable contains an entry with the given key.  This is
254    // an O(1) operation.
255    //
256    //| <include path='docs/doc[@for="IntHashTable.Contains"]/*' />
257    //internal bool Contains(int key) {
258    //  if (key < 0) {
259    //    throw new ArgumentException("Argument_KeyLessThanZero");
260    //  }
261
262    //  uint seed;
263    //  uint incr;
264    //  // Take a snapshot of buckets, in case another thread resizes table
265    //  bucket[] lbuckets = buckets;
266    //  uint hashcode = InitHash(key, lbuckets.Length, out seed, out incr);
267    //  int ntry = 0;
268
269    //  bucket b;
270    //  do {
271    //    int bucketNumber = (int)(seed % (uint)lbuckets.Length);
272    //    b = lbuckets[bucketNumber];
273    //    if (b.val == null) {
274    //      return false;
275    //    }
276    //    if (((b.hash_coll & 0x7FFFFFFF) == hashcode) && b.key == key) {
277    //      return true;
278    //    }
279    //    seed += incr;
280    //  } while (b.hash_coll < 0 && ++ntry < lbuckets.Length);
281    //  return false;
282    //}
283
284    // Returns the value associated with the given key. If an entry with the
285    // given key is not found, the returned value is null.
286    //
287    //| <include path='docs/doc[@for="IntHashTable.this"]/*' />
288    internal Object this[int key] {
289      get {
290        if (key < 0) {
291          throw new ArgumentException("Argument_KeyLessThanZero");
292        }
293        uint seed;
294        uint incr;
295        // Take a snapshot of buckets, in case another thread does a resize
296        bucket[] lbuckets = buckets;
297        uint hashcode = InitHash(key, lbuckets.Length, out seed, out incr);
298        int ntry = 0;
299
300        bucket b;
301        do {
302          int bucketNumber = (int)(seed % (uint)lbuckets.Length);
303          b = lbuckets[bucketNumber];
304          if (b.val == null) {
305            return null;
306          }
307          if (((b.hash_coll & 0x7FFFFFFF) == hashcode) && key == b.key) {
308            return b.val;
309          }
310          seed += incr;
311        } while (b.hash_coll < 0 && ++ntry < lbuckets.Length);
312        return null;
313      }
314      //set {
315      //  Insert(key, value, false);
316      //}
317    }
318
319    // Increases the bucket count of this hashtable. This method is called from
320    // the Insert method when the actual load factor of the hashtable reaches
321    // the upper limit specified when the hashtable was constructed. The number
322    // of buckets in the hashtable is increased to the smallest prime number
323    // that is larger than twice the current number of buckets, and the entries
324    // in the hashtable are redistributed into the new buckets using the cached
325    // hashcodes.
326    private void expand() {
327      rehash(GetPrime(1+buckets.Length*2));
328    }
329
330    // We occationally need to rehash the table to clean up the collision bits.
331    private void rehash() {
332      rehash(buckets.Length);
333    }
334
335    private void rehash(int newsize) {
336
337      // reset occupancy
338      occupancy=0;
339
340      // Don't replace any internal state until we've finished adding to the
341      // new bucket[].  This serves two purposes:
342      //   1) Allow concurrent readers to see valid hashtable contents
343      //      at all times
344      //   2) Protect against an OutOfMemoryException while allocating this
345      //      new bucket[].
346      bucket[] newBuckets = new bucket[newsize];
347
348      // rehash table into new buckets
349      int nb;
350      for (nb = 0; nb < buckets.Length; nb++) {
351        bucket oldb = buckets[nb];
352        if (oldb.val != null) {
353          putEntry(newBuckets, oldb.key, oldb.val, oldb.hash_coll & 0x7FFFFFFF);
354        }
355      }
356
357      // New bucket[] is good to go - replace buckets and other internal state.
358      version++;
359      buckets = newBuckets;
360      loadsize = (int)(loadFactorPerc * newsize) / 100;
361
362      if (loadsize >= newsize) {
363        loadsize = newsize-1;
364      }
365
366      return;
367    }
368
369    // Returns an enumerator for this hashtable.
370    // If modifications made to the hashtable while an enumeration is
371    // in progress, the MoveNext and Current methods of the
372    // enumerator will throw an exception.
373    //
374    //| <include path='docs/doc[@for="IntHashTable.IEnumerable.GetEnumerator"]/*' />
375    //IEnumerator IEnumerable.GetEnumerator() {
376    //  return new IntHashTableEnumerator(this);
377    //}
378
379    // Internal method to compare two keys.
380    //
381    // Inserts an entry into this hashtable. This method is called from the Set
382    // and Add methods. If the add parameter is true and the given key already
383    // exists in the hashtable, an exception is thrown.
384    private void Insert(int key, Object nvalue, bool add) {
385      if (key < 0) {
386        throw new ArgumentException("Argument_KeyLessThanZero");
387      }
388      if (nvalue == null) {
389        throw new ArgumentNullException("nvalue", "ArgumentNull_Value");
390      }
391      if (count >= loadsize) {
392        expand();
393      } else if (occupancy > loadsize && count > 100) {
394        rehash();
395      }
396
397      uint seed;
398      uint incr;
399      // Assume we only have one thread writing concurrently.  Modify
400      // buckets to contain new data, as long as we insert in the right order.
401      uint hashcode = InitHash(key, buckets.Length, out seed, out incr);
402      int ntry = 0;
403      int emptySlotNumber = -1; // We use the empty slot number to cache the first empty slot. We chose to reuse slots
404      // create by remove that have the collision bit set over using up new slots.
405
406      do {
407        int bucketNumber = (int)(seed % (uint)buckets.Length);
408
409        // Set emptySlot number to current bucket if it is the first available bucket that we have seen
410        // that once contained an entry and also has had a collision.
411        // We need to search this entire collision chain because we have to ensure that there are no
412        // duplicate entries in the table.
413
414        // Insert the key/value pair into this bucket if this bucket is empty and has never contained an entry
415        // OR
416        // This bucket once contained an entry but there has never been a collision
417        if (buckets[bucketNumber].val == null) {
418          // If we have found an available bucket that has never had a collision, but we've seen an available
419          // bucket in the past that has the collision bit set, use the previous bucket instead
420          if (emptySlotNumber != -1) { // Reuse slot
421            bucketNumber = emptySlotNumber;
422          }
423
424          // We pretty much have to insert in this order.  Don't set hash
425          // code until the value & key are set appropriately.
426          buckets[bucketNumber].val = nvalue;
427          buckets[bucketNumber].key = key;
428          buckets[bucketNumber].hash_coll |= (int)hashcode;
429          count++;
430          version++;
431          return;
432        }
433
434        // The current bucket is in use
435        // OR
436        // it is available, but has had the collision bit set and we have already found an available bucket
437        if (((buckets[bucketNumber].hash_coll & 0x7FFFFFFF) == hashcode) &&
438                    key == buckets[bucketNumber].key) {
439          if (add) {
440            throw new ArgumentException("Argument_AddingDuplicate__" + buckets[bucketNumber].key);
441          }
442          buckets[bucketNumber].val = nvalue;
443          version++;
444          return;
445        }
446
447        // The current bucket is full, and we have therefore collided.  We need to set the collision bit
448        // UNLESS
449        // we have remembered an available slot previously.
450        if (emptySlotNumber == -1) {// We don't need to set the collision bit here since we already have an empty slot
451          if (buckets[bucketNumber].hash_coll >= 0) {
452            buckets[bucketNumber].hash_coll |= unchecked((int)0x80000000);
453            occupancy++;
454          }
455        }
456        seed += incr;
457      } while (++ntry < buckets.Length);
458
459      // This code is here if and only if there were no buckets without a collision bit set in the entire table
460      if (emptySlotNumber != -1) {
461        // We pretty much have to insert in this order.  Don't set hash
462        // code until the value & key are set appropriately.
463        buckets[emptySlotNumber].val = nvalue;
464        buckets[emptySlotNumber].key  = key;
465        buckets[emptySlotNumber].hash_coll |= (int)hashcode;
466        count++;
467        version++;
468        return;
469
470      }
471
472      // If you see this assert, make sure load factor & count are reasonable.
473      // Then verify that our double hash function (h2, described at top of file)
474      // meets the requirements described above. You should never see this assert.
475      throw new InvalidOperationException("InvalidOperation_HashInsertFailed");
476    }
477
478    private void putEntry(bucket[] newBuckets, int key, Object nvalue, int hashcode) {
479      uint seed = (uint)hashcode;
480      uint incr = (uint)(1 + (((seed >> 5) + 1) % ((uint)newBuckets.Length - 1)));
481
482      do {
483        int bucketNumber = (int)(seed % (uint)newBuckets.Length);
484
485        if ((newBuckets[bucketNumber].val == null)) {
486          newBuckets[bucketNumber].val = nvalue;
487          newBuckets[bucketNumber].key = key;
488          newBuckets[bucketNumber].hash_coll |= hashcode;
489          return;
490        }
491
492        if (newBuckets[bucketNumber].hash_coll >= 0) {
493          newBuckets[bucketNumber].hash_coll |= unchecked((int)0x80000000);
494          occupancy++;
495        }
496        seed += incr;
497      } while (true);
498    }
499
500    // Returns the number of associations in this hashtable.
501    //
502    //| <include path='docs/doc[@for="IntHashTable.Count"]/*' />
503    //internal int Count {
504    //  get { return count; }
505    //}
506
507    // Implements an enumerator for a hashtable. The enumerator uses the
508    // internal version number of the hashtabke to ensure that no modifications
509    // are made to the hashtable while an enumeration is in progress.
510    //private class IntHashTableEnumerator : IEnumerator {
511    //  private IntHashTable hashtable;
512    //  private int bucket;
513    //  private int version;
514    //  private bool current;
515    //  //private int currentKey;
516    //  private Object currentValue;
517
518    //  internal IntHashTableEnumerator(IntHashTable hashtable) {
519    //    this.hashtable = hashtable;
520    //    bucket = hashtable.buckets.Length;
521    //    version = hashtable.version;
522    //  }
523
524    //  public bool MoveNext() {
525    //    if (version != hashtable.version)
526    //      throw new InvalidOperationException("InvalidOperation_EnumFailedVersion");
527    //    while (bucket > 0) {
528    //      bucket--;
529    //      Object val = hashtable.buckets[bucket].val;
530    //      if (val != null) {
531    //        //currentKey = hashtable.buckets[bucket].key;
532    //        currentValue = val;
533    //        current = true;
534    //        return true;
535    //      }
536    //    }
537    //    current = false;
538    //    return false;
539    //  }
540
541    //  //internal int Key {
542    //  //  get {
543    //  //    if (current == false)
544    //  //      throw new InvalidOperationException("InvalidOperation_EnumOpCantHappen");
545    //  //    return currentKey;
546    //  //  }
547    //  //}
548
549    //  public Object Current {
550    //    get {
551    //      if (current == false)
552    //        throw new InvalidOperationException("InvalidOperation_EnumOpCantHappen");
553    //      return currentValue;
554    //    }
555    //  }
556
557    //  //public Object Value {
558    //  //  get {
559    //  //    if (version != hashtable.version)
560    //  //      throw new InvalidOperationException("InvalidOperation_EnumFailedVersion");
561    //  //    if (current == false)
562    //  //      throw new InvalidOperationException("InvalidOperation_EnumOpCantHappen");
563    //  //    return currentValue;
564    //  //  }
565    //  //}
566
567    //  public void Reset() {
568    //    if (version != hashtable.version) throw new InvalidOperationException("InvalidOperation_EnumFailedVersion");
569    //    current = false;
570    //    bucket = hashtable.buckets.Length;
571    //    //currentKey = -1;
572    //    currentValue = null;
573    //  }
574    //}
575  }
576}