PageRenderTime 56ms CodeModel.GetById 6ms RepoModel.GetById 0ms app.codeStats 0ms

/Languages/IronPython/IronPython/Runtime/SetStorage.cs

http://github.com/IronLanguages/main
C# | 1415 lines | 995 code | 223 blank | 197 comment | 349 complexity | b513889f71b341db15db316154997a13 MD5 | raw file
Possible License(s): CPL-1.0, BSD-3-Clause, ISC, GPL-2.0, MPL-2.0-no-copyleft-exception
  1. /* ****************************************************************************
  2. *
  3. * Copyright (c) Microsoft Corporation.
  4. *
  5. * This source code is subject to terms and conditions of the Apache License, Version 2.0. A
  6. * copy of the license can be found in the License.html file at the root of this distribution. If
  7. * you cannot locate the Apache License, Version 2.0, please send an email to
  8. * dlr@microsoft.com. By using this source code in any fashion, you are agreeing to be bound
  9. * by the terms of the Apache License, Version 2.0.
  10. *
  11. * You must not remove this notice, or any other, from this software.
  12. *
  13. *
  14. * ***************************************************************************/
  15. using System;
  16. using System.Collections;
  17. using System.Collections.Generic;
  18. using System.Diagnostics;
  19. using System.Runtime.CompilerServices;
  20. using System.Runtime.Serialization;
  21. using System.Text;
  22. using System.Threading;
  23. using Microsoft.Scripting;
  24. using Microsoft.Scripting.Utils;
  25. using IronPython.Runtime.Operations;
  26. using IronPython.Runtime.Types;
  27. namespace IronPython.Runtime {
  28. /// <summary>
  29. /// General-purpose storage used for Python sets and frozensets.
  30. ///
  31. /// The set storage is thread-safe for multiple readers or writers.
  32. ///
  33. /// Mutations to the set involve a simple locking strategy of locking on the SetStorage object
  34. /// itself to ensure mutual exclusion.
  35. ///
  36. /// Reads against the set happen lock-free. When the set is mutated, it adds or removes buckets
  37. /// in an atomic manner so that the readers will see a consistent picture as if the read
  38. /// occurred either before or after the mutation.
  39. /// </summary>
  40. [Serializable]
  41. internal sealed class SetStorage : IEnumerable, IEnumerable<object>, ISerializable, IDeserializationCallback {
  42. internal Bucket[] _buckets;
  43. internal int _count;
  44. private int _version;
  45. internal bool _hasNull;
  46. private Func<object, int> _hashFunc;
  47. private Func<object, object, bool> _eqFunc;
  48. private Type _itemType;
  49. // The maximum item count before resizing must occur. This is precomputed upon resizing
  50. // rather than multiplying by the load factor every time items are added.
  51. private int _maxCount;
  52. private const int InitialBuckets = 8;
  53. private const double Load = 0.7;
  54. private const double MinLoad = 0.5; // dictates Clone() behavior
  55. // marker type to indicate we've gone megamorphic (SetStorage happens to be a a type we'll
  56. // never see as a set element
  57. private static readonly Type HeterogeneousType = typeof(SetStorage);
  58. // marker object used to indicate we have a removed value
  59. internal static readonly object Removed = new object();
  60. /// <summary>
  61. /// Creates a new set storage with no buckets
  62. /// </summary>
  63. public SetStorage() { }
  64. /// <summary>
  65. /// Creates a new set storage with no buckets
  66. /// </summary>
  67. public SetStorage(int count) {
  68. Initialize(count);
  69. }
  70. #if FEATURE_SERIALIZATION
  71. private SetStorage(SerializationInfo info, StreamingContext context) {
  72. // remember the serialization info; we'll deserialize when we get the callback. This
  73. // enables special types like DBNull.Value to successfully be deserialized inside the
  74. // set. We store the serialization info in a single-element bucket array so we don't
  75. // have an extra field just for serialization.
  76. _buckets = new Bucket[] { new Bucket(0, info) };
  77. }
  78. #endif
  79. private void Initialize() {
  80. _maxCount = (int)(InitialBuckets * Load);
  81. _buckets = new Bucket[InitialBuckets];
  82. }
  83. private void Initialize(int count) {
  84. int bucketCount = Math.Max((int)(count / Load) + 1, InitialBuckets);
  85. // convert to a power of 2
  86. bucketCount = 1 << CeilLog2(bucketCount);
  87. _maxCount = (int)(bucketCount * Load);
  88. _buckets = new Bucket[bucketCount];
  89. }
  90. /// <summary>
  91. /// Returns the number of items currently in the set
  92. /// </summary>
  93. public int Count {
  94. get {
  95. int res = _count;
  96. if (_hasNull) {
  97. res++;
  98. }
  99. return res;
  100. }
  101. }
  102. public int Version {
  103. get {
  104. return _version;
  105. }
  106. }
  107. /// <summary>
  108. /// Adds a new item to the set, unless an equivalent item is already present
  109. /// </summary>
  110. public void Add(object item) {
  111. lock (this) {
  112. AddNoLock(item);
  113. }
  114. }
  115. public void AddNoLock(object item) {
  116. if (item != null) {
  117. if (_buckets == null) {
  118. Initialize();
  119. }
  120. if (item.GetType() != _itemType && _itemType != HeterogeneousType) {
  121. UpdateHelperFunctions(item.GetType(), item);
  122. }
  123. AddWorker(item, Hash(item));
  124. } else {
  125. _hasNull = true;
  126. }
  127. }
  128. private void AddWorker(object/*!*/ item, int hashCode) {
  129. Debug.Assert(_buckets != null && _count < _buckets.Length);
  130. if (AddWorker(_buckets, item, hashCode, _eqFunc, ref _version)) {
  131. _count++;
  132. if (_count > _maxCount) {
  133. Grow();
  134. }
  135. }
  136. }
  137. /// <summary>
  138. /// Static helper which adds the given non-null item with a precomputed hash code. Returns
  139. /// true if the item was added, false if it was already present in the set.
  140. /// </summary>
  141. private static bool AddWorker(
  142. Bucket[]/*!*/ buckets, object/*!*/ item, int hashCode,
  143. Func<object, object, bool> eqFunc, ref int version
  144. ) {
  145. Debug.Assert(buckets != null);
  146. Debug.Assert(item != null);
  147. for (int index = hashCode & (buckets.Length - 1); ; ProbeNext(buckets, ref index)) {
  148. Bucket bucket = buckets[index];
  149. if (bucket.Item == null || bucket.Item == Removed) {
  150. version++;
  151. buckets[index].HashCode = hashCode;
  152. buckets[index].Item = item;
  153. return true;
  154. } else if (bucket.HashCode == hashCode && eqFunc(item, bucket.Item)) {
  155. return false;
  156. }
  157. }
  158. }
  159. /// <summary>
  160. /// Lock-free helper on a non-null item with a pre-calculated hash code. Removes the item
  161. /// if it is present in the set, otherwise adds it.
  162. /// </summary>
  163. private void AddOrRemoveWorker(object/*!*/ item, int hashCode) {
  164. Debug.Assert(_buckets != null);
  165. Debug.Assert(item != null);
  166. for (int index = hashCode & (_buckets.Length - 1); ; ProbeNext(_buckets, ref index)) {
  167. Bucket bucket = _buckets[index];
  168. if (bucket.Item == null) {
  169. _version++;
  170. _buckets[index].HashCode = hashCode;
  171. _buckets[index].Item = item;
  172. _count++;
  173. if (_count > _maxCount) {
  174. Grow();
  175. }
  176. return;
  177. } else if (
  178. bucket.Item != Removed && bucket.HashCode == hashCode &&
  179. _eqFunc(item, bucket.Item)
  180. ) {
  181. _version++;
  182. _buckets[index].Item = Removed;
  183. _count--;
  184. return;
  185. }
  186. }
  187. }
  188. /// <summary>
  189. /// Clears the contents of the set
  190. /// </summary>
  191. public void Clear() {
  192. lock (this) {
  193. ClearNoLock();
  194. }
  195. }
  196. public void ClearNoLock() {
  197. if (_buckets != null) {
  198. _version++;
  199. Initialize();
  200. _count = 0;
  201. }
  202. _hasNull = false;
  203. }
  204. /// <summary>
  205. /// Clones the set, returning a new SetStorage object
  206. /// </summary>
  207. public SetStorage Clone() {
  208. SetStorage res = new SetStorage();
  209. res._hasNull = _hasNull;
  210. if (_count == 0) {
  211. return res;
  212. }
  213. Bucket[] buckets = _buckets;
  214. res._hashFunc = _hashFunc;
  215. res._eqFunc = _eqFunc;
  216. res._itemType = _itemType;
  217. if (_count < _buckets.Length * MinLoad) {
  218. // If the set is sparsely populated, create a cleaner copy
  219. res.Initialize(_count);
  220. for (int i = 0; i < buckets.Length; i++) {
  221. Bucket bucket = buckets[i];
  222. if (bucket.Item != null && bucket.Item != Removed) {
  223. res.AddWorker(bucket.Item, bucket.HashCode);
  224. }
  225. }
  226. } else {
  227. // Otherwise, perform a faster copy
  228. res._maxCount = (int)(buckets.Length * Load);
  229. res._buckets = new Bucket[buckets.Length];
  230. for (int i = 0; i < buckets.Length; i++) {
  231. Bucket bucket = buckets[i];
  232. if (bucket.Item != null) {
  233. res._buckets[i].Item = bucket.Item;
  234. res._buckets[i].HashCode = bucket.HashCode;
  235. res._count++;
  236. }
  237. }
  238. }
  239. return res;
  240. }
  241. /// <summary>
  242. /// Checks to see if the given item exists in the set
  243. /// </summary>
  244. public bool Contains(object item) {
  245. if (item == null) {
  246. return _hasNull;
  247. }
  248. if (_count == 0) {
  249. return false;
  250. }
  251. int hashCode;
  252. Func<object, object, bool> eqFunc;
  253. if (item.GetType() == _itemType || _itemType == HeterogeneousType) {
  254. hashCode = _hashFunc(item);
  255. eqFunc = _eqFunc;
  256. } else {
  257. hashCode = _genericHash(item);
  258. eqFunc = _genericEquals;
  259. }
  260. return ContainsWorker(_buckets, item, hashCode, eqFunc);
  261. }
  262. /// <summary>
  263. /// Checks to see if the given item exists in the set, and tries to hash it even
  264. /// if it is known not to be in the set.
  265. /// </summary>
  266. /// <param name="item"></param>
  267. /// <returns></returns>
  268. public bool ContainsAlwaysHash(object item) {
  269. if (item == null) {
  270. return _hasNull;
  271. }
  272. int hashCode;
  273. Func<object, object, bool> eqFunc;
  274. if (item.GetType() == _itemType || _itemType == HeterogeneousType) {
  275. hashCode = _hashFunc(item);
  276. eqFunc = _eqFunc;
  277. } else {
  278. hashCode = _genericHash(item);
  279. eqFunc = _genericEquals;
  280. }
  281. return _count > 0 && ContainsWorker(_buckets, item, hashCode, eqFunc);
  282. }
  283. private static bool ContainsWorker(
  284. Bucket[]/*!*/ buckets, object/*!*/ item, int hashCode,
  285. Func<object, object, bool> eqFunc
  286. ) {
  287. Debug.Assert(item != null);
  288. Debug.Assert(buckets != null);
  289. Debug.Assert(eqFunc != null);
  290. int index = hashCode & (buckets.Length - 1);
  291. int startIndex = index;
  292. do {
  293. Bucket bucket = buckets[index];
  294. if (bucket.Item == null) {
  295. break;
  296. } else if (
  297. bucket.Item != Removed && bucket.HashCode == hashCode &&
  298. eqFunc(item, bucket.Item)
  299. ) {
  300. return true;
  301. }
  302. ProbeNext(buckets, ref index);
  303. } while (startIndex != index);
  304. return false;
  305. }
  306. /// <summary>
  307. /// Adds items from this set into the other set
  308. /// </summary>
  309. public void CopyTo(SetStorage/*!*/ into) {
  310. Debug.Assert(into != null);
  311. lock (into) {
  312. into.UnionUpdate(this);
  313. }
  314. }
  315. IEnumerator IEnumerable.GetEnumerator() {
  316. return GetEnumerator();
  317. }
  318. public IEnumerator<object> GetEnumerator() {
  319. if (_hasNull) {
  320. yield return null;
  321. }
  322. if (_count == 0) {
  323. yield break;
  324. }
  325. Bucket[] buckets = _buckets;
  326. for (int i = 0; i < buckets.Length; i++) {
  327. object item = buckets[i].Item;
  328. if (item != null && item != Removed) {
  329. yield return item;
  330. }
  331. }
  332. }
  333. public List/*!*/ GetItems() {
  334. List res = new List(Count);
  335. if (_hasNull) {
  336. res.AddNoLock(null);
  337. }
  338. if (_count > 0) {
  339. Bucket[] buckets = _buckets;
  340. for (int i = 0; i < buckets.Length; i++) {
  341. object item = buckets[i].Item;
  342. if (item != null && item != Removed) {
  343. res.AddNoLock(item);
  344. }
  345. }
  346. }
  347. return res;
  348. }
  349. /// <summary>
  350. /// Removes the first set element in the iteration order.
  351. /// </summary>
  352. /// <returns>true if an item was removed, false if the set was empty</returns>
  353. public bool Pop(out object item) {
  354. item = null;
  355. if (_hasNull) {
  356. _hasNull = false;
  357. return true;
  358. }
  359. if (_count == 0) {
  360. return false;
  361. }
  362. lock (this) {
  363. for (int i = 0; i < _buckets.Length; i++) {
  364. if (_buckets[i].Item != null && _buckets[i].Item != Removed) {
  365. item = _buckets[i].Item;
  366. _version++;
  367. _buckets[i].Item = Removed;
  368. _count--;
  369. return true;
  370. }
  371. }
  372. item = null;
  373. return false;
  374. }
  375. }
  376. /// <summary>
  377. /// Removes an item from the set and returns true if it was present, otherwise returns
  378. /// false
  379. /// </summary>
  380. public bool Remove(object item) {
  381. lock (this) {
  382. return RemoveNoLock(item);
  383. }
  384. }
  385. public bool RemoveNoLock(object item) {
  386. if (item == null) {
  387. return RemoveNull();
  388. }
  389. if (_count == 0) {
  390. return false;
  391. }
  392. return RemoveItem(item);
  393. }
  394. /// <summary>
  395. /// Removes an item from the set and returns true if it was removed. The item will always
  396. /// be hashed, throwing if it is unhashable - even if the set has no buckets.
  397. /// </summary>
  398. internal bool RemoveAlwaysHash(object item) {
  399. lock (this) {
  400. if (item == null) {
  401. return RemoveNull();
  402. }
  403. return RemoveItem(item);
  404. }
  405. }
  406. private bool RemoveNull() {
  407. if (_hasNull) {
  408. _hasNull = false;
  409. return true;
  410. }
  411. return false;
  412. }
  413. /// <summary>
  414. /// Lock-free helper to remove a non-null item
  415. /// </summary>
  416. private bool RemoveItem(object/*!*/ item) {
  417. Debug.Assert(item != null);
  418. int hashCode;
  419. Func<object, object, bool> eqFunc;
  420. if (item.GetType() == _itemType || _itemType == HeterogeneousType) {
  421. hashCode = _hashFunc(item);
  422. eqFunc = _eqFunc;
  423. } else {
  424. hashCode = _genericHash(item);
  425. eqFunc = _genericEquals;
  426. }
  427. return RemoveWorker(item, hashCode, eqFunc);
  428. }
  429. private bool RemoveWorker(
  430. object/*!*/ item, int hashCode, Func<object, object, bool> eqFunc
  431. ) {
  432. Debug.Assert(item != null);
  433. if (_count == 0) {
  434. return false;
  435. }
  436. int index = hashCode & (_buckets.Length - 1);
  437. int startIndex = index;
  438. do {
  439. Bucket bucket = _buckets[index];
  440. if (bucket.Item == null) {
  441. break;
  442. } else if (
  443. bucket.Item != Removed && bucket.HashCode == hashCode &&
  444. eqFunc(item, bucket.Item)
  445. ) {
  446. _version++;
  447. _buckets[index].Item = Removed;
  448. _count--;
  449. return true;
  450. }
  451. ProbeNext(_buckets, ref index);
  452. } while (index != startIndex);
  453. return false;
  454. }
  455. #region Set Operations
  456. // Each of these set operations mutate the current set lock-free. Synchronization must
  457. // be done by the caller if desired.
  458. /// <summary>
  459. /// Determines whether the current set shares no elements with the given set
  460. /// </summary>
  461. public bool IsDisjoint(SetStorage other) {
  462. return IsDisjoint(this, other);
  463. }
  464. public static bool IsDisjoint(SetStorage self, SetStorage other) {
  465. SortBySize(ref self, ref other);
  466. if (self._hasNull && other._hasNull) {
  467. return false;
  468. }
  469. if (self._count == 0 || other._count == 0) {
  470. return true;
  471. }
  472. Bucket[] buckets = self._buckets;
  473. Bucket[] otherBuckets = other._buckets;
  474. var eqFunc = GetEqFunc(self, other);
  475. for (int i = 0; i < buckets.Length; i++) {
  476. Bucket bucket = buckets[i];
  477. if (bucket.Item != null && bucket.Item != Removed &&
  478. ContainsWorker(otherBuckets, bucket.Item, bucket.HashCode, eqFunc)) {
  479. return false;
  480. }
  481. }
  482. return true;
  483. }
  484. /// <summary>
  485. /// Determines whether the current set is a subset of the given set
  486. /// </summary>
  487. public bool IsSubset(SetStorage other) {
  488. if (_count > other._count ||
  489. _hasNull && !other._hasNull) {
  490. return false;
  491. }
  492. return IsSubsetWorker(other);
  493. }
  494. /// <summary>
  495. /// Determines whether the current set is a strict subset of the given set
  496. /// </summary>
  497. public bool IsStrictSubset(SetStorage other) {
  498. if (_count > other._count ||
  499. _hasNull && !other._hasNull ||
  500. Count == other.Count) {
  501. return false;
  502. }
  503. return IsSubsetWorker(other);
  504. }
  505. private bool IsSubsetWorker(SetStorage other) {
  506. if (_count == 0) {
  507. return true;
  508. }
  509. if (other._count == 0) {
  510. return false;
  511. }
  512. Bucket[] buckets = _buckets;
  513. Bucket[] otherBuckets = other._buckets;
  514. var eqFunc = GetEqFunc(this, other);
  515. for (int i = 0; i < buckets.Length; i++) {
  516. Bucket bucket = buckets[i];
  517. if (bucket.Item != null && bucket.Item != Removed &&
  518. !ContainsWorker(otherBuckets, bucket.Item, bucket.HashCode, eqFunc)) {
  519. return false;
  520. }
  521. }
  522. return true;
  523. }
  524. /// <summary>
  525. /// Mutates this set to contain its union with 'other'. The caller must lock the current
  526. /// set if synchronization is desired.
  527. /// </summary>
  528. public void UnionUpdate(SetStorage other) {
  529. _hasNull |= other._hasNull;
  530. if (other._count == 0) {
  531. return;
  532. }
  533. if (_buckets == null) {
  534. Initialize(other._count);
  535. }
  536. Bucket[] otherBuckets = other._buckets;
  537. UpdateHelperFunctions(other);
  538. for (int i = 0; i < otherBuckets.Length; i++) {
  539. Bucket bucket = otherBuckets[i];
  540. if (bucket.Item != null && bucket.Item != Removed) {
  541. AddWorker(bucket.Item, bucket.HashCode);
  542. }
  543. }
  544. }
  545. /// <summary>
  546. /// Mutates this set to contain its intersection with 'other'. The caller must lock the
  547. /// current set if synchronization is desired.
  548. /// </summary>
  549. public void IntersectionUpdate(SetStorage other) {
  550. if (other._count == 0) {
  551. ClearNoLock();
  552. _hasNull &= other._hasNull;
  553. return;
  554. }
  555. _hasNull &= other._hasNull;
  556. if (_count == 0) {
  557. return;
  558. }
  559. Bucket[] buckets = _buckets;
  560. Bucket[] otherBuckets = other._buckets;
  561. var eqFunc = GetEqFunc(this, other);
  562. for (int i = 0; i < buckets.Length; i++) {
  563. Bucket bucket = buckets[i];
  564. if (bucket.Item != null && bucket.Item != Removed &&
  565. !ContainsWorker(otherBuckets, bucket.Item, bucket.HashCode, eqFunc)) {
  566. _version++;
  567. buckets[i].Item = Removed;
  568. _count--;
  569. }
  570. }
  571. }
  572. /// <summary>
  573. /// Mutates this set to contain its symmetric difference with 'other'. The caller must
  574. /// lock the current set if synchronization is desired.
  575. /// </summary>
  576. public void SymmetricDifferenceUpdate(SetStorage other) {
  577. _hasNull ^= other._hasNull;
  578. if (other._count == 0) {
  579. return;
  580. }
  581. if (_buckets == null) {
  582. Initialize();
  583. }
  584. Bucket[] otherBuckets = other._buckets;
  585. UpdateHelperFunctions(other);
  586. for (int i = 0; i < otherBuckets.Length; i++) {
  587. Bucket bucket = otherBuckets[i];
  588. if (bucket.Item != null && bucket.Item != Removed) {
  589. AddOrRemoveWorker(bucket.Item, bucket.HashCode);
  590. }
  591. }
  592. }
  593. /// <summary>
  594. /// Mutates this set to contain its difference with 'other'. The caller must lock the
  595. /// current set if synchronization is desired.
  596. /// </summary>
  597. public void DifferenceUpdate(SetStorage other) {
  598. _hasNull &= !other._hasNull;
  599. if (_count == 0 || other._count == 0) {
  600. return;
  601. }
  602. Bucket[] buckets = _buckets;
  603. Bucket[] otherBuckets = other._buckets;
  604. var eqFunc = GetEqFunc(this, other);
  605. if (buckets.Length < otherBuckets.Length) {
  606. // iterate through self, removing anything in other
  607. for (int i = 0; i < buckets.Length; i++) {
  608. Bucket bucket = buckets[i];
  609. if (bucket.Item != null && bucket.Item != Removed &&
  610. ContainsWorker(otherBuckets, bucket.Item, bucket.HashCode, eqFunc)) {
  611. RemoveWorker(bucket.Item, bucket.HashCode, eqFunc);
  612. }
  613. }
  614. } else {
  615. // iterate through other, removing anything we find
  616. for (int i = 0; i < otherBuckets.Length; i++) {
  617. Bucket bucket = otherBuckets[i];
  618. if (bucket.Item != null && bucket.Item != Removed) {
  619. RemoveWorker(bucket.Item, bucket.HashCode, eqFunc);
  620. }
  621. }
  622. }
  623. }
  624. /// <summary>
  625. /// Computes the union of self and other, returning an entirely new set. This method is
  626. /// thread-safe and makes no modifications to self or other.
  627. /// </summary>
  628. public static SetStorage Union(SetStorage self, SetStorage other) {
  629. SetStorage res;
  630. // UnionUpdate iterates through its argument, so clone the larger set
  631. if (self._count < other._count) {
  632. res = other.Clone();
  633. res.UnionUpdate(self);
  634. } else {
  635. res = self.Clone();
  636. res.UnionUpdate(other);
  637. }
  638. return res;
  639. }
  640. /// <summary>
  641. /// Computes the intersection of self and other, returning an entirely new set. This
  642. /// method is thread-safe and makes no modifications to self or other.
  643. /// </summary>
  644. public static SetStorage Intersection(SetStorage self, SetStorage other) {
  645. SetStorage res = new SetStorage(Math.Min(self._count, other._count));
  646. res._hasNull = self._hasNull && other._hasNull;
  647. if (self._count == 0 || other._count == 0) {
  648. return res;
  649. }
  650. SortBySize(ref self, ref other);
  651. Bucket[] buckets = self._buckets;
  652. Bucket[] otherBuckets = other._buckets;
  653. var eqFunc = GetEqFunc(self, other);
  654. // if either set is homogeneous, then the resulting set must be
  655. if (other._itemType != HeterogeneousType) {
  656. res.UpdateHelperFunctions(other);
  657. } else {
  658. res.UpdateHelperFunctions(self);
  659. }
  660. for (int i = 0; i < buckets.Length; i++) {
  661. Bucket bucket = buckets[i];
  662. if (bucket.Item != null && bucket.Item != Removed &&
  663. ContainsWorker(otherBuckets, bucket.Item, bucket.HashCode, eqFunc)) {
  664. res.AddWorker(bucket.Item, bucket.HashCode);
  665. }
  666. }
  667. return res;
  668. }
  669. /// <summary>
  670. /// Computes the symmetric difference of self and other, returning an entirely new set.
  671. /// This method is thread-safe and makes no modifications to self or other.
  672. /// </summary>
  673. public static SetStorage SymmetricDifference(SetStorage self, SetStorage other) {
  674. // SymmetricDifferenceUpdate iterates through its arg, so clone the larger set
  675. SortBySize(ref self, ref other);
  676. SetStorage res = other.Clone();
  677. res.SymmetricDifferenceUpdate(self);
  678. return res;
  679. }
  680. /// <summary>
  681. /// Computes the difference of self and other, returning an entirely new set. This
  682. /// method is thread-safe and makes no modifications to self or other.
  683. /// </summary>
  684. public static SetStorage Difference(SetStorage self, SetStorage other) {
  685. SetStorage res;
  686. if (self._count == 0 || other._count == 0) {
  687. res = self.Clone();
  688. res._hasNull &= !other._hasNull;
  689. return res;
  690. }
  691. if (self._buckets.Length <= other._buckets.Length) {
  692. res = new SetStorage(self._count);
  693. res._hasNull &= !other._hasNull;
  694. Bucket[] buckets = self._buckets;
  695. Bucket[] otherBuckets = other._buckets;
  696. var eqFunc = GetEqFunc(self, other);
  697. res.UpdateHelperFunctions(self);
  698. for (int i = 0; i < buckets.Length; i++) {
  699. Bucket bucket = buckets[i];
  700. if (bucket.Item != null && bucket.Item != Removed &&
  701. !ContainsWorker(otherBuckets, bucket.Item, bucket.HashCode, eqFunc)) {
  702. res.AddWorker(bucket.Item, bucket.HashCode);
  703. }
  704. }
  705. } else {
  706. res = self.Clone();
  707. res.DifferenceUpdate(other);
  708. }
  709. return res;
  710. }
  711. #endregion
  712. #region Comparison and Hashing
  713. public static bool Equals(SetStorage x, SetStorage y, IEqualityComparer comparer) {
  714. if (object.ReferenceEquals(x, y)) {
  715. return true;
  716. }
  717. if (x._count != y._count || (x._hasNull ^ y._hasNull)) {
  718. return false;
  719. }
  720. if (x._count == 0) {
  721. // we know y._count == 0
  722. return true;
  723. }
  724. SortBySize(ref x, ref y);
  725. // optimization when we know the behavior of the comparer
  726. if (comparer is PythonContext.PythonEqualityComparer) {
  727. Bucket[] xBuckets = x._buckets;
  728. Bucket[] yBuckets = y._buckets;
  729. var eqFunc = GetEqFunc(x, y);
  730. for (int i = 0; i < xBuckets.Length; i++) {
  731. Bucket bucket = xBuckets[i];
  732. if (bucket.Item != null && bucket.Item != Removed &&
  733. !ContainsWorker(yBuckets, bucket.Item, bucket.HashCode, eqFunc)) {
  734. return false;
  735. }
  736. }
  737. return true;
  738. }
  739. // Set comparison using the provided comparer. Create special SetStorage objects
  740. // which use comparer's hashing and equality functions.
  741. SetStorage ySet = new SetStorage();
  742. ySet._itemType = HeterogeneousType;
  743. ySet._eqFunc = comparer.Equals;
  744. ySet._hashFunc = comparer.GetHashCode;
  745. foreach (object item in y) {
  746. ySet.AddNoLock(item);
  747. }
  748. foreach (object item in x) {
  749. if (!ySet.RemoveNoLock(item)) {
  750. return false;
  751. }
  752. }
  753. return ySet._count == 0;
  754. }
  755. public static int GetHashCode(SetStorage set, IEqualityComparer/*!*/ comparer) {
  756. Assert.NotNull(comparer);
  757. // hash code needs to be stable across collections (even if items are added in
  758. // different order) and needs to be fairly collision-free.
  759. int hash1 = 1420601183;
  760. int hash2 = 674132117;
  761. int hash3 = 393601577;
  762. if (set._count > 0) {
  763. hash1 ^= set._count * 8803;
  764. hash1 = (hash1 << 10) ^ (hash1 >> 22);
  765. hash2 += set._count * 5179;
  766. hash2 = (hash2 << 10) ^ (hash2 >> 22);
  767. hash3 = hash3 * set._count + 784251623;
  768. hash3 = (hash3 << 10) ^ (hash3 >> 22);
  769. }
  770. if (comparer is PythonContext.PythonEqualityComparer) {
  771. // Comparer with known hash behavior - use the precomputed hash codes.
  772. if (set._hasNull) {
  773. hash1 = (hash1 << 7) ^ (hash1 >> 25) ^ NoneTypeOps.NoneHashCode;
  774. hash2 = ((hash2 << 7) ^ (hash2 >> 25)) + NoneTypeOps.NoneHashCode;
  775. hash3 = ((hash3 << 7) ^ (hash3 >> 25)) * NoneTypeOps.NoneHashCode;
  776. }
  777. if (set._count > 0) {
  778. Bucket[] buckets = set._buckets;
  779. for (int i = 0; i < buckets.Length; i++) {
  780. object item = buckets[i].Item;
  781. if (item != null && item != Removed) {
  782. int hashCode = buckets[i].HashCode;
  783. hash1 ^= hashCode;
  784. hash2 += hashCode;
  785. hash3 *= hashCode;
  786. }
  787. }
  788. }
  789. } else {
  790. // Use the provided comparer for hashing.
  791. if (set._hasNull) {
  792. int hashCode = comparer.GetHashCode(null);
  793. hash1 = (hash1 + ((hash1 << 7) ^ (hash1 >> 25))) ^ hashCode;
  794. hash2 = ((hash2 << 7) ^ (hash2 >> 25)) + hashCode;
  795. hash3 = ((hash3 << 7) ^ (hash3 >> 25)) * hashCode;
  796. }
  797. if (set._count > 0) {
  798. Bucket[] buckets = set._buckets;
  799. for (int i = 0; i < buckets.Length; i++) {
  800. object item = buckets[i].Item;
  801. if (item != null && item != Removed) {
  802. int hashCode = comparer.GetHashCode(item);
  803. hash1 ^= hashCode;
  804. hash2 += hashCode;
  805. hash3 *= hashCode;
  806. }
  807. }
  808. }
  809. }
  810. hash1 = (hash1 << 11) ^ (hash1 >> 21) ^ hash2;
  811. hash1 = (hash1 << 27) ^ (hash1 >> 5) ^ hash3;
  812. return (hash1 << 9) ^ (hash1 >> 23) ^ 2001081521;
  813. }
  814. #endregion
  815. /// <summary>
  816. /// Used to store a single hashed item.
  817. ///
  818. /// Bucket is not serializable because it stores the computed hash code, which could change
  819. /// between serialization and deserialization.
  820. /// </summary>
  821. internal struct Bucket {
  822. public object Item;
  823. public int HashCode;
  824. public Bucket(int hashCode, object item) {
  825. HashCode = hashCode;
  826. Item = item;
  827. }
  828. }
  829. #region Hash/Equality Delegates
  830. // pre-created delegate instances shared by all homogeneous sets on primitive types
  831. private static readonly Func<object, int>
  832. _primitiveHash = PrimitiveHash,
  833. _intHash = IntHash,
  834. _doubleHash = DoubleHash,
  835. _tupleHash = TupleHash,
  836. _genericHash = GenericHash;
  837. private static readonly Func<object, object, bool>
  838. _stringEquals = StringEquals,
  839. _intEquals = IntEquals,
  840. _doubleEquals = DoubleEquals,
  841. _tupleEquals = TupleEquals,
  842. _genericEquals = GenericEquals,
  843. _objectEquals = object.ReferenceEquals;
  844. private static int PrimitiveHash(object o) {
  845. return o.GetHashCode();
  846. }
  847. private static int IntHash(object o) {
  848. return (int)o;
  849. }
  850. private static int DoubleHash(object o) {
  851. return DoubleOps.__hash__((double)o);
  852. }
  853. private static int TupleHash(object o) {
  854. return ((IStructuralEquatable)o).GetHashCode(
  855. DefaultContext.DefaultPythonContext.EqualityComparerNonGeneric
  856. );
  857. }
  858. private static int GenericHash(object o) {
  859. return PythonOps.Hash(DefaultContext.Default, o);
  860. }
  861. private static bool StringEquals(object o1, object o2) {
  862. return (string)o1 == (string)o2;
  863. }
  864. private static bool IntEquals(object o1, object o2) {
  865. Debug.Assert(o1 is int && o2 is int);
  866. return (int)o1 == (int)o2;
  867. }
  868. private static bool DoubleEquals(object o1, object o2) {
  869. return (double)o1 == (double)o2;
  870. }
  871. private static bool TupleEquals(object o1, object o2) {
  872. return ((IStructuralEquatable)o1).Equals(
  873. o2, DefaultContext.DefaultPythonContext.EqualityComparerNonGeneric
  874. );
  875. }
  876. private static bool GenericEquals(object o1, object o2) {
  877. return object.ReferenceEquals(o1, o2) || PythonOps.EqualRetBool(o1, o2);
  878. }
  879. private void UpdateHelperFunctions(SetStorage other) {
  880. if (_itemType == HeterogeneousType || _itemType == other._itemType) {
  881. return;
  882. }
  883. if (other._itemType == HeterogeneousType) {
  884. SetHeterogeneousSites();
  885. return;
  886. }
  887. if (_itemType == null) {
  888. _hashFunc = other._hashFunc;
  889. _eqFunc = other._eqFunc;
  890. _itemType = other._itemType;
  891. return;
  892. }
  893. SetHeterogeneousSites();
  894. }
  895. private void UpdateHelperFunctions(Type t, object item) {
  896. if (_itemType == null) {
  897. // first time through; get the sites for this specific type
  898. if (t == typeof(int)) {
  899. _hashFunc = _intHash;
  900. _eqFunc = _intEquals;
  901. } else if (t == typeof(string)) {
  902. _hashFunc = _primitiveHash;
  903. _eqFunc = _stringEquals;
  904. } else if (t == typeof(double)) {
  905. _hashFunc = _doubleHash;
  906. _eqFunc = _doubleEquals;
  907. } else if (t == typeof(PythonTuple)) {
  908. _hashFunc = _tupleHash;
  909. _eqFunc = _tupleEquals;
  910. } else if (t == typeof(Type).GetType()) {
  911. // RuntimeType
  912. _hashFunc = _primitiveHash;
  913. _eqFunc = _objectEquals;
  914. } else {
  915. // random other type, but still homogeneous; get a shared site
  916. PythonType pt = DynamicHelpers.GetPythonType(item);
  917. AssignSiteDelegates(
  918. PythonContext.GetHashSite(pt),
  919. DefaultContext.DefaultPythonContext.GetEqualSite(pt)
  920. );
  921. }
  922. _itemType = t;
  923. } else if (_itemType != HeterogeneousType) {
  924. // 2nd time through, we're adding a new type, so the set is heterogeneous
  925. SetHeterogeneousSites();
  926. }
  927. // else this set has already created a new heterogeneous site
  928. }
  929. private void SetHeterogeneousSites() {
  930. // we need to clone the buckets so any lock-free readers will only see the
  931. // old, homogeneous buckets
  932. _buckets = (Bucket[])_buckets.Clone();
  933. AssignSiteDelegates(
  934. DefaultContext.DefaultPythonContext.MakeHashSite(),
  935. DefaultContext.DefaultPythonContext.MakeEqualSite()
  936. );
  937. _itemType = HeterogeneousType;
  938. }
  939. private void AssignSiteDelegates(
  940. CallSite<Func<CallSite, object, int>> hashSite,
  941. CallSite<Func<CallSite, object, object, bool>> equalSite
  942. ) {
  943. _hashFunc = (o) => hashSite.Target(hashSite, o);
  944. _eqFunc = (o0, o1) => equalSite.Target(equalSite, o0, o1);
  945. }
  946. /// <summary>
  947. /// Helper to hash the given item w/ support for null
  948. /// </summary>
  949. private int Hash(object item) {
  950. if (item is string) {
  951. return item.GetHashCode();
  952. }
  953. return _hashFunc(item);
  954. }
  955. private static Func<object, object, bool> GetEqFunc(SetStorage self, SetStorage other) {
  956. if (self._itemType == other._itemType || self._itemType == HeterogeneousType) {
  957. return self._eqFunc;
  958. } else if (other._itemType == HeterogeneousType) {
  959. return other._eqFunc;
  960. }
  961. return _genericEquals;
  962. }
  963. #endregion
  964. #region Internal Set Helpers
  965. /// <summary>
  966. /// Helper which ensures that the first argument x requires the least work to enumerate
  967. /// </summary>
  968. internal static void SortBySize(ref SetStorage x, ref SetStorage y) {
  969. if (x._count > 0 &&
  970. ((y._count > 0 && x._buckets.Length > y._buckets.Length) || y._count == 0)) {
  971. SetStorage temp = x;
  972. x = y;
  973. y = temp;
  974. }
  975. }
  976. /// <summary>
  977. /// A factory which creates a SetStorage object from any Python iterable. It extracts
  978. /// the underlying storage of a set or frozen set without copying, which is left to the
  979. /// caller if necessary.
  980. /// </summary>
  981. internal static SetStorage GetItems(object set) {
  982. SetStorage items;
  983. if (GetItemsIfSet(set, out items)) {
  984. return items;
  985. }
  986. return GetItemsWorker(set);
  987. }
  988. /// <summary>
  989. /// A factory which creates a SetStorage object from any Python iterable. It extracts
  990. /// the underlying storage of a set or frozen set without copying, which is left to the
  991. /// caller if necessary.
  992. /// Returns true if the given object was a set or frozen set, false otherwise.
  993. /// </summary>
  994. internal static bool GetItems(object set, out SetStorage items) {
  995. if (GetItemsIfSet(set, out items)) {
  996. return true;
  997. }
  998. items = GetItemsWorker(set);
  999. return false;
  1000. }
  1001. /// <summary>
  1002. /// A factory which creates a SetStorage object from any Python iterable. It extracts
  1003. /// the underlying storage of a set or frozen set, copying in the former case, to return
  1004. /// a SetStorage object that is guaranteed not to receive any outside mutations.
  1005. /// </summary>
  1006. internal static SetStorage GetFrozenItems(object o) {
  1007. Debug.Assert(!(o is SetStorage));
  1008. FrozenSetCollection frozenset = o as FrozenSetCollection;
  1009. if (frozenset != null) {
  1010. return frozenset._items;
  1011. }
  1012. SetCollection set = o as SetCollection;
  1013. if (set != null) {
  1014. return set._items.Clone();
  1015. }
  1016. return GetItemsWorker(o);
  1017. }
  1018. internal static SetStorage GetItemsWorker(object set) {
  1019. Debug.Assert(!(set is SetStorage));
  1020. Debug.Assert(!(set is FrozenSetCollection || set is SetCollection));
  1021. IEnumerator en = PythonOps.GetEnumerator(set);
  1022. return GetItemsWorker(en);
  1023. }
  1024. internal static SetStorage GetItemsWorker(IEnumerator en) {
  1025. SetStorage items = new SetStorage();
  1026. while (en.MoveNext()) {
  1027. items.AddNoLock(en.Current);
  1028. }
  1029. return items;
  1030. }
  1031. /// <summary>
  1032. /// Extracts the SetStorage object from o if it is a set or frozenset and returns true.
  1033. /// Otherwise returns false.
  1034. /// </summary>
  1035. public static bool GetItemsIfSet(object o, out SetStorage items) {
  1036. Debug.Assert(!(o is SetStorage));
  1037. FrozenSetCollection frozenset = o as FrozenSetCollection;
  1038. if (frozenset != null) {
  1039. items = frozenset._items;
  1040. return true;
  1041. }
  1042. SetCollection set = o as SetCollection;
  1043. if (set != null) {
  1044. items = set._items;
  1045. return true;
  1046. }
  1047. items = null;
  1048. return false;
  1049. }
  1050. /// <summary>
  1051. /// Creates a hashable set from the given set, or does nothing if the given object
  1052. /// is not a set.
  1053. /// </summary>
  1054. /// <returns>True if o is a set or frozenset, false otherwise</returns>
  1055. internal static bool GetHashableSetIfSet(ref object o) {
  1056. SetCollection set = o as SetCollection;
  1057. if (set != null) {
  1058. if (IsHashable(set)) {
  1059. return true;
  1060. }
  1061. o = new FrozenSetCollection(set._items.Clone());
  1062. return true;
  1063. }
  1064. return o is FrozenSetCollection;
  1065. }
  1066. private static bool IsHashable(SetCollection set) {
  1067. if (set.GetType() == typeof(SetCollection)) {
  1068. return false;
  1069. }
  1070. // else we have a subclass. Check if it has a hash function
  1071. PythonTypeSlot pts;
  1072. PythonType pt = DynamicHelpers.GetPythonType(set);
  1073. object slotValue;
  1074. return pt.TryResolveSlot(DefaultContext.Default, "__hash__", out pts) &&
  1075. pts.TryGetValue(DefaultContext.Default, set, pt, out slotValue) &&
  1076. slotValue != null;
  1077. }
  1078. internal static PythonTuple Reduce(SetStorage items, PythonType type) {
  1079. PythonTuple itemTuple = PythonTuple.MakeTuple(items.GetItems());
  1080. return PythonTuple.MakeTuple(type, itemTuple, null);
  1081. }
  1082. internal static string SetToString(CodeContext/*!*/ context, object set, SetStorage items) {
  1083. string setTypeStr;
  1084. Type setType = set.GetType();
  1085. if (setType == typeof(SetCollection)) {
  1086. setTypeStr = "set";
  1087. } else if (setType == typeof(FrozenSetCollection)) {
  1088. setTypeStr = "frozenset";
  1089. } else {
  1090. setTypeStr = PythonTypeOps.GetName(set);
  1091. }
  1092. StringBuilder sb = new StringBuilder();
  1093. sb.Append(setTypeStr);
  1094. sb.Append("([");
  1095. string comma = "";
  1096. if (items._hasNull) {
  1097. sb.Append(comma);
  1098. sb.Append(PythonOps.Repr(context, null));
  1099. comma = ", ";
  1100. }
  1101. if (items._count > 0) {
  1102. foreach (Bucket bucket in items._buckets) {
  1103. if (bucket.Item != null && bucket.Item != Removed) {
  1104. sb.Append(comma);
  1105. sb.Append(PythonOps.Repr(context, bucket.Item));
  1106. comma = ", ";
  1107. }
  1108. }
  1109. }
  1110. sb.Append("])");
  1111. return sb.ToString();
  1112. }
  1113. #endregion
  1114. #region Private Helpers
  1115. private void Grow() {
  1116. Debug.Assert(_buckets != null);
  1117. if (_buckets.Length >= 0x40000000) {
  1118. throw PythonOps.MemoryError("set has reached its maximum size");
  1119. }
  1120. Bucket[] newBuckets = new Bucket[_buckets.Length << 1];
  1121. for (int i = 0; i < _buckets.Length; i++) {
  1122. Bucket bucket = _buckets[i];
  1123. if (bucket.Item != null && bucket.Item != Removed) {
  1124. AddWorker(newBuckets, bucket.Item, bucket.HashCode, _eqFunc, ref _version);
  1125. }
  1126. }
  1127. _buckets = newBuckets;
  1128. _maxCount = (int)(_buckets.Length * Load);
  1129. }
  1130. private static void ProbeNext(Bucket[]/*!*/ buckets, ref int index) {
  1131. Debug.Assert(buckets != null);
  1132. index++;
  1133. if (index == buckets.Length) {
  1134. index = 0;
  1135. }
  1136. }
  1137. private static int CeilLog2(int x) {
  1138. // Note: x is assumed to be positive
  1139. int xOrig = x;
  1140. int res = 1;
  1141. if (x >= 1 << 16) {
  1142. x >>= 16;
  1143. res += 16;
  1144. }
  1145. if (x >= 1 << 8) {
  1146. x >>= 8;
  1147. res += 8;
  1148. }
  1149. if (x >= 1 << 4) {
  1150. x >>= 4;
  1151. res += 4;
  1152. }
  1153. if (x >= 1 << 2) {
  1154. x >>= 2;
  1155. res += 2;
  1156. }
  1157. if (x >= 1 << 1) {
  1158. res += 1;
  1159. }
  1160. // res is now floor + 1. Convert it to ceiling.
  1161. if (1 << res != xOrig) {
  1162. return res;
  1163. }
  1164. return res + 1;
  1165. }
  1166. #endregion
  1167. #if FEATURE_SERIALIZATION
  1168. #region ISerializable Members
  1169. public void GetObjectData(SerializationInfo info, StreamingContext context) {
  1170. info.AddValue("buckets", GetItems());
  1171. info.AddValue("hasnull", _hasNull);
  1172. }
  1173. #endregion
  1174. #region IDeserializationCallback Members
  1175. void IDeserializationCallback.OnDeserialization(object sender) {
  1176. SerializationInfo info;
  1177. if (_buckets == null || (info = _buckets[0].Item as SerializationInfo) == null) {
  1178. // if we've received multiple OnDeserialization callbacks, only
  1179. // deserialize after the 1st one
  1180. return;
  1181. }
  1182. _buckets = null;
  1183. var items = (List)info.GetValue("buckets", typeof(List));
  1184. foreach (object item in items) {
  1185. AddNoLock(item);
  1186. }
  1187. _hasNull = (bool)info.GetValue("hasnull", typeof(bool));
  1188. }
  1189. #endregion
  1190. #endif
  1191. }
  1192. }