PageRenderTime 83ms CodeModel.GetById 59ms app.highlight 19ms RepoModel.GetById 2ms app.codeStats 0ms

/Lib/sets.py

http://unladen-swallow.googlecode.com/
Python | 579 lines | 449 code | 30 blank | 100 comment | 35 complexity | dbf548bdfd04f191f47a2a9ddf8a3ee0 MD5 | raw file
  1"""Classes to represent arbitrary sets (including sets of sets).
  2
  3This module implements sets using dictionaries whose values are
  4ignored.  The usual operations (union, intersection, deletion, etc.)
  5are provided as both methods and operators.
  6
  7Important: sets are not sequences!  While they support 'x in s',
  8'len(s)', and 'for x in s', none of those operations are unique for
  9sequences; for example, mappings support all three as well.  The
 10characteristic operation for sequences is subscripting with small
 11integers: s[i], for i in range(len(s)).  Sets don't support
 12subscripting at all.  Also, sequences allow multiple occurrences and
 13their elements have a definite order; sets on the other hand don't
 14record multiple occurrences and don't remember the order of element
 15insertion (which is why they don't support s[i]).
 16
 17The following classes are provided:
 18
 19BaseSet -- All the operations common to both mutable and immutable
 20    sets. This is an abstract class, not meant to be directly
 21    instantiated.
 22
 23Set -- Mutable sets, subclass of BaseSet; not hashable.
 24
 25ImmutableSet -- Immutable sets, subclass of BaseSet; hashable.
 26    An iterable argument is mandatory to create an ImmutableSet.
 27
 28_TemporarilyImmutableSet -- A wrapper around a Set, hashable,
 29    giving the same hash value as the immutable set equivalent
 30    would have.  Do not use this class directly.
 31
 32Only hashable objects can be added to a Set. In particular, you cannot
 33really add a Set as an element to another Set; if you try, what is
 34actually added is an ImmutableSet built from it (it compares equal to
 35the one you tried adding).
 36
 37When you ask if `x in y' where x is a Set and y is a Set or
 38ImmutableSet, x is wrapped into a _TemporarilyImmutableSet z, and
 39what's tested is actually `z in y'.
 40
 41"""
 42
 43# Code history:
 44#
 45# - Greg V. Wilson wrote the first version, using a different approach
 46#   to the mutable/immutable problem, and inheriting from dict.
 47#
 48# - Alex Martelli modified Greg's version to implement the current
 49#   Set/ImmutableSet approach, and make the data an attribute.
 50#
 51# - Guido van Rossum rewrote much of the code, made some API changes,
 52#   and cleaned up the docstrings.
 53#
 54# - Raymond Hettinger added a number of speedups and other
 55#   improvements.
 56
 57from __future__ import generators
 58try:
 59    from itertools import ifilter, ifilterfalse
 60except ImportError:
 61    # Code to make the module run under Py2.2
 62    def ifilter(predicate, iterable):
 63        if predicate is None:
 64            def predicate(x):
 65                return x
 66        for x in iterable:
 67            if predicate(x):
 68                yield x
 69    def ifilterfalse(predicate, iterable):
 70        if predicate is None:
 71            def predicate(x):
 72                return x
 73        for x in iterable:
 74            if not predicate(x):
 75                yield x
 76    try:
 77        True, False
 78    except NameError:
 79        True, False = (0==0, 0!=0)
 80
 81__all__ = ['BaseSet', 'Set', 'ImmutableSet']
 82
 83import warnings
 84warnings.warn("the sets module is deprecated", DeprecationWarning,
 85                stacklevel=2)
 86
 87class BaseSet(object):
 88    """Common base class for mutable and immutable sets."""
 89
 90    __slots__ = ['_data']
 91
 92    # Constructor
 93
 94    def __init__(self):
 95        """This is an abstract class."""
 96        # Don't call this from a concrete subclass!
 97        if self.__class__ is BaseSet:
 98            raise TypeError, ("BaseSet is an abstract class.  "
 99                              "Use Set or ImmutableSet.")
100
101    # Standard protocols: __len__, __repr__, __str__, __iter__
102
103    def __len__(self):
104        """Return the number of elements of a set."""
105        return len(self._data)
106
107    def __repr__(self):
108        """Return string representation of a set.
109
110        This looks like 'Set([<list of elements>])'.
111        """
112        return self._repr()
113
114    # __str__ is the same as __repr__
115    __str__ = __repr__
116
117    def _repr(self, sorted=False):
118        elements = self._data.keys()
119        if sorted:
120            elements.sort()
121        return '%s(%r)' % (self.__class__.__name__, elements)
122
123    def __iter__(self):
124        """Return an iterator over the elements or a set.
125
126        This is the keys iterator for the underlying dict.
127        """
128        return self._data.iterkeys()
129
130    # Three-way comparison is not supported.  However, because __eq__ is
131    # tried before __cmp__, if Set x == Set y, x.__eq__(y) returns True and
132    # then cmp(x, y) returns 0 (Python doesn't actually call __cmp__ in this
133    # case).
134
135    def __cmp__(self, other):
136        raise TypeError, "can't compare sets using cmp()"
137
138    # Equality comparisons using the underlying dicts.  Mixed-type comparisons
139    # are allowed here, where Set == z for non-Set z always returns False,
140    # and Set != z always True.  This allows expressions like "x in y" to
141    # give the expected result when y is a sequence of mixed types, not
142    # raising a pointless TypeError just because y contains a Set, or x is
143    # a Set and y contain's a non-set ("in" invokes only __eq__).
144    # Subtle:  it would be nicer if __eq__ and __ne__ could return
145    # NotImplemented instead of True or False.  Then the other comparand
146    # would get a chance to determine the result, and if the other comparand
147    # also returned NotImplemented then it would fall back to object address
148    # comparison (which would always return False for __eq__ and always
149    # True for __ne__).  However, that doesn't work, because this type
150    # *also* implements __cmp__:  if, e.g., __eq__ returns NotImplemented,
151    # Python tries __cmp__ next, and the __cmp__ here then raises TypeError.
152
153    def __eq__(self, other):
154        if isinstance(other, BaseSet):
155            return self._data == other._data
156        else:
157            return False
158
159    def __ne__(self, other):
160        if isinstance(other, BaseSet):
161            return self._data != other._data
162        else:
163            return True
164
165    # Copying operations
166
167    def copy(self):
168        """Return a shallow copy of a set."""
169        result = self.__class__()
170        result._data.update(self._data)
171        return result
172
173    __copy__ = copy # For the copy module
174
175    def __deepcopy__(self, memo):
176        """Return a deep copy of a set; used by copy module."""
177        # This pre-creates the result and inserts it in the memo
178        # early, in case the deep copy recurses into another reference
179        # to this same set.  A set can't be an element of itself, but
180        # it can certainly contain an object that has a reference to
181        # itself.
182        from copy import deepcopy
183        result = self.__class__()
184        memo[id(self)] = result
185        data = result._data
186        value = True
187        for elt in self:
188            data[deepcopy(elt, memo)] = value
189        return result
190
191    # Standard set operations: union, intersection, both differences.
192    # Each has an operator version (e.g. __or__, invoked with |) and a
193    # method version (e.g. union).
194    # Subtle:  Each pair requires distinct code so that the outcome is
195    # correct when the type of other isn't suitable.  For example, if
196    # we did "union = __or__" instead, then Set().union(3) would return
197    # NotImplemented instead of raising TypeError (albeit that *why* it
198    # raises TypeError as-is is also a bit subtle).
199
200    def __or__(self, other):
201        """Return the union of two sets as a new set.
202
203        (I.e. all elements that are in either set.)
204        """
205        if not isinstance(other, BaseSet):
206            return NotImplemented
207        return self.union(other)
208
209    def union(self, other):
210        """Return the union of two sets as a new set.
211
212        (I.e. all elements that are in either set.)
213        """
214        result = self.__class__(self)
215        result._update(other)
216        return result
217
218    def __and__(self, other):
219        """Return the intersection of two sets as a new set.
220
221        (I.e. all elements that are in both sets.)
222        """
223        if not isinstance(other, BaseSet):
224            return NotImplemented
225        return self.intersection(other)
226
227    def intersection(self, other):
228        """Return the intersection of two sets as a new set.
229
230        (I.e. all elements that are in both sets.)
231        """
232        if not isinstance(other, BaseSet):
233            other = Set(other)
234        if len(self) <= len(other):
235            little, big = self, other
236        else:
237            little, big = other, self
238        common = ifilter(big._data.has_key, little)
239        return self.__class__(common)
240
241    def __xor__(self, other):
242        """Return the symmetric difference of two sets as a new set.
243
244        (I.e. all elements that are in exactly one of the sets.)
245        """
246        if not isinstance(other, BaseSet):
247            return NotImplemented
248        return self.symmetric_difference(other)
249
250    def symmetric_difference(self, other):
251        """Return the symmetric difference of two sets as a new set.
252
253        (I.e. all elements that are in exactly one of the sets.)
254        """
255        result = self.__class__()
256        data = result._data
257        value = True
258        selfdata = self._data
259        try:
260            otherdata = other._data
261        except AttributeError:
262            otherdata = Set(other)._data
263        for elt in ifilterfalse(otherdata.has_key, selfdata):
264            data[elt] = value
265        for elt in ifilterfalse(selfdata.has_key, otherdata):
266            data[elt] = value
267        return result
268
269    def  __sub__(self, other):
270        """Return the difference of two sets as a new Set.
271
272        (I.e. all elements that are in this set and not in the other.)
273        """
274        if not isinstance(other, BaseSet):
275            return NotImplemented
276        return self.difference(other)
277
278    def difference(self, other):
279        """Return the difference of two sets as a new Set.
280
281        (I.e. all elements that are in this set and not in the other.)
282        """
283        result = self.__class__()
284        data = result._data
285        try:
286            otherdata = other._data
287        except AttributeError:
288            otherdata = Set(other)._data
289        value = True
290        for elt in ifilterfalse(otherdata.has_key, self):
291            data[elt] = value
292        return result
293
294    # Membership test
295
296    def __contains__(self, element):
297        """Report whether an element is a member of a set.
298
299        (Called in response to the expression `element in self'.)
300        """
301        try:
302            return element in self._data
303        except TypeError:
304            transform = getattr(element, "__as_temporarily_immutable__", None)
305            if transform is None:
306                raise # re-raise the TypeError exception we caught
307            return transform() in self._data
308
309    # Subset and superset test
310
311    def issubset(self, other):
312        """Report whether another set contains this set."""
313        self._binary_sanity_check(other)
314        if len(self) > len(other):  # Fast check for obvious cases
315            return False
316        for elt in ifilterfalse(other._data.has_key, self):
317            return False
318        return True
319
320    def issuperset(self, other):
321        """Report whether this set contains another set."""
322        self._binary_sanity_check(other)
323        if len(self) < len(other):  # Fast check for obvious cases
324            return False
325        for elt in ifilterfalse(self._data.has_key, other):
326            return False
327        return True
328
329    # Inequality comparisons using the is-subset relation.
330    __le__ = issubset
331    __ge__ = issuperset
332
333    def __lt__(self, other):
334        self._binary_sanity_check(other)
335        return len(self) < len(other) and self.issubset(other)
336
337    def __gt__(self, other):
338        self._binary_sanity_check(other)
339        return len(self) > len(other) and self.issuperset(other)
340
341    # Assorted helpers
342
343    def _binary_sanity_check(self, other):
344        # Check that the other argument to a binary operation is also
345        # a set, raising a TypeError otherwise.
346        if not isinstance(other, BaseSet):
347            raise TypeError, "Binary operation only permitted between sets"
348
349    def _compute_hash(self):
350        # Calculate hash code for a set by xor'ing the hash codes of
351        # the elements.  This ensures that the hash code does not depend
352        # on the order in which elements are added to the set.  This is
353        # not called __hash__ because a BaseSet should not be hashable;
354        # only an ImmutableSet is hashable.
355        result = 0
356        for elt in self:
357            result ^= hash(elt)
358        return result
359
360    def _update(self, iterable):
361        # The main loop for update() and the subclass __init__() methods.
362        data = self._data
363
364        # Use the fast update() method when a dictionary is available.
365        if isinstance(iterable, BaseSet):
366            data.update(iterable._data)
367            return
368
369        value = True
370
371        if type(iterable) in (list, tuple, xrange):
372            # Optimized: we know that __iter__() and next() can't
373            # raise TypeError, so we can move 'try:' out of the loop.
374            it = iter(iterable)
375            while True:
376                try:
377                    for element in it:
378                        data[element] = value
379                    return
380                except TypeError:
381                    transform = getattr(element, "__as_immutable__", None)
382                    if transform is None:
383                        raise # re-raise the TypeError exception we caught
384                    data[transform()] = value
385        else:
386            # Safe: only catch TypeError where intended
387            for element in iterable:
388                try:
389                    data[element] = value
390                except TypeError:
391                    transform = getattr(element, "__as_immutable__", None)
392                    if transform is None:
393                        raise # re-raise the TypeError exception we caught
394                    data[transform()] = value
395
396
397class ImmutableSet(BaseSet):
398    """Immutable set class."""
399
400    __slots__ = ['_hashcode']
401
402    # BaseSet + hashing
403
404    def __init__(self, iterable=None):
405        """Construct an immutable set from an optional iterable."""
406        self._hashcode = None
407        self._data = {}
408        if iterable is not None:
409            self._update(iterable)
410
411    def __hash__(self):
412        if self._hashcode is None:
413            self._hashcode = self._compute_hash()
414        return self._hashcode
415
416    def __getstate__(self):
417        return self._data, self._hashcode
418
419    def __setstate__(self, state):
420        self._data, self._hashcode = state
421
422class Set(BaseSet):
423    """ Mutable set class."""
424
425    __slots__ = []
426
427    # BaseSet + operations requiring mutability; no hashing
428
429    def __init__(self, iterable=None):
430        """Construct a set from an optional iterable."""
431        self._data = {}
432        if iterable is not None:
433            self._update(iterable)
434
435    def __getstate__(self):
436        # getstate's results are ignored if it is not
437        return self._data,
438
439    def __setstate__(self, data):
440        self._data, = data
441
442    # We inherit object.__hash__, so we must deny this explicitly
443    __hash__ = None
444
445    # In-place union, intersection, differences.
446    # Subtle:  The xyz_update() functions deliberately return None,
447    # as do all mutating operations on built-in container types.
448    # The __xyz__ spellings have to return self, though.
449
450    def __ior__(self, other):
451        """Update a set with the union of itself and another."""
452        self._binary_sanity_check(other)
453        self._data.update(other._data)
454        return self
455
456    def union_update(self, other):
457        """Update a set with the union of itself and another."""
458        self._update(other)
459
460    def __iand__(self, other):
461        """Update a set with the intersection of itself and another."""
462        self._binary_sanity_check(other)
463        self._data = (self & other)._data
464        return self
465
466    def intersection_update(self, other):
467        """Update a set with the intersection of itself and another."""
468        if isinstance(other, BaseSet):
469            self &= other
470        else:
471            self._data = (self.intersection(other))._data
472
473    def __ixor__(self, other):
474        """Update a set with the symmetric difference of itself and another."""
475        self._binary_sanity_check(other)
476        self.symmetric_difference_update(other)
477        return self
478
479    def symmetric_difference_update(self, other):
480        """Update a set with the symmetric difference of itself and another."""
481        data = self._data
482        value = True
483        if not isinstance(other, BaseSet):
484            other = Set(other)
485        if self is other:
486            self.clear()
487        for elt in other:
488            if elt in data:
489                del data[elt]
490            else:
491                data[elt] = value
492
493    def __isub__(self, other):
494        """Remove all elements of another set from this set."""
495        self._binary_sanity_check(other)
496        self.difference_update(other)
497        return self
498
499    def difference_update(self, other):
500        """Remove all elements of another set from this set."""
501        data = self._data
502        if not isinstance(other, BaseSet):
503            other = Set(other)
504        if self is other:
505            self.clear()
506        for elt in ifilter(data.has_key, other):
507            del data[elt]
508
509    # Python dict-like mass mutations: update, clear
510
511    def update(self, iterable):
512        """Add all values from an iterable (such as a list or file)."""
513        self._update(iterable)
514
515    def clear(self):
516        """Remove all elements from this set."""
517        self._data.clear()
518
519    # Single-element mutations: add, remove, discard
520
521    def add(self, element):
522        """Add an element to a set.
523
524        This has no effect if the element is already present.
525        """
526        try:
527            self._data[element] = True
528        except TypeError:
529            transform = getattr(element, "__as_immutable__", None)
530            if transform is None:
531                raise # re-raise the TypeError exception we caught
532            self._data[transform()] = True
533
534    def remove(self, element):
535        """Remove an element from a set; it must be a member.
536
537        If the element is not a member, raise a KeyError.
538        """
539        try:
540            del self._data[element]
541        except TypeError:
542            transform = getattr(element, "__as_temporarily_immutable__", None)
543            if transform is None:
544                raise # re-raise the TypeError exception we caught
545            del self._data[transform()]
546
547    def discard(self, element):
548        """Remove an element from a set if it is a member.
549
550        If the element is not a member, do nothing.
551        """
552        try:
553            self.remove(element)
554        except KeyError:
555            pass
556
557    def pop(self):
558        """Remove and return an arbitrary set element."""
559        return self._data.popitem()[0]
560
561    def __as_immutable__(self):
562        # Return a copy of self as an immutable set
563        return ImmutableSet(self)
564
565    def __as_temporarily_immutable__(self):
566        # Return self wrapped in a temporarily immutable set
567        return _TemporarilyImmutableSet(self)
568
569
570class _TemporarilyImmutableSet(BaseSet):
571    # Wrap a mutable set as if it was temporarily immutable.
572    # This only supplies hashing and equality comparisons.
573
574    def __init__(self, set):
575        self._set = set
576        self._data = set._data  # Needed by ImmutableSet.__eq__()
577
578    def __hash__(self):
579        return self._set._compute_hash()