PyXR

c:\python24\lib \ sets.py



0001 """Classes to represent arbitrary sets (including sets of sets).
0002 
0003 This module implements sets using dictionaries whose values are
0004 ignored.  The usual operations (union, intersection, deletion, etc.)
0005 are provided as both methods and operators.
0006 
0007 Important: sets are not sequences!  While they support 'x in s',
0008 'len(s)', and 'for x in s', none of those operations are unique for
0009 sequences; for example, mappings support all three as well.  The
0010 characteristic operation for sequences is subscripting with small
0011 integers: s[i], for i in range(len(s)).  Sets don't support
0012 subscripting at all.  Also, sequences allow multiple occurrences and
0013 their elements have a definite order; sets on the other hand don't
0014 record multiple occurrences and don't remember the order of element
0015 insertion (which is why they don't support s[i]).
0016 
0017 The following classes are provided:
0018 
0019 BaseSet -- All the operations common to both mutable and immutable
0020     sets. This is an abstract class, not meant to be directly
0021     instantiated.
0022 
0023 Set -- Mutable sets, subclass of BaseSet; not hashable.
0024 
0025 ImmutableSet -- Immutable sets, subclass of BaseSet; hashable.
0026     An iterable argument is mandatory to create an ImmutableSet.
0027 
0028 _TemporarilyImmutableSet -- A wrapper around a Set, hashable,
0029     giving the same hash value as the immutable set equivalent
0030     would have.  Do not use this class directly.
0031 
0032 Only hashable objects can be added to a Set. In particular, you cannot
0033 really add a Set as an element to another Set; if you try, what is
0034 actually added is an ImmutableSet built from it (it compares equal to
0035 the one you tried adding).
0036 
0037 When you ask if `x in y' where x is a Set and y is a Set or
0038 ImmutableSet, x is wrapped into a _TemporarilyImmutableSet z, and
0039 what's tested is actually `z in y'.
0040 
0041 """
0042 
0043 # Code history:
0044 #
0045 # - Greg V. Wilson wrote the first version, using a different approach
0046 #   to the mutable/immutable problem, and inheriting from dict.
0047 #
0048 # - Alex Martelli modified Greg's version to implement the current
0049 #   Set/ImmutableSet approach, and make the data an attribute.
0050 #
0051 # - Guido van Rossum rewrote much of the code, made some API changes,
0052 #   and cleaned up the docstrings.
0053 #
0054 # - Raymond Hettinger added a number of speedups and other
0055 #   improvements.
0056 
0057 from __future__ import generators
0058 try:
0059     from itertools import ifilter, ifilterfalse
0060 except ImportError:
0061     # Code to make the module run under Py2.2
0062     def ifilter(predicate, iterable):
0063         if predicate is None:
0064             def predicate(x):
0065                 return x
0066         for x in iterable:
0067             if predicate(x):
0068                 yield x
0069     def ifilterfalse(predicate, iterable):
0070         if predicate is None:
0071             def predicate(x):
0072                 return x
0073         for x in iterable:
0074             if not predicate(x):
0075                 yield x
0076     try:
0077         True, False
0078     except NameError:
0079         True, False = (0==0, 0!=0)
0080 
0081 __all__ = ['BaseSet', 'Set', 'ImmutableSet']
0082 
0083 class BaseSet(object):
0084     """Common base class for mutable and immutable sets."""
0085 
0086     __slots__ = ['_data']
0087 
0088     # Constructor
0089 
0090     def __init__(self):
0091         """This is an abstract class."""
0092         # Don't call this from a concrete subclass!
0093         if self.__class__ is BaseSet:
0094             raise TypeError, ("BaseSet is an abstract class.  "
0095                               "Use Set or ImmutableSet.")
0096 
0097     # Standard protocols: __len__, __repr__, __str__, __iter__
0098 
0099     def __len__(self):
0100         """Return the number of elements of a set."""
0101         return len(self._data)
0102 
0103     def __repr__(self):
0104         """Return string representation of a set.
0105 
0106         This looks like 'Set([<list of elements>])'.
0107         """
0108         return self._repr()
0109 
0110     # __str__ is the same as __repr__
0111     __str__ = __repr__
0112 
0113     def _repr(self, sorted=False):
0114         elements = self._data.keys()
0115         if sorted:
0116             elements.sort()
0117         return '%s(%r)' % (self.__class__.__name__, elements)
0118 
0119     def __iter__(self):
0120         """Return an iterator over the elements or a set.
0121 
0122         This is the keys iterator for the underlying dict.
0123         """
0124         return self._data.iterkeys()
0125 
0126     # Three-way comparison is not supported.  However, because __eq__ is
0127     # tried before __cmp__, if Set x == Set y, x.__eq__(y) returns True and
0128     # then cmp(x, y) returns 0 (Python doesn't actually call __cmp__ in this
0129     # case).
0130 
0131     def __cmp__(self, other):
0132         raise TypeError, "can't compare sets using cmp()"
0133 
0134     # Equality comparisons using the underlying dicts.  Mixed-type comparisons
0135     # are allowed here, where Set == z for non-Set z always returns False,
0136     # and Set != z always True.  This allows expressions like "x in y" to
0137     # give the expected result when y is a sequence of mixed types, not
0138     # raising a pointless TypeError just because y contains a Set, or x is
0139     # a Set and y contain's a non-set ("in" invokes only __eq__).
0140     # Subtle:  it would be nicer if __eq__ and __ne__ could return
0141     # NotImplemented instead of True or False.  Then the other comparand
0142     # would get a chance to determine the result, and if the other comparand
0143     # also returned NotImplemented then it would fall back to object address
0144     # comparison (which would always return False for __eq__ and always
0145     # True for __ne__).  However, that doesn't work, because this type
0146     # *also* implements __cmp__:  if, e.g., __eq__ returns NotImplemented,
0147     # Python tries __cmp__ next, and the __cmp__ here then raises TypeError.
0148 
0149     def __eq__(self, other):
0150         if isinstance(other, BaseSet):
0151             return self._data == other._data
0152         else:
0153             return False
0154 
0155     def __ne__(self, other):
0156         if isinstance(other, BaseSet):
0157             return self._data != other._data
0158         else:
0159             return True
0160 
0161     # Copying operations
0162 
0163     def copy(self):
0164         """Return a shallow copy of a set."""
0165         result = self.__class__()
0166         result._data.update(self._data)
0167         return result
0168 
0169     __copy__ = copy # For the copy module
0170 
0171     def __deepcopy__(self, memo):
0172         """Return a deep copy of a set; used by copy module."""
0173         # This pre-creates the result and inserts it in the memo
0174         # early, in case the deep copy recurses into another reference
0175         # to this same set.  A set can't be an element of itself, but
0176         # it can certainly contain an object that has a reference to
0177         # itself.
0178         from copy import deepcopy
0179         result = self.__class__()
0180         memo[id(self)] = result
0181         data = result._data
0182         value = True
0183         for elt in self:
0184             data[deepcopy(elt, memo)] = value
0185         return result
0186 
0187     # Standard set operations: union, intersection, both differences.
0188     # Each has an operator version (e.g. __or__, invoked with |) and a
0189     # method version (e.g. union).
0190     # Subtle:  Each pair requires distinct code so that the outcome is
0191     # correct when the type of other isn't suitable.  For example, if
0192     # we did "union = __or__" instead, then Set().union(3) would return
0193     # NotImplemented instead of raising TypeError (albeit that *why* it
0194     # raises TypeError as-is is also a bit subtle).
0195 
0196     def __or__(self, other):
0197         """Return the union of two sets as a new set.
0198 
0199         (I.e. all elements that are in either set.)
0200         """
0201         if not isinstance(other, BaseSet):
0202             return NotImplemented
0203         return self.union(other)
0204 
0205     def union(self, other):
0206         """Return the union of two sets as a new set.
0207 
0208         (I.e. all elements that are in either set.)
0209         """
0210         result = self.__class__(self)
0211         result._update(other)
0212         return result
0213 
0214     def __and__(self, other):
0215         """Return the intersection of two sets as a new set.
0216 
0217         (I.e. all elements that are in both sets.)
0218         """
0219         if not isinstance(other, BaseSet):
0220             return NotImplemented
0221         return self.intersection(other)
0222 
0223     def intersection(self, other):
0224         """Return the intersection of two sets as a new set.
0225 
0226         (I.e. all elements that are in both sets.)
0227         """
0228         if not isinstance(other, BaseSet):
0229             other = Set(other)
0230         if len(self) <= len(other):
0231             little, big = self, other
0232         else:
0233             little, big = other, self
0234         common = ifilter(big._data.has_key, little)
0235         return self.__class__(common)
0236 
0237     def __xor__(self, other):
0238         """Return the symmetric difference of two sets as a new set.
0239 
0240         (I.e. all elements that are in exactly one of the sets.)
0241         """
0242         if not isinstance(other, BaseSet):
0243             return NotImplemented
0244         return self.symmetric_difference(other)
0245 
0246     def symmetric_difference(self, other):
0247         """Return the symmetric difference of two sets as a new set.
0248 
0249         (I.e. all elements that are in exactly one of the sets.)
0250         """
0251         result = self.__class__()
0252         data = result._data
0253         value = True
0254         selfdata = self._data
0255         try:
0256             otherdata = other._data
0257         except AttributeError:
0258             otherdata = Set(other)._data
0259         for elt in ifilterfalse(otherdata.has_key, selfdata):
0260             data[elt] = value
0261         for elt in ifilterfalse(selfdata.has_key, otherdata):
0262             data[elt] = value
0263         return result
0264 
0265     def  __sub__(self, other):
0266         """Return the difference of two sets as a new Set.
0267 
0268         (I.e. all elements that are in this set and not in the other.)
0269         """
0270         if not isinstance(other, BaseSet):
0271             return NotImplemented
0272         return self.difference(other)
0273 
0274     def difference(self, other):
0275         """Return the difference of two sets as a new Set.
0276 
0277         (I.e. all elements that are in this set and not in the other.)
0278         """
0279         result = self.__class__()
0280         data = result._data
0281         try:
0282             otherdata = other._data
0283         except AttributeError:
0284             otherdata = Set(other)._data
0285         value = True
0286         for elt in ifilterfalse(otherdata.has_key, self):
0287             data[elt] = value
0288         return result
0289 
0290     # Membership test
0291 
0292     def __contains__(self, element):
0293         """Report whether an element is a member of a set.
0294 
0295         (Called in response to the expression `element in self'.)
0296         """
0297         try:
0298             return element in self._data
0299         except TypeError:
0300             transform = getattr(element, "__as_temporarily_immutable__", None)
0301             if transform is None:
0302                 raise # re-raise the TypeError exception we caught
0303             return transform() in self._data
0304 
0305     # Subset and superset test
0306 
0307     def issubset(self, other):
0308         """Report whether another set contains this set."""
0309         self._binary_sanity_check(other)
0310         if len(self) > len(other):  # Fast check for obvious cases
0311             return False
0312         for elt in ifilterfalse(other._data.has_key, self):
0313             return False
0314         return True
0315 
0316     def issuperset(self, other):
0317         """Report whether this set contains another set."""
0318         self._binary_sanity_check(other)
0319         if len(self) < len(other):  # Fast check for obvious cases
0320             return False
0321         for elt in ifilterfalse(self._data.has_key, other):
0322             return False
0323         return True
0324 
0325     # Inequality comparisons using the is-subset relation.
0326     __le__ = issubset
0327     __ge__ = issuperset
0328 
0329     def __lt__(self, other):
0330         self._binary_sanity_check(other)
0331         return len(self) < len(other) and self.issubset(other)
0332 
0333     def __gt__(self, other):
0334         self._binary_sanity_check(other)
0335         return len(self) > len(other) and self.issuperset(other)
0336 
0337     # Assorted helpers
0338 
0339     def _binary_sanity_check(self, other):
0340         # Check that the other argument to a binary operation is also
0341         # a set, raising a TypeError otherwise.
0342         if not isinstance(other, BaseSet):
0343             raise TypeError, "Binary operation only permitted between sets"
0344 
0345     def _compute_hash(self):
0346         # Calculate hash code for a set by xor'ing the hash codes of
0347         # the elements.  This ensures that the hash code does not depend
0348         # on the order in which elements are added to the set.  This is
0349         # not called __hash__ because a BaseSet should not be hashable;
0350         # only an ImmutableSet is hashable.
0351         result = 0
0352         for elt in self:
0353             result ^= hash(elt)
0354         return result
0355 
0356     def _update(self, iterable):
0357         # The main loop for update() and the subclass __init__() methods.
0358         data = self._data
0359 
0360         # Use the fast update() method when a dictionary is available.
0361         if isinstance(iterable, BaseSet):
0362             data.update(iterable._data)
0363             return
0364 
0365         value = True
0366 
0367         if type(iterable) in (list, tuple, xrange):
0368             # Optimized: we know that __iter__() and next() can't
0369             # raise TypeError, so we can move 'try:' out of the loop.
0370             it = iter(iterable)
0371             while True:
0372                 try:
0373                     for element in it:
0374                         data[element] = value
0375                     return
0376                 except TypeError:
0377                     transform = getattr(element, "__as_immutable__", None)
0378                     if transform is None:
0379                         raise # re-raise the TypeError exception we caught
0380                     data[transform()] = value
0381         else:
0382             # Safe: only catch TypeError where intended
0383             for element in iterable:
0384                 try:
0385                     data[element] = value
0386                 except TypeError:
0387                     transform = getattr(element, "__as_immutable__", None)
0388                     if transform is None:
0389                         raise # re-raise the TypeError exception we caught
0390                     data[transform()] = value
0391 
0392 
0393 class ImmutableSet(BaseSet):
0394     """Immutable set class."""
0395 
0396     __slots__ = ['_hashcode']
0397 
0398     # BaseSet + hashing
0399 
0400     def __init__(self, iterable=None):
0401         """Construct an immutable set from an optional iterable."""
0402         self._hashcode = None
0403         self._data = {}
0404         if iterable is not None:
0405             self._update(iterable)
0406 
0407     def __hash__(self):
0408         if self._hashcode is None:
0409             self._hashcode = self._compute_hash()
0410         return self._hashcode
0411 
0412     def __getstate__(self):
0413         return self._data, self._hashcode
0414 
0415     def __setstate__(self, state):
0416         self._data, self._hashcode = state
0417 
0418 class Set(BaseSet):
0419     """ Mutable set class."""
0420 
0421     __slots__ = []
0422 
0423     # BaseSet + operations requiring mutability; no hashing
0424 
0425     def __init__(self, iterable=None):
0426         """Construct a set from an optional iterable."""
0427         self._data = {}
0428         if iterable is not None:
0429             self._update(iterable)
0430 
0431     def __getstate__(self):
0432         # getstate's results are ignored if it is not
0433         return self._data,
0434 
0435     def __setstate__(self, data):
0436         self._data, = data
0437 
0438     def __hash__(self):
0439         """A Set cannot be hashed."""
0440         # We inherit object.__hash__, so we must deny this explicitly
0441         raise TypeError, "Can't hash a Set, only an ImmutableSet."
0442 
0443     # In-place union, intersection, differences.
0444     # Subtle:  The xyz_update() functions deliberately return None,
0445     # as do all mutating operations on built-in container types.
0446     # The __xyz__ spellings have to return self, though.
0447 
0448     def __ior__(self, other):
0449         """Update a set with the union of itself and another."""
0450         self._binary_sanity_check(other)
0451         self._data.update(other._data)
0452         return self
0453 
0454     def union_update(self, other):
0455         """Update a set with the union of itself and another."""
0456         self._update(other)
0457 
0458     def __iand__(self, other):
0459         """Update a set with the intersection of itself and another."""
0460         self._binary_sanity_check(other)
0461         self._data = (self & other)._data
0462         return self
0463 
0464     def intersection_update(self, other):
0465         """Update a set with the intersection of itself and another."""
0466         if isinstance(other, BaseSet):
0467             self &= other
0468         else:
0469             self._data = (self.intersection(other))._data
0470 
0471     def __ixor__(self, other):
0472         """Update a set with the symmetric difference of itself and another."""
0473         self._binary_sanity_check(other)
0474         self.symmetric_difference_update(other)
0475         return self
0476 
0477     def symmetric_difference_update(self, other):
0478         """Update a set with the symmetric difference of itself and another."""
0479         data = self._data
0480         value = True
0481         if not isinstance(other, BaseSet):
0482             other = Set(other)
0483         for elt in other:
0484             if elt in data:
0485                 del data[elt]
0486             else:
0487                 data[elt] = value
0488 
0489     def __isub__(self, other):
0490         """Remove all elements of another set from this set."""
0491         self._binary_sanity_check(other)
0492         self.difference_update(other)
0493         return self
0494 
0495     def difference_update(self, other):
0496         """Remove all elements of another set from this set."""
0497         data = self._data
0498         if not isinstance(other, BaseSet):
0499             other = Set(other)
0500         for elt in ifilter(data.has_key, other):
0501             del data[elt]
0502 
0503     # Python dict-like mass mutations: update, clear
0504 
0505     def update(self, iterable):
0506         """Add all values from an iterable (such as a list or file)."""
0507         self._update(iterable)
0508 
0509     def clear(self):
0510         """Remove all elements from this set."""
0511         self._data.clear()
0512 
0513     # Single-element mutations: add, remove, discard
0514 
0515     def add(self, element):
0516         """Add an element to a set.
0517 
0518         This has no effect if the element is already present.
0519         """
0520         try:
0521             self._data[element] = True
0522         except TypeError:
0523             transform = getattr(element, "__as_immutable__", None)
0524             if transform is None:
0525                 raise # re-raise the TypeError exception we caught
0526             self._data[transform()] = True
0527 
0528     def remove(self, element):
0529         """Remove an element from a set; it must be a member.
0530 
0531         If the element is not a member, raise a KeyError.
0532         """
0533         try:
0534             del self._data[element]
0535         except TypeError:
0536             transform = getattr(element, "__as_temporarily_immutable__", None)
0537             if transform is None:
0538                 raise # re-raise the TypeError exception we caught
0539             del self._data[transform()]
0540 
0541     def discard(self, element):
0542         """Remove an element from a set if it is a member.
0543 
0544         If the element is not a member, do nothing.
0545         """
0546         try:
0547             self.remove(element)
0548         except KeyError:
0549             pass
0550 
0551     def pop(self):
0552         """Remove and return an arbitrary set element."""
0553         return self._data.popitem()[0]
0554 
0555     def __as_immutable__(self):
0556         # Return a copy of self as an immutable set
0557         return ImmutableSet(self)
0558 
0559     def __as_temporarily_immutable__(self):
0560         # Return self wrapped in a temporarily immutable set
0561         return _TemporarilyImmutableSet(self)
0562 
0563 
0564 class _TemporarilyImmutableSet(BaseSet):
0565     # Wrap a mutable set as if it was temporarily immutable.
0566     # This only supplies hashing and equality comparisons.
0567 
0568     def __init__(self, set):
0569         self._set = set
0570         self._data = set._data  # Needed by ImmutableSet.__eq__()
0571 
0572     def __hash__(self):
0573         return self._set._compute_hash()
0574 

Generated by PyXR 0.9.4
SourceForge.net Logo