0001 """Classes to represent arbitrary sets (including sets of sets). 0002 0003 This module implements sets using dictionaries whose values are 0004 ignored. The usual operations (union, intersection, deletion, etc.) 0005 are provided as both methods and operators. 0006 0007 Important: sets are not sequences! While they support 'x in s', 0008 'len(s)', and 'for x in s', none of those operations are unique for 0009 sequences; for example, mappings support all three as well. The 0010 characteristic operation for sequences is subscripting with small 0011 integers: s[i], for i in range(len(s)). Sets don't support 0012 subscripting at all. Also, sequences allow multiple occurrences and 0013 their elements have a definite order; sets on the other hand don't 0014 record multiple occurrences and don't remember the order of element 0015 insertion (which is why they don't support s[i]). 0016 0017 The following classes are provided: 0018 0019 BaseSet -- All the operations common to both mutable and immutable 0020 sets. This is an abstract class, not meant to be directly 0021 instantiated. 0022 0023 Set -- Mutable sets, subclass of BaseSet; not hashable. 0024 0025 ImmutableSet -- Immutable sets, subclass of BaseSet; hashable. 0026 An iterable argument is mandatory to create an ImmutableSet. 0027 0028 _TemporarilyImmutableSet -- A wrapper around a Set, hashable, 0029 giving the same hash value as the immutable set equivalent 0030 would have. Do not use this class directly. 0031 0032 Only hashable objects can be added to a Set. In particular, you cannot 0033 really add a Set as an element to another Set; if you try, what is 0034 actually added is an ImmutableSet built from it (it compares equal to 0035 the one you tried adding). 0036 0037 When you ask if `x in y' where x is a Set and y is a Set or 0038 ImmutableSet, x is wrapped into a _TemporarilyImmutableSet z, and 0039 what's tested is actually `z in y'. 0040 0041 """ 0042 0043 # Code history: 0044 # 0045 # - Greg V. Wilson wrote the first version, using a different approach 0046 # to the mutable/immutable problem, and inheriting from dict. 0047 # 0048 # - Alex Martelli modified Greg's version to implement the current 0049 # Set/ImmutableSet approach, and make the data an attribute. 0050 # 0051 # - Guido van Rossum rewrote much of the code, made some API changes, 0052 # and cleaned up the docstrings. 0053 # 0054 # - Raymond Hettinger added a number of speedups and other 0055 # improvements. 0056 0057 from __future__ import generators 0058 try: 0059 from itertools import ifilter, ifilterfalse 0060 except ImportError: 0061 # Code to make the module run under Py2.2 0062 def ifilter(predicate, iterable): 0063 if predicate is None: 0064 def predicate(x): 0065 return x 0066 for x in iterable: 0067 if predicate(x): 0068 yield x 0069 def ifilterfalse(predicate, iterable): 0070 if predicate is None: 0071 def predicate(x): 0072 return x 0073 for x in iterable: 0074 if not predicate(x): 0075 yield x 0076 try: 0077 True, False 0078 except NameError: 0079 True, False = (0==0, 0!=0) 0080 0081 __all__ = ['BaseSet', 'Set', 'ImmutableSet'] 0082 0083 class BaseSet(object): 0084 """Common base class for mutable and immutable sets.""" 0085 0086 __slots__ = ['_data'] 0087 0088 # Constructor 0089 0090 def __init__(self): 0091 """This is an abstract class.""" 0092 # Don't call this from a concrete subclass! 0093 if self.__class__ is BaseSet: 0094 raise TypeError, ("BaseSet is an abstract class. " 0095 "Use Set or ImmutableSet.") 0096 0097 # Standard protocols: __len__, __repr__, __str__, __iter__ 0098 0099 def __len__(self): 0100 """Return the number of elements of a set.""" 0101 return len(self._data) 0102 0103 def __repr__(self): 0104 """Return string representation of a set. 0105 0106 This looks like 'Set([<list of elements>])'. 0107 """ 0108 return self._repr() 0109 0110 # __str__ is the same as __repr__ 0111 __str__ = __repr__ 0112 0113 def _repr(self, sorted=False): 0114 elements = self._data.keys() 0115 if sorted: 0116 elements.sort() 0117 return '%s(%r)' % (self.__class__.__name__, elements) 0118 0119 def __iter__(self): 0120 """Return an iterator over the elements or a set. 0121 0122 This is the keys iterator for the underlying dict. 0123 """ 0124 return self._data.iterkeys() 0125 0126 # Three-way comparison is not supported. However, because __eq__ is 0127 # tried before __cmp__, if Set x == Set y, x.__eq__(y) returns True and 0128 # then cmp(x, y) returns 0 (Python doesn't actually call __cmp__ in this 0129 # case). 0130 0131 def __cmp__(self, other): 0132 raise TypeError, "can't compare sets using cmp()" 0133 0134 # Equality comparisons using the underlying dicts. Mixed-type comparisons 0135 # are allowed here, where Set == z for non-Set z always returns False, 0136 # and Set != z always True. This allows expressions like "x in y" to 0137 # give the expected result when y is a sequence of mixed types, not 0138 # raising a pointless TypeError just because y contains a Set, or x is 0139 # a Set and y contain's a non-set ("in" invokes only __eq__). 0140 # Subtle: it would be nicer if __eq__ and __ne__ could return 0141 # NotImplemented instead of True or False. Then the other comparand 0142 # would get a chance to determine the result, and if the other comparand 0143 # also returned NotImplemented then it would fall back to object address 0144 # comparison (which would always return False for __eq__ and always 0145 # True for __ne__). However, that doesn't work, because this type 0146 # *also* implements __cmp__: if, e.g., __eq__ returns NotImplemented, 0147 # Python tries __cmp__ next, and the __cmp__ here then raises TypeError. 0148 0149 def __eq__(self, other): 0150 if isinstance(other, BaseSet): 0151 return self._data == other._data 0152 else: 0153 return False 0154 0155 def __ne__(self, other): 0156 if isinstance(other, BaseSet): 0157 return self._data != other._data 0158 else: 0159 return True 0160 0161 # Copying operations 0162 0163 def copy(self): 0164 """Return a shallow copy of a set.""" 0165 result = self.__class__() 0166 result._data.update(self._data) 0167 return result 0168 0169 __copy__ = copy # For the copy module 0170 0171 def __deepcopy__(self, memo): 0172 """Return a deep copy of a set; used by copy module.""" 0173 # This pre-creates the result and inserts it in the memo 0174 # early, in case the deep copy recurses into another reference 0175 # to this same set. A set can't be an element of itself, but 0176 # it can certainly contain an object that has a reference to 0177 # itself. 0178 from copy import deepcopy 0179 result = self.__class__() 0180 memo[id(self)] = result 0181 data = result._data 0182 value = True 0183 for elt in self: 0184 data[deepcopy(elt, memo)] = value 0185 return result 0186 0187 # Standard set operations: union, intersection, both differences. 0188 # Each has an operator version (e.g. __or__, invoked with |) and a 0189 # method version (e.g. union). 0190 # Subtle: Each pair requires distinct code so that the outcome is 0191 # correct when the type of other isn't suitable. For example, if 0192 # we did "union = __or__" instead, then Set().union(3) would return 0193 # NotImplemented instead of raising TypeError (albeit that *why* it 0194 # raises TypeError as-is is also a bit subtle). 0195 0196 def __or__(self, other): 0197 """Return the union of two sets as a new set. 0198 0199 (I.e. all elements that are in either set.) 0200 """ 0201 if not isinstance(other, BaseSet): 0202 return NotImplemented 0203 return self.union(other) 0204 0205 def union(self, other): 0206 """Return the union of two sets as a new set. 0207 0208 (I.e. all elements that are in either set.) 0209 """ 0210 result = self.__class__(self) 0211 result._update(other) 0212 return result 0213 0214 def __and__(self, other): 0215 """Return the intersection of two sets as a new set. 0216 0217 (I.e. all elements that are in both sets.) 0218 """ 0219 if not isinstance(other, BaseSet): 0220 return NotImplemented 0221 return self.intersection(other) 0222 0223 def intersection(self, other): 0224 """Return the intersection of two sets as a new set. 0225 0226 (I.e. all elements that are in both sets.) 0227 """ 0228 if not isinstance(other, BaseSet): 0229 other = Set(other) 0230 if len(self) <= len(other): 0231 little, big = self, other 0232 else: 0233 little, big = other, self 0234 common = ifilter(big._data.has_key, little) 0235 return self.__class__(common) 0236 0237 def __xor__(self, other): 0238 """Return the symmetric difference of two sets as a new set. 0239 0240 (I.e. all elements that are in exactly one of the sets.) 0241 """ 0242 if not isinstance(other, BaseSet): 0243 return NotImplemented 0244 return self.symmetric_difference(other) 0245 0246 def symmetric_difference(self, other): 0247 """Return the symmetric difference of two sets as a new set. 0248 0249 (I.e. all elements that are in exactly one of the sets.) 0250 """ 0251 result = self.__class__() 0252 data = result._data 0253 value = True 0254 selfdata = self._data 0255 try: 0256 otherdata = other._data 0257 except AttributeError: 0258 otherdata = Set(other)._data 0259 for elt in ifilterfalse(otherdata.has_key, selfdata): 0260 data[elt] = value 0261 for elt in ifilterfalse(selfdata.has_key, otherdata): 0262 data[elt] = value 0263 return result 0264 0265 def __sub__(self, other): 0266 """Return the difference of two sets as a new Set. 0267 0268 (I.e. all elements that are in this set and not in the other.) 0269 """ 0270 if not isinstance(other, BaseSet): 0271 return NotImplemented 0272 return self.difference(other) 0273 0274 def difference(self, other): 0275 """Return the difference of two sets as a new Set. 0276 0277 (I.e. all elements that are in this set and not in the other.) 0278 """ 0279 result = self.__class__() 0280 data = result._data 0281 try: 0282 otherdata = other._data 0283 except AttributeError: 0284 otherdata = Set(other)._data 0285 value = True 0286 for elt in ifilterfalse(otherdata.has_key, self): 0287 data[elt] = value 0288 return result 0289 0290 # Membership test 0291 0292 def __contains__(self, element): 0293 """Report whether an element is a member of a set. 0294 0295 (Called in response to the expression `element in self'.) 0296 """ 0297 try: 0298 return element in self._data 0299 except TypeError: 0300 transform = getattr(element, "__as_temporarily_immutable__", None) 0301 if transform is None: 0302 raise # re-raise the TypeError exception we caught 0303 return transform() in self._data 0304 0305 # Subset and superset test 0306 0307 def issubset(self, other): 0308 """Report whether another set contains this set.""" 0309 self._binary_sanity_check(other) 0310 if len(self) > len(other): # Fast check for obvious cases 0311 return False 0312 for elt in ifilterfalse(other._data.has_key, self): 0313 return False 0314 return True 0315 0316 def issuperset(self, other): 0317 """Report whether this set contains another set.""" 0318 self._binary_sanity_check(other) 0319 if len(self) < len(other): # Fast check for obvious cases 0320 return False 0321 for elt in ifilterfalse(self._data.has_key, other): 0322 return False 0323 return True 0324 0325 # Inequality comparisons using the is-subset relation. 0326 __le__ = issubset 0327 __ge__ = issuperset 0328 0329 def __lt__(self, other): 0330 self._binary_sanity_check(other) 0331 return len(self) < len(other) and self.issubset(other) 0332 0333 def __gt__(self, other): 0334 self._binary_sanity_check(other) 0335 return len(self) > len(other) and self.issuperset(other) 0336 0337 # Assorted helpers 0338 0339 def _binary_sanity_check(self, other): 0340 # Check that the other argument to a binary operation is also 0341 # a set, raising a TypeError otherwise. 0342 if not isinstance(other, BaseSet): 0343 raise TypeError, "Binary operation only permitted between sets" 0344 0345 def _compute_hash(self): 0346 # Calculate hash code for a set by xor'ing the hash codes of 0347 # the elements. This ensures that the hash code does not depend 0348 # on the order in which elements are added to the set. This is 0349 # not called __hash__ because a BaseSet should not be hashable; 0350 # only an ImmutableSet is hashable. 0351 result = 0 0352 for elt in self: 0353 result ^= hash(elt) 0354 return result 0355 0356 def _update(self, iterable): 0357 # The main loop for update() and the subclass __init__() methods. 0358 data = self._data 0359 0360 # Use the fast update() method when a dictionary is available. 0361 if isinstance(iterable, BaseSet): 0362 data.update(iterable._data) 0363 return 0364 0365 value = True 0366 0367 if type(iterable) in (list, tuple, xrange): 0368 # Optimized: we know that __iter__() and next() can't 0369 # raise TypeError, so we can move 'try:' out of the loop. 0370 it = iter(iterable) 0371 while True: 0372 try: 0373 for element in it: 0374 data[element] = value 0375 return 0376 except TypeError: 0377 transform = getattr(element, "__as_immutable__", None) 0378 if transform is None: 0379 raise # re-raise the TypeError exception we caught 0380 data[transform()] = value 0381 else: 0382 # Safe: only catch TypeError where intended 0383 for element in iterable: 0384 try: 0385 data[element] = value 0386 except TypeError: 0387 transform = getattr(element, "__as_immutable__", None) 0388 if transform is None: 0389 raise # re-raise the TypeError exception we caught 0390 data[transform()] = value 0391 0392 0393 class ImmutableSet(BaseSet): 0394 """Immutable set class.""" 0395 0396 __slots__ = ['_hashcode'] 0397 0398 # BaseSet + hashing 0399 0400 def __init__(self, iterable=None): 0401 """Construct an immutable set from an optional iterable.""" 0402 self._hashcode = None 0403 self._data = {} 0404 if iterable is not None: 0405 self._update(iterable) 0406 0407 def __hash__(self): 0408 if self._hashcode is None: 0409 self._hashcode = self._compute_hash() 0410 return self._hashcode 0411 0412 def __getstate__(self): 0413 return self._data, self._hashcode 0414 0415 def __setstate__(self, state): 0416 self._data, self._hashcode = state 0417 0418 class Set(BaseSet): 0419 """ Mutable set class.""" 0420 0421 __slots__ = [] 0422 0423 # BaseSet + operations requiring mutability; no hashing 0424 0425 def __init__(self, iterable=None): 0426 """Construct a set from an optional iterable.""" 0427 self._data = {} 0428 if iterable is not None: 0429 self._update(iterable) 0430 0431 def __getstate__(self): 0432 # getstate's results are ignored if it is not 0433 return self._data, 0434 0435 def __setstate__(self, data): 0436 self._data, = data 0437 0438 def __hash__(self): 0439 """A Set cannot be hashed.""" 0440 # We inherit object.__hash__, so we must deny this explicitly 0441 raise TypeError, "Can't hash a Set, only an ImmutableSet." 0442 0443 # In-place union, intersection, differences. 0444 # Subtle: The xyz_update() functions deliberately return None, 0445 # as do all mutating operations on built-in container types. 0446 # The __xyz__ spellings have to return self, though. 0447 0448 def __ior__(self, other): 0449 """Update a set with the union of itself and another.""" 0450 self._binary_sanity_check(other) 0451 self._data.update(other._data) 0452 return self 0453 0454 def union_update(self, other): 0455 """Update a set with the union of itself and another.""" 0456 self._update(other) 0457 0458 def __iand__(self, other): 0459 """Update a set with the intersection of itself and another.""" 0460 self._binary_sanity_check(other) 0461 self._data = (self & other)._data 0462 return self 0463 0464 def intersection_update(self, other): 0465 """Update a set with the intersection of itself and another.""" 0466 if isinstance(other, BaseSet): 0467 self &= other 0468 else: 0469 self._data = (self.intersection(other))._data 0470 0471 def __ixor__(self, other): 0472 """Update a set with the symmetric difference of itself and another.""" 0473 self._binary_sanity_check(other) 0474 self.symmetric_difference_update(other) 0475 return self 0476 0477 def symmetric_difference_update(self, other): 0478 """Update a set with the symmetric difference of itself and another.""" 0479 data = self._data 0480 value = True 0481 if not isinstance(other, BaseSet): 0482 other = Set(other) 0483 for elt in other: 0484 if elt in data: 0485 del data[elt] 0486 else: 0487 data[elt] = value 0488 0489 def __isub__(self, other): 0490 """Remove all elements of another set from this set.""" 0491 self._binary_sanity_check(other) 0492 self.difference_update(other) 0493 return self 0494 0495 def difference_update(self, other): 0496 """Remove all elements of another set from this set.""" 0497 data = self._data 0498 if not isinstance(other, BaseSet): 0499 other = Set(other) 0500 for elt in ifilter(data.has_key, other): 0501 del data[elt] 0502 0503 # Python dict-like mass mutations: update, clear 0504 0505 def update(self, iterable): 0506 """Add all values from an iterable (such as a list or file).""" 0507 self._update(iterable) 0508 0509 def clear(self): 0510 """Remove all elements from this set.""" 0511 self._data.clear() 0512 0513 # Single-element mutations: add, remove, discard 0514 0515 def add(self, element): 0516 """Add an element to a set. 0517 0518 This has no effect if the element is already present. 0519 """ 0520 try: 0521 self._data[element] = True 0522 except TypeError: 0523 transform = getattr(element, "__as_immutable__", None) 0524 if transform is None: 0525 raise # re-raise the TypeError exception we caught 0526 self._data[transform()] = True 0527 0528 def remove(self, element): 0529 """Remove an element from a set; it must be a member. 0530 0531 If the element is not a member, raise a KeyError. 0532 """ 0533 try: 0534 del self._data[element] 0535 except TypeError: 0536 transform = getattr(element, "__as_temporarily_immutable__", None) 0537 if transform is None: 0538 raise # re-raise the TypeError exception we caught 0539 del self._data[transform()] 0540 0541 def discard(self, element): 0542 """Remove an element from a set if it is a member. 0543 0544 If the element is not a member, do nothing. 0545 """ 0546 try: 0547 self.remove(element) 0548 except KeyError: 0549 pass 0550 0551 def pop(self): 0552 """Remove and return an arbitrary set element.""" 0553 return self._data.popitem()[0] 0554 0555 def __as_immutable__(self): 0556 # Return a copy of self as an immutable set 0557 return ImmutableSet(self) 0558 0559 def __as_temporarily_immutable__(self): 0560 # Return self wrapped in a temporarily immutable set 0561 return _TemporarilyImmutableSet(self) 0562 0563 0564 class _TemporarilyImmutableSet(BaseSet): 0565 # Wrap a mutable set as if it was temporarily immutable. 0566 # This only supplies hashing and equality comparisons. 0567 0568 def __init__(self, set): 0569 self._set = set 0570 self._data = set._data # Needed by ImmutableSet.__eq__() 0571 0572 def __hash__(self): 0573 return self._set._compute_hash() 0574
Generated by PyXR 0.9.4