blob: fee06d76be0dc717af24cf33a6fba0d2aa616314 [file] [log] [blame]
Guido van Rossumd6cf3af2002-08-19 16:19:15 +00001"""Classes to represent arbitrary sets (including sets of sets).
2
3This module implements sets using dictionaries whose values are
4ignored. The usual operations (union, intersection, deletion, etc.)
5are provided as both methods and operators.
6
Guido van Rossum290f1872002-08-20 20:05:23 +00007Important: sets are not sequences! While they support 'x in s',
8'len(s)', and 'for x in s', none of those operations are unique for
9sequences; for example, mappings support all three as well. The
10characteristic operation for sequences is subscripting with small
11integers: s[i], for i in range(len(s)). Sets don't support
12subscripting at all. Also, sequences allow multiple occurrences and
13their elements have a definite order; sets on the other hand don't
14record multiple occurrences and don't remember the order of element
15insertion (which is why they don't support s[i]).
16
Guido van Rossumd6cf3af2002-08-19 16:19:15 +000017The following classes are provided:
18
19BaseSet -- All the operations common to both mutable and immutable
20 sets. This is an abstract class, not meant to be directly
21 instantiated.
22
23Set -- Mutable sets, subclass of BaseSet; not hashable.
24
25ImmutableSet -- Immutable sets, subclass of BaseSet; hashable.
26 An iterable argument is mandatory to create an ImmutableSet.
27
28_TemporarilyImmutableSet -- Not a subclass of BaseSet: just a wrapper
29 around a Set, hashable, giving the same hash value as the
30 immutable set equivalent would have. Do not use this class
31 directly.
32
33Only hashable objects can be added to a Set. In particular, you cannot
34really add a Set as an element to another Set; if you try, what is
Raymond Hettingerede3a0d2002-08-20 23:34:01 +000035actually added is an ImmutableSet built from it (it compares equal to
Guido van Rossumd6cf3af2002-08-19 16:19:15 +000036the one you tried adding).
37
38When you ask if `x in y' where x is a Set and y is a Set or
39ImmutableSet, x is wrapped into a _TemporarilyImmutableSet z, and
40what's tested is actually `z in y'.
41
42"""
43
44# Code history:
45#
46# - Greg V. Wilson wrote the first version, using a different approach
47# to the mutable/immutable problem, and inheriting from dict.
48#
49# - Alex Martelli modified Greg's version to implement the current
50# Set/ImmutableSet approach, and make the data an attribute.
51#
52# - Guido van Rossum rewrote much of the code, made some API changes,
53# and cleaned up the docstrings.
Guido van Rossum26588222002-08-21 02:44:04 +000054#
Guido van Rossum9f872932002-08-21 03:20:44 +000055# - Raymond Hettinger added a number of speedups and other
Guido van Rossumdc61cdf2002-08-22 17:23:33 +000056# improvements.
Guido van Rossumd6cf3af2002-08-19 16:19:15 +000057
58
59__all__ = ['BaseSet', 'Set', 'ImmutableSet']
60
61
62class BaseSet(object):
63 """Common base class for mutable and immutable sets."""
64
65 __slots__ = ['_data']
66
67 # Constructor
68
Guido van Rossum5033b362002-08-20 21:38:37 +000069 def __init__(self):
70 """This is an abstract class."""
71 # Don't call this from a concrete subclass!
72 if self.__class__ is BaseSet:
Guido van Rossum9f872932002-08-21 03:20:44 +000073 raise TypeError, ("BaseSet is an abstract class. "
74 "Use Set or ImmutableSet.")
Guido van Rossumd6cf3af2002-08-19 16:19:15 +000075
76 # Standard protocols: __len__, __repr__, __str__, __iter__
77
78 def __len__(self):
79 """Return the number of elements of a set."""
80 return len(self._data)
81
82 def __repr__(self):
83 """Return string representation of a set.
84
85 This looks like 'Set([<list of elements>])'.
86 """
87 return self._repr()
88
89 # __str__ is the same as __repr__
90 __str__ = __repr__
91
92 def _repr(self, sorted=False):
93 elements = self._data.keys()
94 if sorted:
95 elements.sort()
96 return '%s(%r)' % (self.__class__.__name__, elements)
97
98 def __iter__(self):
99 """Return an iterator over the elements or a set.
100
101 This is the keys iterator for the underlying dict.
102 """
103 return self._data.iterkeys()
104
105 # Comparisons. Ordering is determined by the ordering of the
106 # underlying dicts (which is consistent though unpredictable).
107
108 def __lt__(self, other):
109 self._binary_sanity_check(other)
110 return self._data < other._data
111
112 def __le__(self, other):
113 self._binary_sanity_check(other)
114 return self._data <= other._data
115
116 def __eq__(self, other):
117 self._binary_sanity_check(other)
118 return self._data == other._data
119
120 def __ne__(self, other):
121 self._binary_sanity_check(other)
122 return self._data != other._data
123
124 def __gt__(self, other):
125 self._binary_sanity_check(other)
126 return self._data > other._data
127
128 def __ge__(self, other):
129 self._binary_sanity_check(other)
130 return self._data >= other._data
131
132 # Copying operations
133
134 def copy(self):
135 """Return a shallow copy of a set."""
Raymond Hettingerd9c91512002-08-21 13:20:51 +0000136 result = self.__class__([])
137 result._data.update(self._data)
138 return result
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000139
140 __copy__ = copy # For the copy module
141
142 def __deepcopy__(self, memo):
143 """Return a deep copy of a set; used by copy module."""
144 # This pre-creates the result and inserts it in the memo
145 # early, in case the deep copy recurses into another reference
146 # to this same set. A set can't be an element of itself, but
147 # it can certainly contain an object that has a reference to
148 # itself.
149 from copy import deepcopy
150 result = self.__class__([])
151 memo[id(self)] = result
152 data = result._data
153 value = True
154 for elt in self:
155 data[deepcopy(elt, memo)] = value
156 return result
157
Guido van Rossumdc61cdf2002-08-22 17:23:33 +0000158 # Standard set operations: union, intersection, both differences.
159 # Each has an operator version (e.g. __or__, invoked with |) and a
160 # method version (e.g. union).
161
162 def __or__(self, other):
163 """Return the union of two sets as a new set.
164
165 (I.e. all elements that are in either set.)
166 """
167 if not isinstance(other, BaseSet):
168 return NotImplemented
169 result = self.__class__(self._data)
170 result._data.update(other._data)
171 return result
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000172
173 def union(self, other):
174 """Return the union of two sets as a new set.
175
176 (I.e. all elements that are in either set.)
177 """
Guido van Rossumdc61cdf2002-08-22 17:23:33 +0000178 return self | other
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000179
Guido van Rossumdc61cdf2002-08-22 17:23:33 +0000180 def __and__(self, other):
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000181 """Return the intersection of two sets as a new set.
182
183 (I.e. all elements that are in both sets.)
184 """
Guido van Rossumdc61cdf2002-08-22 17:23:33 +0000185 if not isinstance(other, BaseSet):
186 return NotImplemented
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000187 if len(self) <= len(other):
188 little, big = self, other
189 else:
190 little, big = other, self
191 result = self.__class__([])
192 data = result._data
193 value = True
194 for elt in little:
195 if elt in big:
196 data[elt] = value
197 return result
198
Guido van Rossumdc61cdf2002-08-22 17:23:33 +0000199 def intersection(self, other):
200 """Return the intersection of two sets as a new set.
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000201
Guido van Rossumdc61cdf2002-08-22 17:23:33 +0000202 (I.e. all elements that are in both sets.)
203 """
204 return self & other
205
206 def __xor__(self, other):
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000207 """Return the symmetric difference of two sets as a new set.
208
209 (I.e. all elements that are in exactly one of the sets.)
210 """
Guido van Rossumdc61cdf2002-08-22 17:23:33 +0000211 if not isinstance(other, BaseSet):
212 return NotImplemented
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000213 result = self.__class__([])
214 data = result._data
215 value = True
216 for elt in self:
217 if elt not in other:
218 data[elt] = value
219 for elt in other:
220 if elt not in self:
221 data[elt] = value
222 return result
223
Guido van Rossumdc61cdf2002-08-22 17:23:33 +0000224 def symmetric_difference(self, other):
225 """Return the symmetric difference of two sets as a new set.
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000226
Guido van Rossumdc61cdf2002-08-22 17:23:33 +0000227 (I.e. all elements that are in exactly one of the sets.)
228 """
229 return self ^ other
230
231 def __sub__(self, other):
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000232 """Return the difference of two sets as a new Set.
233
234 (I.e. all elements that are in this set and not in the other.)
235 """
Guido van Rossumdc61cdf2002-08-22 17:23:33 +0000236 if not isinstance(other, BaseSet):
237 return NotImplemented
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000238 result = self.__class__([])
239 data = result._data
240 value = True
241 for elt in self:
242 if elt not in other:
243 data[elt] = value
244 return result
245
Guido van Rossumdc61cdf2002-08-22 17:23:33 +0000246 def difference(self, other):
247 """Return the difference of two sets as a new Set.
248
249 (I.e. all elements that are in this set and not in the other.)
250 """
251 return self - other
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000252
253 # Membership test
254
255 def __contains__(self, element):
256 """Report whether an element is a member of a set.
257
258 (Called in response to the expression `element in self'.)
259 """
260 try:
Raymond Hettingerde6d6972002-08-21 01:35:29 +0000261 return element in self._data
262 except TypeError:
Guido van Rossum9f872932002-08-21 03:20:44 +0000263 transform = getattr(element, "_as_temporarily_immutable", None)
Raymond Hettingerde6d6972002-08-21 01:35:29 +0000264 if transform is None:
265 raise # re-raise the TypeError exception we caught
266 return transform() in self._data
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000267
268 # Subset and superset test
269
270 def issubset(self, other):
271 """Report whether another set contains this set."""
272 self._binary_sanity_check(other)
Raymond Hettinger43db0d62002-08-21 02:22:08 +0000273 if len(self) > len(other): # Fast check for obvious cases
274 return False
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000275 for elt in self:
276 if elt not in other:
277 return False
278 return True
279
280 def issuperset(self, other):
281 """Report whether this set contains another set."""
282 self._binary_sanity_check(other)
Raymond Hettinger43db0d62002-08-21 02:22:08 +0000283 if len(self) < len(other): # Fast check for obvious cases
284 return False
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000285 for elt in other:
286 if elt not in self:
287 return False
288 return True
289
290 # Assorted helpers
291
292 def _binary_sanity_check(self, other):
293 # Check that the other argument to a binary operation is also
294 # a set, raising a TypeError otherwise.
295 if not isinstance(other, BaseSet):
296 raise TypeError, "Binary operation only permitted between sets"
297
298 def _compute_hash(self):
299 # Calculate hash code for a set by xor'ing the hash codes of
300 # the elements. This algorithm ensures that the hash code
301 # does not depend on the order in which elements are added to
302 # the code. This is not called __hash__ because a BaseSet
303 # should not be hashable; only an ImmutableSet is hashable.
304 result = 0
305 for elt in self:
306 result ^= hash(elt)
307 return result
308
Guido van Rossum9f872932002-08-21 03:20:44 +0000309 def _update(self, iterable):
310 # The main loop for update() and the subclass __init__() methods.
Guido van Rossum9f872932002-08-21 03:20:44 +0000311 data = self._data
312 value = True
Raymond Hettinger80d21af2002-08-21 04:12:03 +0000313 it = iter(iterable)
314 while True:
Guido van Rossum9f872932002-08-21 03:20:44 +0000315 try:
Raymond Hettinger80d21af2002-08-21 04:12:03 +0000316 for element in it:
317 data[element] = value
318 return
Guido van Rossum9f872932002-08-21 03:20:44 +0000319 except TypeError:
320 transform = getattr(element, "_as_immutable", None)
321 if transform is None:
322 raise # re-raise the TypeError exception we caught
323 data[transform()] = value
324
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000325
326class ImmutableSet(BaseSet):
327 """Immutable set class."""
328
Guido van Rossum0b650d72002-08-19 16:29:58 +0000329 __slots__ = ['_hashcode']
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000330
331 # BaseSet + hashing
332
Guido van Rossum9f872932002-08-21 03:20:44 +0000333 def __init__(self, iterable=None):
334 """Construct an immutable set from an optional iterable."""
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000335 self._hashcode = None
Guido van Rossum9f872932002-08-21 03:20:44 +0000336 self._data = {}
337 if iterable is not None:
338 self._update(iterable)
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000339
340 def __hash__(self):
341 if self._hashcode is None:
342 self._hashcode = self._compute_hash()
343 return self._hashcode
344
345
346class Set(BaseSet):
347 """ Mutable set class."""
348
349 __slots__ = []
350
351 # BaseSet + operations requiring mutability; no hashing
352
Guido van Rossum9f872932002-08-21 03:20:44 +0000353 def __init__(self, iterable=None):
354 """Construct a set from an optional iterable."""
355 self._data = {}
356 if iterable is not None:
357 self._update(iterable)
358
359 def __hash__(self):
360 """A Set cannot be hashed."""
361 # We inherit object.__hash__, so we must deny this explicitly
362 raise TypeError, "Can't hash a Set, only an ImmutableSet."
Guido van Rossum5033b362002-08-20 21:38:37 +0000363
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000364 # In-place union, intersection, differences
365
366 def union_update(self, other):
367 """Update a set with the union of itself and another."""
368 self._binary_sanity_check(other)
369 self._data.update(other._data)
370 return self
371
372 __ior__ = union_update
373
374 def intersection_update(self, other):
375 """Update a set with the intersection of itself and another."""
376 self._binary_sanity_check(other)
377 for elt in self._data.keys():
378 if elt not in other:
379 del self._data[elt]
380 return self
381
382 __iand__ = intersection_update
383
384 def symmetric_difference_update(self, other):
385 """Update a set with the symmetric difference of itself and another."""
386 self._binary_sanity_check(other)
387 data = self._data
388 value = True
389 for elt in other:
390 if elt in data:
391 del data[elt]
392 else:
393 data[elt] = value
394 return self
395
396 __ixor__ = symmetric_difference_update
397
398 def difference_update(self, other):
399 """Remove all elements of another set from this set."""
400 self._binary_sanity_check(other)
401 data = self._data
402 for elt in other:
403 if elt in data:
404 del data[elt]
405 return self
406
407 __isub__ = difference_update
408
409 # Python dict-like mass mutations: update, clear
410
411 def update(self, iterable):
412 """Add all values from an iterable (such as a list or file)."""
Guido van Rossum9f872932002-08-21 03:20:44 +0000413 self._update(iterable)
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000414
415 def clear(self):
416 """Remove all elements from this set."""
417 self._data.clear()
418
419 # Single-element mutations: add, remove, discard
420
421 def add(self, element):
422 """Add an element to a set.
423
424 This has no effect if the element is already present.
425 """
426 try:
Raymond Hettingerde6d6972002-08-21 01:35:29 +0000427 self._data[element] = True
428 except TypeError:
Guido van Rossum9f872932002-08-21 03:20:44 +0000429 transform = getattr(element, "_as_immutable", None)
Raymond Hettingerde6d6972002-08-21 01:35:29 +0000430 if transform is None:
431 raise # re-raise the TypeError exception we caught
432 self._data[transform()] = True
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000433
434 def remove(self, element):
435 """Remove an element from a set; it must be a member.
436
437 If the element is not a member, raise a KeyError.
438 """
439 try:
Raymond Hettingerde6d6972002-08-21 01:35:29 +0000440 del self._data[element]
441 except TypeError:
Guido van Rossum9f872932002-08-21 03:20:44 +0000442 transform = getattr(element, "_as_temporarily_immutable", None)
Raymond Hettingerde6d6972002-08-21 01:35:29 +0000443 if transform is None:
444 raise # re-raise the TypeError exception we caught
445 del self._data[transform()]
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000446
447 def discard(self, element):
448 """Remove an element from a set if it is a member.
449
450 If the element is not a member, do nothing.
451 """
452 try:
453 del self._data[element]
454 except KeyError:
455 pass
456
Guido van Rossumc9196bc2002-08-20 21:51:59 +0000457 def pop(self):
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000458 """Remove and return a randomly-chosen set element."""
459 return self._data.popitem()[0]
460
461 def _as_immutable(self):
462 # Return a copy of self as an immutable set
463 return ImmutableSet(self)
464
465 def _as_temporarily_immutable(self):
466 # Return self wrapped in a temporarily immutable set
467 return _TemporarilyImmutableSet(self)
468
469
470class _TemporarilyImmutableSet(object):
471 # Wrap a mutable set as if it was temporarily immutable.
472 # This only supplies hashing and equality comparisons.
473
474 _hashcode = None
475
476 def __init__(self, set):
477 self._set = set
478
479 def __hash__(self):
480 if self._hashcode is None:
481 self._hashcode = self._set._compute_hash()
482 return self._hashcode
483
484 def __eq__(self, other):
485 return self._set == other
486
487 def __ne__(self, other):
488 return self._set != other
489
490
491# Rudimentary self-tests
492
493def _test():
494
495 # Empty set
496 red = Set()
497 assert `red` == "Set([])", "Empty set: %s" % `red`
498
499 # Unit set
500 green = Set((0,))
501 assert `green` == "Set([0])", "Unit set: %s" % `green`
502
503 # 3-element set
504 blue = Set([0, 1, 2])
505 assert blue._repr(True) == "Set([0, 1, 2])", "3-element set: %s" % `blue`
506
507 # 2-element set with other values
508 black = Set([0, 5])
509 assert black._repr(True) == "Set([0, 5])", "2-element set: %s" % `black`
510
511 # All elements from all sets
512 white = Set([0, 1, 2, 5])
513 assert white._repr(True) == "Set([0, 1, 2, 5])", "4-element set: %s" % `white`
514
515 # Add element to empty set
516 red.add(9)
517 assert `red` == "Set([9])", "Add to empty set: %s" % `red`
518
519 # Remove element from unit set
520 red.remove(9)
521 assert `red` == "Set([])", "Remove from unit set: %s" % `red`
522
523 # Remove element from empty set
524 try:
525 red.remove(0)
526 assert 0, "Remove element from empty set: %s" % `red`
527 except LookupError:
528 pass
529
530 # Length
531 assert len(red) == 0, "Length of empty set"
532 assert len(green) == 1, "Length of unit set"
533 assert len(blue) == 3, "Length of 3-element set"
534
535 # Compare
536 assert green == Set([0]), "Equality failed"
537 assert green != Set([1]), "Inequality failed"
538
539 # Union
540 assert blue | red == blue, "Union non-empty with empty"
541 assert red | blue == blue, "Union empty with non-empty"
542 assert green | blue == blue, "Union non-empty with non-empty"
543 assert blue | black == white, "Enclosing union"
544
545 # Intersection
546 assert blue & red == red, "Intersect non-empty with empty"
547 assert red & blue == red, "Intersect empty with non-empty"
548 assert green & blue == green, "Intersect non-empty with non-empty"
549 assert blue & black == green, "Enclosing intersection"
550
551 # Symmetric difference
552 assert red ^ green == green, "Empty symdiff non-empty"
553 assert green ^ blue == Set([1, 2]), "Non-empty symdiff"
554 assert white ^ white == red, "Self symdiff"
555
556 # Difference
557 assert red - green == red, "Empty - non-empty"
558 assert blue - red == blue, "Non-empty - empty"
559 assert white - black == Set([1, 2]), "Non-empty - non-empty"
560
561 # In-place union
562 orange = Set([])
563 orange |= Set([1])
564 assert orange == Set([1]), "In-place union"
565
566 # In-place intersection
567 orange = Set([1, 2])
568 orange &= Set([2])
569 assert orange == Set([2]), "In-place intersection"
570
571 # In-place difference
572 orange = Set([1, 2, 3])
573 orange -= Set([2, 4])
574 assert orange == Set([1, 3]), "In-place difference"
575
576 # In-place symmetric difference
577 orange = Set([1, 2, 3])
578 orange ^= Set([3, 4])
579 assert orange == Set([1, 2, 4]), "In-place symmetric difference"
580
581 print "All tests passed"
582
583
584if __name__ == "__main__":
585 _test()