blob: 5f0f0a2d38c18772d6e4e56a44dc162142112bd1 [file] [log] [blame]
Guido van Rossumd6cf3af2002-08-19 16:19:15 +00001"""Classes to represent arbitrary sets (including sets of sets).
2
3This module implements sets using dictionaries whose values are
4ignored. The usual operations (union, intersection, deletion, etc.)
5are provided as both methods and operators.
6
Guido van Rossum290f1872002-08-20 20:05:23 +00007Important: sets are not sequences! While they support 'x in s',
8'len(s)', and 'for x in s', none of those operations are unique for
9sequences; for example, mappings support all three as well. The
10characteristic operation for sequences is subscripting with small
11integers: s[i], for i in range(len(s)). Sets don't support
12subscripting at all. Also, sequences allow multiple occurrences and
13their elements have a definite order; sets on the other hand don't
14record multiple occurrences and don't remember the order of element
15insertion (which is why they don't support s[i]).
16
Guido van Rossumd6cf3af2002-08-19 16:19:15 +000017The following classes are provided:
18
19BaseSet -- All the operations common to both mutable and immutable
20 sets. This is an abstract class, not meant to be directly
21 instantiated.
22
23Set -- Mutable sets, subclass of BaseSet; not hashable.
24
25ImmutableSet -- Immutable sets, subclass of BaseSet; hashable.
26 An iterable argument is mandatory to create an ImmutableSet.
27
28_TemporarilyImmutableSet -- Not a subclass of BaseSet: just a wrapper
29 around a Set, hashable, giving the same hash value as the
30 immutable set equivalent would have. Do not use this class
31 directly.
32
33Only hashable objects can be added to a Set. In particular, you cannot
34really add a Set as an element to another Set; if you try, what is
Raymond Hettingerede3a0d2002-08-20 23:34:01 +000035actually added is an ImmutableSet built from it (it compares equal to
Guido van Rossumd6cf3af2002-08-19 16:19:15 +000036the one you tried adding).
37
38When you ask if `x in y' where x is a Set and y is a Set or
39ImmutableSet, x is wrapped into a _TemporarilyImmutableSet z, and
40what's tested is actually `z in y'.
41
42"""
43
44# Code history:
45#
46# - Greg V. Wilson wrote the first version, using a different approach
47# to the mutable/immutable problem, and inheriting from dict.
48#
49# - Alex Martelli modified Greg's version to implement the current
50# Set/ImmutableSet approach, and make the data an attribute.
51#
52# - Guido van Rossum rewrote much of the code, made some API changes,
53# and cleaned up the docstrings.
Guido van Rossum26588222002-08-21 02:44:04 +000054#
Guido van Rossum9f872932002-08-21 03:20:44 +000055# - Raymond Hettinger added a number of speedups and other
Guido van Rossumdc61cdf2002-08-22 17:23:33 +000056# improvements.
Guido van Rossumd6cf3af2002-08-19 16:19:15 +000057
58
59__all__ = ['BaseSet', 'Set', 'ImmutableSet']
60
61
62class BaseSet(object):
63 """Common base class for mutable and immutable sets."""
64
65 __slots__ = ['_data']
66
67 # Constructor
68
Guido van Rossum5033b362002-08-20 21:38:37 +000069 def __init__(self):
70 """This is an abstract class."""
71 # Don't call this from a concrete subclass!
72 if self.__class__ is BaseSet:
Guido van Rossum9f872932002-08-21 03:20:44 +000073 raise TypeError, ("BaseSet is an abstract class. "
74 "Use Set or ImmutableSet.")
Guido van Rossumd6cf3af2002-08-19 16:19:15 +000075
76 # Standard protocols: __len__, __repr__, __str__, __iter__
77
78 def __len__(self):
79 """Return the number of elements of a set."""
80 return len(self._data)
81
82 def __repr__(self):
83 """Return string representation of a set.
84
85 This looks like 'Set([<list of elements>])'.
86 """
87 return self._repr()
88
89 # __str__ is the same as __repr__
90 __str__ = __repr__
91
92 def _repr(self, sorted=False):
93 elements = self._data.keys()
94 if sorted:
95 elements.sort()
96 return '%s(%r)' % (self.__class__.__name__, elements)
97
98 def __iter__(self):
99 """Return an iterator over the elements or a set.
100
101 This is the keys iterator for the underlying dict.
102 """
103 return self._data.iterkeys()
104
Raymond Hettingere87ab3f2002-08-24 07:33:06 +0000105 # Equality comparisons using the underlying dicts
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000106
107 def __eq__(self, other):
108 self._binary_sanity_check(other)
109 return self._data == other._data
110
111 def __ne__(self, other):
112 self._binary_sanity_check(other)
113 return self._data != other._data
114
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000115 # Copying operations
116
117 def copy(self):
118 """Return a shallow copy of a set."""
Raymond Hettingerfa1480f2002-08-24 02:35:48 +0000119 result = self.__class__()
Raymond Hettingerd9c91512002-08-21 13:20:51 +0000120 result._data.update(self._data)
121 return result
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000122
123 __copy__ = copy # For the copy module
124
125 def __deepcopy__(self, memo):
126 """Return a deep copy of a set; used by copy module."""
127 # This pre-creates the result and inserts it in the memo
128 # early, in case the deep copy recurses into another reference
129 # to this same set. A set can't be an element of itself, but
130 # it can certainly contain an object that has a reference to
131 # itself.
132 from copy import deepcopy
Raymond Hettingerfa1480f2002-08-24 02:35:48 +0000133 result = self.__class__()
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000134 memo[id(self)] = result
135 data = result._data
136 value = True
137 for elt in self:
138 data[deepcopy(elt, memo)] = value
139 return result
140
Guido van Rossumdc61cdf2002-08-22 17:23:33 +0000141 # Standard set operations: union, intersection, both differences.
142 # Each has an operator version (e.g. __or__, invoked with |) and a
143 # method version (e.g. union).
Tim Peters4924db12002-08-25 17:10:17 +0000144 # Subtle: Each pair requires distinct code so that the outcome is
145 # correct when the type of other isn't suitable. For example, if
146 # we did "union = __or__" instead, then Set().union(3) would return
147 # NotImplemented instead of raising TypeError (albeit that *why* it
148 # raises TypeError as-is is also a bit subtle).
Guido van Rossumdc61cdf2002-08-22 17:23:33 +0000149
150 def __or__(self, other):
151 """Return the union of two sets as a new set.
152
153 (I.e. all elements that are in either set.)
154 """
155 if not isinstance(other, BaseSet):
156 return NotImplemented
Tim Peters37faed22002-08-25 19:21:27 +0000157 result = self.__class__()
158 result._data = self._data.copy()
Guido van Rossumdc61cdf2002-08-22 17:23:33 +0000159 result._data.update(other._data)
160 return result
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000161
162 def union(self, other):
163 """Return the union of two sets as a new set.
164
165 (I.e. all elements that are in either set.)
166 """
Guido van Rossumdc61cdf2002-08-22 17:23:33 +0000167 return self | other
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000168
Guido van Rossumdc61cdf2002-08-22 17:23:33 +0000169 def __and__(self, other):
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000170 """Return the intersection of two sets as a new set.
171
172 (I.e. all elements that are in both sets.)
173 """
Guido van Rossumdc61cdf2002-08-22 17:23:33 +0000174 if not isinstance(other, BaseSet):
175 return NotImplemented
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000176 if len(self) <= len(other):
177 little, big = self, other
178 else:
179 little, big = other, self
Raymond Hettingerbfcdb872002-10-04 20:01:48 +0000180 common = filter(big._data.has_key, little._data)
Tim Petersd33e6be2002-08-25 19:12:45 +0000181 return self.__class__(common)
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000182
Guido van Rossumdc61cdf2002-08-22 17:23:33 +0000183 def intersection(self, other):
184 """Return the intersection of two sets as a new set.
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000185
Guido van Rossumdc61cdf2002-08-22 17:23:33 +0000186 (I.e. all elements that are in both sets.)
187 """
188 return self & other
189
190 def __xor__(self, other):
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000191 """Return the symmetric difference of two sets as a new set.
192
193 (I.e. all elements that are in exactly one of the sets.)
194 """
Guido van Rossumdc61cdf2002-08-22 17:23:33 +0000195 if not isinstance(other, BaseSet):
196 return NotImplemented
Raymond Hettingerfa1480f2002-08-24 02:35:48 +0000197 result = self.__class__()
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000198 data = result._data
199 value = True
Tim Peters334b4a52002-08-25 19:47:54 +0000200 selfdata = self._data
201 otherdata = other._data
202 for elt in selfdata:
203 if elt not in otherdata:
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000204 data[elt] = value
Tim Peters334b4a52002-08-25 19:47:54 +0000205 for elt in otherdata:
206 if elt not in selfdata:
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000207 data[elt] = value
208 return result
209
Guido van Rossumdc61cdf2002-08-22 17:23:33 +0000210 def symmetric_difference(self, other):
211 """Return the symmetric difference of two sets as a new set.
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000212
Guido van Rossumdc61cdf2002-08-22 17:23:33 +0000213 (I.e. all elements that are in exactly one of the sets.)
214 """
215 return self ^ other
216
217 def __sub__(self, other):
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000218 """Return the difference of two sets as a new Set.
219
220 (I.e. all elements that are in this set and not in the other.)
221 """
Guido van Rossumdc61cdf2002-08-22 17:23:33 +0000222 if not isinstance(other, BaseSet):
223 return NotImplemented
Raymond Hettingerfa1480f2002-08-24 02:35:48 +0000224 result = self.__class__()
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000225 data = result._data
Tim Petersb8940392002-08-25 19:50:43 +0000226 otherdata = other._data
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000227 value = True
228 for elt in self:
Tim Petersb8940392002-08-25 19:50:43 +0000229 if elt not in otherdata:
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000230 data[elt] = value
231 return result
232
Guido van Rossumdc61cdf2002-08-22 17:23:33 +0000233 def difference(self, other):
234 """Return the difference of two sets as a new Set.
235
236 (I.e. all elements that are in this set and not in the other.)
237 """
238 return self - other
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000239
240 # Membership test
241
242 def __contains__(self, element):
243 """Report whether an element is a member of a set.
244
245 (Called in response to the expression `element in self'.)
246 """
247 try:
Raymond Hettingerde6d6972002-08-21 01:35:29 +0000248 return element in self._data
249 except TypeError:
Guido van Rossum9f872932002-08-21 03:20:44 +0000250 transform = getattr(element, "_as_temporarily_immutable", None)
Raymond Hettingerde6d6972002-08-21 01:35:29 +0000251 if transform is None:
252 raise # re-raise the TypeError exception we caught
253 return transform() in self._data
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000254
255 # Subset and superset test
256
257 def issubset(self, other):
258 """Report whether another set contains this set."""
259 self._binary_sanity_check(other)
Raymond Hettinger43db0d62002-08-21 02:22:08 +0000260 if len(self) > len(other): # Fast check for obvious cases
261 return False
Tim Peterscd06eeb2002-08-25 20:12:19 +0000262 otherdata = other._data
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000263 for elt in self:
Tim Peterscd06eeb2002-08-25 20:12:19 +0000264 if elt not in otherdata:
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000265 return False
266 return True
267
268 def issuperset(self, other):
269 """Report whether this set contains another set."""
270 self._binary_sanity_check(other)
Raymond Hettinger43db0d62002-08-21 02:22:08 +0000271 if len(self) < len(other): # Fast check for obvious cases
272 return False
Tim Peterscd06eeb2002-08-25 20:12:19 +0000273 selfdata = self._data
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000274 for elt in other:
Tim Peterscd06eeb2002-08-25 20:12:19 +0000275 if elt not in selfdata:
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000276 return False
277 return True
278
Tim Petersea76c982002-08-25 18:43:10 +0000279 # Inequality comparisons using the is-subset relation.
280 __le__ = issubset
281 __ge__ = issuperset
282
283 def __lt__(self, other):
284 self._binary_sanity_check(other)
285 return len(self) < len(other) and self.issubset(other)
286
287 def __gt__(self, other):
288 self._binary_sanity_check(other)
289 return len(self) > len(other) and self.issuperset(other)
290
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000291 # Assorted helpers
292
293 def _binary_sanity_check(self, other):
294 # Check that the other argument to a binary operation is also
295 # a set, raising a TypeError otherwise.
296 if not isinstance(other, BaseSet):
297 raise TypeError, "Binary operation only permitted between sets"
298
299 def _compute_hash(self):
300 # Calculate hash code for a set by xor'ing the hash codes of
Tim Petersd06d0302002-08-23 20:06:42 +0000301 # the elements. This ensures that the hash code does not depend
302 # on the order in which elements are added to the set. This is
303 # not called __hash__ because a BaseSet should not be hashable;
304 # only an ImmutableSet is hashable.
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000305 result = 0
306 for elt in self:
307 result ^= hash(elt)
308 return result
309
Guido van Rossum9f872932002-08-21 03:20:44 +0000310 def _update(self, iterable):
311 # The main loop for update() and the subclass __init__() methods.
Guido van Rossum9f872932002-08-21 03:20:44 +0000312 data = self._data
Raymond Hettinger1a8d1932002-08-29 15:13:50 +0000313
314 # Use the fast update() method when a dictionary is available.
315 if isinstance(iterable, BaseSet):
316 data.update(iterable._data)
317 return
318 if isinstance(iterable, dict):
319 data.update(iterable)
320 return
321
Guido van Rossum9f872932002-08-21 03:20:44 +0000322 value = True
Raymond Hettinger80d21af2002-08-21 04:12:03 +0000323 it = iter(iterable)
324 while True:
Guido van Rossum9f872932002-08-21 03:20:44 +0000325 try:
Raymond Hettinger80d21af2002-08-21 04:12:03 +0000326 for element in it:
327 data[element] = value
328 return
Guido van Rossum9f872932002-08-21 03:20:44 +0000329 except TypeError:
330 transform = getattr(element, "_as_immutable", None)
331 if transform is None:
332 raise # re-raise the TypeError exception we caught
333 data[transform()] = value
334
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000335
336class ImmutableSet(BaseSet):
337 """Immutable set class."""
338
Guido van Rossum0b650d72002-08-19 16:29:58 +0000339 __slots__ = ['_hashcode']
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000340
341 # BaseSet + hashing
342
Guido van Rossum9f872932002-08-21 03:20:44 +0000343 def __init__(self, iterable=None):
344 """Construct an immutable set from an optional iterable."""
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000345 self._hashcode = None
Guido van Rossum9f872932002-08-21 03:20:44 +0000346 self._data = {}
347 if iterable is not None:
348 self._update(iterable)
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000349
350 def __hash__(self):
351 if self._hashcode is None:
352 self._hashcode = self._compute_hash()
353 return self._hashcode
354
355
356class Set(BaseSet):
357 """ Mutable set class."""
358
359 __slots__ = []
360
361 # BaseSet + operations requiring mutability; no hashing
362
Guido van Rossum9f872932002-08-21 03:20:44 +0000363 def __init__(self, iterable=None):
364 """Construct a set from an optional iterable."""
365 self._data = {}
366 if iterable is not None:
367 self._update(iterable)
368
369 def __hash__(self):
370 """A Set cannot be hashed."""
371 # We inherit object.__hash__, so we must deny this explicitly
372 raise TypeError, "Can't hash a Set, only an ImmutableSet."
Guido van Rossum5033b362002-08-20 21:38:37 +0000373
Tim Peters4a2f91e2002-08-25 18:59:04 +0000374 # In-place union, intersection, differences.
375 # Subtle: The xyz_update() functions deliberately return None,
376 # as do all mutating operations on built-in container types.
377 # The __xyz__ spellings have to return self, though.
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000378
Raymond Hettinger1b9f5d42002-08-24 06:19:02 +0000379 def __ior__(self, other):
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000380 """Update a set with the union of itself and another."""
381 self._binary_sanity_check(other)
382 self._data.update(other._data)
383 return self
384
Raymond Hettinger1b9f5d42002-08-24 06:19:02 +0000385 def union_update(self, other):
386 """Update a set with the union of itself and another."""
387 self |= other
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000388
Raymond Hettinger1b9f5d42002-08-24 06:19:02 +0000389 def __iand__(self, other):
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000390 """Update a set with the intersection of itself and another."""
391 self._binary_sanity_check(other)
Tim Peters454602f2002-08-26 00:44:07 +0000392 self._data = (self & other)._data
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000393 return self
394
Raymond Hettinger1b9f5d42002-08-24 06:19:02 +0000395 def intersection_update(self, other):
396 """Update a set with the intersection of itself and another."""
397 self &= other
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000398
Raymond Hettinger1b9f5d42002-08-24 06:19:02 +0000399 def __ixor__(self, other):
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000400 """Update a set with the symmetric difference of itself and another."""
401 self._binary_sanity_check(other)
402 data = self._data
403 value = True
404 for elt in other:
405 if elt in data:
406 del data[elt]
407 else:
408 data[elt] = value
409 return self
410
Raymond Hettinger1b9f5d42002-08-24 06:19:02 +0000411 def symmetric_difference_update(self, other):
412 """Update a set with the symmetric difference of itself and another."""
413 self ^= other
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000414
Raymond Hettinger1b9f5d42002-08-24 06:19:02 +0000415 def __isub__(self, other):
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000416 """Remove all elements of another set from this set."""
417 self._binary_sanity_check(other)
418 data = self._data
419 for elt in other:
420 if elt in data:
421 del data[elt]
422 return self
423
Raymond Hettinger1b9f5d42002-08-24 06:19:02 +0000424 def difference_update(self, other):
425 """Remove all elements of another set from this set."""
426 self -= other
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000427
428 # Python dict-like mass mutations: update, clear
429
430 def update(self, iterable):
431 """Add all values from an iterable (such as a list or file)."""
Guido van Rossum9f872932002-08-21 03:20:44 +0000432 self._update(iterable)
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000433
434 def clear(self):
435 """Remove all elements from this set."""
436 self._data.clear()
437
438 # Single-element mutations: add, remove, discard
439
440 def add(self, element):
441 """Add an element to a set.
442
443 This has no effect if the element is already present.
444 """
445 try:
Raymond Hettingerde6d6972002-08-21 01:35:29 +0000446 self._data[element] = True
447 except TypeError:
Guido van Rossum9f872932002-08-21 03:20:44 +0000448 transform = getattr(element, "_as_immutable", None)
Raymond Hettingerde6d6972002-08-21 01:35:29 +0000449 if transform is None:
450 raise # re-raise the TypeError exception we caught
451 self._data[transform()] = True
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000452
453 def remove(self, element):
454 """Remove an element from a set; it must be a member.
455
456 If the element is not a member, raise a KeyError.
457 """
458 try:
Raymond Hettingerde6d6972002-08-21 01:35:29 +0000459 del self._data[element]
460 except TypeError:
Guido van Rossum9f872932002-08-21 03:20:44 +0000461 transform = getattr(element, "_as_temporarily_immutable", None)
Raymond Hettingerde6d6972002-08-21 01:35:29 +0000462 if transform is None:
463 raise # re-raise the TypeError exception we caught
464 del self._data[transform()]
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000465
466 def discard(self, element):
467 """Remove an element from a set if it is a member.
468
469 If the element is not a member, do nothing.
470 """
471 try:
Guido van Rossume399d082002-08-23 14:45:02 +0000472 self.remove(element)
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000473 except KeyError:
474 pass
475
Guido van Rossumc9196bc2002-08-20 21:51:59 +0000476 def pop(self):
Tim Peters53506be2002-08-23 20:36:58 +0000477 """Remove and return an arbitrary set element."""
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000478 return self._data.popitem()[0]
479
480 def _as_immutable(self):
481 # Return a copy of self as an immutable set
482 return ImmutableSet(self)
483
484 def _as_temporarily_immutable(self):
485 # Return self wrapped in a temporarily immutable set
486 return _TemporarilyImmutableSet(self)
487
488
Raymond Hettingerfa1480f2002-08-24 02:35:48 +0000489class _TemporarilyImmutableSet(BaseSet):
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000490 # Wrap a mutable set as if it was temporarily immutable.
491 # This only supplies hashing and equality comparisons.
492
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000493 def __init__(self, set):
494 self._set = set
Raymond Hettingerfa1480f2002-08-24 02:35:48 +0000495 self._data = set._data # Needed by ImmutableSet.__eq__()
Guido van Rossumd6cf3af2002-08-19 16:19:15 +0000496
497 def __hash__(self):
Raymond Hettingerd5018512002-08-24 04:47:42 +0000498 return self._set._compute_hash()