blob: 53e78f94d34773f67c5d63ee4561682037e079af [file] [log] [blame]
Brett Cannon23a4a7b2008-05-12 00:56:28 +00001__all__ = ['deque', 'defaultdict', 'namedtuple', 'UserDict', 'UserList',
Raymond Hettinger2d32f632009-03-02 21:24:57 +00002 'UserString', 'Counter', 'OrderedDict']
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
4# They should however be considered an integral part of collections.py.
5from _abcoll import *
6import _abcoll
7__all__ += _abcoll.__all__
8
Christian Heimes99170a52007-12-19 02:07:34 +00009from _collections import deque, defaultdict
10from operator import itemgetter as _itemgetter
11from keyword import iskeyword as _iskeyword
12import sys as _sys
Raymond Hettingerb8baf632009-01-14 02:20:07 +000013import heapq as _heapq
Raymond Hettinger798ee1a2009-03-23 18:29:11 +000014from weakref import proxy as _proxy
Raymond Hettingerea9f8db2009-03-02 21:28:41 +000015from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
Raymond Hettinger2d32f632009-03-02 21:24:57 +000016
17################################################################################
18### OrderedDict
19################################################################################
20
Raymond Hettingerf1736542009-03-23 05:19:21 +000021class _Link(object):
22 __slots__ = 'prev', 'next', 'key', '__weakref__'
23
Raymond Hettinger2d32f632009-03-02 21:24:57 +000024class OrderedDict(dict, MutableMapping):
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +000025 'Dictionary that remembers insertion order'
Raymond Hettingerf1736542009-03-23 05:19:21 +000026 # An inherited dict maps keys to values.
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +000027 # The inherited dict provides __getitem__, __len__, __contains__, and get.
28 # The remaining methods are order-aware.
Raymond Hettingerf1736542009-03-23 05:19:21 +000029 # Big-O running times for all methods are the same as for regular dictionaries.
30
31 # The internal self.__map dictionary maps keys to links in a doubly linked list.
32 # The circular doubly linked list starts and ends with a sentinel element.
33 # The sentinel element never gets deleted (this simplifies the algorithm).
34 # The prev/next links are weakref proxies (to prevent circular references).
35 # Individual links are kept alive by the hard reference in self.__map.
36 # Those hard references disappear when a key is deleted from an OrderedDict.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000037
38 def __init__(self, *args, **kwds):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000039 '''Initialize an ordered dictionary. Signature is the same as for
40 regular dictionaries, but keyword arguments are not recommended
41 because their insertion order is arbitrary.
42
43 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +000044 if len(args) > 1:
45 raise TypeError('expected at most 1 arguments, got %d' % len(args))
Raymond Hettinger08c70cf2009-03-03 20:47:29 +000046 try:
Raymond Hettingerf1736542009-03-23 05:19:21 +000047 self.__root
Raymond Hettinger08c70cf2009-03-03 20:47:29 +000048 except AttributeError:
Raymond Hettinger52dc06b2009-03-25 22:45:22 +000049 self.__root = root = _Link() # sentinel node for the doubly linked list
50 root.prev = root.next = root
51 self.__map = {}
Raymond Hettinger2d32f632009-03-02 21:24:57 +000052 self.update(*args, **kwds)
53
54 def clear(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000055 'od.clear() -> None. Remove all items from od.'
Raymond Hettingerf1736542009-03-23 05:19:21 +000056 root = self.__root
57 root.prev = root.next = root
Raymond Hettinger52dc06b2009-03-25 22:45:22 +000058 self.__map.clear()
Raymond Hettinger2d32f632009-03-02 21:24:57 +000059 dict.clear(self)
60
61 def __setitem__(self, key, value):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000062 'od.__setitem__(i, y) <==> od[i]=y'
Raymond Hettingerf1736542009-03-23 05:19:21 +000063 # Setting a new item creates a new link which goes at the end of the linked
64 # list, and the inherited dictionary is updated with the new key/value pair.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000065 if key not in self:
Raymond Hettingerf1736542009-03-23 05:19:21 +000066 self.__map[key] = link = _Link()
67 root = self.__root
68 last = root.prev
69 link.prev, link.next, link.key = last, root, key
Raymond Hettinger798ee1a2009-03-23 18:29:11 +000070 last.next = root.prev = _proxy(link)
Raymond Hettinger2d32f632009-03-02 21:24:57 +000071 dict.__setitem__(self, key, value)
72
73 def __delitem__(self, key):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000074 'od.__delitem__(y) <==> del od[y]'
Raymond Hettingerf1736542009-03-23 05:19:21 +000075 # Deleting an existing item uses self.__map to find the link which is
76 # then removed by updating the links in the predecessor and successor nodes.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000077 dict.__delitem__(self, key)
Raymond Hettingerf1736542009-03-23 05:19:21 +000078 link = self.__map.pop(key)
79 link.prev.next = link.next
80 link.next.prev = link.prev
Raymond Hettinger2d32f632009-03-02 21:24:57 +000081
82 def __iter__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000083 'od.__iter__() <==> iter(od)'
Raymond Hettingerf1736542009-03-23 05:19:21 +000084 # Traverse the linked list in order.
85 root = self.__root
86 curr = root.next
87 while curr is not root:
88 yield curr.key
89 curr = curr.next
Raymond Hettinger2d32f632009-03-02 21:24:57 +000090
91 def __reversed__(self):
Raymond Hettinger2352cf32009-04-08 01:16:27 +000092 'od.__reversed__() <==> reversed(od)'
Raymond Hettingerf1736542009-03-23 05:19:21 +000093 # Traverse the linked list in reverse order.
94 root = self.__root
95 curr = root.prev
96 while curr is not root:
97 yield curr.key
98 curr = curr.prev
Raymond Hettinger2d32f632009-03-02 21:24:57 +000099
100 def __reduce__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000101 'Return state information for pickling'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000102 items = [[k, self[k]] for k in self]
Raymond Hettingerf1736542009-03-23 05:19:21 +0000103 tmp = self.__map, self.__root
104 del self.__map, self.__root
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000105 inst_dict = vars(self).copy()
Raymond Hettingerf1736542009-03-23 05:19:21 +0000106 self.__map, self.__root = tmp
Raymond Hettinger14b89ff2009-03-03 22:20:56 +0000107 if inst_dict:
108 return (self.__class__, (items,), inst_dict)
109 return self.__class__, (items,)
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000110
111 setdefault = MutableMapping.setdefault
112 update = MutableMapping.update
113 pop = MutableMapping.pop
114 keys = MutableMapping.keys
115 values = MutableMapping.values
116 items = MutableMapping.items
117
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +0000118 def popitem(self, last=True):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000119 '''od.popitem() -> (k, v), return and remove a (key, value) pair.
120 Pairs are returned in LIFO order if last is true or FIFO order if false.
121
122 '''
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +0000123 if not self:
124 raise KeyError('dictionary is empty')
Raymond Hettinger446a4f22009-04-08 08:28:28 +0000125 key = next(reversed(self) if last else iter(self))
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +0000126 value = self.pop(key)
127 return key, value
128
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000129 def __repr__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000130 'od.__repr__() <==> repr(od)'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000131 if not self:
132 return '%s()' % (self.__class__.__name__,)
133 return '%s(%r)' % (self.__class__.__name__, list(self.items()))
134
135 def copy(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000136 'od.copy() -> a shallow copy of od'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000137 return self.__class__(self)
138
139 @classmethod
140 def fromkeys(cls, iterable, value=None):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000141 '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
142 and values equal to v (which defaults to None).
143
144 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000145 d = cls()
146 for key in iterable:
147 d[key] = value
148 return d
149
150 def __eq__(self, other):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000151 '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
152 while comparison to a regular mapping is order-insensitive.
153
154 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000155 if isinstance(other, OrderedDict):
Raymond Hettinger798ee1a2009-03-23 18:29:11 +0000156 return len(self)==len(other) and \
157 all(p==q for p, q in zip(self.items(), other.items()))
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000158 return dict.__eq__(self, other)
159
Benjamin Peterson2504b7a2009-04-04 17:26:32 +0000160 def __ne__(self, other):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000161 '''od.__ne__(y) <==> od!=y. Comparison to another OD is order-sensitive
162 while comparison to a regular mapping is order-insensitive.
163
164 '''
Benjamin Peterson2504b7a2009-04-04 17:26:32 +0000165 return not self == other
166
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000167
Christian Heimes99170a52007-12-19 02:07:34 +0000168
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000169################################################################################
170### namedtuple
171################################################################################
172
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000173def namedtuple(typename, field_names, verbose=False, rename=False):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000174 """Returns a new subclass of tuple with named fields.
175
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000176 >>> Point = namedtuple('Point', 'x y')
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000177 >>> Point.__doc__ # docstring for the new class
Guido van Rossumd8faa362007-04-27 19:54:29 +0000178 'Point(x, y)'
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000179 >>> p = Point(11, y=22) # instantiate with positional args or keywords
Christian Heimes99170a52007-12-19 02:07:34 +0000180 >>> p[0] + p[1] # indexable like a plain tuple
Guido van Rossumd8faa362007-04-27 19:54:29 +0000181 33
Christian Heimes99170a52007-12-19 02:07:34 +0000182 >>> x, y = p # unpack like a regular tuple
Guido van Rossumd8faa362007-04-27 19:54:29 +0000183 >>> x, y
184 (11, 22)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000185 >>> p.x + p.y # fields also accessable by name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000186 33
Christian Heimes0449f632007-12-15 01:27:15 +0000187 >>> d = p._asdict() # convert to a dictionary
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000188 >>> d['x']
189 11
190 >>> Point(**d) # convert from a dictionary
Guido van Rossumd8faa362007-04-27 19:54:29 +0000191 Point(x=11, y=22)
Christian Heimes0449f632007-12-15 01:27:15 +0000192 >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000193 Point(x=100, y=22)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000194
195 """
196
Christian Heimes2380ac72008-01-09 00:17:24 +0000197 # Parse and validate the field names. Validation serves two purposes,
198 # generating informative error messages and preventing template injection attacks.
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000199 if isinstance(field_names, str):
200 field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas
Benjamin Petersone9bbc8b2008-09-28 02:06:32 +0000201 field_names = tuple(map(str, field_names))
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000202 if rename:
203 names = list(field_names)
204 seen = set()
205 for i, name in enumerate(names):
206 if (not all(c.isalnum() or c=='_' for c in name) or _iskeyword(name)
207 or not name or name[0].isdigit() or name.startswith('_')
208 or name in seen):
Raymond Hettinger56145242009-04-02 22:31:59 +0000209 names[i] = '_%d' % i
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000210 seen.add(name)
211 field_names = tuple(names)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000212 for name in (typename,) + field_names:
Christian Heimesb9eccbf2007-12-05 20:18:38 +0000213 if not all(c.isalnum() or c=='_' for c in name):
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000214 raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
215 if _iskeyword(name):
216 raise ValueError('Type names and field names cannot be a keyword: %r' % name)
217 if name[0].isdigit():
218 raise ValueError('Type names and field names cannot start with a number: %r' % name)
219 seen_names = set()
220 for name in field_names:
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000221 if name.startswith('_') and not rename:
Christian Heimes0449f632007-12-15 01:27:15 +0000222 raise ValueError('Field names cannot start with an underscore: %r' % name)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000223 if name in seen_names:
224 raise ValueError('Encountered duplicate field name: %r' % name)
225 seen_names.add(name)
226
227 # Create and fill-in the class template
Christian Heimesfaf2f632008-01-06 16:59:19 +0000228 numfields = len(field_names)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000229 argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000230 reprtxt = ', '.join('%s=%%r' % name for name in field_names)
231 template = '''class %(typename)s(tuple):
Christian Heimes0449f632007-12-15 01:27:15 +0000232 '%(typename)s(%(argtxt)s)' \n
233 __slots__ = () \n
Christian Heimesfaf2f632008-01-06 16:59:19 +0000234 _fields = %(field_names)r \n
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000235 def __new__(_cls, %(argtxt)s):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000236 'Create new instance of %(typename)s(%(argtxt)s)'
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000237 return _tuple.__new__(_cls, (%(argtxt)s)) \n
Christian Heimesfaf2f632008-01-06 16:59:19 +0000238 @classmethod
Christian Heimes043d6f62008-01-07 17:19:16 +0000239 def _make(cls, iterable, new=tuple.__new__, len=len):
Christian Heimesfaf2f632008-01-06 16:59:19 +0000240 'Make a new %(typename)s object from a sequence or iterable'
Christian Heimes043d6f62008-01-07 17:19:16 +0000241 result = new(cls, iterable)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000242 if len(result) != %(numfields)d:
243 raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
244 return result \n
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000245 def __repr__(self):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000246 'Return a nicely formatted representation string'
Christian Heimes0449f632007-12-15 01:27:15 +0000247 return '%(typename)s(%(reprtxt)s)' %% self \n
Raymond Hettingera4f52b12009-03-02 22:28:31 +0000248 def _asdict(self):
249 'Return a new OrderedDict which maps field names to their values'
250 return OrderedDict(zip(self._fields, self)) \n
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000251 def _replace(_self, **kwds):
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000252 'Return a new %(typename)s object replacing specified fields with new values'
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000253 result = _self._make(map(kwds.pop, %(field_names)r, _self))
Christian Heimesfaf2f632008-01-06 16:59:19 +0000254 if kwds:
255 raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000256 return result \n
257 def __getnewargs__(self):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000258 'Return self as a plain tuple. Used by copy and pickle.'
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000259 return tuple(self) \n\n''' % locals()
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000260 for i, name in enumerate(field_names):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000261 template += " %s = _property(_itemgetter(%d), doc='Alias for field number %d')\n" % (name, i, i)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000262 if verbose:
263 print(template)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000264
Georg Brandlf08a9dd2008-06-10 16:57:31 +0000265 # Execute the template string in a temporary namespace and
266 # support tracing utilities by setting a value for frame.f_globals['__name__']
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000267 namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
268 OrderedDict=OrderedDict, _property=property, _tuple=tuple)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000269 try:
270 exec(template, namespace)
271 except SyntaxError as e:
Christian Heimes99170a52007-12-19 02:07:34 +0000272 raise SyntaxError(e.msg + ':\n' + template) from e
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000273 result = namespace[typename]
274
275 # For pickling to work, the __module__ variable needs to be set to the frame
276 # where the named tuple is created. Bypass this step in enviroments where
Benjamin Peterson25c95f12009-05-08 20:42:26 +0000277 # sys._getframe is not defined (Jython for example) or sys._getframe is not
278 # defined for arguments greater than 0 (IronPython).
279 try:
Raymond Hettinger0f055172009-01-27 10:06:09 +0000280 result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
Benjamin Peterson25c95f12009-05-08 20:42:26 +0000281 except (AttributeError, ValueError):
282 pass
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000283
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000284 return result
Guido van Rossumd8faa362007-04-27 19:54:29 +0000285
Guido van Rossumd8faa362007-04-27 19:54:29 +0000286
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000287########################################################################
288### Counter
289########################################################################
290
291class Counter(dict):
292 '''Dict subclass for counting hashable items. Sometimes called a bag
293 or multiset. Elements are stored as dictionary keys and their counts
294 are stored as dictionary values.
295
296 >>> c = Counter('abracadabra') # count elements from a string
297
298 >>> c.most_common(3) # three most common elements
299 [('a', 5), ('r', 2), ('b', 2)]
300 >>> sorted(c) # list all unique elements
301 ['a', 'b', 'c', 'd', 'r']
302 >>> ''.join(sorted(c.elements())) # list elements with repetitions
303 'aaaaabbcdrr'
304 >>> sum(c.values()) # total of all counts
305 11
306
307 >>> c['a'] # count of letter 'a'
308 5
309 >>> for elem in 'shazam': # update counts from an iterable
310 ... c[elem] += 1 # by adding 1 to each element's count
311 >>> c['a'] # now there are seven 'a'
312 7
313 >>> del c['r'] # remove all 'r'
314 >>> c['r'] # now there are zero 'r'
315 0
316
317 >>> d = Counter('simsalabim') # make another counter
318 >>> c.update(d) # add in the second counter
319 >>> c['a'] # now there are nine 'a'
320 9
321
322 >>> c.clear() # empty the counter
323 >>> c
324 Counter()
325
326 Note: If a count is set to zero or reduced to zero, it will remain
327 in the counter until the entry is deleted or the counter is cleared:
328
329 >>> c = Counter('aaabbc')
330 >>> c['b'] -= 2 # reduce the count of 'b' by two
331 >>> c.most_common() # 'b' is still in, but its count is zero
332 [('a', 3), ('c', 1), ('b', 0)]
333
334 '''
335 # References:
336 # http://en.wikipedia.org/wiki/Multiset
337 # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
338 # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
339 # http://code.activestate.com/recipes/259174/
340 # Knuth, TAOCP Vol. II section 4.6.3
341
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000342 def __init__(self, iterable=None, **kwds):
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000343 '''Create a new, empty Counter object. And if given, count elements
344 from an input iterable. Or, initialize the count from another mapping
345 of elements to their counts.
346
347 >>> c = Counter() # a new, empty counter
348 >>> c = Counter('gallahad') # a new counter from an iterable
349 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000350 >>> c = Counter(a=4, b=2) # a new counter from keyword args
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000351
352 '''
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000353 self.update(iterable, **kwds)
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000354
355 def __missing__(self, key):
356 'The count of elements not in the Counter is zero.'
357 # Needed so that self[missing_item] does not raise KeyError
358 return 0
359
360 def most_common(self, n=None):
361 '''List the n most common elements and their counts from the most
362 common to the least. If n is None, then list all element counts.
363
364 >>> Counter('abracadabra').most_common(3)
365 [('a', 5), ('r', 2), ('b', 2)]
366
367 '''
368 # Emulate Bag.sortedByCount from Smalltalk
369 if n is None:
370 return sorted(self.items(), key=_itemgetter(1), reverse=True)
371 return _heapq.nlargest(n, self.items(), key=_itemgetter(1))
372
373 def elements(self):
374 '''Iterator over elements repeating each as many times as its count.
375
376 >>> c = Counter('ABCABC')
377 >>> sorted(c.elements())
378 ['A', 'A', 'B', 'B', 'C', 'C']
379
380 # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
381 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
382 >>> product = 1
383 >>> for factor in prime_factors.elements(): # loop over factors
384 ... product *= factor # and multiply them
385 >>> product
386 1836
387
388 Note, if an element's count has been set to zero or is a negative
389 number, elements() will ignore it.
390
391 '''
392 # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
393 return _chain.from_iterable(_starmap(_repeat, self.items()))
394
395 # Override dict methods where necessary
396
397 @classmethod
398 def fromkeys(cls, iterable, v=None):
399 # There is no equivalent method for counters because setting v=1
400 # means that no element can have a count greater than one.
401 raise NotImplementedError(
402 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
403
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000404 def update(self, iterable=None, **kwds):
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000405 '''Like dict.update() but add counts instead of replacing them.
406
407 Source can be an iterable, a dictionary, or another Counter instance.
408
409 >>> c = Counter('which')
410 >>> c.update('witch') # add elements from another iterable
411 >>> d = Counter('watch')
412 >>> c.update(d) # add elements from another counter
413 >>> c['h'] # four 'h' in which, witch, and watch
414 4
415
416 '''
417 # The regular dict.update() operation makes no sense here because the
418 # replace behavior results in the some of original untouched counts
419 # being mixed-in with all of the other counts for a mismash that
420 # doesn't have a straight-forward interpretation in most counting
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000421 # contexts. Instead, we implement straight-addition. Both the inputs
422 # and outputs are allowed to contain zero and negative counts.
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000423
424 if iterable is not None:
425 if isinstance(iterable, Mapping):
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000426 if self:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000427 self_get = self.get
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000428 for elem, count in iterable.items():
Raymond Hettingerf9092022009-06-29 18:30:43 +0000429 self[elem] = count + self_get(elem, 0)
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000430 else:
431 dict.update(self, iterable) # fast path when counter is empty
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000432 else:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000433 self_get = self.get
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000434 for elem in iterable:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000435 self[elem] = 1 + self_get(elem, 0)
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000436 if kwds:
437 self.update(kwds)
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000438
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000439 def subtract(self, iterable=None, **kwds):
440 '''Like dict.update() but subtracts counts instead of replacing them.
441 Counts can be reduced below zero. Both the inputs and outputs are
442 allowed to contain zero and negative counts.
443
444 Source can be an iterable, a dictionary, or another Counter instance.
445
446 >>> c = Counter('which')
447 >>> c.subtract('witch') # subtract elements from another iterable
448 >>> c.subtract(Counter('watch')) # subtract elements from another counter
449 >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch
450 0
451 >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch
452 -1
453
454 '''
455 if iterable is not None:
456 if isinstance(iterable, Mapping):
457 self_get = self.get
458 for elem, count in iterable.items():
459 self[elem] = self_get(elem, 0) - count
460 else:
461 self_get = self.get
462 for elem in iterable:
463 self[elem] = self_get(elem, 0) - 1
464 if kwds:
465 self.subtract(kwds)
466
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000467 def copy(self):
468 'Like dict.copy() but returns a Counter instance instead of a dict.'
469 return Counter(self)
470
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000471 def __delitem__(self, elem):
472 'Like dict.__delitem__() but does not raise KeyError for missing values.'
473 if elem in self:
474 dict.__delitem__(self, elem)
475
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000476 def __repr__(self):
477 if not self:
478 return '%s()' % self.__class__.__name__
479 items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
480 return '%s({%s})' % (self.__class__.__name__, items)
481
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000482 # Multiset-style mathematical operations discussed in:
483 # Knuth TAOCP Volume II section 4.6.3 exercise 19
484 # and at http://en.wikipedia.org/wiki/Multiset
485 #
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000486 # Outputs guaranteed to only include positive counts.
487 #
488 # To strip negative and zero counts, add-in an empty counter:
489 # c += Counter()
490
491 def __add__(self, other):
492 '''Add counts from two counters.
493
494 >>> Counter('abbb') + Counter('bcc')
495 Counter({'b': 4, 'c': 2, 'a': 1})
496
497 '''
498 if not isinstance(other, Counter):
499 return NotImplemented
500 result = Counter()
501 for elem in set(self) | set(other):
502 newcount = self[elem] + other[elem]
503 if newcount > 0:
504 result[elem] = newcount
505 return result
506
507 def __sub__(self, other):
508 ''' Subtract count, but keep only results with positive counts.
509
510 >>> Counter('abbbc') - Counter('bccd')
511 Counter({'b': 2, 'a': 1})
512
513 '''
514 if not isinstance(other, Counter):
515 return NotImplemented
516 result = Counter()
Raymond Hettingere0d1b9f2009-01-21 20:36:27 +0000517 for elem in set(self) | set(other):
518 newcount = self[elem] - other[elem]
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000519 if newcount > 0:
520 result[elem] = newcount
521 return result
522
523 def __or__(self, other):
524 '''Union is the maximum of value in either of the input counters.
525
526 >>> Counter('abbb') | Counter('bcc')
527 Counter({'b': 3, 'c': 2, 'a': 1})
528
529 '''
530 if not isinstance(other, Counter):
531 return NotImplemented
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000532 result = Counter()
533 for elem in set(self) | set(other):
Raymond Hettingerc4791702009-04-04 08:48:03 +0000534 p, q = self[elem], other[elem]
535 newcount = q if p < q else p
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000536 if newcount > 0:
537 result[elem] = newcount
538 return result
539
540 def __and__(self, other):
541 ''' Intersection is the minimum of corresponding counts.
542
543 >>> Counter('abbb') & Counter('bcc')
544 Counter({'b': 1})
545
546 '''
547 if not isinstance(other, Counter):
548 return NotImplemented
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000549 result = Counter()
550 if len(self) < len(other):
551 self, other = other, self
552 for elem in filter(self.__contains__, other):
Raymond Hettingerc4791702009-04-04 08:48:03 +0000553 p, q = self[elem], other[elem]
554 newcount = p if p < q else q
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000555 if newcount > 0:
556 result[elem] = newcount
557 return result
558
Guido van Rossumd8faa362007-04-27 19:54:29 +0000559
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000560################################################################################
561### UserDict
562################################################################################
Guido van Rossumd8faa362007-04-27 19:54:29 +0000563
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000564class UserDict(MutableMapping):
565
566 # Start by filling-out the abstract methods
567 def __init__(self, dict=None, **kwargs):
568 self.data = {}
569 if dict is not None:
570 self.update(dict)
571 if len(kwargs):
572 self.update(kwargs)
573 def __len__(self): return len(self.data)
574 def __getitem__(self, key):
575 if key in self.data:
576 return self.data[key]
577 if hasattr(self.__class__, "__missing__"):
578 return self.__class__.__missing__(self, key)
579 raise KeyError(key)
580 def __setitem__(self, key, item): self.data[key] = item
581 def __delitem__(self, key): del self.data[key]
582 def __iter__(self):
583 return iter(self.data)
584
Raymond Hettinger554c8b82008-02-05 22:54:43 +0000585 # Modify __contains__ to work correctly when __missing__ is present
586 def __contains__(self, key):
587 return key in self.data
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000588
589 # Now, add the methods in dicts but not in MutableMapping
590 def __repr__(self): return repr(self.data)
591 def copy(self):
592 if self.__class__ is UserDict:
593 return UserDict(self.data.copy())
594 import copy
595 data = self.data
596 try:
597 self.data = {}
598 c = copy.copy(self)
599 finally:
600 self.data = data
601 c.update(self)
602 return c
603 @classmethod
604 def fromkeys(cls, iterable, value=None):
605 d = cls()
606 for key in iterable:
607 d[key] = value
608 return d
609
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000610
611
612################################################################################
Raymond Hettinger53dbe392008-02-12 20:03:09 +0000613### UserList
614################################################################################
615
616class UserList(MutableSequence):
617 """A more or less complete user-defined wrapper around list objects."""
618 def __init__(self, initlist=None):
619 self.data = []
620 if initlist is not None:
621 # XXX should this accept an arbitrary sequence?
622 if type(initlist) == type(self.data):
623 self.data[:] = initlist
624 elif isinstance(initlist, UserList):
625 self.data[:] = initlist.data[:]
626 else:
627 self.data = list(initlist)
628 def __repr__(self): return repr(self.data)
629 def __lt__(self, other): return self.data < self.__cast(other)
630 def __le__(self, other): return self.data <= self.__cast(other)
631 def __eq__(self, other): return self.data == self.__cast(other)
632 def __ne__(self, other): return self.data != self.__cast(other)
633 def __gt__(self, other): return self.data > self.__cast(other)
634 def __ge__(self, other): return self.data >= self.__cast(other)
635 def __cast(self, other):
636 return other.data if isinstance(other, UserList) else other
Raymond Hettinger53dbe392008-02-12 20:03:09 +0000637 def __contains__(self, item): return item in self.data
638 def __len__(self): return len(self.data)
639 def __getitem__(self, i): return self.data[i]
640 def __setitem__(self, i, item): self.data[i] = item
641 def __delitem__(self, i): del self.data[i]
642 def __add__(self, other):
643 if isinstance(other, UserList):
644 return self.__class__(self.data + other.data)
645 elif isinstance(other, type(self.data)):
646 return self.__class__(self.data + other)
647 return self.__class__(self.data + list(other))
648 def __radd__(self, other):
649 if isinstance(other, UserList):
650 return self.__class__(other.data + self.data)
651 elif isinstance(other, type(self.data)):
652 return self.__class__(other + self.data)
653 return self.__class__(list(other) + self.data)
654 def __iadd__(self, other):
655 if isinstance(other, UserList):
656 self.data += other.data
657 elif isinstance(other, type(self.data)):
658 self.data += other
659 else:
660 self.data += list(other)
661 return self
662 def __mul__(self, n):
663 return self.__class__(self.data*n)
664 __rmul__ = __mul__
665 def __imul__(self, n):
666 self.data *= n
667 return self
668 def append(self, item): self.data.append(item)
669 def insert(self, i, item): self.data.insert(i, item)
670 def pop(self, i=-1): return self.data.pop(i)
671 def remove(self, item): self.data.remove(item)
672 def count(self, item): return self.data.count(item)
673 def index(self, item, *args): return self.data.index(item, *args)
674 def reverse(self): self.data.reverse()
675 def sort(self, *args, **kwds): self.data.sort(*args, **kwds)
676 def extend(self, other):
677 if isinstance(other, UserList):
678 self.data.extend(other.data)
679 else:
680 self.data.extend(other)
681
682
683
684################################################################################
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000685### UserString
686################################################################################
687
688class UserString(Sequence):
689 def __init__(self, seq):
690 if isinstance(seq, str):
691 self.data = seq
692 elif isinstance(seq, UserString):
693 self.data = seq.data[:]
694 else:
695 self.data = str(seq)
696 def __str__(self): return str(self.data)
697 def __repr__(self): return repr(self.data)
698 def __int__(self): return int(self.data)
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000699 def __float__(self): return float(self.data)
700 def __complex__(self): return complex(self.data)
701 def __hash__(self): return hash(self.data)
702
703 def __eq__(self, string):
704 if isinstance(string, UserString):
705 return self.data == string.data
706 return self.data == string
707 def __ne__(self, string):
708 if isinstance(string, UserString):
709 return self.data != string.data
710 return self.data != string
711 def __lt__(self, string):
712 if isinstance(string, UserString):
713 return self.data < string.data
714 return self.data < string
715 def __le__(self, string):
716 if isinstance(string, UserString):
717 return self.data <= string.data
718 return self.data <= string
719 def __gt__(self, string):
720 if isinstance(string, UserString):
721 return self.data > string.data
722 return self.data > string
723 def __ge__(self, string):
724 if isinstance(string, UserString):
725 return self.data >= string.data
726 return self.data >= string
727
728 def __contains__(self, char):
729 if isinstance(char, UserString):
730 char = char.data
731 return char in self.data
732
733 def __len__(self): return len(self.data)
734 def __getitem__(self, index): return self.__class__(self.data[index])
735 def __add__(self, other):
736 if isinstance(other, UserString):
737 return self.__class__(self.data + other.data)
738 elif isinstance(other, str):
739 return self.__class__(self.data + other)
740 return self.__class__(self.data + str(other))
741 def __radd__(self, other):
742 if isinstance(other, str):
743 return self.__class__(other + self.data)
744 return self.__class__(str(other) + self.data)
745 def __mul__(self, n):
746 return self.__class__(self.data*n)
747 __rmul__ = __mul__
748 def __mod__(self, args):
749 return self.__class__(self.data % args)
750
751 # the following methods are defined in alphabetical order:
752 def capitalize(self): return self.__class__(self.data.capitalize())
753 def center(self, width, *args):
754 return self.__class__(self.data.center(width, *args))
755 def count(self, sub, start=0, end=_sys.maxsize):
756 if isinstance(sub, UserString):
757 sub = sub.data
758 return self.data.count(sub, start, end)
759 def encode(self, encoding=None, errors=None): # XXX improve this?
760 if encoding:
761 if errors:
762 return self.__class__(self.data.encode(encoding, errors))
763 return self.__class__(self.data.encode(encoding))
764 return self.__class__(self.data.encode())
765 def endswith(self, suffix, start=0, end=_sys.maxsize):
766 return self.data.endswith(suffix, start, end)
767 def expandtabs(self, tabsize=8):
768 return self.__class__(self.data.expandtabs(tabsize))
769 def find(self, sub, start=0, end=_sys.maxsize):
770 if isinstance(sub, UserString):
771 sub = sub.data
772 return self.data.find(sub, start, end)
773 def format(self, *args, **kwds):
774 return self.data.format(*args, **kwds)
775 def index(self, sub, start=0, end=_sys.maxsize):
776 return self.data.index(sub, start, end)
777 def isalpha(self): return self.data.isalpha()
778 def isalnum(self): return self.data.isalnum()
779 def isdecimal(self): return self.data.isdecimal()
780 def isdigit(self): return self.data.isdigit()
781 def isidentifier(self): return self.data.isidentifier()
782 def islower(self): return self.data.islower()
783 def isnumeric(self): return self.data.isnumeric()
784 def isspace(self): return self.data.isspace()
785 def istitle(self): return self.data.istitle()
786 def isupper(self): return self.data.isupper()
787 def join(self, seq): return self.data.join(seq)
788 def ljust(self, width, *args):
789 return self.__class__(self.data.ljust(width, *args))
790 def lower(self): return self.__class__(self.data.lower())
791 def lstrip(self, chars=None): return self.__class__(self.data.lstrip(chars))
792 def partition(self, sep):
793 return self.data.partition(sep)
794 def replace(self, old, new, maxsplit=-1):
795 if isinstance(old, UserString):
796 old = old.data
797 if isinstance(new, UserString):
798 new = new.data
799 return self.__class__(self.data.replace(old, new, maxsplit))
800 def rfind(self, sub, start=0, end=_sys.maxsize):
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000801 if isinstance(sub, UserString):
802 sub = sub.data
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000803 return self.data.rfind(sub, start, end)
804 def rindex(self, sub, start=0, end=_sys.maxsize):
805 return self.data.rindex(sub, start, end)
806 def rjust(self, width, *args):
807 return self.__class__(self.data.rjust(width, *args))
808 def rpartition(self, sep):
809 return self.data.rpartition(sep)
810 def rstrip(self, chars=None):
811 return self.__class__(self.data.rstrip(chars))
812 def split(self, sep=None, maxsplit=-1):
813 return self.data.split(sep, maxsplit)
814 def rsplit(self, sep=None, maxsplit=-1):
815 return self.data.rsplit(sep, maxsplit)
816 def splitlines(self, keepends=0): return self.data.splitlines(keepends)
817 def startswith(self, prefix, start=0, end=_sys.maxsize):
818 return self.data.startswith(prefix, start, end)
819 def strip(self, chars=None): return self.__class__(self.data.strip(chars))
820 def swapcase(self): return self.__class__(self.data.swapcase())
821 def title(self): return self.__class__(self.data.title())
822 def translate(self, *args):
823 return self.__class__(self.data.translate(*args))
824 def upper(self): return self.__class__(self.data.upper())
825 def zfill(self, width): return self.__class__(self.data.zfill(width))
826
827
828
829################################################################################
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000830### Simple tests
831################################################################################
Guido van Rossumd8faa362007-04-27 19:54:29 +0000832
833if __name__ == '__main__':
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000834 # verify that instances can be pickled
Guido van Rossum99603b02007-07-20 00:22:32 +0000835 from pickle import loads, dumps
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000836 Point = namedtuple('Point', 'x, y', True)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000837 p = Point(x=10, y=20)
838 assert p == loads(dumps(p))
839
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000840 # test and demonstrate ability to override methods
Christian Heimes043d6f62008-01-07 17:19:16 +0000841 class Point(namedtuple('Point', 'x y')):
Christian Heimes25bb7832008-01-11 16:17:00 +0000842 __slots__ = ()
Christian Heimes043d6f62008-01-07 17:19:16 +0000843 @property
844 def hypot(self):
845 return (self.x ** 2 + self.y ** 2) ** 0.5
Christian Heimes790c8232008-01-07 21:14:23 +0000846 def __str__(self):
Christian Heimes25bb7832008-01-11 16:17:00 +0000847 return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
Christian Heimes043d6f62008-01-07 17:19:16 +0000848
Christian Heimes25bb7832008-01-11 16:17:00 +0000849 for p in Point(3, 4), Point(14, 5/7.):
Christian Heimes790c8232008-01-07 21:14:23 +0000850 print (p)
Christian Heimes043d6f62008-01-07 17:19:16 +0000851
852 class Point(namedtuple('Point', 'x y')):
853 'Point class with optimized _make() and _replace() without error-checking'
Christian Heimes25bb7832008-01-11 16:17:00 +0000854 __slots__ = ()
Christian Heimes043d6f62008-01-07 17:19:16 +0000855 _make = classmethod(tuple.__new__)
856 def _replace(self, _map=map, **kwds):
Christian Heimes2380ac72008-01-09 00:17:24 +0000857 return self._make(_map(kwds.get, ('x', 'y'), self))
Christian Heimes043d6f62008-01-07 17:19:16 +0000858
859 print(Point(11, 22)._replace(x=100))
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000860
Christian Heimes25bb7832008-01-11 16:17:00 +0000861 Point3D = namedtuple('Point3D', Point._fields + ('z',))
862 print(Point3D.__doc__)
863
Guido van Rossumd8faa362007-04-27 19:54:29 +0000864 import doctest
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000865 TestResults = namedtuple('TestResults', 'failed attempted')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000866 print(TestResults(*doctest.testmod()))