blob: d6e8cb9967dc7f5af66c7c10dcc880b3fe7f43dc [file] [log] [blame]
Brett Cannon23a4a7b2008-05-12 00:56:28 +00001__all__ = ['deque', 'defaultdict', 'namedtuple', 'UserDict', 'UserList',
Raymond Hettinger2d32f632009-03-02 21:24:57 +00002 'UserString', 'Counter', 'OrderedDict']
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
4# They should however be considered an integral part of collections.py.
5from _abcoll import *
6import _abcoll
7__all__ += _abcoll.__all__
8
Christian Heimes99170a52007-12-19 02:07:34 +00009from _collections import deque, defaultdict
10from operator import itemgetter as _itemgetter
11from keyword import iskeyword as _iskeyword
12import sys as _sys
Raymond Hettingerb8baf632009-01-14 02:20:07 +000013import heapq as _heapq
Raymond Hettinger798ee1a2009-03-23 18:29:11 +000014from weakref import proxy as _proxy
Raymond Hettingerea9f8db2009-03-02 21:28:41 +000015from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
Raymond Hettinger2d32f632009-03-02 21:24:57 +000016
17################################################################################
18### OrderedDict
19################################################################################
20
Raymond Hettingerf1736542009-03-23 05:19:21 +000021class _Link(object):
22 __slots__ = 'prev', 'next', 'key', '__weakref__'
23
Raymond Hettinger2d32f632009-03-02 21:24:57 +000024class OrderedDict(dict, MutableMapping):
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +000025 'Dictionary that remembers insertion order'
Raymond Hettingerf1736542009-03-23 05:19:21 +000026 # An inherited dict maps keys to values.
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +000027 # The inherited dict provides __getitem__, __len__, __contains__, and get.
28 # The remaining methods are order-aware.
Raymond Hettingerf1736542009-03-23 05:19:21 +000029 # Big-O running times for all methods are the same as for regular dictionaries.
30
31 # The internal self.__map dictionary maps keys to links in a doubly linked list.
32 # The circular doubly linked list starts and ends with a sentinel element.
33 # The sentinel element never gets deleted (this simplifies the algorithm).
34 # The prev/next links are weakref proxies (to prevent circular references).
35 # Individual links are kept alive by the hard reference in self.__map.
36 # Those hard references disappear when a key is deleted from an OrderedDict.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000037
38 def __init__(self, *args, **kwds):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000039 '''Initialize an ordered dictionary. Signature is the same as for
40 regular dictionaries, but keyword arguments are not recommended
41 because their insertion order is arbitrary.
42
43 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +000044 if len(args) > 1:
45 raise TypeError('expected at most 1 arguments, got %d' % len(args))
Raymond Hettinger08c70cf2009-03-03 20:47:29 +000046 try:
Raymond Hettingerf1736542009-03-23 05:19:21 +000047 self.__root
Raymond Hettinger08c70cf2009-03-03 20:47:29 +000048 except AttributeError:
Raymond Hettinger52dc06b2009-03-25 22:45:22 +000049 self.__root = root = _Link() # sentinel node for the doubly linked list
50 root.prev = root.next = root
51 self.__map = {}
Raymond Hettinger2d32f632009-03-02 21:24:57 +000052 self.update(*args, **kwds)
53
54 def clear(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000055 'od.clear() -> None. Remove all items from od.'
Raymond Hettingerf1736542009-03-23 05:19:21 +000056 root = self.__root
57 root.prev = root.next = root
Raymond Hettinger52dc06b2009-03-25 22:45:22 +000058 self.__map.clear()
Raymond Hettinger2d32f632009-03-02 21:24:57 +000059 dict.clear(self)
60
61 def __setitem__(self, key, value):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000062 'od.__setitem__(i, y) <==> od[i]=y'
Raymond Hettingerf1736542009-03-23 05:19:21 +000063 # Setting a new item creates a new link which goes at the end of the linked
64 # list, and the inherited dictionary is updated with the new key/value pair.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000065 if key not in self:
Raymond Hettingerf1736542009-03-23 05:19:21 +000066 self.__map[key] = link = _Link()
67 root = self.__root
68 last = root.prev
69 link.prev, link.next, link.key = last, root, key
Raymond Hettinger798ee1a2009-03-23 18:29:11 +000070 last.next = root.prev = _proxy(link)
Raymond Hettinger2d32f632009-03-02 21:24:57 +000071 dict.__setitem__(self, key, value)
72
73 def __delitem__(self, key):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000074 'od.__delitem__(y) <==> del od[y]'
Raymond Hettingerf1736542009-03-23 05:19:21 +000075 # Deleting an existing item uses self.__map to find the link which is
76 # then removed by updating the links in the predecessor and successor nodes.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000077 dict.__delitem__(self, key)
Raymond Hettingerf1736542009-03-23 05:19:21 +000078 link = self.__map.pop(key)
79 link.prev.next = link.next
80 link.next.prev = link.prev
Raymond Hettinger2d32f632009-03-02 21:24:57 +000081
82 def __iter__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000083 'od.__iter__() <==> iter(od)'
Raymond Hettingerf1736542009-03-23 05:19:21 +000084 # Traverse the linked list in order.
85 root = self.__root
86 curr = root.next
87 while curr is not root:
88 yield curr.key
89 curr = curr.next
Raymond Hettinger2d32f632009-03-02 21:24:57 +000090
91 def __reversed__(self):
Raymond Hettinger2352cf32009-04-08 01:16:27 +000092 'od.__reversed__() <==> reversed(od)'
Raymond Hettingerf1736542009-03-23 05:19:21 +000093 # Traverse the linked list in reverse order.
94 root = self.__root
95 curr = root.prev
96 while curr is not root:
97 yield curr.key
98 curr = curr.prev
Raymond Hettinger2d32f632009-03-02 21:24:57 +000099
100 def __reduce__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000101 'Return state information for pickling'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000102 items = [[k, self[k]] for k in self]
Raymond Hettingerf1736542009-03-23 05:19:21 +0000103 tmp = self.__map, self.__root
104 del self.__map, self.__root
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000105 inst_dict = vars(self).copy()
Raymond Hettingerf1736542009-03-23 05:19:21 +0000106 self.__map, self.__root = tmp
Raymond Hettinger14b89ff2009-03-03 22:20:56 +0000107 if inst_dict:
108 return (self.__class__, (items,), inst_dict)
109 return self.__class__, (items,)
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000110
111 setdefault = MutableMapping.setdefault
112 update = MutableMapping.update
113 pop = MutableMapping.pop
114 keys = MutableMapping.keys
115 values = MutableMapping.values
116 items = MutableMapping.items
117
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +0000118 def popitem(self, last=True):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000119 '''od.popitem() -> (k, v), return and remove a (key, value) pair.
120 Pairs are returned in LIFO order if last is true or FIFO order if false.
121
122 '''
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +0000123 if not self:
124 raise KeyError('dictionary is empty')
Raymond Hettinger446a4f22009-04-08 08:28:28 +0000125 key = next(reversed(self) if last else iter(self))
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +0000126 value = self.pop(key)
127 return key, value
128
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000129 def __repr__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000130 'od.__repr__() <==> repr(od)'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000131 if not self:
132 return '%s()' % (self.__class__.__name__,)
133 return '%s(%r)' % (self.__class__.__name__, list(self.items()))
134
135 def copy(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000136 'od.copy() -> a shallow copy of od'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000137 return self.__class__(self)
138
139 @classmethod
140 def fromkeys(cls, iterable, value=None):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000141 '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
142 and values equal to v (which defaults to None).
143
144 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000145 d = cls()
146 for key in iterable:
147 d[key] = value
148 return d
149
150 def __eq__(self, other):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000151 '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
152 while comparison to a regular mapping is order-insensitive.
153
154 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000155 if isinstance(other, OrderedDict):
Raymond Hettinger798ee1a2009-03-23 18:29:11 +0000156 return len(self)==len(other) and \
157 all(p==q for p, q in zip(self.items(), other.items()))
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000158 return dict.__eq__(self, other)
159
Benjamin Peterson2504b7a2009-04-04 17:26:32 +0000160 def __ne__(self, other):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000161 '''od.__ne__(y) <==> od!=y. Comparison to another OD is order-sensitive
162 while comparison to a regular mapping is order-insensitive.
163
164 '''
Benjamin Peterson2504b7a2009-04-04 17:26:32 +0000165 return not self == other
166
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000167
Christian Heimes99170a52007-12-19 02:07:34 +0000168
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000169################################################################################
170### namedtuple
171################################################################################
172
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000173def namedtuple(typename, field_names, verbose=False, rename=False):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000174 """Returns a new subclass of tuple with named fields.
175
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000176 >>> Point = namedtuple('Point', 'x y')
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000177 >>> Point.__doc__ # docstring for the new class
Guido van Rossumd8faa362007-04-27 19:54:29 +0000178 'Point(x, y)'
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000179 >>> p = Point(11, y=22) # instantiate with positional args or keywords
Christian Heimes99170a52007-12-19 02:07:34 +0000180 >>> p[0] + p[1] # indexable like a plain tuple
Guido van Rossumd8faa362007-04-27 19:54:29 +0000181 33
Christian Heimes99170a52007-12-19 02:07:34 +0000182 >>> x, y = p # unpack like a regular tuple
Guido van Rossumd8faa362007-04-27 19:54:29 +0000183 >>> x, y
184 (11, 22)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000185 >>> p.x + p.y # fields also accessable by name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000186 33
Christian Heimes0449f632007-12-15 01:27:15 +0000187 >>> d = p._asdict() # convert to a dictionary
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000188 >>> d['x']
189 11
190 >>> Point(**d) # convert from a dictionary
Guido van Rossumd8faa362007-04-27 19:54:29 +0000191 Point(x=11, y=22)
Christian Heimes0449f632007-12-15 01:27:15 +0000192 >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000193 Point(x=100, y=22)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000194
195 """
196
Christian Heimes2380ac72008-01-09 00:17:24 +0000197 # Parse and validate the field names. Validation serves two purposes,
198 # generating informative error messages and preventing template injection attacks.
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000199 if isinstance(field_names, str):
200 field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas
Benjamin Petersone9bbc8b2008-09-28 02:06:32 +0000201 field_names = tuple(map(str, field_names))
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000202 if rename:
203 names = list(field_names)
204 seen = set()
205 for i, name in enumerate(names):
206 if (not all(c.isalnum() or c=='_' for c in name) or _iskeyword(name)
207 or not name or name[0].isdigit() or name.startswith('_')
208 or name in seen):
Raymond Hettinger56145242009-04-02 22:31:59 +0000209 names[i] = '_%d' % i
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000210 seen.add(name)
211 field_names = tuple(names)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000212 for name in (typename,) + field_names:
Christian Heimesb9eccbf2007-12-05 20:18:38 +0000213 if not all(c.isalnum() or c=='_' for c in name):
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000214 raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
215 if _iskeyword(name):
216 raise ValueError('Type names and field names cannot be a keyword: %r' % name)
217 if name[0].isdigit():
218 raise ValueError('Type names and field names cannot start with a number: %r' % name)
219 seen_names = set()
220 for name in field_names:
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000221 if name.startswith('_') and not rename:
Christian Heimes0449f632007-12-15 01:27:15 +0000222 raise ValueError('Field names cannot start with an underscore: %r' % name)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000223 if name in seen_names:
224 raise ValueError('Encountered duplicate field name: %r' % name)
225 seen_names.add(name)
226
227 # Create and fill-in the class template
Christian Heimesfaf2f632008-01-06 16:59:19 +0000228 numfields = len(field_names)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000229 argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000230 reprtxt = ', '.join('%s=%%r' % name for name in field_names)
231 template = '''class %(typename)s(tuple):
Christian Heimes0449f632007-12-15 01:27:15 +0000232 '%(typename)s(%(argtxt)s)' \n
233 __slots__ = () \n
Christian Heimesfaf2f632008-01-06 16:59:19 +0000234 _fields = %(field_names)r \n
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000235 def __new__(_cls, %(argtxt)s):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000236 'Create new instance of %(typename)s(%(argtxt)s)'
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000237 return _tuple.__new__(_cls, (%(argtxt)s)) \n
Christian Heimesfaf2f632008-01-06 16:59:19 +0000238 @classmethod
Christian Heimes043d6f62008-01-07 17:19:16 +0000239 def _make(cls, iterable, new=tuple.__new__, len=len):
Christian Heimesfaf2f632008-01-06 16:59:19 +0000240 'Make a new %(typename)s object from a sequence or iterable'
Christian Heimes043d6f62008-01-07 17:19:16 +0000241 result = new(cls, iterable)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000242 if len(result) != %(numfields)d:
243 raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
244 return result \n
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000245 def __repr__(self):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000246 'Return a nicely formatted representation string'
Christian Heimes0449f632007-12-15 01:27:15 +0000247 return '%(typename)s(%(reprtxt)s)' %% self \n
Raymond Hettingera4f52b12009-03-02 22:28:31 +0000248 def _asdict(self):
249 'Return a new OrderedDict which maps field names to their values'
250 return OrderedDict(zip(self._fields, self)) \n
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000251 def _replace(_self, **kwds):
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000252 'Return a new %(typename)s object replacing specified fields with new values'
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000253 result = _self._make(map(kwds.pop, %(field_names)r, _self))
Christian Heimesfaf2f632008-01-06 16:59:19 +0000254 if kwds:
255 raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000256 return result \n
257 def __getnewargs__(self):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000258 'Return self as a plain tuple. Used by copy and pickle.'
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000259 return tuple(self) \n\n''' % locals()
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000260 for i, name in enumerate(field_names):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000261 template += " %s = _property(_itemgetter(%d), doc='Alias for field number %d')\n" % (name, i, i)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000262 if verbose:
263 print(template)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000264
Georg Brandlf08a9dd2008-06-10 16:57:31 +0000265 # Execute the template string in a temporary namespace and
266 # support tracing utilities by setting a value for frame.f_globals['__name__']
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000267 namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
268 OrderedDict=OrderedDict, _property=property, _tuple=tuple)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000269 try:
270 exec(template, namespace)
271 except SyntaxError as e:
Christian Heimes99170a52007-12-19 02:07:34 +0000272 raise SyntaxError(e.msg + ':\n' + template) from e
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000273 result = namespace[typename]
274
275 # For pickling to work, the __module__ variable needs to be set to the frame
276 # where the named tuple is created. Bypass this step in enviroments where
Benjamin Peterson25c95f12009-05-08 20:42:26 +0000277 # sys._getframe is not defined (Jython for example) or sys._getframe is not
278 # defined for arguments greater than 0 (IronPython).
279 try:
Raymond Hettinger0f055172009-01-27 10:06:09 +0000280 result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
Benjamin Peterson25c95f12009-05-08 20:42:26 +0000281 except (AttributeError, ValueError):
282 pass
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000283
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000284 return result
Guido van Rossumd8faa362007-04-27 19:54:29 +0000285
Guido van Rossumd8faa362007-04-27 19:54:29 +0000286
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000287########################################################################
288### Counter
289########################################################################
290
291class Counter(dict):
292 '''Dict subclass for counting hashable items. Sometimes called a bag
293 or multiset. Elements are stored as dictionary keys and their counts
294 are stored as dictionary values.
295
296 >>> c = Counter('abracadabra') # count elements from a string
297
298 >>> c.most_common(3) # three most common elements
299 [('a', 5), ('r', 2), ('b', 2)]
300 >>> sorted(c) # list all unique elements
301 ['a', 'b', 'c', 'd', 'r']
302 >>> ''.join(sorted(c.elements())) # list elements with repetitions
303 'aaaaabbcdrr'
304 >>> sum(c.values()) # total of all counts
305 11
306
307 >>> c['a'] # count of letter 'a'
308 5
309 >>> for elem in 'shazam': # update counts from an iterable
310 ... c[elem] += 1 # by adding 1 to each element's count
311 >>> c['a'] # now there are seven 'a'
312 7
313 >>> del c['r'] # remove all 'r'
314 >>> c['r'] # now there are zero 'r'
315 0
316
317 >>> d = Counter('simsalabim') # make another counter
318 >>> c.update(d) # add in the second counter
319 >>> c['a'] # now there are nine 'a'
320 9
321
322 >>> c.clear() # empty the counter
323 >>> c
324 Counter()
325
326 Note: If a count is set to zero or reduced to zero, it will remain
327 in the counter until the entry is deleted or the counter is cleared:
328
329 >>> c = Counter('aaabbc')
330 >>> c['b'] -= 2 # reduce the count of 'b' by two
331 >>> c.most_common() # 'b' is still in, but its count is zero
332 [('a', 3), ('c', 1), ('b', 0)]
333
334 '''
335 # References:
336 # http://en.wikipedia.org/wiki/Multiset
337 # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
338 # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
339 # http://code.activestate.com/recipes/259174/
340 # Knuth, TAOCP Vol. II section 4.6.3
341
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000342 def __init__(self, iterable=None, **kwds):
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000343 '''Create a new, empty Counter object. And if given, count elements
344 from an input iterable. Or, initialize the count from another mapping
345 of elements to their counts.
346
347 >>> c = Counter() # a new, empty counter
348 >>> c = Counter('gallahad') # a new counter from an iterable
349 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000350 >>> c = Counter(a=4, b=2) # a new counter from keyword args
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000351
352 '''
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000353 self.update(iterable, **kwds)
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000354
355 def __missing__(self, key):
356 'The count of elements not in the Counter is zero.'
357 # Needed so that self[missing_item] does not raise KeyError
358 return 0
359
360 def most_common(self, n=None):
361 '''List the n most common elements and their counts from the most
362 common to the least. If n is None, then list all element counts.
363
364 >>> Counter('abracadabra').most_common(3)
365 [('a', 5), ('r', 2), ('b', 2)]
366
367 '''
368 # Emulate Bag.sortedByCount from Smalltalk
369 if n is None:
370 return sorted(self.items(), key=_itemgetter(1), reverse=True)
371 return _heapq.nlargest(n, self.items(), key=_itemgetter(1))
372
373 def elements(self):
374 '''Iterator over elements repeating each as many times as its count.
375
376 >>> c = Counter('ABCABC')
377 >>> sorted(c.elements())
378 ['A', 'A', 'B', 'B', 'C', 'C']
379
380 # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
381 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
382 >>> product = 1
383 >>> for factor in prime_factors.elements(): # loop over factors
384 ... product *= factor # and multiply them
385 >>> product
386 1836
387
388 Note, if an element's count has been set to zero or is a negative
389 number, elements() will ignore it.
390
391 '''
392 # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
393 return _chain.from_iterable(_starmap(_repeat, self.items()))
394
395 # Override dict methods where necessary
396
397 @classmethod
398 def fromkeys(cls, iterable, v=None):
399 # There is no equivalent method for counters because setting v=1
400 # means that no element can have a count greater than one.
401 raise NotImplementedError(
402 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
403
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000404 def update(self, iterable=None, **kwds):
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000405 '''Like dict.update() but add counts instead of replacing them.
406
407 Source can be an iterable, a dictionary, or another Counter instance.
408
409 >>> c = Counter('which')
410 >>> c.update('witch') # add elements from another iterable
411 >>> d = Counter('watch')
412 >>> c.update(d) # add elements from another counter
413 >>> c['h'] # four 'h' in which, witch, and watch
414 4
415
416 '''
417 # The regular dict.update() operation makes no sense here because the
418 # replace behavior results in the some of original untouched counts
419 # being mixed-in with all of the other counts for a mismash that
420 # doesn't have a straight-forward interpretation in most counting
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000421 # contexts. Instead, we implement straight-addition. Both the inputs
422 # and outputs are allowed to contain zero and negative counts.
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000423
424 if iterable is not None:
425 if isinstance(iterable, Mapping):
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000426 if self:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000427 self_get = self.get
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000428 for elem, count in iterable.items():
Raymond Hettingerf9092022009-06-29 18:30:43 +0000429 self[elem] = count + self_get(elem, 0)
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000430 else:
431 dict.update(self, iterable) # fast path when counter is empty
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000432 else:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000433 self_get = self.get
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000434 for elem in iterable:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000435 self[elem] = 1 + self_get(elem, 0)
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000436 if kwds:
437 self.update(kwds)
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000438
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000439 def subtract(self, iterable=None, **kwds):
440 '''Like dict.update() but subtracts counts instead of replacing them.
441 Counts can be reduced below zero. Both the inputs and outputs are
442 allowed to contain zero and negative counts.
443
444 Source can be an iterable, a dictionary, or another Counter instance.
445
446 >>> c = Counter('which')
447 >>> c.subtract('witch') # subtract elements from another iterable
448 >>> c.subtract(Counter('watch')) # subtract elements from another counter
449 >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch
450 0
451 >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch
452 -1
453
454 '''
455 if iterable is not None:
Raymond Hettingerfc3c9cd2010-04-11 20:41:56 +0000456 self_get = self.get
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000457 if isinstance(iterable, Mapping):
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000458 for elem, count in iterable.items():
459 self[elem] = self_get(elem, 0) - count
460 else:
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000461 for elem in iterable:
462 self[elem] = self_get(elem, 0) - 1
463 if kwds:
464 self.subtract(kwds)
465
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000466 def copy(self):
467 'Like dict.copy() but returns a Counter instance instead of a dict.'
468 return Counter(self)
469
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000470 def __delitem__(self, elem):
471 'Like dict.__delitem__() but does not raise KeyError for missing values.'
472 if elem in self:
473 dict.__delitem__(self, elem)
474
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000475 def __repr__(self):
476 if not self:
477 return '%s()' % self.__class__.__name__
478 items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
479 return '%s({%s})' % (self.__class__.__name__, items)
480
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000481 # Multiset-style mathematical operations discussed in:
482 # Knuth TAOCP Volume II section 4.6.3 exercise 19
483 # and at http://en.wikipedia.org/wiki/Multiset
484 #
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000485 # Outputs guaranteed to only include positive counts.
486 #
487 # To strip negative and zero counts, add-in an empty counter:
488 # c += Counter()
489
490 def __add__(self, other):
491 '''Add counts from two counters.
492
493 >>> Counter('abbb') + Counter('bcc')
494 Counter({'b': 4, 'c': 2, 'a': 1})
495
496 '''
497 if not isinstance(other, Counter):
498 return NotImplemented
499 result = Counter()
500 for elem in set(self) | set(other):
501 newcount = self[elem] + other[elem]
502 if newcount > 0:
503 result[elem] = newcount
504 return result
505
506 def __sub__(self, other):
507 ''' Subtract count, but keep only results with positive counts.
508
509 >>> Counter('abbbc') - Counter('bccd')
510 Counter({'b': 2, 'a': 1})
511
512 '''
513 if not isinstance(other, Counter):
514 return NotImplemented
515 result = Counter()
Raymond Hettingere0d1b9f2009-01-21 20:36:27 +0000516 for elem in set(self) | set(other):
517 newcount = self[elem] - other[elem]
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000518 if newcount > 0:
519 result[elem] = newcount
520 return result
521
522 def __or__(self, other):
523 '''Union is the maximum of value in either of the input counters.
524
525 >>> Counter('abbb') | Counter('bcc')
526 Counter({'b': 3, 'c': 2, 'a': 1})
527
528 '''
529 if not isinstance(other, Counter):
530 return NotImplemented
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000531 result = Counter()
532 for elem in set(self) | set(other):
Raymond Hettingerc4791702009-04-04 08:48:03 +0000533 p, q = self[elem], other[elem]
534 newcount = q if p < q else p
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000535 if newcount > 0:
536 result[elem] = newcount
537 return result
538
539 def __and__(self, other):
540 ''' Intersection is the minimum of corresponding counts.
541
542 >>> Counter('abbb') & Counter('bcc')
543 Counter({'b': 1})
544
545 '''
546 if not isinstance(other, Counter):
547 return NotImplemented
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000548 result = Counter()
549 if len(self) < len(other):
550 self, other = other, self
551 for elem in filter(self.__contains__, other):
Raymond Hettingerc4791702009-04-04 08:48:03 +0000552 p, q = self[elem], other[elem]
553 newcount = p if p < q else q
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000554 if newcount > 0:
555 result[elem] = newcount
556 return result
557
Guido van Rossumd8faa362007-04-27 19:54:29 +0000558
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000559################################################################################
560### UserDict
561################################################################################
Guido van Rossumd8faa362007-04-27 19:54:29 +0000562
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000563class UserDict(MutableMapping):
564
565 # Start by filling-out the abstract methods
566 def __init__(self, dict=None, **kwargs):
567 self.data = {}
568 if dict is not None:
569 self.update(dict)
570 if len(kwargs):
571 self.update(kwargs)
572 def __len__(self): return len(self.data)
573 def __getitem__(self, key):
574 if key in self.data:
575 return self.data[key]
576 if hasattr(self.__class__, "__missing__"):
577 return self.__class__.__missing__(self, key)
578 raise KeyError(key)
579 def __setitem__(self, key, item): self.data[key] = item
580 def __delitem__(self, key): del self.data[key]
581 def __iter__(self):
582 return iter(self.data)
583
Raymond Hettinger554c8b82008-02-05 22:54:43 +0000584 # Modify __contains__ to work correctly when __missing__ is present
585 def __contains__(self, key):
586 return key in self.data
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000587
588 # Now, add the methods in dicts but not in MutableMapping
589 def __repr__(self): return repr(self.data)
590 def copy(self):
591 if self.__class__ is UserDict:
592 return UserDict(self.data.copy())
593 import copy
594 data = self.data
595 try:
596 self.data = {}
597 c = copy.copy(self)
598 finally:
599 self.data = data
600 c.update(self)
601 return c
602 @classmethod
603 def fromkeys(cls, iterable, value=None):
604 d = cls()
605 for key in iterable:
606 d[key] = value
607 return d
608
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000609
610
611################################################################################
Raymond Hettinger53dbe392008-02-12 20:03:09 +0000612### UserList
613################################################################################
614
615class UserList(MutableSequence):
616 """A more or less complete user-defined wrapper around list objects."""
617 def __init__(self, initlist=None):
618 self.data = []
619 if initlist is not None:
620 # XXX should this accept an arbitrary sequence?
621 if type(initlist) == type(self.data):
622 self.data[:] = initlist
623 elif isinstance(initlist, UserList):
624 self.data[:] = initlist.data[:]
625 else:
626 self.data = list(initlist)
627 def __repr__(self): return repr(self.data)
628 def __lt__(self, other): return self.data < self.__cast(other)
629 def __le__(self, other): return self.data <= self.__cast(other)
630 def __eq__(self, other): return self.data == self.__cast(other)
631 def __ne__(self, other): return self.data != self.__cast(other)
632 def __gt__(self, other): return self.data > self.__cast(other)
633 def __ge__(self, other): return self.data >= self.__cast(other)
634 def __cast(self, other):
635 return other.data if isinstance(other, UserList) else other
Raymond Hettinger53dbe392008-02-12 20:03:09 +0000636 def __contains__(self, item): return item in self.data
637 def __len__(self): return len(self.data)
638 def __getitem__(self, i): return self.data[i]
639 def __setitem__(self, i, item): self.data[i] = item
640 def __delitem__(self, i): del self.data[i]
641 def __add__(self, other):
642 if isinstance(other, UserList):
643 return self.__class__(self.data + other.data)
644 elif isinstance(other, type(self.data)):
645 return self.__class__(self.data + other)
646 return self.__class__(self.data + list(other))
647 def __radd__(self, other):
648 if isinstance(other, UserList):
649 return self.__class__(other.data + self.data)
650 elif isinstance(other, type(self.data)):
651 return self.__class__(other + self.data)
652 return self.__class__(list(other) + self.data)
653 def __iadd__(self, other):
654 if isinstance(other, UserList):
655 self.data += other.data
656 elif isinstance(other, type(self.data)):
657 self.data += other
658 else:
659 self.data += list(other)
660 return self
661 def __mul__(self, n):
662 return self.__class__(self.data*n)
663 __rmul__ = __mul__
664 def __imul__(self, n):
665 self.data *= n
666 return self
667 def append(self, item): self.data.append(item)
668 def insert(self, i, item): self.data.insert(i, item)
669 def pop(self, i=-1): return self.data.pop(i)
670 def remove(self, item): self.data.remove(item)
671 def count(self, item): return self.data.count(item)
672 def index(self, item, *args): return self.data.index(item, *args)
673 def reverse(self): self.data.reverse()
674 def sort(self, *args, **kwds): self.data.sort(*args, **kwds)
675 def extend(self, other):
676 if isinstance(other, UserList):
677 self.data.extend(other.data)
678 else:
679 self.data.extend(other)
680
681
682
683################################################################################
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000684### UserString
685################################################################################
686
687class UserString(Sequence):
688 def __init__(self, seq):
689 if isinstance(seq, str):
690 self.data = seq
691 elif isinstance(seq, UserString):
692 self.data = seq.data[:]
693 else:
694 self.data = str(seq)
695 def __str__(self): return str(self.data)
696 def __repr__(self): return repr(self.data)
697 def __int__(self): return int(self.data)
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000698 def __float__(self): return float(self.data)
699 def __complex__(self): return complex(self.data)
700 def __hash__(self): return hash(self.data)
701
702 def __eq__(self, string):
703 if isinstance(string, UserString):
704 return self.data == string.data
705 return self.data == string
706 def __ne__(self, string):
707 if isinstance(string, UserString):
708 return self.data != string.data
709 return self.data != string
710 def __lt__(self, string):
711 if isinstance(string, UserString):
712 return self.data < string.data
713 return self.data < string
714 def __le__(self, string):
715 if isinstance(string, UserString):
716 return self.data <= string.data
717 return self.data <= string
718 def __gt__(self, string):
719 if isinstance(string, UserString):
720 return self.data > string.data
721 return self.data > string
722 def __ge__(self, string):
723 if isinstance(string, UserString):
724 return self.data >= string.data
725 return self.data >= string
726
727 def __contains__(self, char):
728 if isinstance(char, UserString):
729 char = char.data
730 return char in self.data
731
732 def __len__(self): return len(self.data)
733 def __getitem__(self, index): return self.__class__(self.data[index])
734 def __add__(self, other):
735 if isinstance(other, UserString):
736 return self.__class__(self.data + other.data)
737 elif isinstance(other, str):
738 return self.__class__(self.data + other)
739 return self.__class__(self.data + str(other))
740 def __radd__(self, other):
741 if isinstance(other, str):
742 return self.__class__(other + self.data)
743 return self.__class__(str(other) + self.data)
744 def __mul__(self, n):
745 return self.__class__(self.data*n)
746 __rmul__ = __mul__
747 def __mod__(self, args):
748 return self.__class__(self.data % args)
749
750 # the following methods are defined in alphabetical order:
751 def capitalize(self): return self.__class__(self.data.capitalize())
752 def center(self, width, *args):
753 return self.__class__(self.data.center(width, *args))
754 def count(self, sub, start=0, end=_sys.maxsize):
755 if isinstance(sub, UserString):
756 sub = sub.data
757 return self.data.count(sub, start, end)
758 def encode(self, encoding=None, errors=None): # XXX improve this?
759 if encoding:
760 if errors:
761 return self.__class__(self.data.encode(encoding, errors))
762 return self.__class__(self.data.encode(encoding))
763 return self.__class__(self.data.encode())
764 def endswith(self, suffix, start=0, end=_sys.maxsize):
765 return self.data.endswith(suffix, start, end)
766 def expandtabs(self, tabsize=8):
767 return self.__class__(self.data.expandtabs(tabsize))
768 def find(self, sub, start=0, end=_sys.maxsize):
769 if isinstance(sub, UserString):
770 sub = sub.data
771 return self.data.find(sub, start, end)
772 def format(self, *args, **kwds):
773 return self.data.format(*args, **kwds)
774 def index(self, sub, start=0, end=_sys.maxsize):
775 return self.data.index(sub, start, end)
776 def isalpha(self): return self.data.isalpha()
777 def isalnum(self): return self.data.isalnum()
778 def isdecimal(self): return self.data.isdecimal()
779 def isdigit(self): return self.data.isdigit()
780 def isidentifier(self): return self.data.isidentifier()
781 def islower(self): return self.data.islower()
782 def isnumeric(self): return self.data.isnumeric()
783 def isspace(self): return self.data.isspace()
784 def istitle(self): return self.data.istitle()
785 def isupper(self): return self.data.isupper()
786 def join(self, seq): return self.data.join(seq)
787 def ljust(self, width, *args):
788 return self.__class__(self.data.ljust(width, *args))
789 def lower(self): return self.__class__(self.data.lower())
790 def lstrip(self, chars=None): return self.__class__(self.data.lstrip(chars))
791 def partition(self, sep):
792 return self.data.partition(sep)
793 def replace(self, old, new, maxsplit=-1):
794 if isinstance(old, UserString):
795 old = old.data
796 if isinstance(new, UserString):
797 new = new.data
798 return self.__class__(self.data.replace(old, new, maxsplit))
799 def rfind(self, sub, start=0, end=_sys.maxsize):
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000800 if isinstance(sub, UserString):
801 sub = sub.data
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000802 return self.data.rfind(sub, start, end)
803 def rindex(self, sub, start=0, end=_sys.maxsize):
804 return self.data.rindex(sub, start, end)
805 def rjust(self, width, *args):
806 return self.__class__(self.data.rjust(width, *args))
807 def rpartition(self, sep):
808 return self.data.rpartition(sep)
809 def rstrip(self, chars=None):
810 return self.__class__(self.data.rstrip(chars))
811 def split(self, sep=None, maxsplit=-1):
812 return self.data.split(sep, maxsplit)
813 def rsplit(self, sep=None, maxsplit=-1):
814 return self.data.rsplit(sep, maxsplit)
815 def splitlines(self, keepends=0): return self.data.splitlines(keepends)
816 def startswith(self, prefix, start=0, end=_sys.maxsize):
817 return self.data.startswith(prefix, start, end)
818 def strip(self, chars=None): return self.__class__(self.data.strip(chars))
819 def swapcase(self): return self.__class__(self.data.swapcase())
820 def title(self): return self.__class__(self.data.title())
821 def translate(self, *args):
822 return self.__class__(self.data.translate(*args))
823 def upper(self): return self.__class__(self.data.upper())
824 def zfill(self, width): return self.__class__(self.data.zfill(width))
825
826
827
828################################################################################
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000829### Simple tests
830################################################################################
Guido van Rossumd8faa362007-04-27 19:54:29 +0000831
832if __name__ == '__main__':
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000833 # verify that instances can be pickled
Guido van Rossum99603b02007-07-20 00:22:32 +0000834 from pickle import loads, dumps
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000835 Point = namedtuple('Point', 'x, y', True)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000836 p = Point(x=10, y=20)
837 assert p == loads(dumps(p))
838
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000839 # test and demonstrate ability to override methods
Christian Heimes043d6f62008-01-07 17:19:16 +0000840 class Point(namedtuple('Point', 'x y')):
Christian Heimes25bb7832008-01-11 16:17:00 +0000841 __slots__ = ()
Christian Heimes043d6f62008-01-07 17:19:16 +0000842 @property
843 def hypot(self):
844 return (self.x ** 2 + self.y ** 2) ** 0.5
Christian Heimes790c8232008-01-07 21:14:23 +0000845 def __str__(self):
Christian Heimes25bb7832008-01-11 16:17:00 +0000846 return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
Christian Heimes043d6f62008-01-07 17:19:16 +0000847
Christian Heimes25bb7832008-01-11 16:17:00 +0000848 for p in Point(3, 4), Point(14, 5/7.):
Christian Heimes790c8232008-01-07 21:14:23 +0000849 print (p)
Christian Heimes043d6f62008-01-07 17:19:16 +0000850
851 class Point(namedtuple('Point', 'x y')):
852 'Point class with optimized _make() and _replace() without error-checking'
Christian Heimes25bb7832008-01-11 16:17:00 +0000853 __slots__ = ()
Christian Heimes043d6f62008-01-07 17:19:16 +0000854 _make = classmethod(tuple.__new__)
855 def _replace(self, _map=map, **kwds):
Christian Heimes2380ac72008-01-09 00:17:24 +0000856 return self._make(_map(kwds.get, ('x', 'y'), self))
Christian Heimes043d6f62008-01-07 17:19:16 +0000857
858 print(Point(11, 22)._replace(x=100))
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000859
Christian Heimes25bb7832008-01-11 16:17:00 +0000860 Point3D = namedtuple('Point3D', Point._fields + ('z',))
861 print(Point3D.__doc__)
862
Guido van Rossumd8faa362007-04-27 19:54:29 +0000863 import doctest
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000864 TestResults = namedtuple('TestResults', 'failed attempted')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000865 print(TestResults(*doctest.testmod()))