blob: 6ec062f9a458c4d925cc20c02188e6dbdeac5108 [file] [log] [blame]
Brett Cannon23a4a7b2008-05-12 00:56:28 +00001__all__ = ['deque', 'defaultdict', 'namedtuple', 'UserDict', 'UserList',
Raymond Hettinger2d32f632009-03-02 21:24:57 +00002 'UserString', 'Counter', 'OrderedDict']
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
4# They should however be considered an integral part of collections.py.
5from _abcoll import *
6import _abcoll
7__all__ += _abcoll.__all__
8
Christian Heimes99170a52007-12-19 02:07:34 +00009from _collections import deque, defaultdict
10from operator import itemgetter as _itemgetter
11from keyword import iskeyword as _iskeyword
12import sys as _sys
Raymond Hettingerb8baf632009-01-14 02:20:07 +000013import heapq as _heapq
Raymond Hettinger798ee1a2009-03-23 18:29:11 +000014from weakref import proxy as _proxy
Raymond Hettingerea9f8db2009-03-02 21:28:41 +000015from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
Raymond Hettinger2d32f632009-03-02 21:24:57 +000016
17################################################################################
18### OrderedDict
19################################################################################
20
Raymond Hettingerf1736542009-03-23 05:19:21 +000021class _Link(object):
22 __slots__ = 'prev', 'next', 'key', '__weakref__'
23
Raymond Hettinger2d32f632009-03-02 21:24:57 +000024class OrderedDict(dict, MutableMapping):
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +000025 'Dictionary that remembers insertion order'
Raymond Hettingerf1736542009-03-23 05:19:21 +000026 # An inherited dict maps keys to values.
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +000027 # The inherited dict provides __getitem__, __len__, __contains__, and get.
28 # The remaining methods are order-aware.
Raymond Hettingerf1736542009-03-23 05:19:21 +000029 # Big-O running times for all methods are the same as for regular dictionaries.
30
31 # The internal self.__map dictionary maps keys to links in a doubly linked list.
32 # The circular doubly linked list starts and ends with a sentinel element.
33 # The sentinel element never gets deleted (this simplifies the algorithm).
34 # The prev/next links are weakref proxies (to prevent circular references).
35 # Individual links are kept alive by the hard reference in self.__map.
36 # Those hard references disappear when a key is deleted from an OrderedDict.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000037
38 def __init__(self, *args, **kwds):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000039 '''Initialize an ordered dictionary. Signature is the same as for
40 regular dictionaries, but keyword arguments are not recommended
41 because their insertion order is arbitrary.
42
43 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +000044 if len(args) > 1:
45 raise TypeError('expected at most 1 arguments, got %d' % len(args))
Raymond Hettinger08c70cf2009-03-03 20:47:29 +000046 try:
Raymond Hettingerf1736542009-03-23 05:19:21 +000047 self.__root
Raymond Hettinger08c70cf2009-03-03 20:47:29 +000048 except AttributeError:
Raymond Hettinger52dc06b2009-03-25 22:45:22 +000049 self.__root = root = _Link() # sentinel node for the doubly linked list
50 root.prev = root.next = root
51 self.__map = {}
Raymond Hettinger2d32f632009-03-02 21:24:57 +000052 self.update(*args, **kwds)
53
54 def clear(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000055 'od.clear() -> None. Remove all items from od.'
Raymond Hettingerf1736542009-03-23 05:19:21 +000056 root = self.__root
57 root.prev = root.next = root
Raymond Hettinger52dc06b2009-03-25 22:45:22 +000058 self.__map.clear()
Raymond Hettinger2d32f632009-03-02 21:24:57 +000059 dict.clear(self)
60
61 def __setitem__(self, key, value):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000062 'od.__setitem__(i, y) <==> od[i]=y'
Raymond Hettingerf1736542009-03-23 05:19:21 +000063 # Setting a new item creates a new link which goes at the end of the linked
64 # list, and the inherited dictionary is updated with the new key/value pair.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000065 if key not in self:
Raymond Hettingerf1736542009-03-23 05:19:21 +000066 self.__map[key] = link = _Link()
67 root = self.__root
68 last = root.prev
69 link.prev, link.next, link.key = last, root, key
Raymond Hettinger798ee1a2009-03-23 18:29:11 +000070 last.next = root.prev = _proxy(link)
Raymond Hettinger2d32f632009-03-02 21:24:57 +000071 dict.__setitem__(self, key, value)
72
73 def __delitem__(self, key):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000074 'od.__delitem__(y) <==> del od[y]'
Raymond Hettingerf1736542009-03-23 05:19:21 +000075 # Deleting an existing item uses self.__map to find the link which is
76 # then removed by updating the links in the predecessor and successor nodes.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000077 dict.__delitem__(self, key)
Raymond Hettingerf1736542009-03-23 05:19:21 +000078 link = self.__map.pop(key)
79 link.prev.next = link.next
80 link.next.prev = link.prev
Raymond Hettinger2d32f632009-03-02 21:24:57 +000081
82 def __iter__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000083 'od.__iter__() <==> iter(od)'
Raymond Hettingerf1736542009-03-23 05:19:21 +000084 # Traverse the linked list in order.
85 root = self.__root
86 curr = root.next
87 while curr is not root:
88 yield curr.key
89 curr = curr.next
Raymond Hettinger2d32f632009-03-02 21:24:57 +000090
91 def __reversed__(self):
Raymond Hettinger2352cf32009-04-08 01:16:27 +000092 'od.__reversed__() <==> reversed(od)'
Raymond Hettingerf1736542009-03-23 05:19:21 +000093 # Traverse the linked list in reverse order.
94 root = self.__root
95 curr = root.prev
96 while curr is not root:
97 yield curr.key
98 curr = curr.prev
Raymond Hettinger2d32f632009-03-02 21:24:57 +000099
100 def __reduce__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000101 'Return state information for pickling'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000102 items = [[k, self[k]] for k in self]
Raymond Hettingerf1736542009-03-23 05:19:21 +0000103 tmp = self.__map, self.__root
104 del self.__map, self.__root
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000105 inst_dict = vars(self).copy()
Raymond Hettingerf1736542009-03-23 05:19:21 +0000106 self.__map, self.__root = tmp
Raymond Hettinger14b89ff2009-03-03 22:20:56 +0000107 if inst_dict:
108 return (self.__class__, (items,), inst_dict)
109 return self.__class__, (items,)
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000110
111 setdefault = MutableMapping.setdefault
112 update = MutableMapping.update
113 pop = MutableMapping.pop
114 keys = MutableMapping.keys
115 values = MutableMapping.values
116 items = MutableMapping.items
117
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +0000118 def popitem(self, last=True):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000119 '''od.popitem() -> (k, v), return and remove a (key, value) pair.
120 Pairs are returned in LIFO order if last is true or FIFO order if false.
121
122 '''
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +0000123 if not self:
124 raise KeyError('dictionary is empty')
Raymond Hettinger446a4f22009-04-08 08:28:28 +0000125 key = next(reversed(self) if last else iter(self))
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +0000126 value = self.pop(key)
127 return key, value
128
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000129 def __repr__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000130 'od.__repr__() <==> repr(od)'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000131 if not self:
132 return '%s()' % (self.__class__.__name__,)
133 return '%s(%r)' % (self.__class__.__name__, list(self.items()))
134
135 def copy(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000136 'od.copy() -> a shallow copy of od'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000137 return self.__class__(self)
138
139 @classmethod
140 def fromkeys(cls, iterable, value=None):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000141 '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
142 and values equal to v (which defaults to None).
143
144 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000145 d = cls()
146 for key in iterable:
147 d[key] = value
148 return d
149
150 def __eq__(self, other):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000151 '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
152 while comparison to a regular mapping is order-insensitive.
153
154 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000155 if isinstance(other, OrderedDict):
Raymond Hettinger798ee1a2009-03-23 18:29:11 +0000156 return len(self)==len(other) and \
157 all(p==q for p, q in zip(self.items(), other.items()))
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000158 return dict.__eq__(self, other)
159
Benjamin Peterson2504b7a2009-04-04 17:26:32 +0000160 def __ne__(self, other):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000161 '''od.__ne__(y) <==> od!=y. Comparison to another OD is order-sensitive
162 while comparison to a regular mapping is order-insensitive.
163
164 '''
Benjamin Peterson2504b7a2009-04-04 17:26:32 +0000165 return not self == other
166
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000167
Christian Heimes99170a52007-12-19 02:07:34 +0000168
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000169################################################################################
170### namedtuple
171################################################################################
172
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000173def namedtuple(typename, field_names, verbose=False, rename=False):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000174 """Returns a new subclass of tuple with named fields.
175
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000176 >>> Point = namedtuple('Point', 'x y')
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000177 >>> Point.__doc__ # docstring for the new class
Guido van Rossumd8faa362007-04-27 19:54:29 +0000178 'Point(x, y)'
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000179 >>> p = Point(11, y=22) # instantiate with positional args or keywords
Christian Heimes99170a52007-12-19 02:07:34 +0000180 >>> p[0] + p[1] # indexable like a plain tuple
Guido van Rossumd8faa362007-04-27 19:54:29 +0000181 33
Christian Heimes99170a52007-12-19 02:07:34 +0000182 >>> x, y = p # unpack like a regular tuple
Guido van Rossumd8faa362007-04-27 19:54:29 +0000183 >>> x, y
184 (11, 22)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000185 >>> p.x + p.y # fields also accessable by name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000186 33
Christian Heimes0449f632007-12-15 01:27:15 +0000187 >>> d = p._asdict() # convert to a dictionary
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000188 >>> d['x']
189 11
190 >>> Point(**d) # convert from a dictionary
Guido van Rossumd8faa362007-04-27 19:54:29 +0000191 Point(x=11, y=22)
Christian Heimes0449f632007-12-15 01:27:15 +0000192 >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000193 Point(x=100, y=22)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000194
195 """
196
Christian Heimes2380ac72008-01-09 00:17:24 +0000197 # Parse and validate the field names. Validation serves two purposes,
198 # generating informative error messages and preventing template injection attacks.
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000199 if isinstance(field_names, str):
200 field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas
Benjamin Petersone9bbc8b2008-09-28 02:06:32 +0000201 field_names = tuple(map(str, field_names))
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000202 if rename:
203 names = list(field_names)
204 seen = set()
205 for i, name in enumerate(names):
206 if (not all(c.isalnum() or c=='_' for c in name) or _iskeyword(name)
207 or not name or name[0].isdigit() or name.startswith('_')
208 or name in seen):
Raymond Hettinger56145242009-04-02 22:31:59 +0000209 names[i] = '_%d' % i
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000210 seen.add(name)
211 field_names = tuple(names)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000212 for name in (typename,) + field_names:
Christian Heimesb9eccbf2007-12-05 20:18:38 +0000213 if not all(c.isalnum() or c=='_' for c in name):
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000214 raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
215 if _iskeyword(name):
216 raise ValueError('Type names and field names cannot be a keyword: %r' % name)
217 if name[0].isdigit():
218 raise ValueError('Type names and field names cannot start with a number: %r' % name)
219 seen_names = set()
220 for name in field_names:
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000221 if name.startswith('_') and not rename:
Christian Heimes0449f632007-12-15 01:27:15 +0000222 raise ValueError('Field names cannot start with an underscore: %r' % name)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000223 if name in seen_names:
224 raise ValueError('Encountered duplicate field name: %r' % name)
225 seen_names.add(name)
226
227 # Create and fill-in the class template
Christian Heimesfaf2f632008-01-06 16:59:19 +0000228 numfields = len(field_names)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000229 argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000230 reprtxt = ', '.join('%s=%%r' % name for name in field_names)
231 template = '''class %(typename)s(tuple):
Christian Heimes0449f632007-12-15 01:27:15 +0000232 '%(typename)s(%(argtxt)s)' \n
233 __slots__ = () \n
Christian Heimesfaf2f632008-01-06 16:59:19 +0000234 _fields = %(field_names)r \n
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000235 def __new__(_cls, %(argtxt)s):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000236 'Create new instance of %(typename)s(%(argtxt)s)'
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000237 return _tuple.__new__(_cls, (%(argtxt)s)) \n
Christian Heimesfaf2f632008-01-06 16:59:19 +0000238 @classmethod
Christian Heimes043d6f62008-01-07 17:19:16 +0000239 def _make(cls, iterable, new=tuple.__new__, len=len):
Christian Heimesfaf2f632008-01-06 16:59:19 +0000240 'Make a new %(typename)s object from a sequence or iterable'
Christian Heimes043d6f62008-01-07 17:19:16 +0000241 result = new(cls, iterable)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000242 if len(result) != %(numfields)d:
243 raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
244 return result \n
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000245 def __repr__(self):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000246 'Return a nicely formatted representation string'
Christian Heimes0449f632007-12-15 01:27:15 +0000247 return '%(typename)s(%(reprtxt)s)' %% self \n
Raymond Hettingera4f52b12009-03-02 22:28:31 +0000248 def _asdict(self):
249 'Return a new OrderedDict which maps field names to their values'
250 return OrderedDict(zip(self._fields, self)) \n
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000251 def _replace(_self, **kwds):
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000252 'Return a new %(typename)s object replacing specified fields with new values'
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000253 result = _self._make(map(kwds.pop, %(field_names)r, _self))
Christian Heimesfaf2f632008-01-06 16:59:19 +0000254 if kwds:
255 raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000256 return result \n
257 def __getnewargs__(self):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000258 'Return self as a plain tuple. Used by copy and pickle.'
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000259 return tuple(self) \n\n''' % locals()
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000260 for i, name in enumerate(field_names):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000261 template += " %s = _property(_itemgetter(%d), doc='Alias for field number %d')\n" % (name, i, i)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000262 if verbose:
263 print(template)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000264
Georg Brandlf08a9dd2008-06-10 16:57:31 +0000265 # Execute the template string in a temporary namespace and
266 # support tracing utilities by setting a value for frame.f_globals['__name__']
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000267 namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
268 OrderedDict=OrderedDict, _property=property, _tuple=tuple)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000269 try:
270 exec(template, namespace)
271 except SyntaxError as e:
Christian Heimes99170a52007-12-19 02:07:34 +0000272 raise SyntaxError(e.msg + ':\n' + template) from e
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000273 result = namespace[typename]
274
275 # For pickling to work, the __module__ variable needs to be set to the frame
276 # where the named tuple is created. Bypass this step in enviroments where
Benjamin Peterson25c95f12009-05-08 20:42:26 +0000277 # sys._getframe is not defined (Jython for example) or sys._getframe is not
278 # defined for arguments greater than 0 (IronPython).
279 try:
Raymond Hettinger0f055172009-01-27 10:06:09 +0000280 result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
Benjamin Peterson25c95f12009-05-08 20:42:26 +0000281 except (AttributeError, ValueError):
282 pass
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000283
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000284 return result
Guido van Rossumd8faa362007-04-27 19:54:29 +0000285
Guido van Rossumd8faa362007-04-27 19:54:29 +0000286
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000287########################################################################
288### Counter
289########################################################################
290
291class Counter(dict):
292 '''Dict subclass for counting hashable items. Sometimes called a bag
293 or multiset. Elements are stored as dictionary keys and their counts
294 are stored as dictionary values.
295
296 >>> c = Counter('abracadabra') # count elements from a string
297
298 >>> c.most_common(3) # three most common elements
299 [('a', 5), ('r', 2), ('b', 2)]
300 >>> sorted(c) # list all unique elements
301 ['a', 'b', 'c', 'd', 'r']
302 >>> ''.join(sorted(c.elements())) # list elements with repetitions
303 'aaaaabbcdrr'
304 >>> sum(c.values()) # total of all counts
305 11
306
307 >>> c['a'] # count of letter 'a'
308 5
309 >>> for elem in 'shazam': # update counts from an iterable
310 ... c[elem] += 1 # by adding 1 to each element's count
311 >>> c['a'] # now there are seven 'a'
312 7
313 >>> del c['r'] # remove all 'r'
314 >>> c['r'] # now there are zero 'r'
315 0
316
317 >>> d = Counter('simsalabim') # make another counter
318 >>> c.update(d) # add in the second counter
319 >>> c['a'] # now there are nine 'a'
320 9
321
322 >>> c.clear() # empty the counter
323 >>> c
324 Counter()
325
326 Note: If a count is set to zero or reduced to zero, it will remain
327 in the counter until the entry is deleted or the counter is cleared:
328
329 >>> c = Counter('aaabbc')
330 >>> c['b'] -= 2 # reduce the count of 'b' by two
331 >>> c.most_common() # 'b' is still in, but its count is zero
332 [('a', 3), ('c', 1), ('b', 0)]
333
334 '''
335 # References:
336 # http://en.wikipedia.org/wiki/Multiset
337 # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
338 # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
339 # http://code.activestate.com/recipes/259174/
340 # Knuth, TAOCP Vol. II section 4.6.3
341
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000342 def __init__(self, iterable=None, **kwds):
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000343 '''Create a new, empty Counter object. And if given, count elements
344 from an input iterable. Or, initialize the count from another mapping
345 of elements to their counts.
346
347 >>> c = Counter() # a new, empty counter
348 >>> c = Counter('gallahad') # a new counter from an iterable
349 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000350 >>> c = Counter(a=4, b=2) # a new counter from keyword args
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000351
352 '''
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000353 self.update(iterable, **kwds)
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000354
355 def __missing__(self, key):
356 'The count of elements not in the Counter is zero.'
357 # Needed so that self[missing_item] does not raise KeyError
358 return 0
359
360 def most_common(self, n=None):
361 '''List the n most common elements and their counts from the most
362 common to the least. If n is None, then list all element counts.
363
364 >>> Counter('abracadabra').most_common(3)
365 [('a', 5), ('r', 2), ('b', 2)]
366
367 '''
368 # Emulate Bag.sortedByCount from Smalltalk
369 if n is None:
370 return sorted(self.items(), key=_itemgetter(1), reverse=True)
371 return _heapq.nlargest(n, self.items(), key=_itemgetter(1))
372
373 def elements(self):
374 '''Iterator over elements repeating each as many times as its count.
375
376 >>> c = Counter('ABCABC')
377 >>> sorted(c.elements())
378 ['A', 'A', 'B', 'B', 'C', 'C']
379
380 # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
381 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
382 >>> product = 1
383 >>> for factor in prime_factors.elements(): # loop over factors
384 ... product *= factor # and multiply them
385 >>> product
386 1836
387
388 Note, if an element's count has been set to zero or is a negative
389 number, elements() will ignore it.
390
391 '''
392 # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
393 return _chain.from_iterable(_starmap(_repeat, self.items()))
394
395 # Override dict methods where necessary
396
397 @classmethod
398 def fromkeys(cls, iterable, v=None):
399 # There is no equivalent method for counters because setting v=1
400 # means that no element can have a count greater than one.
401 raise NotImplementedError(
402 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
403
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000404 def update(self, iterable=None, **kwds):
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000405 '''Like dict.update() but add counts instead of replacing them.
406
407 Source can be an iterable, a dictionary, or another Counter instance.
408
409 >>> c = Counter('which')
410 >>> c.update('witch') # add elements from another iterable
411 >>> d = Counter('watch')
412 >>> c.update(d) # add elements from another counter
413 >>> c['h'] # four 'h' in which, witch, and watch
414 4
415
416 '''
417 # The regular dict.update() operation makes no sense here because the
418 # replace behavior results in the some of original untouched counts
419 # being mixed-in with all of the other counts for a mismash that
420 # doesn't have a straight-forward interpretation in most counting
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000421 # contexts. Instead, we implement straight-addition. Both the inputs
422 # and outputs are allowed to contain zero and negative counts.
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000423
424 if iterable is not None:
425 if isinstance(iterable, Mapping):
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000426 if self:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000427 self_get = self.get
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000428 for elem, count in iterable.items():
Raymond Hettingerf9092022009-06-29 18:30:43 +0000429 self[elem] = count + self_get(elem, 0)
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000430 else:
431 dict.update(self, iterable) # fast path when counter is empty
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000432 else:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000433 self_get = self.get
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000434 for elem in iterable:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000435 self[elem] = 1 + self_get(elem, 0)
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000436 if kwds:
437 self.update(kwds)
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000438
439 def copy(self):
440 'Like dict.copy() but returns a Counter instance instead of a dict.'
441 return Counter(self)
442
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000443 def __delitem__(self, elem):
444 'Like dict.__delitem__() but does not raise KeyError for missing values.'
445 if elem in self:
446 dict.__delitem__(self, elem)
447
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000448 def __repr__(self):
449 if not self:
450 return '%s()' % self.__class__.__name__
451 items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
452 return '%s({%s})' % (self.__class__.__name__, items)
453
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000454 # Multiset-style mathematical operations discussed in:
455 # Knuth TAOCP Volume II section 4.6.3 exercise 19
456 # and at http://en.wikipedia.org/wiki/Multiset
457 #
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000458 # Outputs guaranteed to only include positive counts.
459 #
460 # To strip negative and zero counts, add-in an empty counter:
461 # c += Counter()
462
463 def __add__(self, other):
464 '''Add counts from two counters.
465
466 >>> Counter('abbb') + Counter('bcc')
467 Counter({'b': 4, 'c': 2, 'a': 1})
468
469 '''
470 if not isinstance(other, Counter):
471 return NotImplemented
472 result = Counter()
473 for elem in set(self) | set(other):
474 newcount = self[elem] + other[elem]
475 if newcount > 0:
476 result[elem] = newcount
477 return result
478
479 def __sub__(self, other):
480 ''' Subtract count, but keep only results with positive counts.
481
482 >>> Counter('abbbc') - Counter('bccd')
483 Counter({'b': 2, 'a': 1})
484
485 '''
486 if not isinstance(other, Counter):
487 return NotImplemented
488 result = Counter()
Raymond Hettingere0d1b9f2009-01-21 20:36:27 +0000489 for elem in set(self) | set(other):
490 newcount = self[elem] - other[elem]
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000491 if newcount > 0:
492 result[elem] = newcount
493 return result
494
495 def __or__(self, other):
496 '''Union is the maximum of value in either of the input counters.
497
498 >>> Counter('abbb') | Counter('bcc')
499 Counter({'b': 3, 'c': 2, 'a': 1})
500
501 '''
502 if not isinstance(other, Counter):
503 return NotImplemented
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000504 result = Counter()
505 for elem in set(self) | set(other):
Raymond Hettingerc4791702009-04-04 08:48:03 +0000506 p, q = self[elem], other[elem]
507 newcount = q if p < q else p
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000508 if newcount > 0:
509 result[elem] = newcount
510 return result
511
512 def __and__(self, other):
513 ''' Intersection is the minimum of corresponding counts.
514
515 >>> Counter('abbb') & Counter('bcc')
516 Counter({'b': 1})
517
518 '''
519 if not isinstance(other, Counter):
520 return NotImplemented
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000521 result = Counter()
522 if len(self) < len(other):
523 self, other = other, self
524 for elem in filter(self.__contains__, other):
Raymond Hettingerc4791702009-04-04 08:48:03 +0000525 p, q = self[elem], other[elem]
526 newcount = p if p < q else q
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000527 if newcount > 0:
528 result[elem] = newcount
529 return result
530
Guido van Rossumd8faa362007-04-27 19:54:29 +0000531
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000532################################################################################
533### UserDict
534################################################################################
Guido van Rossumd8faa362007-04-27 19:54:29 +0000535
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000536class UserDict(MutableMapping):
537
538 # Start by filling-out the abstract methods
539 def __init__(self, dict=None, **kwargs):
540 self.data = {}
541 if dict is not None:
542 self.update(dict)
543 if len(kwargs):
544 self.update(kwargs)
545 def __len__(self): return len(self.data)
546 def __getitem__(self, key):
547 if key in self.data:
548 return self.data[key]
549 if hasattr(self.__class__, "__missing__"):
550 return self.__class__.__missing__(self, key)
551 raise KeyError(key)
552 def __setitem__(self, key, item): self.data[key] = item
553 def __delitem__(self, key): del self.data[key]
554 def __iter__(self):
555 return iter(self.data)
556
Raymond Hettinger554c8b82008-02-05 22:54:43 +0000557 # Modify __contains__ to work correctly when __missing__ is present
558 def __contains__(self, key):
559 return key in self.data
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000560
561 # Now, add the methods in dicts but not in MutableMapping
562 def __repr__(self): return repr(self.data)
563 def copy(self):
564 if self.__class__ is UserDict:
565 return UserDict(self.data.copy())
566 import copy
567 data = self.data
568 try:
569 self.data = {}
570 c = copy.copy(self)
571 finally:
572 self.data = data
573 c.update(self)
574 return c
575 @classmethod
576 def fromkeys(cls, iterable, value=None):
577 d = cls()
578 for key in iterable:
579 d[key] = value
580 return d
581
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000582
583
584################################################################################
Raymond Hettinger53dbe392008-02-12 20:03:09 +0000585### UserList
586################################################################################
587
588class UserList(MutableSequence):
589 """A more or less complete user-defined wrapper around list objects."""
590 def __init__(self, initlist=None):
591 self.data = []
592 if initlist is not None:
593 # XXX should this accept an arbitrary sequence?
594 if type(initlist) == type(self.data):
595 self.data[:] = initlist
596 elif isinstance(initlist, UserList):
597 self.data[:] = initlist.data[:]
598 else:
599 self.data = list(initlist)
600 def __repr__(self): return repr(self.data)
601 def __lt__(self, other): return self.data < self.__cast(other)
602 def __le__(self, other): return self.data <= self.__cast(other)
603 def __eq__(self, other): return self.data == self.__cast(other)
604 def __ne__(self, other): return self.data != self.__cast(other)
605 def __gt__(self, other): return self.data > self.__cast(other)
606 def __ge__(self, other): return self.data >= self.__cast(other)
607 def __cast(self, other):
608 return other.data if isinstance(other, UserList) else other
Raymond Hettinger53dbe392008-02-12 20:03:09 +0000609 def __contains__(self, item): return item in self.data
610 def __len__(self): return len(self.data)
611 def __getitem__(self, i): return self.data[i]
612 def __setitem__(self, i, item): self.data[i] = item
613 def __delitem__(self, i): del self.data[i]
614 def __add__(self, other):
615 if isinstance(other, UserList):
616 return self.__class__(self.data + other.data)
617 elif isinstance(other, type(self.data)):
618 return self.__class__(self.data + other)
619 return self.__class__(self.data + list(other))
620 def __radd__(self, other):
621 if isinstance(other, UserList):
622 return self.__class__(other.data + self.data)
623 elif isinstance(other, type(self.data)):
624 return self.__class__(other + self.data)
625 return self.__class__(list(other) + self.data)
626 def __iadd__(self, other):
627 if isinstance(other, UserList):
628 self.data += other.data
629 elif isinstance(other, type(self.data)):
630 self.data += other
631 else:
632 self.data += list(other)
633 return self
634 def __mul__(self, n):
635 return self.__class__(self.data*n)
636 __rmul__ = __mul__
637 def __imul__(self, n):
638 self.data *= n
639 return self
640 def append(self, item): self.data.append(item)
641 def insert(self, i, item): self.data.insert(i, item)
642 def pop(self, i=-1): return self.data.pop(i)
643 def remove(self, item): self.data.remove(item)
644 def count(self, item): return self.data.count(item)
645 def index(self, item, *args): return self.data.index(item, *args)
646 def reverse(self): self.data.reverse()
647 def sort(self, *args, **kwds): self.data.sort(*args, **kwds)
648 def extend(self, other):
649 if isinstance(other, UserList):
650 self.data.extend(other.data)
651 else:
652 self.data.extend(other)
653
654
655
656################################################################################
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000657### UserString
658################################################################################
659
660class UserString(Sequence):
661 def __init__(self, seq):
662 if isinstance(seq, str):
663 self.data = seq
664 elif isinstance(seq, UserString):
665 self.data = seq.data[:]
666 else:
667 self.data = str(seq)
668 def __str__(self): return str(self.data)
669 def __repr__(self): return repr(self.data)
670 def __int__(self): return int(self.data)
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000671 def __float__(self): return float(self.data)
672 def __complex__(self): return complex(self.data)
673 def __hash__(self): return hash(self.data)
674
675 def __eq__(self, string):
676 if isinstance(string, UserString):
677 return self.data == string.data
678 return self.data == string
679 def __ne__(self, string):
680 if isinstance(string, UserString):
681 return self.data != string.data
682 return self.data != string
683 def __lt__(self, string):
684 if isinstance(string, UserString):
685 return self.data < string.data
686 return self.data < string
687 def __le__(self, string):
688 if isinstance(string, UserString):
689 return self.data <= string.data
690 return self.data <= string
691 def __gt__(self, string):
692 if isinstance(string, UserString):
693 return self.data > string.data
694 return self.data > string
695 def __ge__(self, string):
696 if isinstance(string, UserString):
697 return self.data >= string.data
698 return self.data >= string
699
700 def __contains__(self, char):
701 if isinstance(char, UserString):
702 char = char.data
703 return char in self.data
704
705 def __len__(self): return len(self.data)
706 def __getitem__(self, index): return self.__class__(self.data[index])
707 def __add__(self, other):
708 if isinstance(other, UserString):
709 return self.__class__(self.data + other.data)
710 elif isinstance(other, str):
711 return self.__class__(self.data + other)
712 return self.__class__(self.data + str(other))
713 def __radd__(self, other):
714 if isinstance(other, str):
715 return self.__class__(other + self.data)
716 return self.__class__(str(other) + self.data)
717 def __mul__(self, n):
718 return self.__class__(self.data*n)
719 __rmul__ = __mul__
720 def __mod__(self, args):
721 return self.__class__(self.data % args)
722
723 # the following methods are defined in alphabetical order:
724 def capitalize(self): return self.__class__(self.data.capitalize())
725 def center(self, width, *args):
726 return self.__class__(self.data.center(width, *args))
727 def count(self, sub, start=0, end=_sys.maxsize):
728 if isinstance(sub, UserString):
729 sub = sub.data
730 return self.data.count(sub, start, end)
731 def encode(self, encoding=None, errors=None): # XXX improve this?
732 if encoding:
733 if errors:
734 return self.__class__(self.data.encode(encoding, errors))
735 return self.__class__(self.data.encode(encoding))
736 return self.__class__(self.data.encode())
737 def endswith(self, suffix, start=0, end=_sys.maxsize):
738 return self.data.endswith(suffix, start, end)
739 def expandtabs(self, tabsize=8):
740 return self.__class__(self.data.expandtabs(tabsize))
741 def find(self, sub, start=0, end=_sys.maxsize):
742 if isinstance(sub, UserString):
743 sub = sub.data
744 return self.data.find(sub, start, end)
745 def format(self, *args, **kwds):
746 return self.data.format(*args, **kwds)
747 def index(self, sub, start=0, end=_sys.maxsize):
748 return self.data.index(sub, start, end)
749 def isalpha(self): return self.data.isalpha()
750 def isalnum(self): return self.data.isalnum()
751 def isdecimal(self): return self.data.isdecimal()
752 def isdigit(self): return self.data.isdigit()
753 def isidentifier(self): return self.data.isidentifier()
754 def islower(self): return self.data.islower()
755 def isnumeric(self): return self.data.isnumeric()
756 def isspace(self): return self.data.isspace()
757 def istitle(self): return self.data.istitle()
758 def isupper(self): return self.data.isupper()
759 def join(self, seq): return self.data.join(seq)
760 def ljust(self, width, *args):
761 return self.__class__(self.data.ljust(width, *args))
762 def lower(self): return self.__class__(self.data.lower())
763 def lstrip(self, chars=None): return self.__class__(self.data.lstrip(chars))
764 def partition(self, sep):
765 return self.data.partition(sep)
766 def replace(self, old, new, maxsplit=-1):
767 if isinstance(old, UserString):
768 old = old.data
769 if isinstance(new, UserString):
770 new = new.data
771 return self.__class__(self.data.replace(old, new, maxsplit))
772 def rfind(self, sub, start=0, end=_sys.maxsize):
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000773 if isinstance(sub, UserString):
774 sub = sub.data
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000775 return self.data.rfind(sub, start, end)
776 def rindex(self, sub, start=0, end=_sys.maxsize):
777 return self.data.rindex(sub, start, end)
778 def rjust(self, width, *args):
779 return self.__class__(self.data.rjust(width, *args))
780 def rpartition(self, sep):
781 return self.data.rpartition(sep)
782 def rstrip(self, chars=None):
783 return self.__class__(self.data.rstrip(chars))
784 def split(self, sep=None, maxsplit=-1):
785 return self.data.split(sep, maxsplit)
786 def rsplit(self, sep=None, maxsplit=-1):
787 return self.data.rsplit(sep, maxsplit)
788 def splitlines(self, keepends=0): return self.data.splitlines(keepends)
789 def startswith(self, prefix, start=0, end=_sys.maxsize):
790 return self.data.startswith(prefix, start, end)
791 def strip(self, chars=None): return self.__class__(self.data.strip(chars))
792 def swapcase(self): return self.__class__(self.data.swapcase())
793 def title(self): return self.__class__(self.data.title())
794 def translate(self, *args):
795 return self.__class__(self.data.translate(*args))
796 def upper(self): return self.__class__(self.data.upper())
797 def zfill(self, width): return self.__class__(self.data.zfill(width))
798
799
800
801################################################################################
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000802### Simple tests
803################################################################################
Guido van Rossumd8faa362007-04-27 19:54:29 +0000804
805if __name__ == '__main__':
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000806 # verify that instances can be pickled
Guido van Rossum99603b02007-07-20 00:22:32 +0000807 from pickle import loads, dumps
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000808 Point = namedtuple('Point', 'x, y', True)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000809 p = Point(x=10, y=20)
810 assert p == loads(dumps(p))
811
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000812 # test and demonstrate ability to override methods
Christian Heimes043d6f62008-01-07 17:19:16 +0000813 class Point(namedtuple('Point', 'x y')):
Christian Heimes25bb7832008-01-11 16:17:00 +0000814 __slots__ = ()
Christian Heimes043d6f62008-01-07 17:19:16 +0000815 @property
816 def hypot(self):
817 return (self.x ** 2 + self.y ** 2) ** 0.5
Christian Heimes790c8232008-01-07 21:14:23 +0000818 def __str__(self):
Christian Heimes25bb7832008-01-11 16:17:00 +0000819 return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
Christian Heimes043d6f62008-01-07 17:19:16 +0000820
Christian Heimes25bb7832008-01-11 16:17:00 +0000821 for p in Point(3, 4), Point(14, 5/7.):
Christian Heimes790c8232008-01-07 21:14:23 +0000822 print (p)
Christian Heimes043d6f62008-01-07 17:19:16 +0000823
824 class Point(namedtuple('Point', 'x y')):
825 'Point class with optimized _make() and _replace() without error-checking'
Christian Heimes25bb7832008-01-11 16:17:00 +0000826 __slots__ = ()
Christian Heimes043d6f62008-01-07 17:19:16 +0000827 _make = classmethod(tuple.__new__)
828 def _replace(self, _map=map, **kwds):
Christian Heimes2380ac72008-01-09 00:17:24 +0000829 return self._make(_map(kwds.get, ('x', 'y'), self))
Christian Heimes043d6f62008-01-07 17:19:16 +0000830
831 print(Point(11, 22)._replace(x=100))
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000832
Christian Heimes25bb7832008-01-11 16:17:00 +0000833 Point3D = namedtuple('Point3D', Point._fields + ('z',))
834 print(Point3D.__doc__)
835
Guido van Rossumd8faa362007-04-27 19:54:29 +0000836 import doctest
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000837 TestResults = namedtuple('TestResults', 'failed attempted')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000838 print(TestResults(*doctest.testmod()))