blob: 9381f516763e44607e40b50f3ece244ba8b82393 [file] [log] [blame]
Brett Cannon23a4a7b2008-05-12 00:56:28 +00001__all__ = ['deque', 'defaultdict', 'namedtuple', 'UserDict', 'UserList',
Raymond Hettinger2d32f632009-03-02 21:24:57 +00002 'UserString', 'Counter', 'OrderedDict']
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
4# They should however be considered an integral part of collections.py.
5from _abcoll import *
6import _abcoll
7__all__ += _abcoll.__all__
8
Christian Heimes99170a52007-12-19 02:07:34 +00009from _collections import deque, defaultdict
10from operator import itemgetter as _itemgetter
11from keyword import iskeyword as _iskeyword
12import sys as _sys
Raymond Hettingerb8baf632009-01-14 02:20:07 +000013import heapq as _heapq
Raymond Hettinger798ee1a2009-03-23 18:29:11 +000014from weakref import proxy as _proxy
Raymond Hettingerea9f8db2009-03-02 21:28:41 +000015from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
Raymond Hettinger2d32f632009-03-02 21:24:57 +000016
17################################################################################
18### OrderedDict
19################################################################################
20
Raymond Hettingerf1736542009-03-23 05:19:21 +000021class _Link(object):
22 __slots__ = 'prev', 'next', 'key', '__weakref__'
23
Raymond Hettinger1d879f62011-01-04 20:57:19 +000024class OrderedDict(dict):
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +000025 'Dictionary that remembers insertion order'
Raymond Hettingerf1736542009-03-23 05:19:21 +000026 # An inherited dict maps keys to values.
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +000027 # The inherited dict provides __getitem__, __len__, __contains__, and get.
28 # The remaining methods are order-aware.
Raymond Hettingerf1736542009-03-23 05:19:21 +000029 # Big-O running times for all methods are the same as for regular dictionaries.
30
31 # The internal self.__map dictionary maps keys to links in a doubly linked list.
32 # The circular doubly linked list starts and ends with a sentinel element.
33 # The sentinel element never gets deleted (this simplifies the algorithm).
34 # The prev/next links are weakref proxies (to prevent circular references).
35 # Individual links are kept alive by the hard reference in self.__map.
36 # Those hard references disappear when a key is deleted from an OrderedDict.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000037
38 def __init__(self, *args, **kwds):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000039 '''Initialize an ordered dictionary. Signature is the same as for
40 regular dictionaries, but keyword arguments are not recommended
41 because their insertion order is arbitrary.
42
43 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +000044 if len(args) > 1:
45 raise TypeError('expected at most 1 arguments, got %d' % len(args))
Raymond Hettinger69976a72010-09-12 05:28:42 +000046 self.__in_repr = False # detects recursive repr
Raymond Hettinger08c70cf2009-03-03 20:47:29 +000047 try:
Raymond Hettingerf1736542009-03-23 05:19:21 +000048 self.__root
Raymond Hettinger08c70cf2009-03-03 20:47:29 +000049 except AttributeError:
Raymond Hettinger52dc06b2009-03-25 22:45:22 +000050 self.__root = root = _Link() # sentinel node for the doubly linked list
51 root.prev = root.next = root
52 self.__map = {}
Raymond Hettinger1d879f62011-01-04 20:57:19 +000053 self.__update(*args, **kwds)
Raymond Hettinger2d32f632009-03-02 21:24:57 +000054
55 def clear(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000056 'od.clear() -> None. Remove all items from od.'
Raymond Hettingerf1736542009-03-23 05:19:21 +000057 root = self.__root
58 root.prev = root.next = root
Raymond Hettinger52dc06b2009-03-25 22:45:22 +000059 self.__map.clear()
Raymond Hettinger2d32f632009-03-02 21:24:57 +000060 dict.clear(self)
61
62 def __setitem__(self, key, value):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000063 'od.__setitem__(i, y) <==> od[i]=y'
Raymond Hettingerf1736542009-03-23 05:19:21 +000064 # Setting a new item creates a new link which goes at the end of the linked
65 # list, and the inherited dictionary is updated with the new key/value pair.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000066 if key not in self:
Raymond Hettingerf1736542009-03-23 05:19:21 +000067 self.__map[key] = link = _Link()
68 root = self.__root
69 last = root.prev
70 link.prev, link.next, link.key = last, root, key
Raymond Hettinger798ee1a2009-03-23 18:29:11 +000071 last.next = root.prev = _proxy(link)
Raymond Hettinger2d32f632009-03-02 21:24:57 +000072 dict.__setitem__(self, key, value)
73
74 def __delitem__(self, key):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000075 'od.__delitem__(y) <==> del od[y]'
Raymond Hettingerf1736542009-03-23 05:19:21 +000076 # Deleting an existing item uses self.__map to find the link which is
77 # then removed by updating the links in the predecessor and successor nodes.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000078 dict.__delitem__(self, key)
Raymond Hettingerf1736542009-03-23 05:19:21 +000079 link = self.__map.pop(key)
80 link.prev.next = link.next
81 link.next.prev = link.prev
Raymond Hettinger2d32f632009-03-02 21:24:57 +000082
83 def __iter__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000084 'od.__iter__() <==> iter(od)'
Raymond Hettingerf1736542009-03-23 05:19:21 +000085 # Traverse the linked list in order.
86 root = self.__root
87 curr = root.next
88 while curr is not root:
89 yield curr.key
90 curr = curr.next
Raymond Hettinger2d32f632009-03-02 21:24:57 +000091
92 def __reversed__(self):
Raymond Hettinger2352cf32009-04-08 01:16:27 +000093 'od.__reversed__() <==> reversed(od)'
Raymond Hettingerf1736542009-03-23 05:19:21 +000094 # Traverse the linked list in reverse order.
95 root = self.__root
96 curr = root.prev
97 while curr is not root:
98 yield curr.key
99 curr = curr.prev
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000100
101 def __reduce__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000102 'Return state information for pickling'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000103 items = [[k, self[k]] for k in self]
104 inst_dict = vars(self).copy()
Raymond Hettingerd08a2c22011-04-19 10:05:03 -0700105 for k in vars(self.__class__()):
106 inst_dict.pop(k, None)
Raymond Hettinger14b89ff2009-03-03 22:20:56 +0000107 if inst_dict:
108 return (self.__class__, (items,), inst_dict)
109 return self.__class__, (items,)
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000110
Raymond Hettinger1d879f62011-01-04 20:57:19 +0000111 update = __update = MutableMapping.update
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000112 keys = MutableMapping.keys
113 values = MutableMapping.values
114 items = MutableMapping.items
115
Raymond Hettinger1d879f62011-01-04 20:57:19 +0000116 __marker = object()
117
118 def pop(self, key, default=__marker):
119 if key in self:
120 result = self[key]
121 del self[key]
122 return result
123 if default is self.__marker:
124 raise KeyError(key)
125 return default
126
127 def setdefault(self, key, default=None):
128 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od'
129 if key in self:
130 return self[key]
131 self[key] = default
132 return default
133
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +0000134 def popitem(self, last=True):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000135 '''od.popitem() -> (k, v), return and remove a (key, value) pair.
136 Pairs are returned in LIFO order if last is true or FIFO order if false.
137
138 '''
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +0000139 if not self:
140 raise KeyError('dictionary is empty')
Raymond Hettinger446a4f22009-04-08 08:28:28 +0000141 key = next(reversed(self) if last else iter(self))
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +0000142 value = self.pop(key)
143 return key, value
144
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000145 def __repr__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000146 'od.__repr__() <==> repr(od)'
Raymond Hettinger69976a72010-09-12 05:28:42 +0000147 if self.__in_repr:
148 return '...'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000149 if not self:
150 return '%s()' % (self.__class__.__name__,)
Raymond Hettinger69976a72010-09-12 05:28:42 +0000151 self.__in_repr = True
152 try:
153 result = '%s(%r)' % (self.__class__.__name__, list(self.items()))
154 finally:
155 self.__in_repr = False
156 return result
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000157
158 def copy(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000159 'od.copy() -> a shallow copy of od'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000160 return self.__class__(self)
161
162 @classmethod
163 def fromkeys(cls, iterable, value=None):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000164 '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
165 and values equal to v (which defaults to None).
166
167 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000168 d = cls()
169 for key in iterable:
170 d[key] = value
171 return d
172
173 def __eq__(self, other):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000174 '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
175 while comparison to a regular mapping is order-insensitive.
176
177 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000178 if isinstance(other, OrderedDict):
Raymond Hettinger798ee1a2009-03-23 18:29:11 +0000179 return len(self)==len(other) and \
180 all(p==q for p, q in zip(self.items(), other.items()))
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000181 return dict.__eq__(self, other)
182
Benjamin Peterson2504b7a2009-04-04 17:26:32 +0000183 def __ne__(self, other):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000184 '''od.__ne__(y) <==> od!=y. Comparison to another OD is order-sensitive
185 while comparison to a regular mapping is order-insensitive.
186
187 '''
Benjamin Peterson2504b7a2009-04-04 17:26:32 +0000188 return not self == other
189
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000190
Christian Heimes99170a52007-12-19 02:07:34 +0000191
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000192################################################################################
193### namedtuple
194################################################################################
195
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000196def namedtuple(typename, field_names, verbose=False, rename=False):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000197 """Returns a new subclass of tuple with named fields.
198
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000199 >>> Point = namedtuple('Point', 'x y')
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000200 >>> Point.__doc__ # docstring for the new class
Guido van Rossumd8faa362007-04-27 19:54:29 +0000201 'Point(x, y)'
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000202 >>> p = Point(11, y=22) # instantiate with positional args or keywords
Christian Heimes99170a52007-12-19 02:07:34 +0000203 >>> p[0] + p[1] # indexable like a plain tuple
Guido van Rossumd8faa362007-04-27 19:54:29 +0000204 33
Christian Heimes99170a52007-12-19 02:07:34 +0000205 >>> x, y = p # unpack like a regular tuple
Guido van Rossumd8faa362007-04-27 19:54:29 +0000206 >>> x, y
207 (11, 22)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000208 >>> p.x + p.y # fields also accessable by name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000209 33
Christian Heimes0449f632007-12-15 01:27:15 +0000210 >>> d = p._asdict() # convert to a dictionary
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000211 >>> d['x']
212 11
213 >>> Point(**d) # convert from a dictionary
Guido van Rossumd8faa362007-04-27 19:54:29 +0000214 Point(x=11, y=22)
Christian Heimes0449f632007-12-15 01:27:15 +0000215 >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000216 Point(x=100, y=22)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000217
218 """
219
Christian Heimes2380ac72008-01-09 00:17:24 +0000220 # Parse and validate the field names. Validation serves two purposes,
221 # generating informative error messages and preventing template injection attacks.
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000222 if isinstance(field_names, str):
223 field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas
Benjamin Petersone9bbc8b2008-09-28 02:06:32 +0000224 field_names = tuple(map(str, field_names))
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000225 if rename:
226 names = list(field_names)
227 seen = set()
228 for i, name in enumerate(names):
229 if (not all(c.isalnum() or c=='_' for c in name) or _iskeyword(name)
230 or not name or name[0].isdigit() or name.startswith('_')
231 or name in seen):
Raymond Hettinger56145242009-04-02 22:31:59 +0000232 names[i] = '_%d' % i
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000233 seen.add(name)
234 field_names = tuple(names)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000235 for name in (typename,) + field_names:
Christian Heimesb9eccbf2007-12-05 20:18:38 +0000236 if not all(c.isalnum() or c=='_' for c in name):
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000237 raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
238 if _iskeyword(name):
239 raise ValueError('Type names and field names cannot be a keyword: %r' % name)
240 if name[0].isdigit():
241 raise ValueError('Type names and field names cannot start with a number: %r' % name)
242 seen_names = set()
243 for name in field_names:
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000244 if name.startswith('_') and not rename:
Christian Heimes0449f632007-12-15 01:27:15 +0000245 raise ValueError('Field names cannot start with an underscore: %r' % name)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000246 if name in seen_names:
247 raise ValueError('Encountered duplicate field name: %r' % name)
248 seen_names.add(name)
249
250 # Create and fill-in the class template
Christian Heimesfaf2f632008-01-06 16:59:19 +0000251 numfields = len(field_names)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000252 argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000253 reprtxt = ', '.join('%s=%%r' % name for name in field_names)
254 template = '''class %(typename)s(tuple):
Christian Heimes0449f632007-12-15 01:27:15 +0000255 '%(typename)s(%(argtxt)s)' \n
256 __slots__ = () \n
Christian Heimesfaf2f632008-01-06 16:59:19 +0000257 _fields = %(field_names)r \n
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000258 def __new__(_cls, %(argtxt)s):
259 return _tuple.__new__(_cls, (%(argtxt)s)) \n
Christian Heimesfaf2f632008-01-06 16:59:19 +0000260 @classmethod
Christian Heimes043d6f62008-01-07 17:19:16 +0000261 def _make(cls, iterable, new=tuple.__new__, len=len):
Christian Heimesfaf2f632008-01-06 16:59:19 +0000262 'Make a new %(typename)s object from a sequence or iterable'
Christian Heimes043d6f62008-01-07 17:19:16 +0000263 result = new(cls, iterable)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000264 if len(result) != %(numfields)d:
265 raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
266 return result \n
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000267 def __repr__(self):
Christian Heimes0449f632007-12-15 01:27:15 +0000268 return '%(typename)s(%(reprtxt)s)' %% self \n
Raymond Hettingera4f52b12009-03-02 22:28:31 +0000269 def _asdict(self):
270 'Return a new OrderedDict which maps field names to their values'
271 return OrderedDict(zip(self._fields, self)) \n
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000272 def _replace(_self, **kwds):
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000273 'Return a new %(typename)s object replacing specified fields with new values'
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000274 result = _self._make(map(kwds.pop, %(field_names)r, _self))
Christian Heimesfaf2f632008-01-06 16:59:19 +0000275 if kwds:
276 raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000277 return result \n
278 def __getnewargs__(self):
279 return tuple(self) \n\n''' % locals()
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000280 for i, name in enumerate(field_names):
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000281 template += ' %s = _property(_itemgetter(%d))\n' % (name, i)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000282 if verbose:
283 print(template)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000284
Georg Brandlf08a9dd2008-06-10 16:57:31 +0000285 # Execute the template string in a temporary namespace and
286 # support tracing utilities by setting a value for frame.f_globals['__name__']
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000287 namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
288 OrderedDict=OrderedDict, _property=property, _tuple=tuple)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000289 try:
290 exec(template, namespace)
291 except SyntaxError as e:
Christian Heimes99170a52007-12-19 02:07:34 +0000292 raise SyntaxError(e.msg + ':\n' + template) from e
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000293 result = namespace[typename]
294
295 # For pickling to work, the __module__ variable needs to be set to the frame
296 # where the named tuple is created. Bypass this step in enviroments where
Benjamin Peterson25c95f12009-05-08 20:42:26 +0000297 # sys._getframe is not defined (Jython for example) or sys._getframe is not
298 # defined for arguments greater than 0 (IronPython).
299 try:
Raymond Hettinger0f055172009-01-27 10:06:09 +0000300 result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
Benjamin Peterson25c95f12009-05-08 20:42:26 +0000301 except (AttributeError, ValueError):
302 pass
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000303
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000304 return result
Guido van Rossumd8faa362007-04-27 19:54:29 +0000305
Guido van Rossumd8faa362007-04-27 19:54:29 +0000306
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000307########################################################################
308### Counter
309########################################################################
310
311class Counter(dict):
312 '''Dict subclass for counting hashable items. Sometimes called a bag
313 or multiset. Elements are stored as dictionary keys and their counts
314 are stored as dictionary values.
315
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000316 >>> c = Counter('abcdeabcdabcaba') # count elements from a string
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000317
318 >>> c.most_common(3) # three most common elements
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000319 [('a', 5), ('b', 4), ('c', 3)]
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000320 >>> sorted(c) # list all unique elements
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000321 ['a', 'b', 'c', 'd', 'e']
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000322 >>> ''.join(sorted(c.elements())) # list elements with repetitions
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000323 'aaaaabbbbcccdde'
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000324 >>> sum(c.values()) # total of all counts
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000325 15
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000326
327 >>> c['a'] # count of letter 'a'
328 5
329 >>> for elem in 'shazam': # update counts from an iterable
330 ... c[elem] += 1 # by adding 1 to each element's count
331 >>> c['a'] # now there are seven 'a'
332 7
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000333 >>> del c['b'] # remove all 'b'
334 >>> c['b'] # now there are zero 'b'
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000335 0
336
337 >>> d = Counter('simsalabim') # make another counter
338 >>> c.update(d) # add in the second counter
339 >>> c['a'] # now there are nine 'a'
340 9
341
342 >>> c.clear() # empty the counter
343 >>> c
344 Counter()
345
346 Note: If a count is set to zero or reduced to zero, it will remain
347 in the counter until the entry is deleted or the counter is cleared:
348
349 >>> c = Counter('aaabbc')
350 >>> c['b'] -= 2 # reduce the count of 'b' by two
351 >>> c.most_common() # 'b' is still in, but its count is zero
352 [('a', 3), ('c', 1), ('b', 0)]
353
354 '''
355 # References:
356 # http://en.wikipedia.org/wiki/Multiset
357 # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
358 # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
359 # http://code.activestate.com/recipes/259174/
360 # Knuth, TAOCP Vol. II section 4.6.3
361
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000362 def __init__(self, iterable=None, **kwds):
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000363 '''Create a new, empty Counter object. And if given, count elements
364 from an input iterable. Or, initialize the count from another mapping
365 of elements to their counts.
366
367 >>> c = Counter() # a new, empty counter
368 >>> c = Counter('gallahad') # a new counter from an iterable
369 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000370 >>> c = Counter(a=4, b=2) # a new counter from keyword args
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000371
372 '''
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000373 super().__init__()
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000374 self.update(iterable, **kwds)
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000375
376 def __missing__(self, key):
377 'The count of elements not in the Counter is zero.'
378 # Needed so that self[missing_item] does not raise KeyError
379 return 0
380
381 def most_common(self, n=None):
382 '''List the n most common elements and their counts from the most
383 common to the least. If n is None, then list all element counts.
384
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000385 >>> Counter('abcdeabcdabcaba').most_common(3)
386 [('a', 5), ('b', 4), ('c', 3)]
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000387
388 '''
389 # Emulate Bag.sortedByCount from Smalltalk
390 if n is None:
391 return sorted(self.items(), key=_itemgetter(1), reverse=True)
392 return _heapq.nlargest(n, self.items(), key=_itemgetter(1))
393
394 def elements(self):
395 '''Iterator over elements repeating each as many times as its count.
396
397 >>> c = Counter('ABCABC')
398 >>> sorted(c.elements())
399 ['A', 'A', 'B', 'B', 'C', 'C']
400
401 # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
402 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
403 >>> product = 1
404 >>> for factor in prime_factors.elements(): # loop over factors
405 ... product *= factor # and multiply them
406 >>> product
407 1836
408
409 Note, if an element's count has been set to zero or is a negative
410 number, elements() will ignore it.
411
412 '''
413 # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
414 return _chain.from_iterable(_starmap(_repeat, self.items()))
415
416 # Override dict methods where necessary
417
418 @classmethod
419 def fromkeys(cls, iterable, v=None):
420 # There is no equivalent method for counters because setting v=1
421 # means that no element can have a count greater than one.
422 raise NotImplementedError(
423 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
424
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000425 def update(self, iterable=None, **kwds):
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000426 '''Like dict.update() but add counts instead of replacing them.
427
428 Source can be an iterable, a dictionary, or another Counter instance.
429
430 >>> c = Counter('which')
431 >>> c.update('witch') # add elements from another iterable
432 >>> d = Counter('watch')
433 >>> c.update(d) # add elements from another counter
434 >>> c['h'] # four 'h' in which, witch, and watch
435 4
436
437 '''
438 # The regular dict.update() operation makes no sense here because the
439 # replace behavior results in the some of original untouched counts
440 # being mixed-in with all of the other counts for a mismash that
441 # doesn't have a straight-forward interpretation in most counting
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000442 # contexts. Instead, we implement straight-addition. Both the inputs
443 # and outputs are allowed to contain zero and negative counts.
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000444
445 if iterable is not None:
446 if isinstance(iterable, Mapping):
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000447 if self:
Raymond Hettinger77b31ef2009-06-29 18:34:46 +0000448 self_get = self.get
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000449 for elem, count in iterable.items():
Raymond Hettinger77b31ef2009-06-29 18:34:46 +0000450 self[elem] = count + self_get(elem, 0)
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000451 else:
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000452 super().update(iterable) # fast path when counter is empty
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000453 else:
Raymond Hettinger77b31ef2009-06-29 18:34:46 +0000454 self_get = self.get
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000455 for elem in iterable:
Raymond Hettinger77b31ef2009-06-29 18:34:46 +0000456 self[elem] = 1 + self_get(elem, 0)
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000457 if kwds:
458 self.update(kwds)
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000459
460 def copy(self):
Raymond Hettinger1c746c22011-04-15 13:16:46 -0700461 'Return a shallow copy.'
462 return self.__class__(self)
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000463
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000464 def __reduce__(self):
465 return self.__class__, (dict(self),)
466
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000467 def __delitem__(self, elem):
468 'Like dict.__delitem__() but does not raise KeyError for missing values.'
469 if elem in self:
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000470 super().__delitem__(elem)
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000471
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000472 def __repr__(self):
473 if not self:
474 return '%s()' % self.__class__.__name__
475 items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
476 return '%s({%s})' % (self.__class__.__name__, items)
477
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000478 # Multiset-style mathematical operations discussed in:
479 # Knuth TAOCP Volume II section 4.6.3 exercise 19
480 # and at http://en.wikipedia.org/wiki/Multiset
481 #
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000482 # Outputs guaranteed to only include positive counts.
483 #
484 # To strip negative and zero counts, add-in an empty counter:
485 # c += Counter()
486
487 def __add__(self, other):
488 '''Add counts from two counters.
489
490 >>> Counter('abbb') + Counter('bcc')
491 Counter({'b': 4, 'c': 2, 'a': 1})
492
493 '''
494 if not isinstance(other, Counter):
495 return NotImplemented
496 result = Counter()
Raymond Hettinger2876a8c2011-04-17 19:46:46 -0700497 for elem, count in self.items():
498 newcount = count + other[elem]
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000499 if newcount > 0:
500 result[elem] = newcount
Raymond Hettinger2876a8c2011-04-17 19:46:46 -0700501 for elem, count in other.items():
502 if elem not in self and count > 0:
503 result[elem] = count
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000504 return result
505
506 def __sub__(self, other):
507 ''' Subtract count, but keep only results with positive counts.
508
509 >>> Counter('abbbc') - Counter('bccd')
510 Counter({'b': 2, 'a': 1})
511
512 '''
513 if not isinstance(other, Counter):
514 return NotImplemented
515 result = Counter()
Raymond Hettinger2876a8c2011-04-17 19:46:46 -0700516 for elem, count in self.items():
517 newcount = count - other[elem]
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000518 if newcount > 0:
519 result[elem] = newcount
Raymond Hettinger2876a8c2011-04-17 19:46:46 -0700520 for elem, count in other.items():
521 if elem not in self and count < 0:
522 result[elem] = 0 - count
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000523 return result
524
525 def __or__(self, other):
526 '''Union is the maximum of value in either of the input counters.
527
528 >>> Counter('abbb') | Counter('bcc')
529 Counter({'b': 3, 'c': 2, 'a': 1})
530
531 '''
532 if not isinstance(other, Counter):
533 return NotImplemented
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000534 result = Counter()
Raymond Hettinger2876a8c2011-04-17 19:46:46 -0700535 for elem, count in self.items():
536 other_count = other[elem]
537 newcount = other_count if count < other_count else count
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000538 if newcount > 0:
539 result[elem] = newcount
Raymond Hettinger2876a8c2011-04-17 19:46:46 -0700540 for elem, count in other.items():
541 if elem not in self and count > 0:
542 result[elem] = count
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000543 return result
544
545 def __and__(self, other):
546 ''' Intersection is the minimum of corresponding counts.
547
548 >>> Counter('abbb') & Counter('bcc')
549 Counter({'b': 1})
550
551 '''
552 if not isinstance(other, Counter):
553 return NotImplemented
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000554 result = Counter()
Raymond Hettinger2876a8c2011-04-17 19:46:46 -0700555 for elem, count in self.items():
556 other_count = other[elem]
557 newcount = count if count < other_count else other_count
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000558 if newcount > 0:
559 result[elem] = newcount
560 return result
561
Guido van Rossumd8faa362007-04-27 19:54:29 +0000562
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000563################################################################################
564### UserDict
565################################################################################
Guido van Rossumd8faa362007-04-27 19:54:29 +0000566
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000567class UserDict(MutableMapping):
568
569 # Start by filling-out the abstract methods
570 def __init__(self, dict=None, **kwargs):
571 self.data = {}
572 if dict is not None:
573 self.update(dict)
574 if len(kwargs):
575 self.update(kwargs)
576 def __len__(self): return len(self.data)
577 def __getitem__(self, key):
578 if key in self.data:
579 return self.data[key]
580 if hasattr(self.__class__, "__missing__"):
581 return self.__class__.__missing__(self, key)
582 raise KeyError(key)
583 def __setitem__(self, key, item): self.data[key] = item
584 def __delitem__(self, key): del self.data[key]
585 def __iter__(self):
586 return iter(self.data)
587
Raymond Hettinger554c8b82008-02-05 22:54:43 +0000588 # Modify __contains__ to work correctly when __missing__ is present
589 def __contains__(self, key):
590 return key in self.data
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000591
592 # Now, add the methods in dicts but not in MutableMapping
593 def __repr__(self): return repr(self.data)
594 def copy(self):
595 if self.__class__ is UserDict:
596 return UserDict(self.data.copy())
597 import copy
598 data = self.data
599 try:
600 self.data = {}
601 c = copy.copy(self)
602 finally:
603 self.data = data
604 c.update(self)
605 return c
606 @classmethod
607 def fromkeys(cls, iterable, value=None):
608 d = cls()
609 for key in iterable:
610 d[key] = value
611 return d
612
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000613
614
615################################################################################
Raymond Hettinger53dbe392008-02-12 20:03:09 +0000616### UserList
617################################################################################
618
619class UserList(MutableSequence):
620 """A more or less complete user-defined wrapper around list objects."""
621 def __init__(self, initlist=None):
622 self.data = []
623 if initlist is not None:
624 # XXX should this accept an arbitrary sequence?
625 if type(initlist) == type(self.data):
626 self.data[:] = initlist
627 elif isinstance(initlist, UserList):
628 self.data[:] = initlist.data[:]
629 else:
630 self.data = list(initlist)
631 def __repr__(self): return repr(self.data)
632 def __lt__(self, other): return self.data < self.__cast(other)
633 def __le__(self, other): return self.data <= self.__cast(other)
634 def __eq__(self, other): return self.data == self.__cast(other)
635 def __ne__(self, other): return self.data != self.__cast(other)
636 def __gt__(self, other): return self.data > self.__cast(other)
637 def __ge__(self, other): return self.data >= self.__cast(other)
638 def __cast(self, other):
639 return other.data if isinstance(other, UserList) else other
Raymond Hettinger53dbe392008-02-12 20:03:09 +0000640 def __contains__(self, item): return item in self.data
641 def __len__(self): return len(self.data)
642 def __getitem__(self, i): return self.data[i]
643 def __setitem__(self, i, item): self.data[i] = item
644 def __delitem__(self, i): del self.data[i]
645 def __add__(self, other):
646 if isinstance(other, UserList):
647 return self.__class__(self.data + other.data)
648 elif isinstance(other, type(self.data)):
649 return self.__class__(self.data + other)
650 return self.__class__(self.data + list(other))
651 def __radd__(self, other):
652 if isinstance(other, UserList):
653 return self.__class__(other.data + self.data)
654 elif isinstance(other, type(self.data)):
655 return self.__class__(other + self.data)
656 return self.__class__(list(other) + self.data)
657 def __iadd__(self, other):
658 if isinstance(other, UserList):
659 self.data += other.data
660 elif isinstance(other, type(self.data)):
661 self.data += other
662 else:
663 self.data += list(other)
664 return self
665 def __mul__(self, n):
666 return self.__class__(self.data*n)
667 __rmul__ = __mul__
668 def __imul__(self, n):
669 self.data *= n
670 return self
671 def append(self, item): self.data.append(item)
672 def insert(self, i, item): self.data.insert(i, item)
673 def pop(self, i=-1): return self.data.pop(i)
674 def remove(self, item): self.data.remove(item)
675 def count(self, item): return self.data.count(item)
676 def index(self, item, *args): return self.data.index(item, *args)
677 def reverse(self): self.data.reverse()
678 def sort(self, *args, **kwds): self.data.sort(*args, **kwds)
679 def extend(self, other):
680 if isinstance(other, UserList):
681 self.data.extend(other.data)
682 else:
683 self.data.extend(other)
684
685
686
687################################################################################
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000688### UserString
689################################################################################
690
691class UserString(Sequence):
692 def __init__(self, seq):
693 if isinstance(seq, str):
694 self.data = seq
695 elif isinstance(seq, UserString):
696 self.data = seq.data[:]
697 else:
698 self.data = str(seq)
699 def __str__(self): return str(self.data)
700 def __repr__(self): return repr(self.data)
701 def __int__(self): return int(self.data)
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000702 def __float__(self): return float(self.data)
703 def __complex__(self): return complex(self.data)
704 def __hash__(self): return hash(self.data)
705
706 def __eq__(self, string):
707 if isinstance(string, UserString):
708 return self.data == string.data
709 return self.data == string
710 def __ne__(self, string):
711 if isinstance(string, UserString):
712 return self.data != string.data
713 return self.data != string
714 def __lt__(self, string):
715 if isinstance(string, UserString):
716 return self.data < string.data
717 return self.data < string
718 def __le__(self, string):
719 if isinstance(string, UserString):
720 return self.data <= string.data
721 return self.data <= string
722 def __gt__(self, string):
723 if isinstance(string, UserString):
724 return self.data > string.data
725 return self.data > string
726 def __ge__(self, string):
727 if isinstance(string, UserString):
728 return self.data >= string.data
729 return self.data >= string
730
731 def __contains__(self, char):
732 if isinstance(char, UserString):
733 char = char.data
734 return char in self.data
735
736 def __len__(self): return len(self.data)
737 def __getitem__(self, index): return self.__class__(self.data[index])
738 def __add__(self, other):
739 if isinstance(other, UserString):
740 return self.__class__(self.data + other.data)
741 elif isinstance(other, str):
742 return self.__class__(self.data + other)
743 return self.__class__(self.data + str(other))
744 def __radd__(self, other):
745 if isinstance(other, str):
746 return self.__class__(other + self.data)
747 return self.__class__(str(other) + self.data)
748 def __mul__(self, n):
749 return self.__class__(self.data*n)
750 __rmul__ = __mul__
751 def __mod__(self, args):
752 return self.__class__(self.data % args)
753
754 # the following methods are defined in alphabetical order:
755 def capitalize(self): return self.__class__(self.data.capitalize())
756 def center(self, width, *args):
757 return self.__class__(self.data.center(width, *args))
758 def count(self, sub, start=0, end=_sys.maxsize):
759 if isinstance(sub, UserString):
760 sub = sub.data
761 return self.data.count(sub, start, end)
762 def encode(self, encoding=None, errors=None): # XXX improve this?
763 if encoding:
764 if errors:
765 return self.__class__(self.data.encode(encoding, errors))
766 return self.__class__(self.data.encode(encoding))
767 return self.__class__(self.data.encode())
768 def endswith(self, suffix, start=0, end=_sys.maxsize):
769 return self.data.endswith(suffix, start, end)
770 def expandtabs(self, tabsize=8):
771 return self.__class__(self.data.expandtabs(tabsize))
772 def find(self, sub, start=0, end=_sys.maxsize):
773 if isinstance(sub, UserString):
774 sub = sub.data
775 return self.data.find(sub, start, end)
776 def format(self, *args, **kwds):
777 return self.data.format(*args, **kwds)
778 def index(self, sub, start=0, end=_sys.maxsize):
779 return self.data.index(sub, start, end)
780 def isalpha(self): return self.data.isalpha()
781 def isalnum(self): return self.data.isalnum()
782 def isdecimal(self): return self.data.isdecimal()
783 def isdigit(self): return self.data.isdigit()
784 def isidentifier(self): return self.data.isidentifier()
785 def islower(self): return self.data.islower()
786 def isnumeric(self): return self.data.isnumeric()
787 def isspace(self): return self.data.isspace()
788 def istitle(self): return self.data.istitle()
789 def isupper(self): return self.data.isupper()
790 def join(self, seq): return self.data.join(seq)
791 def ljust(self, width, *args):
792 return self.__class__(self.data.ljust(width, *args))
793 def lower(self): return self.__class__(self.data.lower())
794 def lstrip(self, chars=None): return self.__class__(self.data.lstrip(chars))
795 def partition(self, sep):
796 return self.data.partition(sep)
797 def replace(self, old, new, maxsplit=-1):
798 if isinstance(old, UserString):
799 old = old.data
800 if isinstance(new, UserString):
801 new = new.data
802 return self.__class__(self.data.replace(old, new, maxsplit))
803 def rfind(self, sub, start=0, end=_sys.maxsize):
804 return self.data.rfind(sub, start, end)
805 def rindex(self, sub, start=0, end=_sys.maxsize):
806 return self.data.rindex(sub, start, end)
807 def rjust(self, width, *args):
808 return self.__class__(self.data.rjust(width, *args))
809 def rpartition(self, sep):
810 return self.data.rpartition(sep)
811 def rstrip(self, chars=None):
812 return self.__class__(self.data.rstrip(chars))
813 def split(self, sep=None, maxsplit=-1):
814 return self.data.split(sep, maxsplit)
815 def rsplit(self, sep=None, maxsplit=-1):
816 return self.data.rsplit(sep, maxsplit)
817 def splitlines(self, keepends=0): return self.data.splitlines(keepends)
818 def startswith(self, prefix, start=0, end=_sys.maxsize):
819 return self.data.startswith(prefix, start, end)
820 def strip(self, chars=None): return self.__class__(self.data.strip(chars))
821 def swapcase(self): return self.__class__(self.data.swapcase())
822 def title(self): return self.__class__(self.data.title())
823 def translate(self, *args):
824 return self.__class__(self.data.translate(*args))
825 def upper(self): return self.__class__(self.data.upper())
826 def zfill(self, width): return self.__class__(self.data.zfill(width))
827
828
829
830################################################################################
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000831### Simple tests
832################################################################################
Guido van Rossumd8faa362007-04-27 19:54:29 +0000833
834if __name__ == '__main__':
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000835 # verify that instances can be pickled
Guido van Rossum99603b02007-07-20 00:22:32 +0000836 from pickle import loads, dumps
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000837 Point = namedtuple('Point', 'x, y', True)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000838 p = Point(x=10, y=20)
839 assert p == loads(dumps(p))
840
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000841 # test and demonstrate ability to override methods
Christian Heimes043d6f62008-01-07 17:19:16 +0000842 class Point(namedtuple('Point', 'x y')):
Christian Heimes25bb7832008-01-11 16:17:00 +0000843 __slots__ = ()
Christian Heimes043d6f62008-01-07 17:19:16 +0000844 @property
845 def hypot(self):
846 return (self.x ** 2 + self.y ** 2) ** 0.5
Christian Heimes790c8232008-01-07 21:14:23 +0000847 def __str__(self):
Christian Heimes25bb7832008-01-11 16:17:00 +0000848 return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
Christian Heimes043d6f62008-01-07 17:19:16 +0000849
Christian Heimes25bb7832008-01-11 16:17:00 +0000850 for p in Point(3, 4), Point(14, 5/7.):
Christian Heimes790c8232008-01-07 21:14:23 +0000851 print (p)
Christian Heimes043d6f62008-01-07 17:19:16 +0000852
853 class Point(namedtuple('Point', 'x y')):
854 'Point class with optimized _make() and _replace() without error-checking'
Christian Heimes25bb7832008-01-11 16:17:00 +0000855 __slots__ = ()
Christian Heimes043d6f62008-01-07 17:19:16 +0000856 _make = classmethod(tuple.__new__)
857 def _replace(self, _map=map, **kwds):
Christian Heimes2380ac72008-01-09 00:17:24 +0000858 return self._make(_map(kwds.get, ('x', 'y'), self))
Christian Heimes043d6f62008-01-07 17:19:16 +0000859
860 print(Point(11, 22)._replace(x=100))
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000861
Christian Heimes25bb7832008-01-11 16:17:00 +0000862 Point3D = namedtuple('Point3D', Point._fields + ('z',))
863 print(Point3D.__doc__)
864
Guido van Rossumd8faa362007-04-27 19:54:29 +0000865 import doctest
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000866 TestResults = namedtuple('TestResults', 'failed attempted')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000867 print(TestResults(*doctest.testmod()))