blob: 3329f08d62b3a7f58fb6a17d126f4aac96f2a9c4 [file] [log] [blame]
Brett Cannon23a4a7b2008-05-12 00:56:28 +00001__all__ = ['deque', 'defaultdict', 'namedtuple', 'UserDict', 'UserList',
Raymond Hettinger2d32f632009-03-02 21:24:57 +00002 'UserString', 'Counter', 'OrderedDict']
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
4# They should however be considered an integral part of collections.py.
5from _abcoll import *
6import _abcoll
7__all__ += _abcoll.__all__
8
Christian Heimes99170a52007-12-19 02:07:34 +00009from _collections import deque, defaultdict
10from operator import itemgetter as _itemgetter
11from keyword import iskeyword as _iskeyword
12import sys as _sys
Raymond Hettingerb8baf632009-01-14 02:20:07 +000013import heapq as _heapq
Raymond Hettinger798ee1a2009-03-23 18:29:11 +000014from weakref import proxy as _proxy
Raymond Hettingerea9f8db2009-03-02 21:28:41 +000015from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
Raymond Hettinger2d32f632009-03-02 21:24:57 +000016
17################################################################################
18### OrderedDict
19################################################################################
20
Raymond Hettingerf1736542009-03-23 05:19:21 +000021class _Link(object):
22 __slots__ = 'prev', 'next', 'key', '__weakref__'
23
Raymond Hettinger2d32f632009-03-02 21:24:57 +000024class OrderedDict(dict, MutableMapping):
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +000025 'Dictionary that remembers insertion order'
Raymond Hettingerf1736542009-03-23 05:19:21 +000026 # An inherited dict maps keys to values.
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +000027 # The inherited dict provides __getitem__, __len__, __contains__, and get.
28 # The remaining methods are order-aware.
Raymond Hettingerf1736542009-03-23 05:19:21 +000029 # Big-O running times for all methods are the same as for regular dictionaries.
30
31 # The internal self.__map dictionary maps keys to links in a doubly linked list.
32 # The circular doubly linked list starts and ends with a sentinel element.
33 # The sentinel element never gets deleted (this simplifies the algorithm).
34 # The prev/next links are weakref proxies (to prevent circular references).
35 # Individual links are kept alive by the hard reference in self.__map.
36 # Those hard references disappear when a key is deleted from an OrderedDict.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000037
38 def __init__(self, *args, **kwds):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000039 '''Initialize an ordered dictionary. Signature is the same as for
40 regular dictionaries, but keyword arguments are not recommended
41 because their insertion order is arbitrary.
42
43 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +000044 if len(args) > 1:
45 raise TypeError('expected at most 1 arguments, got %d' % len(args))
Raymond Hettinger69976a72010-09-12 05:28:42 +000046 self.__in_repr = False # detects recursive repr
Raymond Hettinger08c70cf2009-03-03 20:47:29 +000047 try:
Raymond Hettingerf1736542009-03-23 05:19:21 +000048 self.__root
Raymond Hettinger08c70cf2009-03-03 20:47:29 +000049 except AttributeError:
Raymond Hettinger52dc06b2009-03-25 22:45:22 +000050 self.__root = root = _Link() # sentinel node for the doubly linked list
51 root.prev = root.next = root
52 self.__map = {}
Raymond Hettinger2d32f632009-03-02 21:24:57 +000053 self.update(*args, **kwds)
54
55 def clear(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000056 'od.clear() -> None. Remove all items from od.'
Raymond Hettingerf1736542009-03-23 05:19:21 +000057 root = self.__root
58 root.prev = root.next = root
Raymond Hettinger52dc06b2009-03-25 22:45:22 +000059 self.__map.clear()
Raymond Hettinger2d32f632009-03-02 21:24:57 +000060 dict.clear(self)
61
62 def __setitem__(self, key, value):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000063 'od.__setitem__(i, y) <==> od[i]=y'
Raymond Hettingerf1736542009-03-23 05:19:21 +000064 # Setting a new item creates a new link which goes at the end of the linked
65 # list, and the inherited dictionary is updated with the new key/value pair.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000066 if key not in self:
Raymond Hettingerf1736542009-03-23 05:19:21 +000067 self.__map[key] = link = _Link()
68 root = self.__root
69 last = root.prev
70 link.prev, link.next, link.key = last, root, key
Raymond Hettinger798ee1a2009-03-23 18:29:11 +000071 last.next = root.prev = _proxy(link)
Raymond Hettinger2d32f632009-03-02 21:24:57 +000072 dict.__setitem__(self, key, value)
73
74 def __delitem__(self, key):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000075 'od.__delitem__(y) <==> del od[y]'
Raymond Hettingerf1736542009-03-23 05:19:21 +000076 # Deleting an existing item uses self.__map to find the link which is
77 # then removed by updating the links in the predecessor and successor nodes.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000078 dict.__delitem__(self, key)
Raymond Hettingerf1736542009-03-23 05:19:21 +000079 link = self.__map.pop(key)
80 link.prev.next = link.next
81 link.next.prev = link.prev
Raymond Hettinger2d32f632009-03-02 21:24:57 +000082
83 def __iter__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000084 'od.__iter__() <==> iter(od)'
Raymond Hettingerf1736542009-03-23 05:19:21 +000085 # Traverse the linked list in order.
86 root = self.__root
87 curr = root.next
88 while curr is not root:
89 yield curr.key
90 curr = curr.next
Raymond Hettinger2d32f632009-03-02 21:24:57 +000091
92 def __reversed__(self):
Raymond Hettinger2352cf32009-04-08 01:16:27 +000093 'od.__reversed__() <==> reversed(od)'
Raymond Hettingerf1736542009-03-23 05:19:21 +000094 # Traverse the linked list in reverse order.
95 root = self.__root
96 curr = root.prev
97 while curr is not root:
98 yield curr.key
99 curr = curr.prev
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000100
101 def __reduce__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000102 'Return state information for pickling'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000103 items = [[k, self[k]] for k in self]
Raymond Hettinger69976a72010-09-12 05:28:42 +0000104 tmp = self.__map, self.__root, self.__in_repr
105 del self.__map, self.__root, self.__in_repr
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000106 inst_dict = vars(self).copy()
Raymond Hettinger69976a72010-09-12 05:28:42 +0000107 self.__map, self.__root, self.__in_repr = tmp
Raymond Hettinger14b89ff2009-03-03 22:20:56 +0000108 if inst_dict:
109 return (self.__class__, (items,), inst_dict)
110 return self.__class__, (items,)
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000111
112 setdefault = MutableMapping.setdefault
113 update = MutableMapping.update
114 pop = MutableMapping.pop
115 keys = MutableMapping.keys
116 values = MutableMapping.values
117 items = MutableMapping.items
118
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +0000119 def popitem(self, last=True):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000120 '''od.popitem() -> (k, v), return and remove a (key, value) pair.
121 Pairs are returned in LIFO order if last is true or FIFO order if false.
122
123 '''
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +0000124 if not self:
125 raise KeyError('dictionary is empty')
Raymond Hettinger446a4f22009-04-08 08:28:28 +0000126 key = next(reversed(self) if last else iter(self))
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +0000127 value = self.pop(key)
128 return key, value
129
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000130 def __repr__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000131 'od.__repr__() <==> repr(od)'
Raymond Hettinger69976a72010-09-12 05:28:42 +0000132 if self.__in_repr:
133 return '...'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000134 if not self:
135 return '%s()' % (self.__class__.__name__,)
Raymond Hettinger69976a72010-09-12 05:28:42 +0000136 self.__in_repr = True
137 try:
138 result = '%s(%r)' % (self.__class__.__name__, list(self.items()))
139 finally:
140 self.__in_repr = False
141 return result
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000142
143 def copy(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000144 'od.copy() -> a shallow copy of od'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000145 return self.__class__(self)
146
147 @classmethod
148 def fromkeys(cls, iterable, value=None):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000149 '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
150 and values equal to v (which defaults to None).
151
152 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000153 d = cls()
154 for key in iterable:
155 d[key] = value
156 return d
157
158 def __eq__(self, other):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000159 '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
160 while comparison to a regular mapping is order-insensitive.
161
162 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000163 if isinstance(other, OrderedDict):
Raymond Hettinger798ee1a2009-03-23 18:29:11 +0000164 return len(self)==len(other) and \
165 all(p==q for p, q in zip(self.items(), other.items()))
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000166 return dict.__eq__(self, other)
167
Benjamin Peterson2504b7a2009-04-04 17:26:32 +0000168 def __ne__(self, other):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000169 '''od.__ne__(y) <==> od!=y. Comparison to another OD is order-sensitive
170 while comparison to a regular mapping is order-insensitive.
171
172 '''
Benjamin Peterson2504b7a2009-04-04 17:26:32 +0000173 return not self == other
174
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000175
Christian Heimes99170a52007-12-19 02:07:34 +0000176
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000177################################################################################
178### namedtuple
179################################################################################
180
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000181def namedtuple(typename, field_names, verbose=False, rename=False):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000182 """Returns a new subclass of tuple with named fields.
183
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000184 >>> Point = namedtuple('Point', 'x y')
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000185 >>> Point.__doc__ # docstring for the new class
Guido van Rossumd8faa362007-04-27 19:54:29 +0000186 'Point(x, y)'
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000187 >>> p = Point(11, y=22) # instantiate with positional args or keywords
Christian Heimes99170a52007-12-19 02:07:34 +0000188 >>> p[0] + p[1] # indexable like a plain tuple
Guido van Rossumd8faa362007-04-27 19:54:29 +0000189 33
Christian Heimes99170a52007-12-19 02:07:34 +0000190 >>> x, y = p # unpack like a regular tuple
Guido van Rossumd8faa362007-04-27 19:54:29 +0000191 >>> x, y
192 (11, 22)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000193 >>> p.x + p.y # fields also accessable by name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000194 33
Christian Heimes0449f632007-12-15 01:27:15 +0000195 >>> d = p._asdict() # convert to a dictionary
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000196 >>> d['x']
197 11
198 >>> Point(**d) # convert from a dictionary
Guido van Rossumd8faa362007-04-27 19:54:29 +0000199 Point(x=11, y=22)
Christian Heimes0449f632007-12-15 01:27:15 +0000200 >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000201 Point(x=100, y=22)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000202
203 """
204
Christian Heimes2380ac72008-01-09 00:17:24 +0000205 # Parse and validate the field names. Validation serves two purposes,
206 # generating informative error messages and preventing template injection attacks.
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000207 if isinstance(field_names, str):
208 field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas
Benjamin Petersone9bbc8b2008-09-28 02:06:32 +0000209 field_names = tuple(map(str, field_names))
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000210 if rename:
211 names = list(field_names)
212 seen = set()
213 for i, name in enumerate(names):
214 if (not all(c.isalnum() or c=='_' for c in name) or _iskeyword(name)
215 or not name or name[0].isdigit() or name.startswith('_')
216 or name in seen):
Raymond Hettinger56145242009-04-02 22:31:59 +0000217 names[i] = '_%d' % i
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000218 seen.add(name)
219 field_names = tuple(names)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000220 for name in (typename,) + field_names:
Christian Heimesb9eccbf2007-12-05 20:18:38 +0000221 if not all(c.isalnum() or c=='_' for c in name):
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000222 raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
223 if _iskeyword(name):
224 raise ValueError('Type names and field names cannot be a keyword: %r' % name)
225 if name[0].isdigit():
226 raise ValueError('Type names and field names cannot start with a number: %r' % name)
227 seen_names = set()
228 for name in field_names:
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000229 if name.startswith('_') and not rename:
Christian Heimes0449f632007-12-15 01:27:15 +0000230 raise ValueError('Field names cannot start with an underscore: %r' % name)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000231 if name in seen_names:
232 raise ValueError('Encountered duplicate field name: %r' % name)
233 seen_names.add(name)
234
235 # Create and fill-in the class template
Christian Heimesfaf2f632008-01-06 16:59:19 +0000236 numfields = len(field_names)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000237 argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000238 reprtxt = ', '.join('%s=%%r' % name for name in field_names)
239 template = '''class %(typename)s(tuple):
Christian Heimes0449f632007-12-15 01:27:15 +0000240 '%(typename)s(%(argtxt)s)' \n
241 __slots__ = () \n
Christian Heimesfaf2f632008-01-06 16:59:19 +0000242 _fields = %(field_names)r \n
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000243 def __new__(_cls, %(argtxt)s):
244 return _tuple.__new__(_cls, (%(argtxt)s)) \n
Christian Heimesfaf2f632008-01-06 16:59:19 +0000245 @classmethod
Christian Heimes043d6f62008-01-07 17:19:16 +0000246 def _make(cls, iterable, new=tuple.__new__, len=len):
Christian Heimesfaf2f632008-01-06 16:59:19 +0000247 'Make a new %(typename)s object from a sequence or iterable'
Christian Heimes043d6f62008-01-07 17:19:16 +0000248 result = new(cls, iterable)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000249 if len(result) != %(numfields)d:
250 raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
251 return result \n
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000252 def __repr__(self):
Christian Heimes0449f632007-12-15 01:27:15 +0000253 return '%(typename)s(%(reprtxt)s)' %% self \n
Raymond Hettingera4f52b12009-03-02 22:28:31 +0000254 def _asdict(self):
255 'Return a new OrderedDict which maps field names to their values'
256 return OrderedDict(zip(self._fields, self)) \n
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000257 def _replace(_self, **kwds):
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000258 'Return a new %(typename)s object replacing specified fields with new values'
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000259 result = _self._make(map(kwds.pop, %(field_names)r, _self))
Christian Heimesfaf2f632008-01-06 16:59:19 +0000260 if kwds:
261 raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000262 return result \n
263 def __getnewargs__(self):
264 return tuple(self) \n\n''' % locals()
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000265 for i, name in enumerate(field_names):
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000266 template += ' %s = _property(_itemgetter(%d))\n' % (name, i)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000267 if verbose:
268 print(template)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000269
Georg Brandlf08a9dd2008-06-10 16:57:31 +0000270 # Execute the template string in a temporary namespace and
271 # support tracing utilities by setting a value for frame.f_globals['__name__']
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000272 namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
273 OrderedDict=OrderedDict, _property=property, _tuple=tuple)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000274 try:
275 exec(template, namespace)
276 except SyntaxError as e:
Christian Heimes99170a52007-12-19 02:07:34 +0000277 raise SyntaxError(e.msg + ':\n' + template) from e
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000278 result = namespace[typename]
279
280 # For pickling to work, the __module__ variable needs to be set to the frame
281 # where the named tuple is created. Bypass this step in enviroments where
Benjamin Peterson25c95f12009-05-08 20:42:26 +0000282 # sys._getframe is not defined (Jython for example) or sys._getframe is not
283 # defined for arguments greater than 0 (IronPython).
284 try:
Raymond Hettinger0f055172009-01-27 10:06:09 +0000285 result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
Benjamin Peterson25c95f12009-05-08 20:42:26 +0000286 except (AttributeError, ValueError):
287 pass
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000288
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000289 return result
Guido van Rossumd8faa362007-04-27 19:54:29 +0000290
Guido van Rossumd8faa362007-04-27 19:54:29 +0000291
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000292########################################################################
293### Counter
294########################################################################
295
296class Counter(dict):
297 '''Dict subclass for counting hashable items. Sometimes called a bag
298 or multiset. Elements are stored as dictionary keys and their counts
299 are stored as dictionary values.
300
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000301 >>> c = Counter('abcdeabcdabcaba') # count elements from a string
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000302
303 >>> c.most_common(3) # three most common elements
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000304 [('a', 5), ('b', 4), ('c', 3)]
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000305 >>> sorted(c) # list all unique elements
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000306 ['a', 'b', 'c', 'd', 'e']
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000307 >>> ''.join(sorted(c.elements())) # list elements with repetitions
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000308 'aaaaabbbbcccdde'
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000309 >>> sum(c.values()) # total of all counts
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000310 15
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000311
312 >>> c['a'] # count of letter 'a'
313 5
314 >>> for elem in 'shazam': # update counts from an iterable
315 ... c[elem] += 1 # by adding 1 to each element's count
316 >>> c['a'] # now there are seven 'a'
317 7
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000318 >>> del c['b'] # remove all 'b'
319 >>> c['b'] # now there are zero 'b'
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000320 0
321
322 >>> d = Counter('simsalabim') # make another counter
323 >>> c.update(d) # add in the second counter
324 >>> c['a'] # now there are nine 'a'
325 9
326
327 >>> c.clear() # empty the counter
328 >>> c
329 Counter()
330
331 Note: If a count is set to zero or reduced to zero, it will remain
332 in the counter until the entry is deleted or the counter is cleared:
333
334 >>> c = Counter('aaabbc')
335 >>> c['b'] -= 2 # reduce the count of 'b' by two
336 >>> c.most_common() # 'b' is still in, but its count is zero
337 [('a', 3), ('c', 1), ('b', 0)]
338
339 '''
340 # References:
341 # http://en.wikipedia.org/wiki/Multiset
342 # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
343 # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
344 # http://code.activestate.com/recipes/259174/
345 # Knuth, TAOCP Vol. II section 4.6.3
346
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000347 def __init__(self, iterable=None, **kwds):
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000348 '''Create a new, empty Counter object. And if given, count elements
349 from an input iterable. Or, initialize the count from another mapping
350 of elements to their counts.
351
352 >>> c = Counter() # a new, empty counter
353 >>> c = Counter('gallahad') # a new counter from an iterable
354 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000355 >>> c = Counter(a=4, b=2) # a new counter from keyword args
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000356
357 '''
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000358 super().__init__()
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000359 self.update(iterable, **kwds)
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000360
361 def __missing__(self, key):
362 'The count of elements not in the Counter is zero.'
363 # Needed so that self[missing_item] does not raise KeyError
364 return 0
365
366 def most_common(self, n=None):
367 '''List the n most common elements and their counts from the most
368 common to the least. If n is None, then list all element counts.
369
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000370 >>> Counter('abcdeabcdabcaba').most_common(3)
371 [('a', 5), ('b', 4), ('c', 3)]
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000372
373 '''
374 # Emulate Bag.sortedByCount from Smalltalk
375 if n is None:
376 return sorted(self.items(), key=_itemgetter(1), reverse=True)
377 return _heapq.nlargest(n, self.items(), key=_itemgetter(1))
378
379 def elements(self):
380 '''Iterator over elements repeating each as many times as its count.
381
382 >>> c = Counter('ABCABC')
383 >>> sorted(c.elements())
384 ['A', 'A', 'B', 'B', 'C', 'C']
385
386 # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
387 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
388 >>> product = 1
389 >>> for factor in prime_factors.elements(): # loop over factors
390 ... product *= factor # and multiply them
391 >>> product
392 1836
393
394 Note, if an element's count has been set to zero or is a negative
395 number, elements() will ignore it.
396
397 '''
398 # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
399 return _chain.from_iterable(_starmap(_repeat, self.items()))
400
401 # Override dict methods where necessary
402
403 @classmethod
404 def fromkeys(cls, iterable, v=None):
405 # There is no equivalent method for counters because setting v=1
406 # means that no element can have a count greater than one.
407 raise NotImplementedError(
408 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
409
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000410 def update(self, iterable=None, **kwds):
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000411 '''Like dict.update() but add counts instead of replacing them.
412
413 Source can be an iterable, a dictionary, or another Counter instance.
414
415 >>> c = Counter('which')
416 >>> c.update('witch') # add elements from another iterable
417 >>> d = Counter('watch')
418 >>> c.update(d) # add elements from another counter
419 >>> c['h'] # four 'h' in which, witch, and watch
420 4
421
422 '''
423 # The regular dict.update() operation makes no sense here because the
424 # replace behavior results in the some of original untouched counts
425 # being mixed-in with all of the other counts for a mismash that
426 # doesn't have a straight-forward interpretation in most counting
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000427 # contexts. Instead, we implement straight-addition. Both the inputs
428 # and outputs are allowed to contain zero and negative counts.
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000429
430 if iterable is not None:
431 if isinstance(iterable, Mapping):
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000432 if self:
Raymond Hettinger77b31ef2009-06-29 18:34:46 +0000433 self_get = self.get
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000434 for elem, count in iterable.items():
Raymond Hettinger77b31ef2009-06-29 18:34:46 +0000435 self[elem] = count + self_get(elem, 0)
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000436 else:
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000437 super().update(iterable) # fast path when counter is empty
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000438 else:
Raymond Hettinger77b31ef2009-06-29 18:34:46 +0000439 self_get = self.get
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000440 for elem in iterable:
Raymond Hettinger77b31ef2009-06-29 18:34:46 +0000441 self[elem] = 1 + self_get(elem, 0)
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000442 if kwds:
443 self.update(kwds)
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000444
445 def copy(self):
446 'Like dict.copy() but returns a Counter instance instead of a dict.'
447 return Counter(self)
448
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000449 def __reduce__(self):
450 return self.__class__, (dict(self),)
451
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000452 def __delitem__(self, elem):
453 'Like dict.__delitem__() but does not raise KeyError for missing values.'
454 if elem in self:
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000455 super().__delitem__(elem)
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000456
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000457 def __repr__(self):
458 if not self:
459 return '%s()' % self.__class__.__name__
460 items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
461 return '%s({%s})' % (self.__class__.__name__, items)
462
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000463 # Multiset-style mathematical operations discussed in:
464 # Knuth TAOCP Volume II section 4.6.3 exercise 19
465 # and at http://en.wikipedia.org/wiki/Multiset
466 #
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000467 # Outputs guaranteed to only include positive counts.
468 #
469 # To strip negative and zero counts, add-in an empty counter:
470 # c += Counter()
471
472 def __add__(self, other):
473 '''Add counts from two counters.
474
475 >>> Counter('abbb') + Counter('bcc')
476 Counter({'b': 4, 'c': 2, 'a': 1})
477
478 '''
479 if not isinstance(other, Counter):
480 return NotImplemented
481 result = Counter()
482 for elem in set(self) | set(other):
483 newcount = self[elem] + other[elem]
484 if newcount > 0:
485 result[elem] = newcount
486 return result
487
488 def __sub__(self, other):
489 ''' Subtract count, but keep only results with positive counts.
490
491 >>> Counter('abbbc') - Counter('bccd')
492 Counter({'b': 2, 'a': 1})
493
494 '''
495 if not isinstance(other, Counter):
496 return NotImplemented
497 result = Counter()
Raymond Hettingere0d1b9f2009-01-21 20:36:27 +0000498 for elem in set(self) | set(other):
499 newcount = self[elem] - other[elem]
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000500 if newcount > 0:
501 result[elem] = newcount
502 return result
503
504 def __or__(self, other):
505 '''Union is the maximum of value in either of the input counters.
506
507 >>> Counter('abbb') | Counter('bcc')
508 Counter({'b': 3, 'c': 2, 'a': 1})
509
510 '''
511 if not isinstance(other, Counter):
512 return NotImplemented
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000513 result = Counter()
514 for elem in set(self) | set(other):
Raymond Hettingerc4791702009-04-04 08:48:03 +0000515 p, q = self[elem], other[elem]
516 newcount = q if p < q else p
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000517 if newcount > 0:
518 result[elem] = newcount
519 return result
520
521 def __and__(self, other):
522 ''' Intersection is the minimum of corresponding counts.
523
524 >>> Counter('abbb') & Counter('bcc')
525 Counter({'b': 1})
526
527 '''
528 if not isinstance(other, Counter):
529 return NotImplemented
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000530 result = Counter()
531 if len(self) < len(other):
532 self, other = other, self
533 for elem in filter(self.__contains__, other):
Raymond Hettingerc4791702009-04-04 08:48:03 +0000534 p, q = self[elem], other[elem]
535 newcount = p if p < q else q
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000536 if newcount > 0:
537 result[elem] = newcount
538 return result
539
Guido van Rossumd8faa362007-04-27 19:54:29 +0000540
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000541################################################################################
542### UserDict
543################################################################################
Guido van Rossumd8faa362007-04-27 19:54:29 +0000544
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000545class UserDict(MutableMapping):
546
547 # Start by filling-out the abstract methods
548 def __init__(self, dict=None, **kwargs):
549 self.data = {}
550 if dict is not None:
551 self.update(dict)
552 if len(kwargs):
553 self.update(kwargs)
554 def __len__(self): return len(self.data)
555 def __getitem__(self, key):
556 if key in self.data:
557 return self.data[key]
558 if hasattr(self.__class__, "__missing__"):
559 return self.__class__.__missing__(self, key)
560 raise KeyError(key)
561 def __setitem__(self, key, item): self.data[key] = item
562 def __delitem__(self, key): del self.data[key]
563 def __iter__(self):
564 return iter(self.data)
565
Raymond Hettinger554c8b82008-02-05 22:54:43 +0000566 # Modify __contains__ to work correctly when __missing__ is present
567 def __contains__(self, key):
568 return key in self.data
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000569
570 # Now, add the methods in dicts but not in MutableMapping
571 def __repr__(self): return repr(self.data)
572 def copy(self):
573 if self.__class__ is UserDict:
574 return UserDict(self.data.copy())
575 import copy
576 data = self.data
577 try:
578 self.data = {}
579 c = copy.copy(self)
580 finally:
581 self.data = data
582 c.update(self)
583 return c
584 @classmethod
585 def fromkeys(cls, iterable, value=None):
586 d = cls()
587 for key in iterable:
588 d[key] = value
589 return d
590
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000591
592
593################################################################################
Raymond Hettinger53dbe392008-02-12 20:03:09 +0000594### UserList
595################################################################################
596
597class UserList(MutableSequence):
598 """A more or less complete user-defined wrapper around list objects."""
599 def __init__(self, initlist=None):
600 self.data = []
601 if initlist is not None:
602 # XXX should this accept an arbitrary sequence?
603 if type(initlist) == type(self.data):
604 self.data[:] = initlist
605 elif isinstance(initlist, UserList):
606 self.data[:] = initlist.data[:]
607 else:
608 self.data = list(initlist)
609 def __repr__(self): return repr(self.data)
610 def __lt__(self, other): return self.data < self.__cast(other)
611 def __le__(self, other): return self.data <= self.__cast(other)
612 def __eq__(self, other): return self.data == self.__cast(other)
613 def __ne__(self, other): return self.data != self.__cast(other)
614 def __gt__(self, other): return self.data > self.__cast(other)
615 def __ge__(self, other): return self.data >= self.__cast(other)
616 def __cast(self, other):
617 return other.data if isinstance(other, UserList) else other
Raymond Hettinger53dbe392008-02-12 20:03:09 +0000618 def __contains__(self, item): return item in self.data
619 def __len__(self): return len(self.data)
620 def __getitem__(self, i): return self.data[i]
621 def __setitem__(self, i, item): self.data[i] = item
622 def __delitem__(self, i): del self.data[i]
623 def __add__(self, other):
624 if isinstance(other, UserList):
625 return self.__class__(self.data + other.data)
626 elif isinstance(other, type(self.data)):
627 return self.__class__(self.data + other)
628 return self.__class__(self.data + list(other))
629 def __radd__(self, other):
630 if isinstance(other, UserList):
631 return self.__class__(other.data + self.data)
632 elif isinstance(other, type(self.data)):
633 return self.__class__(other + self.data)
634 return self.__class__(list(other) + self.data)
635 def __iadd__(self, other):
636 if isinstance(other, UserList):
637 self.data += other.data
638 elif isinstance(other, type(self.data)):
639 self.data += other
640 else:
641 self.data += list(other)
642 return self
643 def __mul__(self, n):
644 return self.__class__(self.data*n)
645 __rmul__ = __mul__
646 def __imul__(self, n):
647 self.data *= n
648 return self
649 def append(self, item): self.data.append(item)
650 def insert(self, i, item): self.data.insert(i, item)
651 def pop(self, i=-1): return self.data.pop(i)
652 def remove(self, item): self.data.remove(item)
653 def count(self, item): return self.data.count(item)
654 def index(self, item, *args): return self.data.index(item, *args)
655 def reverse(self): self.data.reverse()
656 def sort(self, *args, **kwds): self.data.sort(*args, **kwds)
657 def extend(self, other):
658 if isinstance(other, UserList):
659 self.data.extend(other.data)
660 else:
661 self.data.extend(other)
662
663
664
665################################################################################
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000666### UserString
667################################################################################
668
669class UserString(Sequence):
670 def __init__(self, seq):
671 if isinstance(seq, str):
672 self.data = seq
673 elif isinstance(seq, UserString):
674 self.data = seq.data[:]
675 else:
676 self.data = str(seq)
677 def __str__(self): return str(self.data)
678 def __repr__(self): return repr(self.data)
679 def __int__(self): return int(self.data)
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000680 def __float__(self): return float(self.data)
681 def __complex__(self): return complex(self.data)
682 def __hash__(self): return hash(self.data)
683
684 def __eq__(self, string):
685 if isinstance(string, UserString):
686 return self.data == string.data
687 return self.data == string
688 def __ne__(self, string):
689 if isinstance(string, UserString):
690 return self.data != string.data
691 return self.data != string
692 def __lt__(self, string):
693 if isinstance(string, UserString):
694 return self.data < string.data
695 return self.data < string
696 def __le__(self, string):
697 if isinstance(string, UserString):
698 return self.data <= string.data
699 return self.data <= string
700 def __gt__(self, string):
701 if isinstance(string, UserString):
702 return self.data > string.data
703 return self.data > string
704 def __ge__(self, string):
705 if isinstance(string, UserString):
706 return self.data >= string.data
707 return self.data >= string
708
709 def __contains__(self, char):
710 if isinstance(char, UserString):
711 char = char.data
712 return char in self.data
713
714 def __len__(self): return len(self.data)
715 def __getitem__(self, index): return self.__class__(self.data[index])
716 def __add__(self, other):
717 if isinstance(other, UserString):
718 return self.__class__(self.data + other.data)
719 elif isinstance(other, str):
720 return self.__class__(self.data + other)
721 return self.__class__(self.data + str(other))
722 def __radd__(self, other):
723 if isinstance(other, str):
724 return self.__class__(other + self.data)
725 return self.__class__(str(other) + self.data)
726 def __mul__(self, n):
727 return self.__class__(self.data*n)
728 __rmul__ = __mul__
729 def __mod__(self, args):
730 return self.__class__(self.data % args)
731
732 # the following methods are defined in alphabetical order:
733 def capitalize(self): return self.__class__(self.data.capitalize())
734 def center(self, width, *args):
735 return self.__class__(self.data.center(width, *args))
736 def count(self, sub, start=0, end=_sys.maxsize):
737 if isinstance(sub, UserString):
738 sub = sub.data
739 return self.data.count(sub, start, end)
740 def encode(self, encoding=None, errors=None): # XXX improve this?
741 if encoding:
742 if errors:
743 return self.__class__(self.data.encode(encoding, errors))
744 return self.__class__(self.data.encode(encoding))
745 return self.__class__(self.data.encode())
746 def endswith(self, suffix, start=0, end=_sys.maxsize):
747 return self.data.endswith(suffix, start, end)
748 def expandtabs(self, tabsize=8):
749 return self.__class__(self.data.expandtabs(tabsize))
750 def find(self, sub, start=0, end=_sys.maxsize):
751 if isinstance(sub, UserString):
752 sub = sub.data
753 return self.data.find(sub, start, end)
754 def format(self, *args, **kwds):
755 return self.data.format(*args, **kwds)
756 def index(self, sub, start=0, end=_sys.maxsize):
757 return self.data.index(sub, start, end)
758 def isalpha(self): return self.data.isalpha()
759 def isalnum(self): return self.data.isalnum()
760 def isdecimal(self): return self.data.isdecimal()
761 def isdigit(self): return self.data.isdigit()
762 def isidentifier(self): return self.data.isidentifier()
763 def islower(self): return self.data.islower()
764 def isnumeric(self): return self.data.isnumeric()
765 def isspace(self): return self.data.isspace()
766 def istitle(self): return self.data.istitle()
767 def isupper(self): return self.data.isupper()
768 def join(self, seq): return self.data.join(seq)
769 def ljust(self, width, *args):
770 return self.__class__(self.data.ljust(width, *args))
771 def lower(self): return self.__class__(self.data.lower())
772 def lstrip(self, chars=None): return self.__class__(self.data.lstrip(chars))
773 def partition(self, sep):
774 return self.data.partition(sep)
775 def replace(self, old, new, maxsplit=-1):
776 if isinstance(old, UserString):
777 old = old.data
778 if isinstance(new, UserString):
779 new = new.data
780 return self.__class__(self.data.replace(old, new, maxsplit))
781 def rfind(self, sub, start=0, end=_sys.maxsize):
782 return self.data.rfind(sub, start, end)
783 def rindex(self, sub, start=0, end=_sys.maxsize):
784 return self.data.rindex(sub, start, end)
785 def rjust(self, width, *args):
786 return self.__class__(self.data.rjust(width, *args))
787 def rpartition(self, sep):
788 return self.data.rpartition(sep)
789 def rstrip(self, chars=None):
790 return self.__class__(self.data.rstrip(chars))
791 def split(self, sep=None, maxsplit=-1):
792 return self.data.split(sep, maxsplit)
793 def rsplit(self, sep=None, maxsplit=-1):
794 return self.data.rsplit(sep, maxsplit)
795 def splitlines(self, keepends=0): return self.data.splitlines(keepends)
796 def startswith(self, prefix, start=0, end=_sys.maxsize):
797 return self.data.startswith(prefix, start, end)
798 def strip(self, chars=None): return self.__class__(self.data.strip(chars))
799 def swapcase(self): return self.__class__(self.data.swapcase())
800 def title(self): return self.__class__(self.data.title())
801 def translate(self, *args):
802 return self.__class__(self.data.translate(*args))
803 def upper(self): return self.__class__(self.data.upper())
804 def zfill(self, width): return self.__class__(self.data.zfill(width))
805
806
807
808################################################################################
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000809### Simple tests
810################################################################################
Guido van Rossumd8faa362007-04-27 19:54:29 +0000811
812if __name__ == '__main__':
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000813 # verify that instances can be pickled
Guido van Rossum99603b02007-07-20 00:22:32 +0000814 from pickle import loads, dumps
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000815 Point = namedtuple('Point', 'x, y', True)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000816 p = Point(x=10, y=20)
817 assert p == loads(dumps(p))
818
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000819 # test and demonstrate ability to override methods
Christian Heimes043d6f62008-01-07 17:19:16 +0000820 class Point(namedtuple('Point', 'x y')):
Christian Heimes25bb7832008-01-11 16:17:00 +0000821 __slots__ = ()
Christian Heimes043d6f62008-01-07 17:19:16 +0000822 @property
823 def hypot(self):
824 return (self.x ** 2 + self.y ** 2) ** 0.5
Christian Heimes790c8232008-01-07 21:14:23 +0000825 def __str__(self):
Christian Heimes25bb7832008-01-11 16:17:00 +0000826 return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
Christian Heimes043d6f62008-01-07 17:19:16 +0000827
Christian Heimes25bb7832008-01-11 16:17:00 +0000828 for p in Point(3, 4), Point(14, 5/7.):
Christian Heimes790c8232008-01-07 21:14:23 +0000829 print (p)
Christian Heimes043d6f62008-01-07 17:19:16 +0000830
831 class Point(namedtuple('Point', 'x y')):
832 'Point class with optimized _make() and _replace() without error-checking'
Christian Heimes25bb7832008-01-11 16:17:00 +0000833 __slots__ = ()
Christian Heimes043d6f62008-01-07 17:19:16 +0000834 _make = classmethod(tuple.__new__)
835 def _replace(self, _map=map, **kwds):
Christian Heimes2380ac72008-01-09 00:17:24 +0000836 return self._make(_map(kwds.get, ('x', 'y'), self))
Christian Heimes043d6f62008-01-07 17:19:16 +0000837
838 print(Point(11, 22)._replace(x=100))
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000839
Christian Heimes25bb7832008-01-11 16:17:00 +0000840 Point3D = namedtuple('Point3D', Point._fields + ('z',))
841 print(Point3D.__doc__)
842
Guido van Rossumd8faa362007-04-27 19:54:29 +0000843 import doctest
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000844 TestResults = namedtuple('TestResults', 'failed attempted')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000845 print(TestResults(*doctest.testmod()))