blob: f05d7b4350256bf3be97dcfd14c7eb3e77780901 [file] [log] [blame]
Brett Cannon23a4a7b2008-05-12 00:56:28 +00001__all__ = ['deque', 'defaultdict', 'namedtuple', 'UserDict', 'UserList',
Raymond Hettinger2d32f632009-03-02 21:24:57 +00002 'UserString', 'Counter', 'OrderedDict']
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
4# They should however be considered an integral part of collections.py.
5from _abcoll import *
6import _abcoll
7__all__ += _abcoll.__all__
8
Christian Heimes99170a52007-12-19 02:07:34 +00009from _collections import deque, defaultdict
10from operator import itemgetter as _itemgetter
11from keyword import iskeyword as _iskeyword
12import sys as _sys
Raymond Hettingerb8baf632009-01-14 02:20:07 +000013import heapq as _heapq
Raymond Hettingerfa11db02010-09-12 04:12:42 +000014from weakref import proxy as _proxy
Raymond Hettingerea9f8db2009-03-02 21:28:41 +000015from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
Raymond Hettinger98a5f3f2010-09-13 21:36:00 +000016from reprlib import recursive_repr as _recursive_repr
Raymond Hettinger2d32f632009-03-02 21:24:57 +000017
18################################################################################
19### OrderedDict
20################################################################################
21
Raymond Hettingerfa11db02010-09-12 04:12:42 +000022class _Link(object):
23 __slots__ = 'prev', 'next', 'key', '__weakref__'
24
Raymond Hettinger2d32f632009-03-02 21:24:57 +000025class OrderedDict(dict, MutableMapping):
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +000026 'Dictionary that remembers insertion order'
Raymond Hettingerf1736542009-03-23 05:19:21 +000027 # An inherited dict maps keys to values.
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +000028 # The inherited dict provides __getitem__, __len__, __contains__, and get.
29 # The remaining methods are order-aware.
Raymond Hettingerf1736542009-03-23 05:19:21 +000030 # Big-O running times for all methods are the same as for regular dictionaries.
31
32 # The internal self.__map dictionary maps keys to links in a doubly linked list.
33 # The circular doubly linked list starts and ends with a sentinel element.
34 # The sentinel element never gets deleted (this simplifies the algorithm).
Raymond Hettingerf7328d02010-09-14 19:40:15 +000035 # The sentinel is stored in self.__hardroot with a weakref proxy in self.__root.
Raymond Hettingerc5c29c02010-09-12 18:13:46 +000036 # The prev/next links are weakref proxies (to prevent circular references).
37 # Individual links are kept alive by the hard reference in self.__map.
38 # Those hard references disappear when a key is deleted from an OrderedDict.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000039
40 def __init__(self, *args, **kwds):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000041 '''Initialize an ordered dictionary. Signature is the same as for
42 regular dictionaries, but keyword arguments are not recommended
43 because their insertion order is arbitrary.
44
45 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +000046 if len(args) > 1:
47 raise TypeError('expected at most 1 arguments, got %d' % len(args))
Raymond Hettinger08c70cf2009-03-03 20:47:29 +000048 try:
Raymond Hettingerf1736542009-03-23 05:19:21 +000049 self.__root
Raymond Hettinger08c70cf2009-03-03 20:47:29 +000050 except AttributeError:
Raymond Hettingerf7328d02010-09-14 19:40:15 +000051 self.__hardroot = _Link()
52 self.__root = root = _proxy(self.__hardroot)
Raymond Hettingerfa11db02010-09-12 04:12:42 +000053 root.prev = root.next = root
Raymond Hettinger52dc06b2009-03-25 22:45:22 +000054 self.__map = {}
Raymond Hettinger2d32f632009-03-02 21:24:57 +000055 self.update(*args, **kwds)
56
Raymond Hettingerfa11db02010-09-12 04:12:42 +000057 def __setitem__(self, key, value,
58 dict_setitem=dict.__setitem__, proxy=_proxy, Link=_Link):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000059 'od.__setitem__(i, y) <==> od[i]=y'
Raymond Hettingerf1736542009-03-23 05:19:21 +000060 # Setting a new item creates a new link which goes at the end of the linked
61 # list, and the inherited dictionary is updated with the new key/value pair.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000062 if key not in self:
Raymond Hettingerfa11db02010-09-12 04:12:42 +000063 self.__map[key] = link = Link()
Raymond Hettingerf1736542009-03-23 05:19:21 +000064 root = self.__root
Raymond Hettingerfa11db02010-09-12 04:12:42 +000065 last = root.prev
66 link.prev, link.next, link.key = last, root, key
Raymond Hettingerf7328d02010-09-14 19:40:15 +000067 last.next = link
68 root.prev = proxy(link)
69 dict_setitem(self, key, value)
Raymond Hettinger2d32f632009-03-02 21:24:57 +000070
Raymond Hettingerfa11db02010-09-12 04:12:42 +000071 def __delitem__(self, key, dict_delitem=dict.__delitem__):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000072 'od.__delitem__(y) <==> del od[y]'
Raymond Hettingerf1736542009-03-23 05:19:21 +000073 # Deleting an existing item uses self.__map to find the link which is
74 # then removed by updating the links in the predecessor and successor nodes.
Raymond Hettinger5be21b72010-08-01 22:10:57 +000075 dict_delitem(self, key)
Raymond Hettingerf1736542009-03-23 05:19:21 +000076 link = self.__map.pop(key)
Raymond Hettingerfa11db02010-09-12 04:12:42 +000077 link_prev = link.prev
78 link_next = link.next
79 link_prev.next = link_next
80 link_next.prev = link_prev
Raymond Hettinger2d32f632009-03-02 21:24:57 +000081
Raymond Hettingerfa11db02010-09-12 04:12:42 +000082 def __iter__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000083 'od.__iter__() <==> iter(od)'
Raymond Hettingerf1736542009-03-23 05:19:21 +000084 # Traverse the linked list in order.
85 root = self.__root
Raymond Hettingerfa11db02010-09-12 04:12:42 +000086 curr = root.next
Raymond Hettingerf1736542009-03-23 05:19:21 +000087 while curr is not root:
Raymond Hettingerfa11db02010-09-12 04:12:42 +000088 yield curr.key
89 curr = curr.next
Raymond Hettinger2d32f632009-03-02 21:24:57 +000090
Raymond Hettingerfa11db02010-09-12 04:12:42 +000091 def __reversed__(self):
Raymond Hettinger2352cf32009-04-08 01:16:27 +000092 'od.__reversed__() <==> reversed(od)'
Raymond Hettingerf1736542009-03-23 05:19:21 +000093 # Traverse the linked list in reverse order.
94 root = self.__root
Raymond Hettingerfa11db02010-09-12 04:12:42 +000095 curr = root.prev
Raymond Hettingerf1736542009-03-23 05:19:21 +000096 while curr is not root:
Raymond Hettingerfa11db02010-09-12 04:12:42 +000097 yield curr.key
98 curr = curr.prev
Raymond Hettinger2d32f632009-03-02 21:24:57 +000099
Raymond Hettinger5be21b72010-08-01 22:10:57 +0000100 def clear(self):
101 'od.clear() -> None. Remove all items from od.'
Raymond Hettingerfa11db02010-09-12 04:12:42 +0000102 root = self.__root
103 root.prev = root.next = root
104 self.__map.clear()
Raymond Hettinger5be21b72010-08-01 22:10:57 +0000105 dict.clear(self)
106
Raymond Hettingerfa11db02010-09-12 04:12:42 +0000107 def popitem(self, last=True):
Raymond Hettinger331722d2010-09-02 18:44:16 +0000108 '''od.popitem() -> (k, v), return and remove a (key, value) pair.
109 Pairs are returned in LIFO order if last is true or FIFO order if false.
110
111 '''
112 if not self:
113 raise KeyError('dictionary is empty')
114 root = self.__root
Raymond Hettingerfa11db02010-09-12 04:12:42 +0000115 if last:
116 link = root.prev
117 link_prev = link.prev
118 link_prev.next = root
119 root.prev = link_prev
120 else:
121 link = root.next
122 link_next = link.next
123 root.next = link_next
124 link_next.prev = root
125 key = link.key
Raymond Hettinger331722d2010-09-02 18:44:16 +0000126 del self.__map[key]
Raymond Hettingerfa11db02010-09-12 04:12:42 +0000127 value = dict.pop(self, key)
Raymond Hettinger331722d2010-09-02 18:44:16 +0000128 return key, value
129
Raymond Hettingerfa11db02010-09-12 04:12:42 +0000130 def move_to_end(self, key, last=True):
131 '''Move an existing element to the end (or beginning if last==False).
132
133 Raises KeyError if the element does not exist.
134 When last=True, acts like a fast version of self[key]=self.pop(key).
135
136 '''
137 link = self.__map[key]
138 link_prev = link.prev
139 link_next = link.next
140 link_prev.next = link_next
141 link_next.prev = link_prev
142 root = self.__root
143 if last:
144 last = root.prev
145 link.prev = last
146 link.next = root
147 last.next = root.prev = link
148 else:
149 first = root.next
150 link.prev = root
151 link.next = first
152 root.next = first.prev = link
153
Raymond Hettinger35c87f22010-09-16 19:10:17 +0000154 def __reduce__(self):
155 'Return state information for pickling'
156 items = [[k, self[k]] for k in self]
157 tmp = self.__map, self.__root, self.__hardroot
158 del self.__map, self.__root, self.__hardroot
159 inst_dict = vars(self).copy()
160 self.__map, self.__root, self.__hardroot = tmp
161 if inst_dict:
162 return (self.__class__, (items,), inst_dict)
163 return self.__class__, (items,)
164
165 def __sizeof__(self):
166 sizeof = _sys.getsizeof
167 n = len(self) + 1 # number of links including root
168 size = sizeof(self.__dict__) # instance dictionary
169 size += sizeof(self.__map) * 2 # internal dict and inherited dict
170 size += sizeof(self.__hardroot) * n # link objects
171 size += sizeof(self.__root) * n # proxy objects
172 return size
173
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000174 setdefault = MutableMapping.setdefault
175 update = MutableMapping.update
176 pop = MutableMapping.pop
177 keys = MutableMapping.keys
178 values = MutableMapping.values
179 items = MutableMapping.items
Raymond Hettinger5be21b72010-08-01 22:10:57 +0000180 __ne__ = MutableMapping.__ne__
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000181
Raymond Hettinger98a5f3f2010-09-13 21:36:00 +0000182 @_recursive_repr()
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000183 def __repr__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000184 'od.__repr__() <==> repr(od)'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000185 if not self:
186 return '%s()' % (self.__class__.__name__,)
Raymond Hettinger98a5f3f2010-09-13 21:36:00 +0000187 return '%s(%r)' % (self.__class__.__name__, list(self.items()))
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000188
189 def copy(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000190 'od.copy() -> a shallow copy of od'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000191 return self.__class__(self)
192
193 @classmethod
194 def fromkeys(cls, iterable, value=None):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000195 '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
196 and values equal to v (which defaults to None).
197
198 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000199 d = cls()
200 for key in iterable:
201 d[key] = value
202 return d
203
204 def __eq__(self, other):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000205 '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
206 while comparison to a regular mapping is order-insensitive.
207
208 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000209 if isinstance(other, OrderedDict):
Raymond Hettinger798ee1a2009-03-23 18:29:11 +0000210 return len(self)==len(other) and \
211 all(p==q for p, q in zip(self.items(), other.items()))
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000212 return dict.__eq__(self, other)
213
Christian Heimes99170a52007-12-19 02:07:34 +0000214
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000215################################################################################
216### namedtuple
217################################################################################
218
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000219def namedtuple(typename, field_names, verbose=False, rename=False):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000220 """Returns a new subclass of tuple with named fields.
221
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000222 >>> Point = namedtuple('Point', 'x y')
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000223 >>> Point.__doc__ # docstring for the new class
Guido van Rossumd8faa362007-04-27 19:54:29 +0000224 'Point(x, y)'
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000225 >>> p = Point(11, y=22) # instantiate with positional args or keywords
Christian Heimes99170a52007-12-19 02:07:34 +0000226 >>> p[0] + p[1] # indexable like a plain tuple
Guido van Rossumd8faa362007-04-27 19:54:29 +0000227 33
Christian Heimes99170a52007-12-19 02:07:34 +0000228 >>> x, y = p # unpack like a regular tuple
Guido van Rossumd8faa362007-04-27 19:54:29 +0000229 >>> x, y
230 (11, 22)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000231 >>> p.x + p.y # fields also accessable by name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000232 33
Christian Heimes0449f632007-12-15 01:27:15 +0000233 >>> d = p._asdict() # convert to a dictionary
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000234 >>> d['x']
235 11
236 >>> Point(**d) # convert from a dictionary
Guido van Rossumd8faa362007-04-27 19:54:29 +0000237 Point(x=11, y=22)
Christian Heimes0449f632007-12-15 01:27:15 +0000238 >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000239 Point(x=100, y=22)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000240
241 """
242
Christian Heimes2380ac72008-01-09 00:17:24 +0000243 # Parse and validate the field names. Validation serves two purposes,
244 # generating informative error messages and preventing template injection attacks.
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000245 if isinstance(field_names, str):
246 field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas
Benjamin Petersone9bbc8b2008-09-28 02:06:32 +0000247 field_names = tuple(map(str, field_names))
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000248 if rename:
249 names = list(field_names)
250 seen = set()
251 for i, name in enumerate(names):
252 if (not all(c.isalnum() or c=='_' for c in name) or _iskeyword(name)
253 or not name or name[0].isdigit() or name.startswith('_')
254 or name in seen):
Raymond Hettinger56145242009-04-02 22:31:59 +0000255 names[i] = '_%d' % i
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000256 seen.add(name)
257 field_names = tuple(names)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000258 for name in (typename,) + field_names:
Christian Heimesb9eccbf2007-12-05 20:18:38 +0000259 if not all(c.isalnum() or c=='_' for c in name):
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000260 raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
261 if _iskeyword(name):
262 raise ValueError('Type names and field names cannot be a keyword: %r' % name)
263 if name[0].isdigit():
264 raise ValueError('Type names and field names cannot start with a number: %r' % name)
265 seen_names = set()
266 for name in field_names:
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000267 if name.startswith('_') and not rename:
Christian Heimes0449f632007-12-15 01:27:15 +0000268 raise ValueError('Field names cannot start with an underscore: %r' % name)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000269 if name in seen_names:
270 raise ValueError('Encountered duplicate field name: %r' % name)
271 seen_names.add(name)
272
273 # Create and fill-in the class template
Christian Heimesfaf2f632008-01-06 16:59:19 +0000274 numfields = len(field_names)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000275 argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000276 reprtxt = ', '.join('%s=%%r' % name for name in field_names)
277 template = '''class %(typename)s(tuple):
Christian Heimes0449f632007-12-15 01:27:15 +0000278 '%(typename)s(%(argtxt)s)' \n
279 __slots__ = () \n
Christian Heimesfaf2f632008-01-06 16:59:19 +0000280 _fields = %(field_names)r \n
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000281 def __new__(_cls, %(argtxt)s):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000282 'Create new instance of %(typename)s(%(argtxt)s)'
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000283 return _tuple.__new__(_cls, (%(argtxt)s)) \n
Christian Heimesfaf2f632008-01-06 16:59:19 +0000284 @classmethod
Christian Heimes043d6f62008-01-07 17:19:16 +0000285 def _make(cls, iterable, new=tuple.__new__, len=len):
Christian Heimesfaf2f632008-01-06 16:59:19 +0000286 'Make a new %(typename)s object from a sequence or iterable'
Christian Heimes043d6f62008-01-07 17:19:16 +0000287 result = new(cls, iterable)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000288 if len(result) != %(numfields)d:
289 raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
290 return result \n
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000291 def __repr__(self):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000292 'Return a nicely formatted representation string'
Raymond Hettingerd331ce92010-08-08 01:13:42 +0000293 return self.__class__.__name__ + '(%(reprtxt)s)' %% self \n
Raymond Hettingera4f52b12009-03-02 22:28:31 +0000294 def _asdict(self):
295 'Return a new OrderedDict which maps field names to their values'
296 return OrderedDict(zip(self._fields, self)) \n
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000297 def _replace(_self, **kwds):
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000298 'Return a new %(typename)s object replacing specified fields with new values'
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000299 result = _self._make(map(kwds.pop, %(field_names)r, _self))
Christian Heimesfaf2f632008-01-06 16:59:19 +0000300 if kwds:
301 raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000302 return result \n
303 def __getnewargs__(self):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000304 'Return self as a plain tuple. Used by copy and pickle.'
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000305 return tuple(self) \n\n''' % locals()
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000306 for i, name in enumerate(field_names):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000307 template += " %s = _property(_itemgetter(%d), doc='Alias for field number %d')\n" % (name, i, i)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000308 if verbose:
309 print(template)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000310
Georg Brandlf08a9dd2008-06-10 16:57:31 +0000311 # Execute the template string in a temporary namespace and
312 # support tracing utilities by setting a value for frame.f_globals['__name__']
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000313 namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
314 OrderedDict=OrderedDict, _property=property, _tuple=tuple)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000315 try:
316 exec(template, namespace)
317 except SyntaxError as e:
Raymond Hettingerc1cc0d02010-09-16 08:06:05 +0000318 raise SyntaxError(e.msg + ':\n\n' + template)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000319 result = namespace[typename]
320
321 # For pickling to work, the __module__ variable needs to be set to the frame
322 # where the named tuple is created. Bypass this step in enviroments where
Benjamin Peterson25c95f12009-05-08 20:42:26 +0000323 # sys._getframe is not defined (Jython for example) or sys._getframe is not
324 # defined for arguments greater than 0 (IronPython).
325 try:
Raymond Hettinger0f055172009-01-27 10:06:09 +0000326 result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
Benjamin Peterson25c95f12009-05-08 20:42:26 +0000327 except (AttributeError, ValueError):
328 pass
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000329
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000330 return result
Guido van Rossumd8faa362007-04-27 19:54:29 +0000331
Guido van Rossumd8faa362007-04-27 19:54:29 +0000332
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000333########################################################################
334### Counter
335########################################################################
336
337class Counter(dict):
338 '''Dict subclass for counting hashable items. Sometimes called a bag
339 or multiset. Elements are stored as dictionary keys and their counts
340 are stored as dictionary values.
341
342 >>> c = Counter('abracadabra') # count elements from a string
343
344 >>> c.most_common(3) # three most common elements
345 [('a', 5), ('r', 2), ('b', 2)]
346 >>> sorted(c) # list all unique elements
347 ['a', 'b', 'c', 'd', 'r']
348 >>> ''.join(sorted(c.elements())) # list elements with repetitions
349 'aaaaabbcdrr'
350 >>> sum(c.values()) # total of all counts
351 11
352
353 >>> c['a'] # count of letter 'a'
354 5
355 >>> for elem in 'shazam': # update counts from an iterable
356 ... c[elem] += 1 # by adding 1 to each element's count
357 >>> c['a'] # now there are seven 'a'
358 7
359 >>> del c['r'] # remove all 'r'
360 >>> c['r'] # now there are zero 'r'
361 0
362
363 >>> d = Counter('simsalabim') # make another counter
364 >>> c.update(d) # add in the second counter
365 >>> c['a'] # now there are nine 'a'
366 9
367
368 >>> c.clear() # empty the counter
369 >>> c
370 Counter()
371
372 Note: If a count is set to zero or reduced to zero, it will remain
373 in the counter until the entry is deleted or the counter is cleared:
374
375 >>> c = Counter('aaabbc')
376 >>> c['b'] -= 2 # reduce the count of 'b' by two
377 >>> c.most_common() # 'b' is still in, but its count is zero
378 [('a', 3), ('c', 1), ('b', 0)]
379
380 '''
381 # References:
382 # http://en.wikipedia.org/wiki/Multiset
383 # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
384 # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
385 # http://code.activestate.com/recipes/259174/
386 # Knuth, TAOCP Vol. II section 4.6.3
387
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000388 def __init__(self, iterable=None, **kwds):
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000389 '''Create a new, empty Counter object. And if given, count elements
390 from an input iterable. Or, initialize the count from another mapping
391 of elements to their counts.
392
393 >>> c = Counter() # a new, empty counter
394 >>> c = Counter('gallahad') # a new counter from an iterable
395 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000396 >>> c = Counter(a=4, b=2) # a new counter from keyword args
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000397
398 '''
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000399 self.update(iterable, **kwds)
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000400
401 def __missing__(self, key):
402 'The count of elements not in the Counter is zero.'
403 # Needed so that self[missing_item] does not raise KeyError
404 return 0
405
406 def most_common(self, n=None):
407 '''List the n most common elements and their counts from the most
408 common to the least. If n is None, then list all element counts.
409
410 >>> Counter('abracadabra').most_common(3)
411 [('a', 5), ('r', 2), ('b', 2)]
412
413 '''
414 # Emulate Bag.sortedByCount from Smalltalk
415 if n is None:
416 return sorted(self.items(), key=_itemgetter(1), reverse=True)
417 return _heapq.nlargest(n, self.items(), key=_itemgetter(1))
418
419 def elements(self):
420 '''Iterator over elements repeating each as many times as its count.
421
422 >>> c = Counter('ABCABC')
423 >>> sorted(c.elements())
424 ['A', 'A', 'B', 'B', 'C', 'C']
425
426 # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
427 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
428 >>> product = 1
429 >>> for factor in prime_factors.elements(): # loop over factors
430 ... product *= factor # and multiply them
431 >>> product
432 1836
433
434 Note, if an element's count has been set to zero or is a negative
435 number, elements() will ignore it.
436
437 '''
438 # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
439 return _chain.from_iterable(_starmap(_repeat, self.items()))
440
441 # Override dict methods where necessary
442
443 @classmethod
444 def fromkeys(cls, iterable, v=None):
445 # There is no equivalent method for counters because setting v=1
446 # means that no element can have a count greater than one.
447 raise NotImplementedError(
448 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
449
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000450 def update(self, iterable=None, **kwds):
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000451 '''Like dict.update() but add counts instead of replacing them.
452
453 Source can be an iterable, a dictionary, or another Counter instance.
454
455 >>> c = Counter('which')
456 >>> c.update('witch') # add elements from another iterable
457 >>> d = Counter('watch')
458 >>> c.update(d) # add elements from another counter
459 >>> c['h'] # four 'h' in which, witch, and watch
460 4
461
462 '''
463 # The regular dict.update() operation makes no sense here because the
464 # replace behavior results in the some of original untouched counts
465 # being mixed-in with all of the other counts for a mismash that
466 # doesn't have a straight-forward interpretation in most counting
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000467 # contexts. Instead, we implement straight-addition. Both the inputs
468 # and outputs are allowed to contain zero and negative counts.
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000469
470 if iterable is not None:
471 if isinstance(iterable, Mapping):
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000472 if self:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000473 self_get = self.get
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000474 for elem, count in iterable.items():
Raymond Hettingerf9092022009-06-29 18:30:43 +0000475 self[elem] = count + self_get(elem, 0)
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000476 else:
477 dict.update(self, iterable) # fast path when counter is empty
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000478 else:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000479 self_get = self.get
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000480 for elem in iterable:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000481 self[elem] = 1 + self_get(elem, 0)
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000482 if kwds:
483 self.update(kwds)
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000484
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000485 def subtract(self, iterable=None, **kwds):
486 '''Like dict.update() but subtracts counts instead of replacing them.
487 Counts can be reduced below zero. Both the inputs and outputs are
488 allowed to contain zero and negative counts.
489
490 Source can be an iterable, a dictionary, or another Counter instance.
491
492 >>> c = Counter('which')
493 >>> c.subtract('witch') # subtract elements from another iterable
494 >>> c.subtract(Counter('watch')) # subtract elements from another counter
495 >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch
496 0
497 >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch
498 -1
499
500 '''
501 if iterable is not None:
Raymond Hettingerfc3c9cd2010-04-11 20:41:56 +0000502 self_get = self.get
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000503 if isinstance(iterable, Mapping):
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000504 for elem, count in iterable.items():
505 self[elem] = self_get(elem, 0) - count
506 else:
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000507 for elem in iterable:
508 self[elem] = self_get(elem, 0) - 1
509 if kwds:
510 self.subtract(kwds)
511
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000512 def copy(self):
513 'Like dict.copy() but returns a Counter instance instead of a dict.'
514 return Counter(self)
515
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000516 def __delitem__(self, elem):
517 'Like dict.__delitem__() but does not raise KeyError for missing values.'
518 if elem in self:
519 dict.__delitem__(self, elem)
520
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000521 def __repr__(self):
522 if not self:
523 return '%s()' % self.__class__.__name__
524 items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
525 return '%s({%s})' % (self.__class__.__name__, items)
526
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000527 # Multiset-style mathematical operations discussed in:
528 # Knuth TAOCP Volume II section 4.6.3 exercise 19
529 # and at http://en.wikipedia.org/wiki/Multiset
530 #
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000531 # Outputs guaranteed to only include positive counts.
532 #
533 # To strip negative and zero counts, add-in an empty counter:
534 # c += Counter()
535
536 def __add__(self, other):
537 '''Add counts from two counters.
538
539 >>> Counter('abbb') + Counter('bcc')
540 Counter({'b': 4, 'c': 2, 'a': 1})
541
542 '''
543 if not isinstance(other, Counter):
544 return NotImplemented
545 result = Counter()
546 for elem in set(self) | set(other):
547 newcount = self[elem] + other[elem]
548 if newcount > 0:
549 result[elem] = newcount
550 return result
551
552 def __sub__(self, other):
553 ''' Subtract count, but keep only results with positive counts.
554
555 >>> Counter('abbbc') - Counter('bccd')
556 Counter({'b': 2, 'a': 1})
557
558 '''
559 if not isinstance(other, Counter):
560 return NotImplemented
561 result = Counter()
Raymond Hettingere0d1b9f2009-01-21 20:36:27 +0000562 for elem in set(self) | set(other):
563 newcount = self[elem] - other[elem]
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000564 if newcount > 0:
565 result[elem] = newcount
566 return result
567
568 def __or__(self, other):
569 '''Union is the maximum of value in either of the input counters.
570
571 >>> Counter('abbb') | Counter('bcc')
572 Counter({'b': 3, 'c': 2, 'a': 1})
573
574 '''
575 if not isinstance(other, Counter):
576 return NotImplemented
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000577 result = Counter()
578 for elem in set(self) | set(other):
Raymond Hettingerc4791702009-04-04 08:48:03 +0000579 p, q = self[elem], other[elem]
580 newcount = q if p < q else p
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000581 if newcount > 0:
582 result[elem] = newcount
583 return result
584
585 def __and__(self, other):
586 ''' Intersection is the minimum of corresponding counts.
587
588 >>> Counter('abbb') & Counter('bcc')
589 Counter({'b': 1})
590
591 '''
592 if not isinstance(other, Counter):
593 return NotImplemented
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000594 result = Counter()
595 if len(self) < len(other):
596 self, other = other, self
597 for elem in filter(self.__contains__, other):
Raymond Hettingerc4791702009-04-04 08:48:03 +0000598 p, q = self[elem], other[elem]
599 newcount = p if p < q else q
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000600 if newcount > 0:
601 result[elem] = newcount
602 return result
603
Guido van Rossumd8faa362007-04-27 19:54:29 +0000604
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000605################################################################################
606### UserDict
607################################################################################
Guido van Rossumd8faa362007-04-27 19:54:29 +0000608
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000609class UserDict(MutableMapping):
610
611 # Start by filling-out the abstract methods
612 def __init__(self, dict=None, **kwargs):
613 self.data = {}
614 if dict is not None:
615 self.update(dict)
616 if len(kwargs):
617 self.update(kwargs)
618 def __len__(self): return len(self.data)
619 def __getitem__(self, key):
620 if key in self.data:
621 return self.data[key]
622 if hasattr(self.__class__, "__missing__"):
623 return self.__class__.__missing__(self, key)
624 raise KeyError(key)
625 def __setitem__(self, key, item): self.data[key] = item
626 def __delitem__(self, key): del self.data[key]
627 def __iter__(self):
628 return iter(self.data)
629
Raymond Hettinger554c8b82008-02-05 22:54:43 +0000630 # Modify __contains__ to work correctly when __missing__ is present
631 def __contains__(self, key):
632 return key in self.data
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000633
634 # Now, add the methods in dicts but not in MutableMapping
635 def __repr__(self): return repr(self.data)
636 def copy(self):
637 if self.__class__ is UserDict:
638 return UserDict(self.data.copy())
639 import copy
640 data = self.data
641 try:
642 self.data = {}
643 c = copy.copy(self)
644 finally:
645 self.data = data
646 c.update(self)
647 return c
648 @classmethod
649 def fromkeys(cls, iterable, value=None):
650 d = cls()
651 for key in iterable:
652 d[key] = value
653 return d
654
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000655
656
657################################################################################
Raymond Hettinger53dbe392008-02-12 20:03:09 +0000658### UserList
659################################################################################
660
661class UserList(MutableSequence):
662 """A more or less complete user-defined wrapper around list objects."""
663 def __init__(self, initlist=None):
664 self.data = []
665 if initlist is not None:
666 # XXX should this accept an arbitrary sequence?
667 if type(initlist) == type(self.data):
668 self.data[:] = initlist
669 elif isinstance(initlist, UserList):
670 self.data[:] = initlist.data[:]
671 else:
672 self.data = list(initlist)
673 def __repr__(self): return repr(self.data)
674 def __lt__(self, other): return self.data < self.__cast(other)
675 def __le__(self, other): return self.data <= self.__cast(other)
676 def __eq__(self, other): return self.data == self.__cast(other)
677 def __ne__(self, other): return self.data != self.__cast(other)
678 def __gt__(self, other): return self.data > self.__cast(other)
679 def __ge__(self, other): return self.data >= self.__cast(other)
680 def __cast(self, other):
681 return other.data if isinstance(other, UserList) else other
Raymond Hettinger53dbe392008-02-12 20:03:09 +0000682 def __contains__(self, item): return item in self.data
683 def __len__(self): return len(self.data)
684 def __getitem__(self, i): return self.data[i]
685 def __setitem__(self, i, item): self.data[i] = item
686 def __delitem__(self, i): del self.data[i]
687 def __add__(self, other):
688 if isinstance(other, UserList):
689 return self.__class__(self.data + other.data)
690 elif isinstance(other, type(self.data)):
691 return self.__class__(self.data + other)
692 return self.__class__(self.data + list(other))
693 def __radd__(self, other):
694 if isinstance(other, UserList):
695 return self.__class__(other.data + self.data)
696 elif isinstance(other, type(self.data)):
697 return self.__class__(other + self.data)
698 return self.__class__(list(other) + self.data)
699 def __iadd__(self, other):
700 if isinstance(other, UserList):
701 self.data += other.data
702 elif isinstance(other, type(self.data)):
703 self.data += other
704 else:
705 self.data += list(other)
706 return self
707 def __mul__(self, n):
708 return self.__class__(self.data*n)
709 __rmul__ = __mul__
710 def __imul__(self, n):
711 self.data *= n
712 return self
713 def append(self, item): self.data.append(item)
714 def insert(self, i, item): self.data.insert(i, item)
715 def pop(self, i=-1): return self.data.pop(i)
716 def remove(self, item): self.data.remove(item)
717 def count(self, item): return self.data.count(item)
718 def index(self, item, *args): return self.data.index(item, *args)
719 def reverse(self): self.data.reverse()
720 def sort(self, *args, **kwds): self.data.sort(*args, **kwds)
721 def extend(self, other):
722 if isinstance(other, UserList):
723 self.data.extend(other.data)
724 else:
725 self.data.extend(other)
726
727
728
729################################################################################
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000730### UserString
731################################################################################
732
733class UserString(Sequence):
734 def __init__(self, seq):
735 if isinstance(seq, str):
736 self.data = seq
737 elif isinstance(seq, UserString):
738 self.data = seq.data[:]
739 else:
740 self.data = str(seq)
741 def __str__(self): return str(self.data)
742 def __repr__(self): return repr(self.data)
743 def __int__(self): return int(self.data)
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000744 def __float__(self): return float(self.data)
745 def __complex__(self): return complex(self.data)
746 def __hash__(self): return hash(self.data)
747
748 def __eq__(self, string):
749 if isinstance(string, UserString):
750 return self.data == string.data
751 return self.data == string
752 def __ne__(self, string):
753 if isinstance(string, UserString):
754 return self.data != string.data
755 return self.data != string
756 def __lt__(self, string):
757 if isinstance(string, UserString):
758 return self.data < string.data
759 return self.data < string
760 def __le__(self, string):
761 if isinstance(string, UserString):
762 return self.data <= string.data
763 return self.data <= string
764 def __gt__(self, string):
765 if isinstance(string, UserString):
766 return self.data > string.data
767 return self.data > string
768 def __ge__(self, string):
769 if isinstance(string, UserString):
770 return self.data >= string.data
771 return self.data >= string
772
773 def __contains__(self, char):
774 if isinstance(char, UserString):
775 char = char.data
776 return char in self.data
777
778 def __len__(self): return len(self.data)
779 def __getitem__(self, index): return self.__class__(self.data[index])
780 def __add__(self, other):
781 if isinstance(other, UserString):
782 return self.__class__(self.data + other.data)
783 elif isinstance(other, str):
784 return self.__class__(self.data + other)
785 return self.__class__(self.data + str(other))
786 def __radd__(self, other):
787 if isinstance(other, str):
788 return self.__class__(other + self.data)
789 return self.__class__(str(other) + self.data)
790 def __mul__(self, n):
791 return self.__class__(self.data*n)
792 __rmul__ = __mul__
793 def __mod__(self, args):
794 return self.__class__(self.data % args)
795
796 # the following methods are defined in alphabetical order:
797 def capitalize(self): return self.__class__(self.data.capitalize())
798 def center(self, width, *args):
799 return self.__class__(self.data.center(width, *args))
800 def count(self, sub, start=0, end=_sys.maxsize):
801 if isinstance(sub, UserString):
802 sub = sub.data
803 return self.data.count(sub, start, end)
804 def encode(self, encoding=None, errors=None): # XXX improve this?
805 if encoding:
806 if errors:
807 return self.__class__(self.data.encode(encoding, errors))
808 return self.__class__(self.data.encode(encoding))
809 return self.__class__(self.data.encode())
810 def endswith(self, suffix, start=0, end=_sys.maxsize):
811 return self.data.endswith(suffix, start, end)
812 def expandtabs(self, tabsize=8):
813 return self.__class__(self.data.expandtabs(tabsize))
814 def find(self, sub, start=0, end=_sys.maxsize):
815 if isinstance(sub, UserString):
816 sub = sub.data
817 return self.data.find(sub, start, end)
818 def format(self, *args, **kwds):
819 return self.data.format(*args, **kwds)
820 def index(self, sub, start=0, end=_sys.maxsize):
821 return self.data.index(sub, start, end)
822 def isalpha(self): return self.data.isalpha()
823 def isalnum(self): return self.data.isalnum()
824 def isdecimal(self): return self.data.isdecimal()
825 def isdigit(self): return self.data.isdigit()
826 def isidentifier(self): return self.data.isidentifier()
827 def islower(self): return self.data.islower()
828 def isnumeric(self): return self.data.isnumeric()
829 def isspace(self): return self.data.isspace()
830 def istitle(self): return self.data.istitle()
831 def isupper(self): return self.data.isupper()
832 def join(self, seq): return self.data.join(seq)
833 def ljust(self, width, *args):
834 return self.__class__(self.data.ljust(width, *args))
835 def lower(self): return self.__class__(self.data.lower())
836 def lstrip(self, chars=None): return self.__class__(self.data.lstrip(chars))
837 def partition(self, sep):
838 return self.data.partition(sep)
839 def replace(self, old, new, maxsplit=-1):
840 if isinstance(old, UserString):
841 old = old.data
842 if isinstance(new, UserString):
843 new = new.data
844 return self.__class__(self.data.replace(old, new, maxsplit))
845 def rfind(self, sub, start=0, end=_sys.maxsize):
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000846 if isinstance(sub, UserString):
847 sub = sub.data
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000848 return self.data.rfind(sub, start, end)
849 def rindex(self, sub, start=0, end=_sys.maxsize):
850 return self.data.rindex(sub, start, end)
851 def rjust(self, width, *args):
852 return self.__class__(self.data.rjust(width, *args))
853 def rpartition(self, sep):
854 return self.data.rpartition(sep)
855 def rstrip(self, chars=None):
856 return self.__class__(self.data.rstrip(chars))
857 def split(self, sep=None, maxsplit=-1):
858 return self.data.split(sep, maxsplit)
859 def rsplit(self, sep=None, maxsplit=-1):
860 return self.data.rsplit(sep, maxsplit)
861 def splitlines(self, keepends=0): return self.data.splitlines(keepends)
862 def startswith(self, prefix, start=0, end=_sys.maxsize):
863 return self.data.startswith(prefix, start, end)
864 def strip(self, chars=None): return self.__class__(self.data.strip(chars))
865 def swapcase(self): return self.__class__(self.data.swapcase())
866 def title(self): return self.__class__(self.data.title())
867 def translate(self, *args):
868 return self.__class__(self.data.translate(*args))
869 def upper(self): return self.__class__(self.data.upper())
870 def zfill(self, width): return self.__class__(self.data.zfill(width))
871
872
873
874################################################################################
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000875### Simple tests
876################################################################################
Guido van Rossumd8faa362007-04-27 19:54:29 +0000877
878if __name__ == '__main__':
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000879 # verify that instances can be pickled
Guido van Rossum99603b02007-07-20 00:22:32 +0000880 from pickle import loads, dumps
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000881 Point = namedtuple('Point', 'x, y', True)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000882 p = Point(x=10, y=20)
883 assert p == loads(dumps(p))
884
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000885 # test and demonstrate ability to override methods
Christian Heimes043d6f62008-01-07 17:19:16 +0000886 class Point(namedtuple('Point', 'x y')):
Christian Heimes25bb7832008-01-11 16:17:00 +0000887 __slots__ = ()
Christian Heimes043d6f62008-01-07 17:19:16 +0000888 @property
889 def hypot(self):
890 return (self.x ** 2 + self.y ** 2) ** 0.5
Christian Heimes790c8232008-01-07 21:14:23 +0000891 def __str__(self):
Christian Heimes25bb7832008-01-11 16:17:00 +0000892 return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
Christian Heimes043d6f62008-01-07 17:19:16 +0000893
Christian Heimes25bb7832008-01-11 16:17:00 +0000894 for p in Point(3, 4), Point(14, 5/7.):
Christian Heimes790c8232008-01-07 21:14:23 +0000895 print (p)
Christian Heimes043d6f62008-01-07 17:19:16 +0000896
897 class Point(namedtuple('Point', 'x y')):
898 'Point class with optimized _make() and _replace() without error-checking'
Christian Heimes25bb7832008-01-11 16:17:00 +0000899 __slots__ = ()
Christian Heimes043d6f62008-01-07 17:19:16 +0000900 _make = classmethod(tuple.__new__)
901 def _replace(self, _map=map, **kwds):
Christian Heimes2380ac72008-01-09 00:17:24 +0000902 return self._make(_map(kwds.get, ('x', 'y'), self))
Christian Heimes043d6f62008-01-07 17:19:16 +0000903
904 print(Point(11, 22)._replace(x=100))
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000905
Christian Heimes25bb7832008-01-11 16:17:00 +0000906 Point3D = namedtuple('Point3D', Point._fields + ('z',))
907 print(Point3D.__doc__)
908
Guido van Rossumd8faa362007-04-27 19:54:29 +0000909 import doctest
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000910 TestResults = namedtuple('TestResults', 'failed attempted')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000911 print(TestResults(*doctest.testmod()))