blob: 061106bd5ee9d2fcc85d35dc5a997ba8288e79b0 [file] [log] [blame]
Brett Cannon23a4a7b2008-05-12 00:56:28 +00001__all__ = ['deque', 'defaultdict', 'namedtuple', 'UserDict', 'UserList',
Raymond Hettinger2d32f632009-03-02 21:24:57 +00002 'UserString', 'Counter', 'OrderedDict']
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
4# They should however be considered an integral part of collections.py.
5from _abcoll import *
6import _abcoll
7__all__ += _abcoll.__all__
8
Christian Heimes99170a52007-12-19 02:07:34 +00009from _collections import deque, defaultdict
10from operator import itemgetter as _itemgetter
11from keyword import iskeyword as _iskeyword
12import sys as _sys
Raymond Hettingerb8baf632009-01-14 02:20:07 +000013import heapq as _heapq
Raymond Hettingerfa11db02010-09-12 04:12:42 +000014from weakref import proxy as _proxy
Raymond Hettingerea9f8db2009-03-02 21:28:41 +000015from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
Raymond Hettinger98a5f3f2010-09-13 21:36:00 +000016from reprlib import recursive_repr as _recursive_repr
Raymond Hettinger2d32f632009-03-02 21:24:57 +000017
18################################################################################
19### OrderedDict
20################################################################################
21
Raymond Hettingerfa11db02010-09-12 04:12:42 +000022class _Link(object):
23 __slots__ = 'prev', 'next', 'key', '__weakref__'
24
Raymond Hettinger2d32f632009-03-02 21:24:57 +000025class OrderedDict(dict, MutableMapping):
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +000026 'Dictionary that remembers insertion order'
Raymond Hettingerf1736542009-03-23 05:19:21 +000027 # An inherited dict maps keys to values.
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +000028 # The inherited dict provides __getitem__, __len__, __contains__, and get.
29 # The remaining methods are order-aware.
Raymond Hettingerf1736542009-03-23 05:19:21 +000030 # Big-O running times for all methods are the same as for regular dictionaries.
31
32 # The internal self.__map dictionary maps keys to links in a doubly linked list.
33 # The circular doubly linked list starts and ends with a sentinel element.
34 # The sentinel element never gets deleted (this simplifies the algorithm).
Raymond Hettingerf7328d02010-09-14 19:40:15 +000035 # The sentinel is stored in self.__hardroot with a weakref proxy in self.__root.
Raymond Hettingerc5c29c02010-09-12 18:13:46 +000036 # The prev/next links are weakref proxies (to prevent circular references).
37 # Individual links are kept alive by the hard reference in self.__map.
38 # Those hard references disappear when a key is deleted from an OrderedDict.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000039
40 def __init__(self, *args, **kwds):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000041 '''Initialize an ordered dictionary. Signature is the same as for
42 regular dictionaries, but keyword arguments are not recommended
43 because their insertion order is arbitrary.
44
45 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +000046 if len(args) > 1:
47 raise TypeError('expected at most 1 arguments, got %d' % len(args))
Raymond Hettinger08c70cf2009-03-03 20:47:29 +000048 try:
Raymond Hettingerf1736542009-03-23 05:19:21 +000049 self.__root
Raymond Hettinger08c70cf2009-03-03 20:47:29 +000050 except AttributeError:
Raymond Hettingerf7328d02010-09-14 19:40:15 +000051 self.__hardroot = _Link()
52 self.__root = root = _proxy(self.__hardroot)
Raymond Hettingerfa11db02010-09-12 04:12:42 +000053 root.prev = root.next = root
Raymond Hettinger52dc06b2009-03-25 22:45:22 +000054 self.__map = {}
Raymond Hettinger2d32f632009-03-02 21:24:57 +000055 self.update(*args, **kwds)
56
Raymond Hettingerfa11db02010-09-12 04:12:42 +000057 def __setitem__(self, key, value,
58 dict_setitem=dict.__setitem__, proxy=_proxy, Link=_Link):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000059 'od.__setitem__(i, y) <==> od[i]=y'
Raymond Hettingerf1736542009-03-23 05:19:21 +000060 # Setting a new item creates a new link which goes at the end of the linked
61 # list, and the inherited dictionary is updated with the new key/value pair.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000062 if key not in self:
Raymond Hettingerfa11db02010-09-12 04:12:42 +000063 self.__map[key] = link = Link()
Raymond Hettingerf1736542009-03-23 05:19:21 +000064 root = self.__root
Raymond Hettingerfa11db02010-09-12 04:12:42 +000065 last = root.prev
66 link.prev, link.next, link.key = last, root, key
Raymond Hettingerf7328d02010-09-14 19:40:15 +000067 last.next = link
68 root.prev = proxy(link)
69 dict_setitem(self, key, value)
Raymond Hettinger2d32f632009-03-02 21:24:57 +000070
Raymond Hettingerfa11db02010-09-12 04:12:42 +000071 def __delitem__(self, key, dict_delitem=dict.__delitem__):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000072 'od.__delitem__(y) <==> del od[y]'
Raymond Hettingerf1736542009-03-23 05:19:21 +000073 # Deleting an existing item uses self.__map to find the link which is
74 # then removed by updating the links in the predecessor and successor nodes.
Raymond Hettinger5be21b72010-08-01 22:10:57 +000075 dict_delitem(self, key)
Raymond Hettingerf1736542009-03-23 05:19:21 +000076 link = self.__map.pop(key)
Raymond Hettingerfa11db02010-09-12 04:12:42 +000077 link_prev = link.prev
78 link_next = link.next
79 link_prev.next = link_next
80 link_next.prev = link_prev
Raymond Hettinger2d32f632009-03-02 21:24:57 +000081
Raymond Hettingerfa11db02010-09-12 04:12:42 +000082 def __iter__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000083 'od.__iter__() <==> iter(od)'
Raymond Hettingerf1736542009-03-23 05:19:21 +000084 # Traverse the linked list in order.
85 root = self.__root
Raymond Hettingerfa11db02010-09-12 04:12:42 +000086 curr = root.next
Raymond Hettingerf1736542009-03-23 05:19:21 +000087 while curr is not root:
Raymond Hettingerfa11db02010-09-12 04:12:42 +000088 yield curr.key
89 curr = curr.next
Raymond Hettinger2d32f632009-03-02 21:24:57 +000090
Raymond Hettingerfa11db02010-09-12 04:12:42 +000091 def __reversed__(self):
Raymond Hettinger2352cf32009-04-08 01:16:27 +000092 'od.__reversed__() <==> reversed(od)'
Raymond Hettingerf1736542009-03-23 05:19:21 +000093 # Traverse the linked list in reverse order.
94 root = self.__root
Raymond Hettingerfa11db02010-09-12 04:12:42 +000095 curr = root.prev
Raymond Hettingerf1736542009-03-23 05:19:21 +000096 while curr is not root:
Raymond Hettingerfa11db02010-09-12 04:12:42 +000097 yield curr.key
98 curr = curr.prev
Raymond Hettinger2d32f632009-03-02 21:24:57 +000099
Raymond Hettinger5be21b72010-08-01 22:10:57 +0000100 def clear(self):
101 'od.clear() -> None. Remove all items from od.'
Raymond Hettingerfa11db02010-09-12 04:12:42 +0000102 root = self.__root
103 root.prev = root.next = root
104 self.__map.clear()
Raymond Hettinger5be21b72010-08-01 22:10:57 +0000105 dict.clear(self)
106
Raymond Hettingerfa11db02010-09-12 04:12:42 +0000107 def popitem(self, last=True):
Raymond Hettinger331722d2010-09-02 18:44:16 +0000108 '''od.popitem() -> (k, v), return and remove a (key, value) pair.
109 Pairs are returned in LIFO order if last is true or FIFO order if false.
110
111 '''
112 if not self:
113 raise KeyError('dictionary is empty')
114 root = self.__root
Raymond Hettingerfa11db02010-09-12 04:12:42 +0000115 if last:
116 link = root.prev
117 link_prev = link.prev
118 link_prev.next = root
119 root.prev = link_prev
120 else:
121 link = root.next
122 link_next = link.next
123 root.next = link_next
124 link_next.prev = root
125 key = link.key
Raymond Hettinger331722d2010-09-02 18:44:16 +0000126 del self.__map[key]
Raymond Hettingerfa11db02010-09-12 04:12:42 +0000127 value = dict.pop(self, key)
Raymond Hettinger331722d2010-09-02 18:44:16 +0000128 return key, value
129
Raymond Hettingerfa11db02010-09-12 04:12:42 +0000130 def move_to_end(self, key, last=True):
131 '''Move an existing element to the end (or beginning if last==False).
132
133 Raises KeyError if the element does not exist.
134 When last=True, acts like a fast version of self[key]=self.pop(key).
135
136 '''
137 link = self.__map[key]
138 link_prev = link.prev
139 link_next = link.next
140 link_prev.next = link_next
141 link_next.prev = link_prev
142 root = self.__root
143 if last:
144 last = root.prev
145 link.prev = last
146 link.next = root
147 last.next = root.prev = link
148 else:
149 first = root.next
150 link.prev = root
151 link.next = first
152 root.next = first.prev = link
153
Raymond Hettinger35c87f22010-09-16 19:10:17 +0000154 def __reduce__(self):
155 'Return state information for pickling'
156 items = [[k, self[k]] for k in self]
157 tmp = self.__map, self.__root, self.__hardroot
158 del self.__map, self.__root, self.__hardroot
159 inst_dict = vars(self).copy()
160 self.__map, self.__root, self.__hardroot = tmp
161 if inst_dict:
162 return (self.__class__, (items,), inst_dict)
163 return self.__class__, (items,)
164
165 def __sizeof__(self):
166 sizeof = _sys.getsizeof
167 n = len(self) + 1 # number of links including root
168 size = sizeof(self.__dict__) # instance dictionary
169 size += sizeof(self.__map) * 2 # internal dict and inherited dict
170 size += sizeof(self.__hardroot) * n # link objects
171 size += sizeof(self.__root) * n # proxy objects
172 return size
173
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000174 setdefault = MutableMapping.setdefault
175 update = MutableMapping.update
176 pop = MutableMapping.pop
177 keys = MutableMapping.keys
178 values = MutableMapping.values
179 items = MutableMapping.items
Raymond Hettinger5be21b72010-08-01 22:10:57 +0000180 __ne__ = MutableMapping.__ne__
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000181
Raymond Hettinger98a5f3f2010-09-13 21:36:00 +0000182 @_recursive_repr()
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000183 def __repr__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000184 'od.__repr__() <==> repr(od)'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000185 if not self:
186 return '%s()' % (self.__class__.__name__,)
Raymond Hettinger98a5f3f2010-09-13 21:36:00 +0000187 return '%s(%r)' % (self.__class__.__name__, list(self.items()))
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000188
189 def copy(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000190 'od.copy() -> a shallow copy of od'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000191 return self.__class__(self)
192
193 @classmethod
194 def fromkeys(cls, iterable, value=None):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000195 '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
196 and values equal to v (which defaults to None).
197
198 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000199 d = cls()
200 for key in iterable:
201 d[key] = value
202 return d
203
204 def __eq__(self, other):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000205 '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
206 while comparison to a regular mapping is order-insensitive.
207
208 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000209 if isinstance(other, OrderedDict):
Raymond Hettinger798ee1a2009-03-23 18:29:11 +0000210 return len(self)==len(other) and \
211 all(p==q for p, q in zip(self.items(), other.items()))
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000212 return dict.__eq__(self, other)
213
Christian Heimes99170a52007-12-19 02:07:34 +0000214
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000215################################################################################
216### namedtuple
217################################################################################
218
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000219def namedtuple(typename, field_names, verbose=False, rename=False):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000220 """Returns a new subclass of tuple with named fields.
221
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000222 >>> Point = namedtuple('Point', 'x y')
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000223 >>> Point.__doc__ # docstring for the new class
Guido van Rossumd8faa362007-04-27 19:54:29 +0000224 'Point(x, y)'
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000225 >>> p = Point(11, y=22) # instantiate with positional args or keywords
Christian Heimes99170a52007-12-19 02:07:34 +0000226 >>> p[0] + p[1] # indexable like a plain tuple
Guido van Rossumd8faa362007-04-27 19:54:29 +0000227 33
Christian Heimes99170a52007-12-19 02:07:34 +0000228 >>> x, y = p # unpack like a regular tuple
Guido van Rossumd8faa362007-04-27 19:54:29 +0000229 >>> x, y
230 (11, 22)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000231 >>> p.x + p.y # fields also accessable by name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000232 33
Christian Heimes0449f632007-12-15 01:27:15 +0000233 >>> d = p._asdict() # convert to a dictionary
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000234 >>> d['x']
235 11
236 >>> Point(**d) # convert from a dictionary
Guido van Rossumd8faa362007-04-27 19:54:29 +0000237 Point(x=11, y=22)
Christian Heimes0449f632007-12-15 01:27:15 +0000238 >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000239 Point(x=100, y=22)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000240
241 """
242
Christian Heimes2380ac72008-01-09 00:17:24 +0000243 # Parse and validate the field names. Validation serves two purposes,
244 # generating informative error messages and preventing template injection attacks.
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000245 if isinstance(field_names, str):
246 field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas
Benjamin Petersone9bbc8b2008-09-28 02:06:32 +0000247 field_names = tuple(map(str, field_names))
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000248 if rename:
249 names = list(field_names)
250 seen = set()
251 for i, name in enumerate(names):
252 if (not all(c.isalnum() or c=='_' for c in name) or _iskeyword(name)
253 or not name or name[0].isdigit() or name.startswith('_')
254 or name in seen):
Raymond Hettinger56145242009-04-02 22:31:59 +0000255 names[i] = '_%d' % i
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000256 seen.add(name)
257 field_names = tuple(names)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000258 for name in (typename,) + field_names:
Christian Heimesb9eccbf2007-12-05 20:18:38 +0000259 if not all(c.isalnum() or c=='_' for c in name):
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000260 raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
261 if _iskeyword(name):
262 raise ValueError('Type names and field names cannot be a keyword: %r' % name)
263 if name[0].isdigit():
264 raise ValueError('Type names and field names cannot start with a number: %r' % name)
265 seen_names = set()
266 for name in field_names:
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000267 if name.startswith('_') and not rename:
Christian Heimes0449f632007-12-15 01:27:15 +0000268 raise ValueError('Field names cannot start with an underscore: %r' % name)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000269 if name in seen_names:
270 raise ValueError('Encountered duplicate field name: %r' % name)
271 seen_names.add(name)
272
273 # Create and fill-in the class template
Christian Heimesfaf2f632008-01-06 16:59:19 +0000274 numfields = len(field_names)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000275 argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000276 reprtxt = ', '.join('%s=%%r' % name for name in field_names)
277 template = '''class %(typename)s(tuple):
Christian Heimes0449f632007-12-15 01:27:15 +0000278 '%(typename)s(%(argtxt)s)' \n
279 __slots__ = () \n
Christian Heimesfaf2f632008-01-06 16:59:19 +0000280 _fields = %(field_names)r \n
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000281 def __new__(_cls, %(argtxt)s):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000282 'Create new instance of %(typename)s(%(argtxt)s)'
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000283 return _tuple.__new__(_cls, (%(argtxt)s)) \n
Christian Heimesfaf2f632008-01-06 16:59:19 +0000284 @classmethod
Christian Heimes043d6f62008-01-07 17:19:16 +0000285 def _make(cls, iterable, new=tuple.__new__, len=len):
Christian Heimesfaf2f632008-01-06 16:59:19 +0000286 'Make a new %(typename)s object from a sequence or iterable'
Christian Heimes043d6f62008-01-07 17:19:16 +0000287 result = new(cls, iterable)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000288 if len(result) != %(numfields)d:
289 raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
290 return result \n
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000291 def __repr__(self):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000292 'Return a nicely formatted representation string'
Raymond Hettingerd331ce92010-08-08 01:13:42 +0000293 return self.__class__.__name__ + '(%(reprtxt)s)' %% self \n
Raymond Hettingera4f52b12009-03-02 22:28:31 +0000294 def _asdict(self):
295 'Return a new OrderedDict which maps field names to their values'
296 return OrderedDict(zip(self._fields, self)) \n
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000297 def _replace(_self, **kwds):
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000298 'Return a new %(typename)s object replacing specified fields with new values'
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000299 result = _self._make(map(kwds.pop, %(field_names)r, _self))
Christian Heimesfaf2f632008-01-06 16:59:19 +0000300 if kwds:
301 raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000302 return result \n
303 def __getnewargs__(self):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000304 'Return self as a plain tuple. Used by copy and pickle.'
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000305 return tuple(self) \n\n''' % locals()
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000306 for i, name in enumerate(field_names):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000307 template += " %s = _property(_itemgetter(%d), doc='Alias for field number %d')\n" % (name, i, i)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000308 if verbose:
309 print(template)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000310
Georg Brandlf08a9dd2008-06-10 16:57:31 +0000311 # Execute the template string in a temporary namespace and
312 # support tracing utilities by setting a value for frame.f_globals['__name__']
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000313 namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
314 OrderedDict=OrderedDict, _property=property, _tuple=tuple)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000315 try:
316 exec(template, namespace)
317 except SyntaxError as e:
Raymond Hettingerc1cc0d02010-09-16 08:06:05 +0000318 raise SyntaxError(e.msg + ':\n\n' + template)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000319 result = namespace[typename]
320
321 # For pickling to work, the __module__ variable needs to be set to the frame
322 # where the named tuple is created. Bypass this step in enviroments where
Benjamin Peterson25c95f12009-05-08 20:42:26 +0000323 # sys._getframe is not defined (Jython for example) or sys._getframe is not
324 # defined for arguments greater than 0 (IronPython).
325 try:
Raymond Hettinger0f055172009-01-27 10:06:09 +0000326 result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
Benjamin Peterson25c95f12009-05-08 20:42:26 +0000327 except (AttributeError, ValueError):
328 pass
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000329
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000330 return result
Guido van Rossumd8faa362007-04-27 19:54:29 +0000331
Guido van Rossumd8faa362007-04-27 19:54:29 +0000332
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000333########################################################################
334### Counter
335########################################################################
336
Raymond Hettinger96f34102010-12-15 16:30:37 +0000337def _count_elements(mapping, iterable):
338 'Tally elements from the iterable.'
339 mapping_get = mapping.get
340 for elem in iterable:
341 mapping[elem] = mapping_get(elem, 0) + 1
342
343try: # Load C helper function if available
344 from _collections import _count_elements
345except ImportError:
346 pass
347
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000348class Counter(dict):
349 '''Dict subclass for counting hashable items. Sometimes called a bag
350 or multiset. Elements are stored as dictionary keys and their counts
351 are stored as dictionary values.
352
353 >>> c = Counter('abracadabra') # count elements from a string
354
355 >>> c.most_common(3) # three most common elements
356 [('a', 5), ('r', 2), ('b', 2)]
357 >>> sorted(c) # list all unique elements
358 ['a', 'b', 'c', 'd', 'r']
359 >>> ''.join(sorted(c.elements())) # list elements with repetitions
360 'aaaaabbcdrr'
361 >>> sum(c.values()) # total of all counts
362 11
363
364 >>> c['a'] # count of letter 'a'
365 5
366 >>> for elem in 'shazam': # update counts from an iterable
367 ... c[elem] += 1 # by adding 1 to each element's count
368 >>> c['a'] # now there are seven 'a'
369 7
370 >>> del c['r'] # remove all 'r'
371 >>> c['r'] # now there are zero 'r'
372 0
373
374 >>> d = Counter('simsalabim') # make another counter
375 >>> c.update(d) # add in the second counter
376 >>> c['a'] # now there are nine 'a'
377 9
378
379 >>> c.clear() # empty the counter
380 >>> c
381 Counter()
382
383 Note: If a count is set to zero or reduced to zero, it will remain
384 in the counter until the entry is deleted or the counter is cleared:
385
386 >>> c = Counter('aaabbc')
387 >>> c['b'] -= 2 # reduce the count of 'b' by two
388 >>> c.most_common() # 'b' is still in, but its count is zero
389 [('a', 3), ('c', 1), ('b', 0)]
390
391 '''
392 # References:
393 # http://en.wikipedia.org/wiki/Multiset
394 # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
395 # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
396 # http://code.activestate.com/recipes/259174/
397 # Knuth, TAOCP Vol. II section 4.6.3
398
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000399 def __init__(self, iterable=None, **kwds):
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000400 '''Create a new, empty Counter object. And if given, count elements
401 from an input iterable. Or, initialize the count from another mapping
402 of elements to their counts.
403
404 >>> c = Counter() # a new, empty counter
405 >>> c = Counter('gallahad') # a new counter from an iterable
406 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000407 >>> c = Counter(a=4, b=2) # a new counter from keyword args
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000408
409 '''
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000410 self.update(iterable, **kwds)
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000411
412 def __missing__(self, key):
413 'The count of elements not in the Counter is zero.'
414 # Needed so that self[missing_item] does not raise KeyError
415 return 0
416
417 def most_common(self, n=None):
418 '''List the n most common elements and their counts from the most
419 common to the least. If n is None, then list all element counts.
420
421 >>> Counter('abracadabra').most_common(3)
422 [('a', 5), ('r', 2), ('b', 2)]
423
424 '''
425 # Emulate Bag.sortedByCount from Smalltalk
426 if n is None:
427 return sorted(self.items(), key=_itemgetter(1), reverse=True)
428 return _heapq.nlargest(n, self.items(), key=_itemgetter(1))
429
430 def elements(self):
431 '''Iterator over elements repeating each as many times as its count.
432
433 >>> c = Counter('ABCABC')
434 >>> sorted(c.elements())
435 ['A', 'A', 'B', 'B', 'C', 'C']
436
437 # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
438 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
439 >>> product = 1
440 >>> for factor in prime_factors.elements(): # loop over factors
441 ... product *= factor # and multiply them
442 >>> product
443 1836
444
445 Note, if an element's count has been set to zero or is a negative
446 number, elements() will ignore it.
447
448 '''
449 # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
450 return _chain.from_iterable(_starmap(_repeat, self.items()))
451
452 # Override dict methods where necessary
453
454 @classmethod
455 def fromkeys(cls, iterable, v=None):
456 # There is no equivalent method for counters because setting v=1
457 # means that no element can have a count greater than one.
458 raise NotImplementedError(
459 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
460
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000461 def update(self, iterable=None, **kwds):
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000462 '''Like dict.update() but add counts instead of replacing them.
463
464 Source can be an iterable, a dictionary, or another Counter instance.
465
466 >>> c = Counter('which')
467 >>> c.update('witch') # add elements from another iterable
468 >>> d = Counter('watch')
469 >>> c.update(d) # add elements from another counter
470 >>> c['h'] # four 'h' in which, witch, and watch
471 4
472
473 '''
474 # The regular dict.update() operation makes no sense here because the
475 # replace behavior results in the some of original untouched counts
476 # being mixed-in with all of the other counts for a mismash that
477 # doesn't have a straight-forward interpretation in most counting
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000478 # contexts. Instead, we implement straight-addition. Both the inputs
479 # and outputs are allowed to contain zero and negative counts.
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000480
481 if iterable is not None:
482 if isinstance(iterable, Mapping):
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000483 if self:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000484 self_get = self.get
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000485 for elem, count in iterable.items():
Raymond Hettingerf9092022009-06-29 18:30:43 +0000486 self[elem] = count + self_get(elem, 0)
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000487 else:
488 dict.update(self, iterable) # fast path when counter is empty
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000489 else:
Raymond Hettinger96f34102010-12-15 16:30:37 +0000490 _count_elements(self, iterable)
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000491 if kwds:
492 self.update(kwds)
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000493
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000494 def subtract(self, iterable=None, **kwds):
495 '''Like dict.update() but subtracts counts instead of replacing them.
496 Counts can be reduced below zero. Both the inputs and outputs are
497 allowed to contain zero and negative counts.
498
499 Source can be an iterable, a dictionary, or another Counter instance.
500
501 >>> c = Counter('which')
502 >>> c.subtract('witch') # subtract elements from another iterable
503 >>> c.subtract(Counter('watch')) # subtract elements from another counter
504 >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch
505 0
506 >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch
507 -1
508
509 '''
510 if iterable is not None:
Raymond Hettingerfc3c9cd2010-04-11 20:41:56 +0000511 self_get = self.get
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000512 if isinstance(iterable, Mapping):
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000513 for elem, count in iterable.items():
514 self[elem] = self_get(elem, 0) - count
515 else:
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000516 for elem in iterable:
517 self[elem] = self_get(elem, 0) - 1
518 if kwds:
519 self.subtract(kwds)
520
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000521 def copy(self):
522 'Like dict.copy() but returns a Counter instance instead of a dict.'
523 return Counter(self)
524
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000525 def __delitem__(self, elem):
526 'Like dict.__delitem__() but does not raise KeyError for missing values.'
527 if elem in self:
528 dict.__delitem__(self, elem)
529
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000530 def __repr__(self):
531 if not self:
532 return '%s()' % self.__class__.__name__
533 items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
534 return '%s({%s})' % (self.__class__.__name__, items)
535
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000536 # Multiset-style mathematical operations discussed in:
537 # Knuth TAOCP Volume II section 4.6.3 exercise 19
538 # and at http://en.wikipedia.org/wiki/Multiset
539 #
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000540 # Outputs guaranteed to only include positive counts.
541 #
542 # To strip negative and zero counts, add-in an empty counter:
543 # c += Counter()
544
545 def __add__(self, other):
546 '''Add counts from two counters.
547
548 >>> Counter('abbb') + Counter('bcc')
549 Counter({'b': 4, 'c': 2, 'a': 1})
550
551 '''
552 if not isinstance(other, Counter):
553 return NotImplemented
554 result = Counter()
555 for elem in set(self) | set(other):
556 newcount = self[elem] + other[elem]
557 if newcount > 0:
558 result[elem] = newcount
559 return result
560
561 def __sub__(self, other):
562 ''' Subtract count, but keep only results with positive counts.
563
564 >>> Counter('abbbc') - Counter('bccd')
565 Counter({'b': 2, 'a': 1})
566
567 '''
568 if not isinstance(other, Counter):
569 return NotImplemented
570 result = Counter()
Raymond Hettingere0d1b9f2009-01-21 20:36:27 +0000571 for elem in set(self) | set(other):
572 newcount = self[elem] - other[elem]
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000573 if newcount > 0:
574 result[elem] = newcount
575 return result
576
577 def __or__(self, other):
578 '''Union is the maximum of value in either of the input counters.
579
580 >>> Counter('abbb') | Counter('bcc')
581 Counter({'b': 3, 'c': 2, 'a': 1})
582
583 '''
584 if not isinstance(other, Counter):
585 return NotImplemented
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000586 result = Counter()
587 for elem in set(self) | set(other):
Raymond Hettingerc4791702009-04-04 08:48:03 +0000588 p, q = self[elem], other[elem]
589 newcount = q if p < q else p
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000590 if newcount > 0:
591 result[elem] = newcount
592 return result
593
594 def __and__(self, other):
595 ''' Intersection is the minimum of corresponding counts.
596
597 >>> Counter('abbb') & Counter('bcc')
598 Counter({'b': 1})
599
600 '''
601 if not isinstance(other, Counter):
602 return NotImplemented
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000603 result = Counter()
604 if len(self) < len(other):
605 self, other = other, self
606 for elem in filter(self.__contains__, other):
Raymond Hettingerc4791702009-04-04 08:48:03 +0000607 p, q = self[elem], other[elem]
608 newcount = p if p < q else q
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000609 if newcount > 0:
610 result[elem] = newcount
611 return result
612
Guido van Rossumd8faa362007-04-27 19:54:29 +0000613
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000614################################################################################
615### UserDict
616################################################################################
Guido van Rossumd8faa362007-04-27 19:54:29 +0000617
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000618class UserDict(MutableMapping):
619
620 # Start by filling-out the abstract methods
621 def __init__(self, dict=None, **kwargs):
622 self.data = {}
623 if dict is not None:
624 self.update(dict)
625 if len(kwargs):
626 self.update(kwargs)
627 def __len__(self): return len(self.data)
628 def __getitem__(self, key):
629 if key in self.data:
630 return self.data[key]
631 if hasattr(self.__class__, "__missing__"):
632 return self.__class__.__missing__(self, key)
633 raise KeyError(key)
634 def __setitem__(self, key, item): self.data[key] = item
635 def __delitem__(self, key): del self.data[key]
636 def __iter__(self):
637 return iter(self.data)
638
Raymond Hettinger554c8b82008-02-05 22:54:43 +0000639 # Modify __contains__ to work correctly when __missing__ is present
640 def __contains__(self, key):
641 return key in self.data
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000642
643 # Now, add the methods in dicts but not in MutableMapping
644 def __repr__(self): return repr(self.data)
645 def copy(self):
646 if self.__class__ is UserDict:
647 return UserDict(self.data.copy())
648 import copy
649 data = self.data
650 try:
651 self.data = {}
652 c = copy.copy(self)
653 finally:
654 self.data = data
655 c.update(self)
656 return c
657 @classmethod
658 def fromkeys(cls, iterable, value=None):
659 d = cls()
660 for key in iterable:
661 d[key] = value
662 return d
663
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000664
665
666################################################################################
Raymond Hettinger53dbe392008-02-12 20:03:09 +0000667### UserList
668################################################################################
669
670class UserList(MutableSequence):
671 """A more or less complete user-defined wrapper around list objects."""
672 def __init__(self, initlist=None):
673 self.data = []
674 if initlist is not None:
675 # XXX should this accept an arbitrary sequence?
676 if type(initlist) == type(self.data):
677 self.data[:] = initlist
678 elif isinstance(initlist, UserList):
679 self.data[:] = initlist.data[:]
680 else:
681 self.data = list(initlist)
682 def __repr__(self): return repr(self.data)
683 def __lt__(self, other): return self.data < self.__cast(other)
684 def __le__(self, other): return self.data <= self.__cast(other)
685 def __eq__(self, other): return self.data == self.__cast(other)
686 def __ne__(self, other): return self.data != self.__cast(other)
687 def __gt__(self, other): return self.data > self.__cast(other)
688 def __ge__(self, other): return self.data >= self.__cast(other)
689 def __cast(self, other):
690 return other.data if isinstance(other, UserList) else other
Raymond Hettinger53dbe392008-02-12 20:03:09 +0000691 def __contains__(self, item): return item in self.data
692 def __len__(self): return len(self.data)
693 def __getitem__(self, i): return self.data[i]
694 def __setitem__(self, i, item): self.data[i] = item
695 def __delitem__(self, i): del self.data[i]
696 def __add__(self, other):
697 if isinstance(other, UserList):
698 return self.__class__(self.data + other.data)
699 elif isinstance(other, type(self.data)):
700 return self.__class__(self.data + other)
701 return self.__class__(self.data + list(other))
702 def __radd__(self, other):
703 if isinstance(other, UserList):
704 return self.__class__(other.data + self.data)
705 elif isinstance(other, type(self.data)):
706 return self.__class__(other + self.data)
707 return self.__class__(list(other) + self.data)
708 def __iadd__(self, other):
709 if isinstance(other, UserList):
710 self.data += other.data
711 elif isinstance(other, type(self.data)):
712 self.data += other
713 else:
714 self.data += list(other)
715 return self
716 def __mul__(self, n):
717 return self.__class__(self.data*n)
718 __rmul__ = __mul__
719 def __imul__(self, n):
720 self.data *= n
721 return self
722 def append(self, item): self.data.append(item)
723 def insert(self, i, item): self.data.insert(i, item)
724 def pop(self, i=-1): return self.data.pop(i)
725 def remove(self, item): self.data.remove(item)
726 def count(self, item): return self.data.count(item)
727 def index(self, item, *args): return self.data.index(item, *args)
728 def reverse(self): self.data.reverse()
729 def sort(self, *args, **kwds): self.data.sort(*args, **kwds)
730 def extend(self, other):
731 if isinstance(other, UserList):
732 self.data.extend(other.data)
733 else:
734 self.data.extend(other)
735
736
737
738################################################################################
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000739### UserString
740################################################################################
741
742class UserString(Sequence):
743 def __init__(self, seq):
744 if isinstance(seq, str):
745 self.data = seq
746 elif isinstance(seq, UserString):
747 self.data = seq.data[:]
748 else:
749 self.data = str(seq)
750 def __str__(self): return str(self.data)
751 def __repr__(self): return repr(self.data)
752 def __int__(self): return int(self.data)
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000753 def __float__(self): return float(self.data)
754 def __complex__(self): return complex(self.data)
755 def __hash__(self): return hash(self.data)
756
757 def __eq__(self, string):
758 if isinstance(string, UserString):
759 return self.data == string.data
760 return self.data == string
761 def __ne__(self, string):
762 if isinstance(string, UserString):
763 return self.data != string.data
764 return self.data != string
765 def __lt__(self, string):
766 if isinstance(string, UserString):
767 return self.data < string.data
768 return self.data < string
769 def __le__(self, string):
770 if isinstance(string, UserString):
771 return self.data <= string.data
772 return self.data <= string
773 def __gt__(self, string):
774 if isinstance(string, UserString):
775 return self.data > string.data
776 return self.data > string
777 def __ge__(self, string):
778 if isinstance(string, UserString):
779 return self.data >= string.data
780 return self.data >= string
781
782 def __contains__(self, char):
783 if isinstance(char, UserString):
784 char = char.data
785 return char in self.data
786
787 def __len__(self): return len(self.data)
788 def __getitem__(self, index): return self.__class__(self.data[index])
789 def __add__(self, other):
790 if isinstance(other, UserString):
791 return self.__class__(self.data + other.data)
792 elif isinstance(other, str):
793 return self.__class__(self.data + other)
794 return self.__class__(self.data + str(other))
795 def __radd__(self, other):
796 if isinstance(other, str):
797 return self.__class__(other + self.data)
798 return self.__class__(str(other) + self.data)
799 def __mul__(self, n):
800 return self.__class__(self.data*n)
801 __rmul__ = __mul__
802 def __mod__(self, args):
803 return self.__class__(self.data % args)
804
805 # the following methods are defined in alphabetical order:
806 def capitalize(self): return self.__class__(self.data.capitalize())
807 def center(self, width, *args):
808 return self.__class__(self.data.center(width, *args))
809 def count(self, sub, start=0, end=_sys.maxsize):
810 if isinstance(sub, UserString):
811 sub = sub.data
812 return self.data.count(sub, start, end)
813 def encode(self, encoding=None, errors=None): # XXX improve this?
814 if encoding:
815 if errors:
816 return self.__class__(self.data.encode(encoding, errors))
817 return self.__class__(self.data.encode(encoding))
818 return self.__class__(self.data.encode())
819 def endswith(self, suffix, start=0, end=_sys.maxsize):
820 return self.data.endswith(suffix, start, end)
821 def expandtabs(self, tabsize=8):
822 return self.__class__(self.data.expandtabs(tabsize))
823 def find(self, sub, start=0, end=_sys.maxsize):
824 if isinstance(sub, UserString):
825 sub = sub.data
826 return self.data.find(sub, start, end)
827 def format(self, *args, **kwds):
828 return self.data.format(*args, **kwds)
829 def index(self, sub, start=0, end=_sys.maxsize):
830 return self.data.index(sub, start, end)
831 def isalpha(self): return self.data.isalpha()
832 def isalnum(self): return self.data.isalnum()
833 def isdecimal(self): return self.data.isdecimal()
834 def isdigit(self): return self.data.isdigit()
835 def isidentifier(self): return self.data.isidentifier()
836 def islower(self): return self.data.islower()
837 def isnumeric(self): return self.data.isnumeric()
838 def isspace(self): return self.data.isspace()
839 def istitle(self): return self.data.istitle()
840 def isupper(self): return self.data.isupper()
841 def join(self, seq): return self.data.join(seq)
842 def ljust(self, width, *args):
843 return self.__class__(self.data.ljust(width, *args))
844 def lower(self): return self.__class__(self.data.lower())
845 def lstrip(self, chars=None): return self.__class__(self.data.lstrip(chars))
846 def partition(self, sep):
847 return self.data.partition(sep)
848 def replace(self, old, new, maxsplit=-1):
849 if isinstance(old, UserString):
850 old = old.data
851 if isinstance(new, UserString):
852 new = new.data
853 return self.__class__(self.data.replace(old, new, maxsplit))
854 def rfind(self, sub, start=0, end=_sys.maxsize):
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000855 if isinstance(sub, UserString):
856 sub = sub.data
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000857 return self.data.rfind(sub, start, end)
858 def rindex(self, sub, start=0, end=_sys.maxsize):
859 return self.data.rindex(sub, start, end)
860 def rjust(self, width, *args):
861 return self.__class__(self.data.rjust(width, *args))
862 def rpartition(self, sep):
863 return self.data.rpartition(sep)
864 def rstrip(self, chars=None):
865 return self.__class__(self.data.rstrip(chars))
866 def split(self, sep=None, maxsplit=-1):
867 return self.data.split(sep, maxsplit)
868 def rsplit(self, sep=None, maxsplit=-1):
869 return self.data.rsplit(sep, maxsplit)
870 def splitlines(self, keepends=0): return self.data.splitlines(keepends)
871 def startswith(self, prefix, start=0, end=_sys.maxsize):
872 return self.data.startswith(prefix, start, end)
873 def strip(self, chars=None): return self.__class__(self.data.strip(chars))
874 def swapcase(self): return self.__class__(self.data.swapcase())
875 def title(self): return self.__class__(self.data.title())
876 def translate(self, *args):
877 return self.__class__(self.data.translate(*args))
878 def upper(self): return self.__class__(self.data.upper())
879 def zfill(self, width): return self.__class__(self.data.zfill(width))
880
881
882
883################################################################################
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000884### Simple tests
885################################################################################
Guido van Rossumd8faa362007-04-27 19:54:29 +0000886
887if __name__ == '__main__':
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000888 # verify that instances can be pickled
Guido van Rossum99603b02007-07-20 00:22:32 +0000889 from pickle import loads, dumps
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000890 Point = namedtuple('Point', 'x, y', True)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000891 p = Point(x=10, y=20)
892 assert p == loads(dumps(p))
893
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000894 # test and demonstrate ability to override methods
Christian Heimes043d6f62008-01-07 17:19:16 +0000895 class Point(namedtuple('Point', 'x y')):
Christian Heimes25bb7832008-01-11 16:17:00 +0000896 __slots__ = ()
Christian Heimes043d6f62008-01-07 17:19:16 +0000897 @property
898 def hypot(self):
899 return (self.x ** 2 + self.y ** 2) ** 0.5
Christian Heimes790c8232008-01-07 21:14:23 +0000900 def __str__(self):
Christian Heimes25bb7832008-01-11 16:17:00 +0000901 return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
Christian Heimes043d6f62008-01-07 17:19:16 +0000902
Christian Heimes25bb7832008-01-11 16:17:00 +0000903 for p in Point(3, 4), Point(14, 5/7.):
Christian Heimes790c8232008-01-07 21:14:23 +0000904 print (p)
Christian Heimes043d6f62008-01-07 17:19:16 +0000905
906 class Point(namedtuple('Point', 'x y')):
907 'Point class with optimized _make() and _replace() without error-checking'
Christian Heimes25bb7832008-01-11 16:17:00 +0000908 __slots__ = ()
Christian Heimes043d6f62008-01-07 17:19:16 +0000909 _make = classmethod(tuple.__new__)
910 def _replace(self, _map=map, **kwds):
Christian Heimes2380ac72008-01-09 00:17:24 +0000911 return self._make(_map(kwds.get, ('x', 'y'), self))
Christian Heimes043d6f62008-01-07 17:19:16 +0000912
913 print(Point(11, 22)._replace(x=100))
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000914
Christian Heimes25bb7832008-01-11 16:17:00 +0000915 Point3D = namedtuple('Point3D', Point._fields + ('z',))
916 print(Point3D.__doc__)
917
Guido van Rossumd8faa362007-04-27 19:54:29 +0000918 import doctest
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000919 TestResults = namedtuple('TestResults', 'failed attempted')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000920 print(TestResults(*doctest.testmod()))