blob: 9120ab69d566f0a66b010e27b13aef3428cf0829 [file] [log] [blame]
Brett Cannon23a4a7b2008-05-12 00:56:28 +00001__all__ = ['deque', 'defaultdict', 'namedtuple', 'UserDict', 'UserList',
Raymond Hettinger2d32f632009-03-02 21:24:57 +00002 'UserString', 'Counter', 'OrderedDict']
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
4# They should however be considered an integral part of collections.py.
5from _abcoll import *
6import _abcoll
7__all__ += _abcoll.__all__
8
Christian Heimes99170a52007-12-19 02:07:34 +00009from _collections import deque, defaultdict
10from operator import itemgetter as _itemgetter
11from keyword import iskeyword as _iskeyword
12import sys as _sys
Raymond Hettingerb8baf632009-01-14 02:20:07 +000013import heapq as _heapq
Raymond Hettingerfa11db02010-09-12 04:12:42 +000014from weakref import proxy as _proxy
Raymond Hettingerea9f8db2009-03-02 21:28:41 +000015from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
Raymond Hettinger2d32f632009-03-02 21:24:57 +000016
17################################################################################
18### OrderedDict
19################################################################################
20
Raymond Hettingerfa11db02010-09-12 04:12:42 +000021class _Link(object):
22 __slots__ = 'prev', 'next', 'key', '__weakref__'
23
Raymond Hettinger2d32f632009-03-02 21:24:57 +000024class OrderedDict(dict, MutableMapping):
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +000025 'Dictionary that remembers insertion order'
Raymond Hettingerf1736542009-03-23 05:19:21 +000026 # An inherited dict maps keys to values.
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +000027 # The inherited dict provides __getitem__, __len__, __contains__, and get.
28 # The remaining methods are order-aware.
Raymond Hettingerf1736542009-03-23 05:19:21 +000029 # Big-O running times for all methods are the same as for regular dictionaries.
30
31 # The internal self.__map dictionary maps keys to links in a doubly linked list.
32 # The circular doubly linked list starts and ends with a sentinel element.
33 # The sentinel element never gets deleted (this simplifies the algorithm).
Raymond Hettingerc5c29c02010-09-12 18:13:46 +000034 # The prev/next links are weakref proxies (to prevent circular references).
35 # Individual links are kept alive by the hard reference in self.__map.
36 # Those hard references disappear when a key is deleted from an OrderedDict.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000037
38 def __init__(self, *args, **kwds):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000039 '''Initialize an ordered dictionary. Signature is the same as for
40 regular dictionaries, but keyword arguments are not recommended
41 because their insertion order is arbitrary.
42
43 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +000044 if len(args) > 1:
45 raise TypeError('expected at most 1 arguments, got %d' % len(args))
Raymond Hettingerdc08a142010-09-12 05:15:22 +000046 self.__in_repr = False # detects recursive repr
Raymond Hettinger08c70cf2009-03-03 20:47:29 +000047 try:
Raymond Hettingerf1736542009-03-23 05:19:21 +000048 self.__root
Raymond Hettinger08c70cf2009-03-03 20:47:29 +000049 except AttributeError:
Raymond Hettingerfa11db02010-09-12 04:12:42 +000050 self.__root = root = _Link() # sentinel node for the doubly linked list
51 root.prev = root.next = root
Raymond Hettinger52dc06b2009-03-25 22:45:22 +000052 self.__map = {}
Raymond Hettinger2d32f632009-03-02 21:24:57 +000053 self.update(*args, **kwds)
54
Raymond Hettingerfa11db02010-09-12 04:12:42 +000055 def __setitem__(self, key, value,
56 dict_setitem=dict.__setitem__, proxy=_proxy, Link=_Link):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000057 'od.__setitem__(i, y) <==> od[i]=y'
Raymond Hettingerf1736542009-03-23 05:19:21 +000058 # Setting a new item creates a new link which goes at the end of the linked
59 # list, and the inherited dictionary is updated with the new key/value pair.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000060 if key not in self:
Raymond Hettingerfa11db02010-09-12 04:12:42 +000061 self.__map[key] = link = Link()
Raymond Hettingerf1736542009-03-23 05:19:21 +000062 root = self.__root
Raymond Hettingerfa11db02010-09-12 04:12:42 +000063 last = root.prev
64 link.prev, link.next, link.key = last, root, key
Raymond Hettingerc5c29c02010-09-12 18:13:46 +000065 last.next = root.prev = proxy(link)
Raymond Hettingerfa11db02010-09-12 04:12:42 +000066 dict.__setitem__(self, key, value)
Raymond Hettinger2d32f632009-03-02 21:24:57 +000067
Raymond Hettingerfa11db02010-09-12 04:12:42 +000068 def __delitem__(self, key, dict_delitem=dict.__delitem__):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000069 'od.__delitem__(y) <==> del od[y]'
Raymond Hettingerf1736542009-03-23 05:19:21 +000070 # Deleting an existing item uses self.__map to find the link which is
71 # then removed by updating the links in the predecessor and successor nodes.
Raymond Hettinger5be21b72010-08-01 22:10:57 +000072 dict_delitem(self, key)
Raymond Hettingerf1736542009-03-23 05:19:21 +000073 link = self.__map.pop(key)
Raymond Hettingerfa11db02010-09-12 04:12:42 +000074 link_prev = link.prev
75 link_next = link.next
76 link_prev.next = link_next
77 link_next.prev = link_prev
Raymond Hettinger2d32f632009-03-02 21:24:57 +000078
Raymond Hettingerfa11db02010-09-12 04:12:42 +000079 def __iter__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000080 'od.__iter__() <==> iter(od)'
Raymond Hettingerf1736542009-03-23 05:19:21 +000081 # Traverse the linked list in order.
82 root = self.__root
Raymond Hettingerfa11db02010-09-12 04:12:42 +000083 curr = root.next
Raymond Hettingerf1736542009-03-23 05:19:21 +000084 while curr is not root:
Raymond Hettingerfa11db02010-09-12 04:12:42 +000085 yield curr.key
86 curr = curr.next
Raymond Hettinger2d32f632009-03-02 21:24:57 +000087
Raymond Hettingerfa11db02010-09-12 04:12:42 +000088 def __reversed__(self):
Raymond Hettinger2352cf32009-04-08 01:16:27 +000089 'od.__reversed__() <==> reversed(od)'
Raymond Hettingerf1736542009-03-23 05:19:21 +000090 # Traverse the linked list in reverse order.
91 root = self.__root
Raymond Hettingerfa11db02010-09-12 04:12:42 +000092 curr = root.prev
Raymond Hettingerf1736542009-03-23 05:19:21 +000093 while curr is not root:
Raymond Hettingerfa11db02010-09-12 04:12:42 +000094 yield curr.key
95 curr = curr.prev
Raymond Hettinger2d32f632009-03-02 21:24:57 +000096
97 def __reduce__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000098 'Return state information for pickling'
Raymond Hettinger2d32f632009-03-02 21:24:57 +000099 items = [[k, self[k]] for k in self]
Raymond Hettingerdc08a142010-09-12 05:15:22 +0000100 tmp = self.__map, self.__root, self.__in_repr
101 del self.__map, self.__root, self.__in_repr
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000102 inst_dict = vars(self).copy()
Raymond Hettingerdc08a142010-09-12 05:15:22 +0000103 self.__map, self.__root, self.__in_repr = tmp
Raymond Hettinger14b89ff2009-03-03 22:20:56 +0000104 if inst_dict:
105 return (self.__class__, (items,), inst_dict)
106 return self.__class__, (items,)
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000107
Raymond Hettinger5be21b72010-08-01 22:10:57 +0000108 def clear(self):
109 'od.clear() -> None. Remove all items from od.'
Raymond Hettingerfa11db02010-09-12 04:12:42 +0000110 root = self.__root
111 root.prev = root.next = root
112 self.__map.clear()
Raymond Hettinger5be21b72010-08-01 22:10:57 +0000113 dict.clear(self)
114
Raymond Hettingerfa11db02010-09-12 04:12:42 +0000115 def popitem(self, last=True):
Raymond Hettinger331722d2010-09-02 18:44:16 +0000116 '''od.popitem() -> (k, v), return and remove a (key, value) pair.
117 Pairs are returned in LIFO order if last is true or FIFO order if false.
118
119 '''
120 if not self:
121 raise KeyError('dictionary is empty')
122 root = self.__root
Raymond Hettingerfa11db02010-09-12 04:12:42 +0000123 if last:
124 link = root.prev
125 link_prev = link.prev
126 link_prev.next = root
127 root.prev = link_prev
128 else:
129 link = root.next
130 link_next = link.next
131 root.next = link_next
132 link_next.prev = root
133 key = link.key
Raymond Hettinger331722d2010-09-02 18:44:16 +0000134 del self.__map[key]
Raymond Hettingerfa11db02010-09-12 04:12:42 +0000135 value = dict.pop(self, key)
Raymond Hettinger331722d2010-09-02 18:44:16 +0000136 return key, value
137
Raymond Hettingerfa11db02010-09-12 04:12:42 +0000138 def move_to_end(self, key, last=True):
139 '''Move an existing element to the end (or beginning if last==False).
140
141 Raises KeyError if the element does not exist.
142 When last=True, acts like a fast version of self[key]=self.pop(key).
143
144 '''
145 link = self.__map[key]
146 link_prev = link.prev
147 link_next = link.next
148 link_prev.next = link_next
149 link_next.prev = link_prev
150 root = self.__root
151 if last:
152 last = root.prev
153 link.prev = last
154 link.next = root
155 last.next = root.prev = link
156 else:
157 first = root.next
158 link.prev = root
159 link.next = first
160 root.next = first.prev = link
161
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000162 setdefault = MutableMapping.setdefault
163 update = MutableMapping.update
164 pop = MutableMapping.pop
165 keys = MutableMapping.keys
166 values = MutableMapping.values
167 items = MutableMapping.items
Raymond Hettinger5be21b72010-08-01 22:10:57 +0000168 __ne__ = MutableMapping.__ne__
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000169
170 def __repr__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000171 'od.__repr__() <==> repr(od)'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000172 if not self:
173 return '%s()' % (self.__class__.__name__,)
Raymond Hettingerf1725292010-09-12 18:16:01 +0000174 if self.__in_repr:
175 return '...'
Raymond Hettingerdc08a142010-09-12 05:15:22 +0000176 self.__in_repr = True
177 try:
178 result = '%s(%r)' % (self.__class__.__name__, list(self.items()))
179 finally:
180 self.__in_repr = False
181 return result
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000182
183 def copy(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000184 'od.copy() -> a shallow copy of od'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000185 return self.__class__(self)
186
187 @classmethod
188 def fromkeys(cls, iterable, value=None):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000189 '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
190 and values equal to v (which defaults to None).
191
192 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000193 d = cls()
194 for key in iterable:
195 d[key] = value
196 return d
197
198 def __eq__(self, other):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000199 '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
200 while comparison to a regular mapping is order-insensitive.
201
202 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000203 if isinstance(other, OrderedDict):
Raymond Hettinger798ee1a2009-03-23 18:29:11 +0000204 return len(self)==len(other) and \
205 all(p==q for p, q in zip(self.items(), other.items()))
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000206 return dict.__eq__(self, other)
207
Christian Heimes99170a52007-12-19 02:07:34 +0000208
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000209################################################################################
210### namedtuple
211################################################################################
212
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000213def namedtuple(typename, field_names, verbose=False, rename=False):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000214 """Returns a new subclass of tuple with named fields.
215
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000216 >>> Point = namedtuple('Point', 'x y')
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000217 >>> Point.__doc__ # docstring for the new class
Guido van Rossumd8faa362007-04-27 19:54:29 +0000218 'Point(x, y)'
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000219 >>> p = Point(11, y=22) # instantiate with positional args or keywords
Christian Heimes99170a52007-12-19 02:07:34 +0000220 >>> p[0] + p[1] # indexable like a plain tuple
Guido van Rossumd8faa362007-04-27 19:54:29 +0000221 33
Christian Heimes99170a52007-12-19 02:07:34 +0000222 >>> x, y = p # unpack like a regular tuple
Guido van Rossumd8faa362007-04-27 19:54:29 +0000223 >>> x, y
224 (11, 22)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000225 >>> p.x + p.y # fields also accessable by name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000226 33
Christian Heimes0449f632007-12-15 01:27:15 +0000227 >>> d = p._asdict() # convert to a dictionary
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000228 >>> d['x']
229 11
230 >>> Point(**d) # convert from a dictionary
Guido van Rossumd8faa362007-04-27 19:54:29 +0000231 Point(x=11, y=22)
Christian Heimes0449f632007-12-15 01:27:15 +0000232 >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000233 Point(x=100, y=22)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000234
235 """
236
Christian Heimes2380ac72008-01-09 00:17:24 +0000237 # Parse and validate the field names. Validation serves two purposes,
238 # generating informative error messages and preventing template injection attacks.
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000239 if isinstance(field_names, str):
240 field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas
Benjamin Petersone9bbc8b2008-09-28 02:06:32 +0000241 field_names = tuple(map(str, field_names))
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000242 if rename:
243 names = list(field_names)
244 seen = set()
245 for i, name in enumerate(names):
246 if (not all(c.isalnum() or c=='_' for c in name) or _iskeyword(name)
247 or not name or name[0].isdigit() or name.startswith('_')
248 or name in seen):
Raymond Hettinger56145242009-04-02 22:31:59 +0000249 names[i] = '_%d' % i
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000250 seen.add(name)
251 field_names = tuple(names)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000252 for name in (typename,) + field_names:
Christian Heimesb9eccbf2007-12-05 20:18:38 +0000253 if not all(c.isalnum() or c=='_' for c in name):
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000254 raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
255 if _iskeyword(name):
256 raise ValueError('Type names and field names cannot be a keyword: %r' % name)
257 if name[0].isdigit():
258 raise ValueError('Type names and field names cannot start with a number: %r' % name)
259 seen_names = set()
260 for name in field_names:
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000261 if name.startswith('_') and not rename:
Christian Heimes0449f632007-12-15 01:27:15 +0000262 raise ValueError('Field names cannot start with an underscore: %r' % name)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000263 if name in seen_names:
264 raise ValueError('Encountered duplicate field name: %r' % name)
265 seen_names.add(name)
266
267 # Create and fill-in the class template
Christian Heimesfaf2f632008-01-06 16:59:19 +0000268 numfields = len(field_names)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000269 argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000270 reprtxt = ', '.join('%s=%%r' % name for name in field_names)
271 template = '''class %(typename)s(tuple):
Christian Heimes0449f632007-12-15 01:27:15 +0000272 '%(typename)s(%(argtxt)s)' \n
273 __slots__ = () \n
Christian Heimesfaf2f632008-01-06 16:59:19 +0000274 _fields = %(field_names)r \n
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000275 def __new__(_cls, %(argtxt)s):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000276 'Create new instance of %(typename)s(%(argtxt)s)'
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000277 return _tuple.__new__(_cls, (%(argtxt)s)) \n
Christian Heimesfaf2f632008-01-06 16:59:19 +0000278 @classmethod
Christian Heimes043d6f62008-01-07 17:19:16 +0000279 def _make(cls, iterable, new=tuple.__new__, len=len):
Christian Heimesfaf2f632008-01-06 16:59:19 +0000280 'Make a new %(typename)s object from a sequence or iterable'
Christian Heimes043d6f62008-01-07 17:19:16 +0000281 result = new(cls, iterable)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000282 if len(result) != %(numfields)d:
283 raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
284 return result \n
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000285 def __repr__(self):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000286 'Return a nicely formatted representation string'
Raymond Hettingerd331ce92010-08-08 01:13:42 +0000287 return self.__class__.__name__ + '(%(reprtxt)s)' %% self \n
Raymond Hettingera4f52b12009-03-02 22:28:31 +0000288 def _asdict(self):
289 'Return a new OrderedDict which maps field names to their values'
290 return OrderedDict(zip(self._fields, self)) \n
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000291 def _replace(_self, **kwds):
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000292 'Return a new %(typename)s object replacing specified fields with new values'
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000293 result = _self._make(map(kwds.pop, %(field_names)r, _self))
Christian Heimesfaf2f632008-01-06 16:59:19 +0000294 if kwds:
295 raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000296 return result \n
297 def __getnewargs__(self):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000298 'Return self as a plain tuple. Used by copy and pickle.'
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000299 return tuple(self) \n\n''' % locals()
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000300 for i, name in enumerate(field_names):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000301 template += " %s = _property(_itemgetter(%d), doc='Alias for field number %d')\n" % (name, i, i)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000302 if verbose:
303 print(template)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000304
Georg Brandlf08a9dd2008-06-10 16:57:31 +0000305 # Execute the template string in a temporary namespace and
306 # support tracing utilities by setting a value for frame.f_globals['__name__']
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000307 namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
308 OrderedDict=OrderedDict, _property=property, _tuple=tuple)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000309 try:
310 exec(template, namespace)
311 except SyntaxError as e:
Christian Heimes99170a52007-12-19 02:07:34 +0000312 raise SyntaxError(e.msg + ':\n' + template) from e
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000313 result = namespace[typename]
314
315 # For pickling to work, the __module__ variable needs to be set to the frame
316 # where the named tuple is created. Bypass this step in enviroments where
Benjamin Peterson25c95f12009-05-08 20:42:26 +0000317 # sys._getframe is not defined (Jython for example) or sys._getframe is not
318 # defined for arguments greater than 0 (IronPython).
319 try:
Raymond Hettinger0f055172009-01-27 10:06:09 +0000320 result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
Benjamin Peterson25c95f12009-05-08 20:42:26 +0000321 except (AttributeError, ValueError):
322 pass
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000323
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000324 return result
Guido van Rossumd8faa362007-04-27 19:54:29 +0000325
Guido van Rossumd8faa362007-04-27 19:54:29 +0000326
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000327########################################################################
328### Counter
329########################################################################
330
331class Counter(dict):
332 '''Dict subclass for counting hashable items. Sometimes called a bag
333 or multiset. Elements are stored as dictionary keys and their counts
334 are stored as dictionary values.
335
336 >>> c = Counter('abracadabra') # count elements from a string
337
338 >>> c.most_common(3) # three most common elements
339 [('a', 5), ('r', 2), ('b', 2)]
340 >>> sorted(c) # list all unique elements
341 ['a', 'b', 'c', 'd', 'r']
342 >>> ''.join(sorted(c.elements())) # list elements with repetitions
343 'aaaaabbcdrr'
344 >>> sum(c.values()) # total of all counts
345 11
346
347 >>> c['a'] # count of letter 'a'
348 5
349 >>> for elem in 'shazam': # update counts from an iterable
350 ... c[elem] += 1 # by adding 1 to each element's count
351 >>> c['a'] # now there are seven 'a'
352 7
353 >>> del c['r'] # remove all 'r'
354 >>> c['r'] # now there are zero 'r'
355 0
356
357 >>> d = Counter('simsalabim') # make another counter
358 >>> c.update(d) # add in the second counter
359 >>> c['a'] # now there are nine 'a'
360 9
361
362 >>> c.clear() # empty the counter
363 >>> c
364 Counter()
365
366 Note: If a count is set to zero or reduced to zero, it will remain
367 in the counter until the entry is deleted or the counter is cleared:
368
369 >>> c = Counter('aaabbc')
370 >>> c['b'] -= 2 # reduce the count of 'b' by two
371 >>> c.most_common() # 'b' is still in, but its count is zero
372 [('a', 3), ('c', 1), ('b', 0)]
373
374 '''
375 # References:
376 # http://en.wikipedia.org/wiki/Multiset
377 # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
378 # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
379 # http://code.activestate.com/recipes/259174/
380 # Knuth, TAOCP Vol. II section 4.6.3
381
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000382 def __init__(self, iterable=None, **kwds):
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000383 '''Create a new, empty Counter object. And if given, count elements
384 from an input iterable. Or, initialize the count from another mapping
385 of elements to their counts.
386
387 >>> c = Counter() # a new, empty counter
388 >>> c = Counter('gallahad') # a new counter from an iterable
389 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000390 >>> c = Counter(a=4, b=2) # a new counter from keyword args
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000391
392 '''
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000393 self.update(iterable, **kwds)
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000394
395 def __missing__(self, key):
396 'The count of elements not in the Counter is zero.'
397 # Needed so that self[missing_item] does not raise KeyError
398 return 0
399
400 def most_common(self, n=None):
401 '''List the n most common elements and their counts from the most
402 common to the least. If n is None, then list all element counts.
403
404 >>> Counter('abracadabra').most_common(3)
405 [('a', 5), ('r', 2), ('b', 2)]
406
407 '''
408 # Emulate Bag.sortedByCount from Smalltalk
409 if n is None:
410 return sorted(self.items(), key=_itemgetter(1), reverse=True)
411 return _heapq.nlargest(n, self.items(), key=_itemgetter(1))
412
413 def elements(self):
414 '''Iterator over elements repeating each as many times as its count.
415
416 >>> c = Counter('ABCABC')
417 >>> sorted(c.elements())
418 ['A', 'A', 'B', 'B', 'C', 'C']
419
420 # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
421 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
422 >>> product = 1
423 >>> for factor in prime_factors.elements(): # loop over factors
424 ... product *= factor # and multiply them
425 >>> product
426 1836
427
428 Note, if an element's count has been set to zero or is a negative
429 number, elements() will ignore it.
430
431 '''
432 # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
433 return _chain.from_iterable(_starmap(_repeat, self.items()))
434
435 # Override dict methods where necessary
436
437 @classmethod
438 def fromkeys(cls, iterable, v=None):
439 # There is no equivalent method for counters because setting v=1
440 # means that no element can have a count greater than one.
441 raise NotImplementedError(
442 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
443
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000444 def update(self, iterable=None, **kwds):
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000445 '''Like dict.update() but add counts instead of replacing them.
446
447 Source can be an iterable, a dictionary, or another Counter instance.
448
449 >>> c = Counter('which')
450 >>> c.update('witch') # add elements from another iterable
451 >>> d = Counter('watch')
452 >>> c.update(d) # add elements from another counter
453 >>> c['h'] # four 'h' in which, witch, and watch
454 4
455
456 '''
457 # The regular dict.update() operation makes no sense here because the
458 # replace behavior results in the some of original untouched counts
459 # being mixed-in with all of the other counts for a mismash that
460 # doesn't have a straight-forward interpretation in most counting
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000461 # contexts. Instead, we implement straight-addition. Both the inputs
462 # and outputs are allowed to contain zero and negative counts.
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000463
464 if iterable is not None:
465 if isinstance(iterable, Mapping):
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000466 if self:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000467 self_get = self.get
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000468 for elem, count in iterable.items():
Raymond Hettingerf9092022009-06-29 18:30:43 +0000469 self[elem] = count + self_get(elem, 0)
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000470 else:
471 dict.update(self, iterable) # fast path when counter is empty
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000472 else:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000473 self_get = self.get
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000474 for elem in iterable:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000475 self[elem] = 1 + self_get(elem, 0)
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000476 if kwds:
477 self.update(kwds)
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000478
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000479 def subtract(self, iterable=None, **kwds):
480 '''Like dict.update() but subtracts counts instead of replacing them.
481 Counts can be reduced below zero. Both the inputs and outputs are
482 allowed to contain zero and negative counts.
483
484 Source can be an iterable, a dictionary, or another Counter instance.
485
486 >>> c = Counter('which')
487 >>> c.subtract('witch') # subtract elements from another iterable
488 >>> c.subtract(Counter('watch')) # subtract elements from another counter
489 >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch
490 0
491 >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch
492 -1
493
494 '''
495 if iterable is not None:
Raymond Hettingerfc3c9cd2010-04-11 20:41:56 +0000496 self_get = self.get
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000497 if isinstance(iterable, Mapping):
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000498 for elem, count in iterable.items():
499 self[elem] = self_get(elem, 0) - count
500 else:
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000501 for elem in iterable:
502 self[elem] = self_get(elem, 0) - 1
503 if kwds:
504 self.subtract(kwds)
505
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000506 def copy(self):
507 'Like dict.copy() but returns a Counter instance instead of a dict.'
508 return Counter(self)
509
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000510 def __delitem__(self, elem):
511 'Like dict.__delitem__() but does not raise KeyError for missing values.'
512 if elem in self:
513 dict.__delitem__(self, elem)
514
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000515 def __repr__(self):
516 if not self:
517 return '%s()' % self.__class__.__name__
518 items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
519 return '%s({%s})' % (self.__class__.__name__, items)
520
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000521 # Multiset-style mathematical operations discussed in:
522 # Knuth TAOCP Volume II section 4.6.3 exercise 19
523 # and at http://en.wikipedia.org/wiki/Multiset
524 #
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000525 # Outputs guaranteed to only include positive counts.
526 #
527 # To strip negative and zero counts, add-in an empty counter:
528 # c += Counter()
529
530 def __add__(self, other):
531 '''Add counts from two counters.
532
533 >>> Counter('abbb') + Counter('bcc')
534 Counter({'b': 4, 'c': 2, 'a': 1})
535
536 '''
537 if not isinstance(other, Counter):
538 return NotImplemented
539 result = Counter()
540 for elem in set(self) | set(other):
541 newcount = self[elem] + other[elem]
542 if newcount > 0:
543 result[elem] = newcount
544 return result
545
546 def __sub__(self, other):
547 ''' Subtract count, but keep only results with positive counts.
548
549 >>> Counter('abbbc') - Counter('bccd')
550 Counter({'b': 2, 'a': 1})
551
552 '''
553 if not isinstance(other, Counter):
554 return NotImplemented
555 result = Counter()
Raymond Hettingere0d1b9f2009-01-21 20:36:27 +0000556 for elem in set(self) | set(other):
557 newcount = self[elem] - other[elem]
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000558 if newcount > 0:
559 result[elem] = newcount
560 return result
561
562 def __or__(self, other):
563 '''Union is the maximum of value in either of the input counters.
564
565 >>> Counter('abbb') | Counter('bcc')
566 Counter({'b': 3, 'c': 2, 'a': 1})
567
568 '''
569 if not isinstance(other, Counter):
570 return NotImplemented
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000571 result = Counter()
572 for elem in set(self) | set(other):
Raymond Hettingerc4791702009-04-04 08:48:03 +0000573 p, q = self[elem], other[elem]
574 newcount = q if p < q else p
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000575 if newcount > 0:
576 result[elem] = newcount
577 return result
578
579 def __and__(self, other):
580 ''' Intersection is the minimum of corresponding counts.
581
582 >>> Counter('abbb') & Counter('bcc')
583 Counter({'b': 1})
584
585 '''
586 if not isinstance(other, Counter):
587 return NotImplemented
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000588 result = Counter()
589 if len(self) < len(other):
590 self, other = other, self
591 for elem in filter(self.__contains__, other):
Raymond Hettingerc4791702009-04-04 08:48:03 +0000592 p, q = self[elem], other[elem]
593 newcount = p if p < q else q
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000594 if newcount > 0:
595 result[elem] = newcount
596 return result
597
Guido van Rossumd8faa362007-04-27 19:54:29 +0000598
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000599################################################################################
600### UserDict
601################################################################################
Guido van Rossumd8faa362007-04-27 19:54:29 +0000602
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000603class UserDict(MutableMapping):
604
605 # Start by filling-out the abstract methods
606 def __init__(self, dict=None, **kwargs):
607 self.data = {}
608 if dict is not None:
609 self.update(dict)
610 if len(kwargs):
611 self.update(kwargs)
612 def __len__(self): return len(self.data)
613 def __getitem__(self, key):
614 if key in self.data:
615 return self.data[key]
616 if hasattr(self.__class__, "__missing__"):
617 return self.__class__.__missing__(self, key)
618 raise KeyError(key)
619 def __setitem__(self, key, item): self.data[key] = item
620 def __delitem__(self, key): del self.data[key]
621 def __iter__(self):
622 return iter(self.data)
623
Raymond Hettinger554c8b82008-02-05 22:54:43 +0000624 # Modify __contains__ to work correctly when __missing__ is present
625 def __contains__(self, key):
626 return key in self.data
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000627
628 # Now, add the methods in dicts but not in MutableMapping
629 def __repr__(self): return repr(self.data)
630 def copy(self):
631 if self.__class__ is UserDict:
632 return UserDict(self.data.copy())
633 import copy
634 data = self.data
635 try:
636 self.data = {}
637 c = copy.copy(self)
638 finally:
639 self.data = data
640 c.update(self)
641 return c
642 @classmethod
643 def fromkeys(cls, iterable, value=None):
644 d = cls()
645 for key in iterable:
646 d[key] = value
647 return d
648
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000649
650
651################################################################################
Raymond Hettinger53dbe392008-02-12 20:03:09 +0000652### UserList
653################################################################################
654
655class UserList(MutableSequence):
656 """A more or less complete user-defined wrapper around list objects."""
657 def __init__(self, initlist=None):
658 self.data = []
659 if initlist is not None:
660 # XXX should this accept an arbitrary sequence?
661 if type(initlist) == type(self.data):
662 self.data[:] = initlist
663 elif isinstance(initlist, UserList):
664 self.data[:] = initlist.data[:]
665 else:
666 self.data = list(initlist)
667 def __repr__(self): return repr(self.data)
668 def __lt__(self, other): return self.data < self.__cast(other)
669 def __le__(self, other): return self.data <= self.__cast(other)
670 def __eq__(self, other): return self.data == self.__cast(other)
671 def __ne__(self, other): return self.data != self.__cast(other)
672 def __gt__(self, other): return self.data > self.__cast(other)
673 def __ge__(self, other): return self.data >= self.__cast(other)
674 def __cast(self, other):
675 return other.data if isinstance(other, UserList) else other
Raymond Hettinger53dbe392008-02-12 20:03:09 +0000676 def __contains__(self, item): return item in self.data
677 def __len__(self): return len(self.data)
678 def __getitem__(self, i): return self.data[i]
679 def __setitem__(self, i, item): self.data[i] = item
680 def __delitem__(self, i): del self.data[i]
681 def __add__(self, other):
682 if isinstance(other, UserList):
683 return self.__class__(self.data + other.data)
684 elif isinstance(other, type(self.data)):
685 return self.__class__(self.data + other)
686 return self.__class__(self.data + list(other))
687 def __radd__(self, other):
688 if isinstance(other, UserList):
689 return self.__class__(other.data + self.data)
690 elif isinstance(other, type(self.data)):
691 return self.__class__(other + self.data)
692 return self.__class__(list(other) + self.data)
693 def __iadd__(self, other):
694 if isinstance(other, UserList):
695 self.data += other.data
696 elif isinstance(other, type(self.data)):
697 self.data += other
698 else:
699 self.data += list(other)
700 return self
701 def __mul__(self, n):
702 return self.__class__(self.data*n)
703 __rmul__ = __mul__
704 def __imul__(self, n):
705 self.data *= n
706 return self
707 def append(self, item): self.data.append(item)
708 def insert(self, i, item): self.data.insert(i, item)
709 def pop(self, i=-1): return self.data.pop(i)
710 def remove(self, item): self.data.remove(item)
711 def count(self, item): return self.data.count(item)
712 def index(self, item, *args): return self.data.index(item, *args)
713 def reverse(self): self.data.reverse()
714 def sort(self, *args, **kwds): self.data.sort(*args, **kwds)
715 def extend(self, other):
716 if isinstance(other, UserList):
717 self.data.extend(other.data)
718 else:
719 self.data.extend(other)
720
721
722
723################################################################################
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000724### UserString
725################################################################################
726
727class UserString(Sequence):
728 def __init__(self, seq):
729 if isinstance(seq, str):
730 self.data = seq
731 elif isinstance(seq, UserString):
732 self.data = seq.data[:]
733 else:
734 self.data = str(seq)
735 def __str__(self): return str(self.data)
736 def __repr__(self): return repr(self.data)
737 def __int__(self): return int(self.data)
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000738 def __float__(self): return float(self.data)
739 def __complex__(self): return complex(self.data)
740 def __hash__(self): return hash(self.data)
741
742 def __eq__(self, string):
743 if isinstance(string, UserString):
744 return self.data == string.data
745 return self.data == string
746 def __ne__(self, string):
747 if isinstance(string, UserString):
748 return self.data != string.data
749 return self.data != string
750 def __lt__(self, string):
751 if isinstance(string, UserString):
752 return self.data < string.data
753 return self.data < string
754 def __le__(self, string):
755 if isinstance(string, UserString):
756 return self.data <= string.data
757 return self.data <= string
758 def __gt__(self, string):
759 if isinstance(string, UserString):
760 return self.data > string.data
761 return self.data > string
762 def __ge__(self, string):
763 if isinstance(string, UserString):
764 return self.data >= string.data
765 return self.data >= string
766
767 def __contains__(self, char):
768 if isinstance(char, UserString):
769 char = char.data
770 return char in self.data
771
772 def __len__(self): return len(self.data)
773 def __getitem__(self, index): return self.__class__(self.data[index])
774 def __add__(self, other):
775 if isinstance(other, UserString):
776 return self.__class__(self.data + other.data)
777 elif isinstance(other, str):
778 return self.__class__(self.data + other)
779 return self.__class__(self.data + str(other))
780 def __radd__(self, other):
781 if isinstance(other, str):
782 return self.__class__(other + self.data)
783 return self.__class__(str(other) + self.data)
784 def __mul__(self, n):
785 return self.__class__(self.data*n)
786 __rmul__ = __mul__
787 def __mod__(self, args):
788 return self.__class__(self.data % args)
789
790 # the following methods are defined in alphabetical order:
791 def capitalize(self): return self.__class__(self.data.capitalize())
792 def center(self, width, *args):
793 return self.__class__(self.data.center(width, *args))
794 def count(self, sub, start=0, end=_sys.maxsize):
795 if isinstance(sub, UserString):
796 sub = sub.data
797 return self.data.count(sub, start, end)
798 def encode(self, encoding=None, errors=None): # XXX improve this?
799 if encoding:
800 if errors:
801 return self.__class__(self.data.encode(encoding, errors))
802 return self.__class__(self.data.encode(encoding))
803 return self.__class__(self.data.encode())
804 def endswith(self, suffix, start=0, end=_sys.maxsize):
805 return self.data.endswith(suffix, start, end)
806 def expandtabs(self, tabsize=8):
807 return self.__class__(self.data.expandtabs(tabsize))
808 def find(self, sub, start=0, end=_sys.maxsize):
809 if isinstance(sub, UserString):
810 sub = sub.data
811 return self.data.find(sub, start, end)
812 def format(self, *args, **kwds):
813 return self.data.format(*args, **kwds)
814 def index(self, sub, start=0, end=_sys.maxsize):
815 return self.data.index(sub, start, end)
816 def isalpha(self): return self.data.isalpha()
817 def isalnum(self): return self.data.isalnum()
818 def isdecimal(self): return self.data.isdecimal()
819 def isdigit(self): return self.data.isdigit()
820 def isidentifier(self): return self.data.isidentifier()
821 def islower(self): return self.data.islower()
822 def isnumeric(self): return self.data.isnumeric()
823 def isspace(self): return self.data.isspace()
824 def istitle(self): return self.data.istitle()
825 def isupper(self): return self.data.isupper()
826 def join(self, seq): return self.data.join(seq)
827 def ljust(self, width, *args):
828 return self.__class__(self.data.ljust(width, *args))
829 def lower(self): return self.__class__(self.data.lower())
830 def lstrip(self, chars=None): return self.__class__(self.data.lstrip(chars))
831 def partition(self, sep):
832 return self.data.partition(sep)
833 def replace(self, old, new, maxsplit=-1):
834 if isinstance(old, UserString):
835 old = old.data
836 if isinstance(new, UserString):
837 new = new.data
838 return self.__class__(self.data.replace(old, new, maxsplit))
839 def rfind(self, sub, start=0, end=_sys.maxsize):
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000840 if isinstance(sub, UserString):
841 sub = sub.data
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000842 return self.data.rfind(sub, start, end)
843 def rindex(self, sub, start=0, end=_sys.maxsize):
844 return self.data.rindex(sub, start, end)
845 def rjust(self, width, *args):
846 return self.__class__(self.data.rjust(width, *args))
847 def rpartition(self, sep):
848 return self.data.rpartition(sep)
849 def rstrip(self, chars=None):
850 return self.__class__(self.data.rstrip(chars))
851 def split(self, sep=None, maxsplit=-1):
852 return self.data.split(sep, maxsplit)
853 def rsplit(self, sep=None, maxsplit=-1):
854 return self.data.rsplit(sep, maxsplit)
855 def splitlines(self, keepends=0): return self.data.splitlines(keepends)
856 def startswith(self, prefix, start=0, end=_sys.maxsize):
857 return self.data.startswith(prefix, start, end)
858 def strip(self, chars=None): return self.__class__(self.data.strip(chars))
859 def swapcase(self): return self.__class__(self.data.swapcase())
860 def title(self): return self.__class__(self.data.title())
861 def translate(self, *args):
862 return self.__class__(self.data.translate(*args))
863 def upper(self): return self.__class__(self.data.upper())
864 def zfill(self, width): return self.__class__(self.data.zfill(width))
865
866
867
868################################################################################
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000869### Simple tests
870################################################################################
Guido van Rossumd8faa362007-04-27 19:54:29 +0000871
872if __name__ == '__main__':
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000873 # verify that instances can be pickled
Guido van Rossum99603b02007-07-20 00:22:32 +0000874 from pickle import loads, dumps
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000875 Point = namedtuple('Point', 'x, y', True)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000876 p = Point(x=10, y=20)
877 assert p == loads(dumps(p))
878
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000879 # test and demonstrate ability to override methods
Christian Heimes043d6f62008-01-07 17:19:16 +0000880 class Point(namedtuple('Point', 'x y')):
Christian Heimes25bb7832008-01-11 16:17:00 +0000881 __slots__ = ()
Christian Heimes043d6f62008-01-07 17:19:16 +0000882 @property
883 def hypot(self):
884 return (self.x ** 2 + self.y ** 2) ** 0.5
Christian Heimes790c8232008-01-07 21:14:23 +0000885 def __str__(self):
Christian Heimes25bb7832008-01-11 16:17:00 +0000886 return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
Christian Heimes043d6f62008-01-07 17:19:16 +0000887
Christian Heimes25bb7832008-01-11 16:17:00 +0000888 for p in Point(3, 4), Point(14, 5/7.):
Christian Heimes790c8232008-01-07 21:14:23 +0000889 print (p)
Christian Heimes043d6f62008-01-07 17:19:16 +0000890
891 class Point(namedtuple('Point', 'x y')):
892 'Point class with optimized _make() and _replace() without error-checking'
Christian Heimes25bb7832008-01-11 16:17:00 +0000893 __slots__ = ()
Christian Heimes043d6f62008-01-07 17:19:16 +0000894 _make = classmethod(tuple.__new__)
895 def _replace(self, _map=map, **kwds):
Christian Heimes2380ac72008-01-09 00:17:24 +0000896 return self._make(_map(kwds.get, ('x', 'y'), self))
Christian Heimes043d6f62008-01-07 17:19:16 +0000897
898 print(Point(11, 22)._replace(x=100))
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000899
Christian Heimes25bb7832008-01-11 16:17:00 +0000900 Point3D = namedtuple('Point3D', Point._fields + ('z',))
901 print(Point3D.__doc__)
902
Guido van Rossumd8faa362007-04-27 19:54:29 +0000903 import doctest
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000904 TestResults = namedtuple('TestResults', 'failed attempted')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000905 print(TestResults(*doctest.testmod()))