blob: 56ae2a30558e543e45454981286b6ed01cf9ca4a [file] [log] [blame]
Brett Cannon23a4a7b2008-05-12 00:56:28 +00001__all__ = ['deque', 'defaultdict', 'namedtuple', 'UserDict', 'UserList',
Raymond Hettinger2d32f632009-03-02 21:24:57 +00002 'UserString', 'Counter', 'OrderedDict']
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
4# They should however be considered an integral part of collections.py.
5from _abcoll import *
6import _abcoll
7__all__ += _abcoll.__all__
8
Christian Heimes99170a52007-12-19 02:07:34 +00009from _collections import deque, defaultdict
10from operator import itemgetter as _itemgetter
11from keyword import iskeyword as _iskeyword
12import sys as _sys
Raymond Hettingerb8baf632009-01-14 02:20:07 +000013import heapq as _heapq
Raymond Hettingerfa11db02010-09-12 04:12:42 +000014from weakref import proxy as _proxy
Raymond Hettingerea9f8db2009-03-02 21:28:41 +000015from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
Raymond Hettinger2d32f632009-03-02 21:24:57 +000016
17################################################################################
18### OrderedDict
19################################################################################
20
Raymond Hettingerfa11db02010-09-12 04:12:42 +000021class _Link(object):
22 __slots__ = 'prev', 'next', 'key', '__weakref__'
23
Raymond Hettinger2d32f632009-03-02 21:24:57 +000024class OrderedDict(dict, MutableMapping):
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +000025 'Dictionary that remembers insertion order'
Raymond Hettingerf1736542009-03-23 05:19:21 +000026 # An inherited dict maps keys to values.
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +000027 # The inherited dict provides __getitem__, __len__, __contains__, and get.
28 # The remaining methods are order-aware.
Raymond Hettingerf1736542009-03-23 05:19:21 +000029 # Big-O running times for all methods are the same as for regular dictionaries.
30
31 # The internal self.__map dictionary maps keys to links in a doubly linked list.
32 # The circular doubly linked list starts and ends with a sentinel element.
33 # The sentinel element never gets deleted (this simplifies the algorithm).
Raymond Hettingerfa11db02010-09-12 04:12:42 +000034 # The back links are weakref proxies (to prevent circular references).
Raymond Hettinger2d32f632009-03-02 21:24:57 +000035
36 def __init__(self, *args, **kwds):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000037 '''Initialize an ordered dictionary. Signature is the same as for
38 regular dictionaries, but keyword arguments are not recommended
39 because their insertion order is arbitrary.
40
41 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +000042 if len(args) > 1:
43 raise TypeError('expected at most 1 arguments, got %d' % len(args))
Raymond Hettinger08c70cf2009-03-03 20:47:29 +000044 try:
Raymond Hettingerf1736542009-03-23 05:19:21 +000045 self.__root
Raymond Hettinger08c70cf2009-03-03 20:47:29 +000046 except AttributeError:
Raymond Hettingerfa11db02010-09-12 04:12:42 +000047 self.__root = root = _Link() # sentinel node for the doubly linked list
48 root.prev = root.next = root
Raymond Hettinger52dc06b2009-03-25 22:45:22 +000049 self.__map = {}
Raymond Hettinger2d32f632009-03-02 21:24:57 +000050 self.update(*args, **kwds)
51
Raymond Hettingerfa11db02010-09-12 04:12:42 +000052 def __setitem__(self, key, value,
53 dict_setitem=dict.__setitem__, proxy=_proxy, Link=_Link):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000054 'od.__setitem__(i, y) <==> od[i]=y'
Raymond Hettingerf1736542009-03-23 05:19:21 +000055 # Setting a new item creates a new link which goes at the end of the linked
56 # list, and the inherited dictionary is updated with the new key/value pair.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000057 if key not in self:
Raymond Hettingerfa11db02010-09-12 04:12:42 +000058 self.__map[key] = link = Link()
Raymond Hettingerf1736542009-03-23 05:19:21 +000059 root = self.__root
Raymond Hettingerfa11db02010-09-12 04:12:42 +000060 last = root.prev
61 link.prev, link.next, link.key = last, root, key
62 last.next = link
63 root.prev = proxy(link)
64 dict.__setitem__(self, key, value)
Raymond Hettinger2d32f632009-03-02 21:24:57 +000065
Raymond Hettingerfa11db02010-09-12 04:12:42 +000066 def __delitem__(self, key, dict_delitem=dict.__delitem__):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000067 'od.__delitem__(y) <==> del od[y]'
Raymond Hettingerf1736542009-03-23 05:19:21 +000068 # Deleting an existing item uses self.__map to find the link which is
69 # then removed by updating the links in the predecessor and successor nodes.
Raymond Hettinger5be21b72010-08-01 22:10:57 +000070 dict_delitem(self, key)
Raymond Hettingerf1736542009-03-23 05:19:21 +000071 link = self.__map.pop(key)
Raymond Hettingerfa11db02010-09-12 04:12:42 +000072 link_prev = link.prev
73 link_next = link.next
74 link_prev.next = link_next
75 link_next.prev = link_prev
Raymond Hettinger2d32f632009-03-02 21:24:57 +000076
Raymond Hettingerfa11db02010-09-12 04:12:42 +000077 def __iter__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000078 'od.__iter__() <==> iter(od)'
Raymond Hettingerf1736542009-03-23 05:19:21 +000079 # Traverse the linked list in order.
80 root = self.__root
Raymond Hettingerfa11db02010-09-12 04:12:42 +000081 curr = root.next
Raymond Hettingerf1736542009-03-23 05:19:21 +000082 while curr is not root:
Raymond Hettingerfa11db02010-09-12 04:12:42 +000083 yield curr.key
84 curr = curr.next
Raymond Hettinger2d32f632009-03-02 21:24:57 +000085
Raymond Hettingerfa11db02010-09-12 04:12:42 +000086 def __reversed__(self):
Raymond Hettinger2352cf32009-04-08 01:16:27 +000087 'od.__reversed__() <==> reversed(od)'
Raymond Hettingerf1736542009-03-23 05:19:21 +000088 # Traverse the linked list in reverse order.
89 root = self.__root
Raymond Hettingerfa11db02010-09-12 04:12:42 +000090 curr = root.prev
Raymond Hettingerf1736542009-03-23 05:19:21 +000091 while curr is not root:
Raymond Hettingerfa11db02010-09-12 04:12:42 +000092 yield curr.key
93 curr = curr.prev
Raymond Hettinger2d32f632009-03-02 21:24:57 +000094
95 def __reduce__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000096 'Return state information for pickling'
Raymond Hettinger2d32f632009-03-02 21:24:57 +000097 items = [[k, self[k]] for k in self]
Raymond Hettingerf1736542009-03-23 05:19:21 +000098 tmp = self.__map, self.__root
99 del self.__map, self.__root
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000100 inst_dict = vars(self).copy()
Raymond Hettingerf1736542009-03-23 05:19:21 +0000101 self.__map, self.__root = tmp
Raymond Hettinger14b89ff2009-03-03 22:20:56 +0000102 if inst_dict:
103 return (self.__class__, (items,), inst_dict)
104 return self.__class__, (items,)
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000105
Raymond Hettinger5be21b72010-08-01 22:10:57 +0000106 def clear(self):
107 'od.clear() -> None. Remove all items from od.'
Raymond Hettingerfa11db02010-09-12 04:12:42 +0000108 root = self.__root
109 root.prev = root.next = root
110 self.__map.clear()
Raymond Hettinger5be21b72010-08-01 22:10:57 +0000111 dict.clear(self)
112
Raymond Hettingerfa11db02010-09-12 04:12:42 +0000113 def popitem(self, last=True):
Raymond Hettinger331722d2010-09-02 18:44:16 +0000114 '''od.popitem() -> (k, v), return and remove a (key, value) pair.
115 Pairs are returned in LIFO order if last is true or FIFO order if false.
116
117 '''
118 if not self:
119 raise KeyError('dictionary is empty')
120 root = self.__root
Raymond Hettingerfa11db02010-09-12 04:12:42 +0000121 if last:
122 link = root.prev
123 link_prev = link.prev
124 link_prev.next = root
125 root.prev = link_prev
126 else:
127 link = root.next
128 link_next = link.next
129 root.next = link_next
130 link_next.prev = root
131 key = link.key
Raymond Hettinger331722d2010-09-02 18:44:16 +0000132 del self.__map[key]
Raymond Hettingerfa11db02010-09-12 04:12:42 +0000133 value = dict.pop(self, key)
Raymond Hettinger331722d2010-09-02 18:44:16 +0000134 return key, value
135
Raymond Hettingerfa11db02010-09-12 04:12:42 +0000136 def move_to_end(self, key, last=True):
137 '''Move an existing element to the end (or beginning if last==False).
138
139 Raises KeyError if the element does not exist.
140 When last=True, acts like a fast version of self[key]=self.pop(key).
141
142 '''
143 link = self.__map[key]
144 link_prev = link.prev
145 link_next = link.next
146 link_prev.next = link_next
147 link_next.prev = link_prev
148 root = self.__root
149 if last:
150 last = root.prev
151 link.prev = last
152 link.next = root
153 last.next = root.prev = link
154 else:
155 first = root.next
156 link.prev = root
157 link.next = first
158 root.next = first.prev = link
159
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000160 setdefault = MutableMapping.setdefault
161 update = MutableMapping.update
162 pop = MutableMapping.pop
163 keys = MutableMapping.keys
164 values = MutableMapping.values
165 items = MutableMapping.items
Raymond Hettinger5be21b72010-08-01 22:10:57 +0000166 __ne__ = MutableMapping.__ne__
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000167
168 def __repr__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000169 'od.__repr__() <==> repr(od)'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000170 if not self:
171 return '%s()' % (self.__class__.__name__,)
172 return '%s(%r)' % (self.__class__.__name__, list(self.items()))
173
174 def copy(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000175 'od.copy() -> a shallow copy of od'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000176 return self.__class__(self)
177
178 @classmethod
179 def fromkeys(cls, iterable, value=None):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000180 '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
181 and values equal to v (which defaults to None).
182
183 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000184 d = cls()
185 for key in iterable:
186 d[key] = value
187 return d
188
189 def __eq__(self, other):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000190 '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
191 while comparison to a regular mapping is order-insensitive.
192
193 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000194 if isinstance(other, OrderedDict):
Raymond Hettinger798ee1a2009-03-23 18:29:11 +0000195 return len(self)==len(other) and \
196 all(p==q for p, q in zip(self.items(), other.items()))
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000197 return dict.__eq__(self, other)
198
Christian Heimes99170a52007-12-19 02:07:34 +0000199
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000200################################################################################
201### namedtuple
202################################################################################
203
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000204def namedtuple(typename, field_names, verbose=False, rename=False):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000205 """Returns a new subclass of tuple with named fields.
206
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000207 >>> Point = namedtuple('Point', 'x y')
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000208 >>> Point.__doc__ # docstring for the new class
Guido van Rossumd8faa362007-04-27 19:54:29 +0000209 'Point(x, y)'
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000210 >>> p = Point(11, y=22) # instantiate with positional args or keywords
Christian Heimes99170a52007-12-19 02:07:34 +0000211 >>> p[0] + p[1] # indexable like a plain tuple
Guido van Rossumd8faa362007-04-27 19:54:29 +0000212 33
Christian Heimes99170a52007-12-19 02:07:34 +0000213 >>> x, y = p # unpack like a regular tuple
Guido van Rossumd8faa362007-04-27 19:54:29 +0000214 >>> x, y
215 (11, 22)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000216 >>> p.x + p.y # fields also accessable by name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000217 33
Christian Heimes0449f632007-12-15 01:27:15 +0000218 >>> d = p._asdict() # convert to a dictionary
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000219 >>> d['x']
220 11
221 >>> Point(**d) # convert from a dictionary
Guido van Rossumd8faa362007-04-27 19:54:29 +0000222 Point(x=11, y=22)
Christian Heimes0449f632007-12-15 01:27:15 +0000223 >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000224 Point(x=100, y=22)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000225
226 """
227
Christian Heimes2380ac72008-01-09 00:17:24 +0000228 # Parse and validate the field names. Validation serves two purposes,
229 # generating informative error messages and preventing template injection attacks.
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000230 if isinstance(field_names, str):
231 field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas
Benjamin Petersone9bbc8b2008-09-28 02:06:32 +0000232 field_names = tuple(map(str, field_names))
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000233 if rename:
234 names = list(field_names)
235 seen = set()
236 for i, name in enumerate(names):
237 if (not all(c.isalnum() or c=='_' for c in name) or _iskeyword(name)
238 or not name or name[0].isdigit() or name.startswith('_')
239 or name in seen):
Raymond Hettinger56145242009-04-02 22:31:59 +0000240 names[i] = '_%d' % i
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000241 seen.add(name)
242 field_names = tuple(names)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000243 for name in (typename,) + field_names:
Christian Heimesb9eccbf2007-12-05 20:18:38 +0000244 if not all(c.isalnum() or c=='_' for c in name):
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000245 raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
246 if _iskeyword(name):
247 raise ValueError('Type names and field names cannot be a keyword: %r' % name)
248 if name[0].isdigit():
249 raise ValueError('Type names and field names cannot start with a number: %r' % name)
250 seen_names = set()
251 for name in field_names:
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000252 if name.startswith('_') and not rename:
Christian Heimes0449f632007-12-15 01:27:15 +0000253 raise ValueError('Field names cannot start with an underscore: %r' % name)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000254 if name in seen_names:
255 raise ValueError('Encountered duplicate field name: %r' % name)
256 seen_names.add(name)
257
258 # Create and fill-in the class template
Christian Heimesfaf2f632008-01-06 16:59:19 +0000259 numfields = len(field_names)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000260 argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000261 reprtxt = ', '.join('%s=%%r' % name for name in field_names)
262 template = '''class %(typename)s(tuple):
Christian Heimes0449f632007-12-15 01:27:15 +0000263 '%(typename)s(%(argtxt)s)' \n
264 __slots__ = () \n
Christian Heimesfaf2f632008-01-06 16:59:19 +0000265 _fields = %(field_names)r \n
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000266 def __new__(_cls, %(argtxt)s):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000267 'Create new instance of %(typename)s(%(argtxt)s)'
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000268 return _tuple.__new__(_cls, (%(argtxt)s)) \n
Christian Heimesfaf2f632008-01-06 16:59:19 +0000269 @classmethod
Christian Heimes043d6f62008-01-07 17:19:16 +0000270 def _make(cls, iterable, new=tuple.__new__, len=len):
Christian Heimesfaf2f632008-01-06 16:59:19 +0000271 'Make a new %(typename)s object from a sequence or iterable'
Christian Heimes043d6f62008-01-07 17:19:16 +0000272 result = new(cls, iterable)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000273 if len(result) != %(numfields)d:
274 raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
275 return result \n
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000276 def __repr__(self):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000277 'Return a nicely formatted representation string'
Raymond Hettingerd331ce92010-08-08 01:13:42 +0000278 return self.__class__.__name__ + '(%(reprtxt)s)' %% self \n
Raymond Hettingera4f52b12009-03-02 22:28:31 +0000279 def _asdict(self):
280 'Return a new OrderedDict which maps field names to their values'
281 return OrderedDict(zip(self._fields, self)) \n
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000282 def _replace(_self, **kwds):
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000283 'Return a new %(typename)s object replacing specified fields with new values'
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000284 result = _self._make(map(kwds.pop, %(field_names)r, _self))
Christian Heimesfaf2f632008-01-06 16:59:19 +0000285 if kwds:
286 raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000287 return result \n
288 def __getnewargs__(self):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000289 'Return self as a plain tuple. Used by copy and pickle.'
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000290 return tuple(self) \n\n''' % locals()
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000291 for i, name in enumerate(field_names):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000292 template += " %s = _property(_itemgetter(%d), doc='Alias for field number %d')\n" % (name, i, i)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000293 if verbose:
294 print(template)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000295
Georg Brandlf08a9dd2008-06-10 16:57:31 +0000296 # Execute the template string in a temporary namespace and
297 # support tracing utilities by setting a value for frame.f_globals['__name__']
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000298 namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
299 OrderedDict=OrderedDict, _property=property, _tuple=tuple)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000300 try:
301 exec(template, namespace)
302 except SyntaxError as e:
Christian Heimes99170a52007-12-19 02:07:34 +0000303 raise SyntaxError(e.msg + ':\n' + template) from e
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000304 result = namespace[typename]
305
306 # For pickling to work, the __module__ variable needs to be set to the frame
307 # where the named tuple is created. Bypass this step in enviroments where
Benjamin Peterson25c95f12009-05-08 20:42:26 +0000308 # sys._getframe is not defined (Jython for example) or sys._getframe is not
309 # defined for arguments greater than 0 (IronPython).
310 try:
Raymond Hettinger0f055172009-01-27 10:06:09 +0000311 result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
Benjamin Peterson25c95f12009-05-08 20:42:26 +0000312 except (AttributeError, ValueError):
313 pass
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000314
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000315 return result
Guido van Rossumd8faa362007-04-27 19:54:29 +0000316
Guido van Rossumd8faa362007-04-27 19:54:29 +0000317
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000318########################################################################
319### Counter
320########################################################################
321
322class Counter(dict):
323 '''Dict subclass for counting hashable items. Sometimes called a bag
324 or multiset. Elements are stored as dictionary keys and their counts
325 are stored as dictionary values.
326
327 >>> c = Counter('abracadabra') # count elements from a string
328
329 >>> c.most_common(3) # three most common elements
330 [('a', 5), ('r', 2), ('b', 2)]
331 >>> sorted(c) # list all unique elements
332 ['a', 'b', 'c', 'd', 'r']
333 >>> ''.join(sorted(c.elements())) # list elements with repetitions
334 'aaaaabbcdrr'
335 >>> sum(c.values()) # total of all counts
336 11
337
338 >>> c['a'] # count of letter 'a'
339 5
340 >>> for elem in 'shazam': # update counts from an iterable
341 ... c[elem] += 1 # by adding 1 to each element's count
342 >>> c['a'] # now there are seven 'a'
343 7
344 >>> del c['r'] # remove all 'r'
345 >>> c['r'] # now there are zero 'r'
346 0
347
348 >>> d = Counter('simsalabim') # make another counter
349 >>> c.update(d) # add in the second counter
350 >>> c['a'] # now there are nine 'a'
351 9
352
353 >>> c.clear() # empty the counter
354 >>> c
355 Counter()
356
357 Note: If a count is set to zero or reduced to zero, it will remain
358 in the counter until the entry is deleted or the counter is cleared:
359
360 >>> c = Counter('aaabbc')
361 >>> c['b'] -= 2 # reduce the count of 'b' by two
362 >>> c.most_common() # 'b' is still in, but its count is zero
363 [('a', 3), ('c', 1), ('b', 0)]
364
365 '''
366 # References:
367 # http://en.wikipedia.org/wiki/Multiset
368 # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
369 # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
370 # http://code.activestate.com/recipes/259174/
371 # Knuth, TAOCP Vol. II section 4.6.3
372
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000373 def __init__(self, iterable=None, **kwds):
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000374 '''Create a new, empty Counter object. And if given, count elements
375 from an input iterable. Or, initialize the count from another mapping
376 of elements to their counts.
377
378 >>> c = Counter() # a new, empty counter
379 >>> c = Counter('gallahad') # a new counter from an iterable
380 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000381 >>> c = Counter(a=4, b=2) # a new counter from keyword args
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000382
383 '''
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000384 self.update(iterable, **kwds)
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000385
386 def __missing__(self, key):
387 'The count of elements not in the Counter is zero.'
388 # Needed so that self[missing_item] does not raise KeyError
389 return 0
390
391 def most_common(self, n=None):
392 '''List the n most common elements and their counts from the most
393 common to the least. If n is None, then list all element counts.
394
395 >>> Counter('abracadabra').most_common(3)
396 [('a', 5), ('r', 2), ('b', 2)]
397
398 '''
399 # Emulate Bag.sortedByCount from Smalltalk
400 if n is None:
401 return sorted(self.items(), key=_itemgetter(1), reverse=True)
402 return _heapq.nlargest(n, self.items(), key=_itemgetter(1))
403
404 def elements(self):
405 '''Iterator over elements repeating each as many times as its count.
406
407 >>> c = Counter('ABCABC')
408 >>> sorted(c.elements())
409 ['A', 'A', 'B', 'B', 'C', 'C']
410
411 # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
412 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
413 >>> product = 1
414 >>> for factor in prime_factors.elements(): # loop over factors
415 ... product *= factor # and multiply them
416 >>> product
417 1836
418
419 Note, if an element's count has been set to zero or is a negative
420 number, elements() will ignore it.
421
422 '''
423 # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
424 return _chain.from_iterable(_starmap(_repeat, self.items()))
425
426 # Override dict methods where necessary
427
428 @classmethod
429 def fromkeys(cls, iterable, v=None):
430 # There is no equivalent method for counters because setting v=1
431 # means that no element can have a count greater than one.
432 raise NotImplementedError(
433 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
434
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000435 def update(self, iterable=None, **kwds):
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000436 '''Like dict.update() but add counts instead of replacing them.
437
438 Source can be an iterable, a dictionary, or another Counter instance.
439
440 >>> c = Counter('which')
441 >>> c.update('witch') # add elements from another iterable
442 >>> d = Counter('watch')
443 >>> c.update(d) # add elements from another counter
444 >>> c['h'] # four 'h' in which, witch, and watch
445 4
446
447 '''
448 # The regular dict.update() operation makes no sense here because the
449 # replace behavior results in the some of original untouched counts
450 # being mixed-in with all of the other counts for a mismash that
451 # doesn't have a straight-forward interpretation in most counting
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000452 # contexts. Instead, we implement straight-addition. Both the inputs
453 # and outputs are allowed to contain zero and negative counts.
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000454
455 if iterable is not None:
456 if isinstance(iterable, Mapping):
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000457 if self:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000458 self_get = self.get
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000459 for elem, count in iterable.items():
Raymond Hettingerf9092022009-06-29 18:30:43 +0000460 self[elem] = count + self_get(elem, 0)
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000461 else:
462 dict.update(self, iterable) # fast path when counter is empty
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000463 else:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000464 self_get = self.get
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000465 for elem in iterable:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000466 self[elem] = 1 + self_get(elem, 0)
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000467 if kwds:
468 self.update(kwds)
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000469
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000470 def subtract(self, iterable=None, **kwds):
471 '''Like dict.update() but subtracts counts instead of replacing them.
472 Counts can be reduced below zero. Both the inputs and outputs are
473 allowed to contain zero and negative counts.
474
475 Source can be an iterable, a dictionary, or another Counter instance.
476
477 >>> c = Counter('which')
478 >>> c.subtract('witch') # subtract elements from another iterable
479 >>> c.subtract(Counter('watch')) # subtract elements from another counter
480 >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch
481 0
482 >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch
483 -1
484
485 '''
486 if iterable is not None:
Raymond Hettingerfc3c9cd2010-04-11 20:41:56 +0000487 self_get = self.get
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000488 if isinstance(iterable, Mapping):
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000489 for elem, count in iterable.items():
490 self[elem] = self_get(elem, 0) - count
491 else:
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000492 for elem in iterable:
493 self[elem] = self_get(elem, 0) - 1
494 if kwds:
495 self.subtract(kwds)
496
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000497 def copy(self):
498 'Like dict.copy() but returns a Counter instance instead of a dict.'
499 return Counter(self)
500
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000501 def __delitem__(self, elem):
502 'Like dict.__delitem__() but does not raise KeyError for missing values.'
503 if elem in self:
504 dict.__delitem__(self, elem)
505
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000506 def __repr__(self):
507 if not self:
508 return '%s()' % self.__class__.__name__
509 items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
510 return '%s({%s})' % (self.__class__.__name__, items)
511
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000512 # Multiset-style mathematical operations discussed in:
513 # Knuth TAOCP Volume II section 4.6.3 exercise 19
514 # and at http://en.wikipedia.org/wiki/Multiset
515 #
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000516 # Outputs guaranteed to only include positive counts.
517 #
518 # To strip negative and zero counts, add-in an empty counter:
519 # c += Counter()
520
521 def __add__(self, other):
522 '''Add counts from two counters.
523
524 >>> Counter('abbb') + Counter('bcc')
525 Counter({'b': 4, 'c': 2, 'a': 1})
526
527 '''
528 if not isinstance(other, Counter):
529 return NotImplemented
530 result = Counter()
531 for elem in set(self) | set(other):
532 newcount = self[elem] + other[elem]
533 if newcount > 0:
534 result[elem] = newcount
535 return result
536
537 def __sub__(self, other):
538 ''' Subtract count, but keep only results with positive counts.
539
540 >>> Counter('abbbc') - Counter('bccd')
541 Counter({'b': 2, 'a': 1})
542
543 '''
544 if not isinstance(other, Counter):
545 return NotImplemented
546 result = Counter()
Raymond Hettingere0d1b9f2009-01-21 20:36:27 +0000547 for elem in set(self) | set(other):
548 newcount = self[elem] - other[elem]
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000549 if newcount > 0:
550 result[elem] = newcount
551 return result
552
553 def __or__(self, other):
554 '''Union is the maximum of value in either of the input counters.
555
556 >>> Counter('abbb') | Counter('bcc')
557 Counter({'b': 3, 'c': 2, 'a': 1})
558
559 '''
560 if not isinstance(other, Counter):
561 return NotImplemented
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000562 result = Counter()
563 for elem in set(self) | set(other):
Raymond Hettingerc4791702009-04-04 08:48:03 +0000564 p, q = self[elem], other[elem]
565 newcount = q if p < q else p
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000566 if newcount > 0:
567 result[elem] = newcount
568 return result
569
570 def __and__(self, other):
571 ''' Intersection is the minimum of corresponding counts.
572
573 >>> Counter('abbb') & Counter('bcc')
574 Counter({'b': 1})
575
576 '''
577 if not isinstance(other, Counter):
578 return NotImplemented
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000579 result = Counter()
580 if len(self) < len(other):
581 self, other = other, self
582 for elem in filter(self.__contains__, other):
Raymond Hettingerc4791702009-04-04 08:48:03 +0000583 p, q = self[elem], other[elem]
584 newcount = p if p < q else q
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000585 if newcount > 0:
586 result[elem] = newcount
587 return result
588
Guido van Rossumd8faa362007-04-27 19:54:29 +0000589
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000590################################################################################
591### UserDict
592################################################################################
Guido van Rossumd8faa362007-04-27 19:54:29 +0000593
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000594class UserDict(MutableMapping):
595
596 # Start by filling-out the abstract methods
597 def __init__(self, dict=None, **kwargs):
598 self.data = {}
599 if dict is not None:
600 self.update(dict)
601 if len(kwargs):
602 self.update(kwargs)
603 def __len__(self): return len(self.data)
604 def __getitem__(self, key):
605 if key in self.data:
606 return self.data[key]
607 if hasattr(self.__class__, "__missing__"):
608 return self.__class__.__missing__(self, key)
609 raise KeyError(key)
610 def __setitem__(self, key, item): self.data[key] = item
611 def __delitem__(self, key): del self.data[key]
612 def __iter__(self):
613 return iter(self.data)
614
Raymond Hettinger554c8b82008-02-05 22:54:43 +0000615 # Modify __contains__ to work correctly when __missing__ is present
616 def __contains__(self, key):
617 return key in self.data
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000618
619 # Now, add the methods in dicts but not in MutableMapping
620 def __repr__(self): return repr(self.data)
621 def copy(self):
622 if self.__class__ is UserDict:
623 return UserDict(self.data.copy())
624 import copy
625 data = self.data
626 try:
627 self.data = {}
628 c = copy.copy(self)
629 finally:
630 self.data = data
631 c.update(self)
632 return c
633 @classmethod
634 def fromkeys(cls, iterable, value=None):
635 d = cls()
636 for key in iterable:
637 d[key] = value
638 return d
639
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000640
641
642################################################################################
Raymond Hettinger53dbe392008-02-12 20:03:09 +0000643### UserList
644################################################################################
645
646class UserList(MutableSequence):
647 """A more or less complete user-defined wrapper around list objects."""
648 def __init__(self, initlist=None):
649 self.data = []
650 if initlist is not None:
651 # XXX should this accept an arbitrary sequence?
652 if type(initlist) == type(self.data):
653 self.data[:] = initlist
654 elif isinstance(initlist, UserList):
655 self.data[:] = initlist.data[:]
656 else:
657 self.data = list(initlist)
658 def __repr__(self): return repr(self.data)
659 def __lt__(self, other): return self.data < self.__cast(other)
660 def __le__(self, other): return self.data <= self.__cast(other)
661 def __eq__(self, other): return self.data == self.__cast(other)
662 def __ne__(self, other): return self.data != self.__cast(other)
663 def __gt__(self, other): return self.data > self.__cast(other)
664 def __ge__(self, other): return self.data >= self.__cast(other)
665 def __cast(self, other):
666 return other.data if isinstance(other, UserList) else other
Raymond Hettinger53dbe392008-02-12 20:03:09 +0000667 def __contains__(self, item): return item in self.data
668 def __len__(self): return len(self.data)
669 def __getitem__(self, i): return self.data[i]
670 def __setitem__(self, i, item): self.data[i] = item
671 def __delitem__(self, i): del self.data[i]
672 def __add__(self, other):
673 if isinstance(other, UserList):
674 return self.__class__(self.data + other.data)
675 elif isinstance(other, type(self.data)):
676 return self.__class__(self.data + other)
677 return self.__class__(self.data + list(other))
678 def __radd__(self, other):
679 if isinstance(other, UserList):
680 return self.__class__(other.data + self.data)
681 elif isinstance(other, type(self.data)):
682 return self.__class__(other + self.data)
683 return self.__class__(list(other) + self.data)
684 def __iadd__(self, other):
685 if isinstance(other, UserList):
686 self.data += other.data
687 elif isinstance(other, type(self.data)):
688 self.data += other
689 else:
690 self.data += list(other)
691 return self
692 def __mul__(self, n):
693 return self.__class__(self.data*n)
694 __rmul__ = __mul__
695 def __imul__(self, n):
696 self.data *= n
697 return self
698 def append(self, item): self.data.append(item)
699 def insert(self, i, item): self.data.insert(i, item)
700 def pop(self, i=-1): return self.data.pop(i)
701 def remove(self, item): self.data.remove(item)
702 def count(self, item): return self.data.count(item)
703 def index(self, item, *args): return self.data.index(item, *args)
704 def reverse(self): self.data.reverse()
705 def sort(self, *args, **kwds): self.data.sort(*args, **kwds)
706 def extend(self, other):
707 if isinstance(other, UserList):
708 self.data.extend(other.data)
709 else:
710 self.data.extend(other)
711
712
713
714################################################################################
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000715### UserString
716################################################################################
717
718class UserString(Sequence):
719 def __init__(self, seq):
720 if isinstance(seq, str):
721 self.data = seq
722 elif isinstance(seq, UserString):
723 self.data = seq.data[:]
724 else:
725 self.data = str(seq)
726 def __str__(self): return str(self.data)
727 def __repr__(self): return repr(self.data)
728 def __int__(self): return int(self.data)
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000729 def __float__(self): return float(self.data)
730 def __complex__(self): return complex(self.data)
731 def __hash__(self): return hash(self.data)
732
733 def __eq__(self, string):
734 if isinstance(string, UserString):
735 return self.data == string.data
736 return self.data == string
737 def __ne__(self, string):
738 if isinstance(string, UserString):
739 return self.data != string.data
740 return self.data != string
741 def __lt__(self, string):
742 if isinstance(string, UserString):
743 return self.data < string.data
744 return self.data < string
745 def __le__(self, string):
746 if isinstance(string, UserString):
747 return self.data <= string.data
748 return self.data <= string
749 def __gt__(self, string):
750 if isinstance(string, UserString):
751 return self.data > string.data
752 return self.data > string
753 def __ge__(self, string):
754 if isinstance(string, UserString):
755 return self.data >= string.data
756 return self.data >= string
757
758 def __contains__(self, char):
759 if isinstance(char, UserString):
760 char = char.data
761 return char in self.data
762
763 def __len__(self): return len(self.data)
764 def __getitem__(self, index): return self.__class__(self.data[index])
765 def __add__(self, other):
766 if isinstance(other, UserString):
767 return self.__class__(self.data + other.data)
768 elif isinstance(other, str):
769 return self.__class__(self.data + other)
770 return self.__class__(self.data + str(other))
771 def __radd__(self, other):
772 if isinstance(other, str):
773 return self.__class__(other + self.data)
774 return self.__class__(str(other) + self.data)
775 def __mul__(self, n):
776 return self.__class__(self.data*n)
777 __rmul__ = __mul__
778 def __mod__(self, args):
779 return self.__class__(self.data % args)
780
781 # the following methods are defined in alphabetical order:
782 def capitalize(self): return self.__class__(self.data.capitalize())
783 def center(self, width, *args):
784 return self.__class__(self.data.center(width, *args))
785 def count(self, sub, start=0, end=_sys.maxsize):
786 if isinstance(sub, UserString):
787 sub = sub.data
788 return self.data.count(sub, start, end)
789 def encode(self, encoding=None, errors=None): # XXX improve this?
790 if encoding:
791 if errors:
792 return self.__class__(self.data.encode(encoding, errors))
793 return self.__class__(self.data.encode(encoding))
794 return self.__class__(self.data.encode())
795 def endswith(self, suffix, start=0, end=_sys.maxsize):
796 return self.data.endswith(suffix, start, end)
797 def expandtabs(self, tabsize=8):
798 return self.__class__(self.data.expandtabs(tabsize))
799 def find(self, sub, start=0, end=_sys.maxsize):
800 if isinstance(sub, UserString):
801 sub = sub.data
802 return self.data.find(sub, start, end)
803 def format(self, *args, **kwds):
804 return self.data.format(*args, **kwds)
805 def index(self, sub, start=0, end=_sys.maxsize):
806 return self.data.index(sub, start, end)
807 def isalpha(self): return self.data.isalpha()
808 def isalnum(self): return self.data.isalnum()
809 def isdecimal(self): return self.data.isdecimal()
810 def isdigit(self): return self.data.isdigit()
811 def isidentifier(self): return self.data.isidentifier()
812 def islower(self): return self.data.islower()
813 def isnumeric(self): return self.data.isnumeric()
814 def isspace(self): return self.data.isspace()
815 def istitle(self): return self.data.istitle()
816 def isupper(self): return self.data.isupper()
817 def join(self, seq): return self.data.join(seq)
818 def ljust(self, width, *args):
819 return self.__class__(self.data.ljust(width, *args))
820 def lower(self): return self.__class__(self.data.lower())
821 def lstrip(self, chars=None): return self.__class__(self.data.lstrip(chars))
822 def partition(self, sep):
823 return self.data.partition(sep)
824 def replace(self, old, new, maxsplit=-1):
825 if isinstance(old, UserString):
826 old = old.data
827 if isinstance(new, UserString):
828 new = new.data
829 return self.__class__(self.data.replace(old, new, maxsplit))
830 def rfind(self, sub, start=0, end=_sys.maxsize):
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000831 if isinstance(sub, UserString):
832 sub = sub.data
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000833 return self.data.rfind(sub, start, end)
834 def rindex(self, sub, start=0, end=_sys.maxsize):
835 return self.data.rindex(sub, start, end)
836 def rjust(self, width, *args):
837 return self.__class__(self.data.rjust(width, *args))
838 def rpartition(self, sep):
839 return self.data.rpartition(sep)
840 def rstrip(self, chars=None):
841 return self.__class__(self.data.rstrip(chars))
842 def split(self, sep=None, maxsplit=-1):
843 return self.data.split(sep, maxsplit)
844 def rsplit(self, sep=None, maxsplit=-1):
845 return self.data.rsplit(sep, maxsplit)
846 def splitlines(self, keepends=0): return self.data.splitlines(keepends)
847 def startswith(self, prefix, start=0, end=_sys.maxsize):
848 return self.data.startswith(prefix, start, end)
849 def strip(self, chars=None): return self.__class__(self.data.strip(chars))
850 def swapcase(self): return self.__class__(self.data.swapcase())
851 def title(self): return self.__class__(self.data.title())
852 def translate(self, *args):
853 return self.__class__(self.data.translate(*args))
854 def upper(self): return self.__class__(self.data.upper())
855 def zfill(self, width): return self.__class__(self.data.zfill(width))
856
857
858
859################################################################################
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000860### Simple tests
861################################################################################
Guido van Rossumd8faa362007-04-27 19:54:29 +0000862
863if __name__ == '__main__':
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000864 # verify that instances can be pickled
Guido van Rossum99603b02007-07-20 00:22:32 +0000865 from pickle import loads, dumps
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000866 Point = namedtuple('Point', 'x, y', True)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000867 p = Point(x=10, y=20)
868 assert p == loads(dumps(p))
869
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000870 # test and demonstrate ability to override methods
Christian Heimes043d6f62008-01-07 17:19:16 +0000871 class Point(namedtuple('Point', 'x y')):
Christian Heimes25bb7832008-01-11 16:17:00 +0000872 __slots__ = ()
Christian Heimes043d6f62008-01-07 17:19:16 +0000873 @property
874 def hypot(self):
875 return (self.x ** 2 + self.y ** 2) ** 0.5
Christian Heimes790c8232008-01-07 21:14:23 +0000876 def __str__(self):
Christian Heimes25bb7832008-01-11 16:17:00 +0000877 return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
Christian Heimes043d6f62008-01-07 17:19:16 +0000878
Christian Heimes25bb7832008-01-11 16:17:00 +0000879 for p in Point(3, 4), Point(14, 5/7.):
Christian Heimes790c8232008-01-07 21:14:23 +0000880 print (p)
Christian Heimes043d6f62008-01-07 17:19:16 +0000881
882 class Point(namedtuple('Point', 'x y')):
883 'Point class with optimized _make() and _replace() without error-checking'
Christian Heimes25bb7832008-01-11 16:17:00 +0000884 __slots__ = ()
Christian Heimes043d6f62008-01-07 17:19:16 +0000885 _make = classmethod(tuple.__new__)
886 def _replace(self, _map=map, **kwds):
Christian Heimes2380ac72008-01-09 00:17:24 +0000887 return self._make(_map(kwds.get, ('x', 'y'), self))
Christian Heimes043d6f62008-01-07 17:19:16 +0000888
889 print(Point(11, 22)._replace(x=100))
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000890
Christian Heimes25bb7832008-01-11 16:17:00 +0000891 Point3D = namedtuple('Point3D', Point._fields + ('z',))
892 print(Point3D.__doc__)
893
Guido van Rossumd8faa362007-04-27 19:54:29 +0000894 import doctest
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000895 TestResults = namedtuple('TestResults', 'failed attempted')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000896 print(TestResults(*doctest.testmod()))