blob: 6daa5967c1205f789210d7e956058f5825bb0f37 [file] [log] [blame]
Brett Cannon23a4a7b2008-05-12 00:56:28 +00001__all__ = ['deque', 'defaultdict', 'namedtuple', 'UserDict', 'UserList',
Raymond Hettinger2d32f632009-03-02 21:24:57 +00002 'UserString', 'Counter', 'OrderedDict']
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
4# They should however be considered an integral part of collections.py.
5from _abcoll import *
6import _abcoll
7__all__ += _abcoll.__all__
8
Christian Heimes99170a52007-12-19 02:07:34 +00009from _collections import deque, defaultdict
10from operator import itemgetter as _itemgetter
11from keyword import iskeyword as _iskeyword
12import sys as _sys
Raymond Hettingerb8baf632009-01-14 02:20:07 +000013import heapq as _heapq
Raymond Hettingerea9f8db2009-03-02 21:28:41 +000014from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
Raymond Hettinger2d32f632009-03-02 21:24:57 +000015
16################################################################################
17### OrderedDict
18################################################################################
19
20class OrderedDict(dict, MutableMapping):
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +000021 'Dictionary that remembers insertion order'
Raymond Hettingerf1736542009-03-23 05:19:21 +000022 # An inherited dict maps keys to values.
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +000023 # The inherited dict provides __getitem__, __len__, __contains__, and get.
24 # The remaining methods are order-aware.
Raymond Hettingerf1736542009-03-23 05:19:21 +000025 # Big-O running times for all methods are the same as for regular dictionaries.
26
27 # The internal self.__map dictionary maps keys to links in a doubly linked list.
28 # The circular doubly linked list starts and ends with a sentinel element.
29 # The sentinel element never gets deleted (this simplifies the algorithm).
Raymond Hettinger5be21b72010-08-01 22:10:57 +000030 # Each link is stored as a list of length three: [PREV, NEXT, KEY].
Raymond Hettinger2d32f632009-03-02 21:24:57 +000031
32 def __init__(self, *args, **kwds):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000033 '''Initialize an ordered dictionary. Signature is the same as for
34 regular dictionaries, but keyword arguments are not recommended
35 because their insertion order is arbitrary.
36
37 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +000038 if len(args) > 1:
39 raise TypeError('expected at most 1 arguments, got %d' % len(args))
Raymond Hettinger08c70cf2009-03-03 20:47:29 +000040 try:
Raymond Hettingerf1736542009-03-23 05:19:21 +000041 self.__root
Raymond Hettinger08c70cf2009-03-03 20:47:29 +000042 except AttributeError:
Raymond Hettinger5be21b72010-08-01 22:10:57 +000043 self.__root = root = [None, None, None] # sentinel node
44 PREV = 0
45 NEXT = 1
46 root[PREV] = root[NEXT] = root
Raymond Hettinger52dc06b2009-03-25 22:45:22 +000047 self.__map = {}
Raymond Hettinger2d32f632009-03-02 21:24:57 +000048 self.update(*args, **kwds)
49
Raymond Hettinger5be21b72010-08-01 22:10:57 +000050 def __setitem__(self, key, value, PREV=0, NEXT=1, dict_setitem=dict.__setitem__):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000051 'od.__setitem__(i, y) <==> od[i]=y'
Raymond Hettingerf1736542009-03-23 05:19:21 +000052 # Setting a new item creates a new link which goes at the end of the linked
53 # list, and the inherited dictionary is updated with the new key/value pair.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000054 if key not in self:
Raymond Hettingerf1736542009-03-23 05:19:21 +000055 root = self.__root
Raymond Hettinger5be21b72010-08-01 22:10:57 +000056 last = root[PREV]
57 last[NEXT] = root[PREV] = self.__map[key] = [last, root, key]
58 dict_setitem(self, key, value)
Raymond Hettinger2d32f632009-03-02 21:24:57 +000059
Raymond Hettinger5be21b72010-08-01 22:10:57 +000060 def __delitem__(self, key, PREV=0, NEXT=1, dict_delitem=dict.__delitem__):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000061 'od.__delitem__(y) <==> del od[y]'
Raymond Hettingerf1736542009-03-23 05:19:21 +000062 # Deleting an existing item uses self.__map to find the link which is
63 # then removed by updating the links in the predecessor and successor nodes.
Raymond Hettinger5be21b72010-08-01 22:10:57 +000064 dict_delitem(self, key)
Raymond Hettingerf1736542009-03-23 05:19:21 +000065 link = self.__map.pop(key)
Raymond Hettinger5be21b72010-08-01 22:10:57 +000066 link_prev = link[PREV]
67 link_next = link[NEXT]
68 link_prev[NEXT] = link_next
69 link_next[PREV] = link_prev
Raymond Hettinger2d32f632009-03-02 21:24:57 +000070
Raymond Hettinger5be21b72010-08-01 22:10:57 +000071 def __iter__(self, NEXT=1, KEY=2):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000072 'od.__iter__() <==> iter(od)'
Raymond Hettingerf1736542009-03-23 05:19:21 +000073 # Traverse the linked list in order.
74 root = self.__root
Raymond Hettinger5be21b72010-08-01 22:10:57 +000075 curr = root[NEXT]
Raymond Hettingerf1736542009-03-23 05:19:21 +000076 while curr is not root:
Raymond Hettinger5be21b72010-08-01 22:10:57 +000077 yield curr[KEY]
78 curr = curr[NEXT]
Raymond Hettinger2d32f632009-03-02 21:24:57 +000079
Raymond Hettinger5be21b72010-08-01 22:10:57 +000080 def __reversed__(self, PREV=0, KEY=2):
Raymond Hettinger2352cf32009-04-08 01:16:27 +000081 'od.__reversed__() <==> reversed(od)'
Raymond Hettingerf1736542009-03-23 05:19:21 +000082 # Traverse the linked list in reverse order.
83 root = self.__root
Raymond Hettinger5be21b72010-08-01 22:10:57 +000084 curr = root[PREV]
Raymond Hettingerf1736542009-03-23 05:19:21 +000085 while curr is not root:
Raymond Hettinger5be21b72010-08-01 22:10:57 +000086 yield curr[KEY]
87 curr = curr[PREV]
Raymond Hettinger2d32f632009-03-02 21:24:57 +000088
89 def __reduce__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000090 'Return state information for pickling'
Raymond Hettinger2d32f632009-03-02 21:24:57 +000091 items = [[k, self[k]] for k in self]
Raymond Hettingerf1736542009-03-23 05:19:21 +000092 tmp = self.__map, self.__root
93 del self.__map, self.__root
Raymond Hettinger2d32f632009-03-02 21:24:57 +000094 inst_dict = vars(self).copy()
Raymond Hettingerf1736542009-03-23 05:19:21 +000095 self.__map, self.__root = tmp
Raymond Hettinger14b89ff2009-03-03 22:20:56 +000096 if inst_dict:
97 return (self.__class__, (items,), inst_dict)
98 return self.__class__, (items,)
Raymond Hettinger2d32f632009-03-02 21:24:57 +000099
Raymond Hettinger5be21b72010-08-01 22:10:57 +0000100 def clear(self):
101 'od.clear() -> None. Remove all items from od.'
102 try:
103 for node in self.__map.values():
104 del node[:]
105 self.__root[:] = [self.__root, self.__root, None]
106 self.__map.clear()
107 except AttributeError:
108 pass
109 dict.clear(self)
110
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000111 setdefault = MutableMapping.setdefault
112 update = MutableMapping.update
113 pop = MutableMapping.pop
114 keys = MutableMapping.keys
115 values = MutableMapping.values
116 items = MutableMapping.items
Raymond Hettinger5be21b72010-08-01 22:10:57 +0000117 __ne__ = MutableMapping.__ne__
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000118
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +0000119 def popitem(self, last=True):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000120 '''od.popitem() -> (k, v), return and remove a (key, value) pair.
121 Pairs are returned in LIFO order if last is true or FIFO order if false.
122
123 '''
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +0000124 if not self:
125 raise KeyError('dictionary is empty')
Raymond Hettinger446a4f22009-04-08 08:28:28 +0000126 key = next(reversed(self) if last else iter(self))
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +0000127 value = self.pop(key)
128 return key, value
129
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000130 def __repr__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000131 'od.__repr__() <==> repr(od)'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000132 if not self:
133 return '%s()' % (self.__class__.__name__,)
134 return '%s(%r)' % (self.__class__.__name__, list(self.items()))
135
136 def copy(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000137 'od.copy() -> a shallow copy of od'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000138 return self.__class__(self)
139
140 @classmethod
141 def fromkeys(cls, iterable, value=None):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000142 '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
143 and values equal to v (which defaults to None).
144
145 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000146 d = cls()
147 for key in iterable:
148 d[key] = value
149 return d
150
151 def __eq__(self, other):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000152 '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
153 while comparison to a regular mapping is order-insensitive.
154
155 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000156 if isinstance(other, OrderedDict):
Raymond Hettinger798ee1a2009-03-23 18:29:11 +0000157 return len(self)==len(other) and \
158 all(p==q for p, q in zip(self.items(), other.items()))
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000159 return dict.__eq__(self, other)
160
Raymond Hettinger5be21b72010-08-01 22:10:57 +0000161 def __del__(self):
162 self.clear() # eliminate cyclical references
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000163
Raymond Hettinger38d17e32010-09-02 09:44:28 +0000164 def _move_to_end(self, key, PREV=0, NEXT=1):
165 'Fast version of self[key]=self.pop(key). Private method for internal use.'
166 link = self.__map[key]
167 link_prev = link[PREV]
168 link_next = link[NEXT]
169 link_prev[NEXT] = link_next
170 link_next[PREV] = link_prev
171 root = self.__root
172 last = root[PREV]
173 link[PREV] = last
174 link[NEXT] = root
175 last[NEXT] = root[PREV] = link
176
Christian Heimes99170a52007-12-19 02:07:34 +0000177
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000178################################################################################
179### namedtuple
180################################################################################
181
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000182def namedtuple(typename, field_names, verbose=False, rename=False):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000183 """Returns a new subclass of tuple with named fields.
184
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000185 >>> Point = namedtuple('Point', 'x y')
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000186 >>> Point.__doc__ # docstring for the new class
Guido van Rossumd8faa362007-04-27 19:54:29 +0000187 'Point(x, y)'
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000188 >>> p = Point(11, y=22) # instantiate with positional args or keywords
Christian Heimes99170a52007-12-19 02:07:34 +0000189 >>> p[0] + p[1] # indexable like a plain tuple
Guido van Rossumd8faa362007-04-27 19:54:29 +0000190 33
Christian Heimes99170a52007-12-19 02:07:34 +0000191 >>> x, y = p # unpack like a regular tuple
Guido van Rossumd8faa362007-04-27 19:54:29 +0000192 >>> x, y
193 (11, 22)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000194 >>> p.x + p.y # fields also accessable by name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000195 33
Christian Heimes0449f632007-12-15 01:27:15 +0000196 >>> d = p._asdict() # convert to a dictionary
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000197 >>> d['x']
198 11
199 >>> Point(**d) # convert from a dictionary
Guido van Rossumd8faa362007-04-27 19:54:29 +0000200 Point(x=11, y=22)
Christian Heimes0449f632007-12-15 01:27:15 +0000201 >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000202 Point(x=100, y=22)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000203
204 """
205
Christian Heimes2380ac72008-01-09 00:17:24 +0000206 # Parse and validate the field names. Validation serves two purposes,
207 # generating informative error messages and preventing template injection attacks.
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000208 if isinstance(field_names, str):
209 field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas
Benjamin Petersone9bbc8b2008-09-28 02:06:32 +0000210 field_names = tuple(map(str, field_names))
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000211 if rename:
212 names = list(field_names)
213 seen = set()
214 for i, name in enumerate(names):
215 if (not all(c.isalnum() or c=='_' for c in name) or _iskeyword(name)
216 or not name or name[0].isdigit() or name.startswith('_')
217 or name in seen):
Raymond Hettinger56145242009-04-02 22:31:59 +0000218 names[i] = '_%d' % i
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000219 seen.add(name)
220 field_names = tuple(names)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000221 for name in (typename,) + field_names:
Christian Heimesb9eccbf2007-12-05 20:18:38 +0000222 if not all(c.isalnum() or c=='_' for c in name):
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000223 raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
224 if _iskeyword(name):
225 raise ValueError('Type names and field names cannot be a keyword: %r' % name)
226 if name[0].isdigit():
227 raise ValueError('Type names and field names cannot start with a number: %r' % name)
228 seen_names = set()
229 for name in field_names:
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000230 if name.startswith('_') and not rename:
Christian Heimes0449f632007-12-15 01:27:15 +0000231 raise ValueError('Field names cannot start with an underscore: %r' % name)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000232 if name in seen_names:
233 raise ValueError('Encountered duplicate field name: %r' % name)
234 seen_names.add(name)
235
236 # Create and fill-in the class template
Christian Heimesfaf2f632008-01-06 16:59:19 +0000237 numfields = len(field_names)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000238 argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000239 reprtxt = ', '.join('%s=%%r' % name for name in field_names)
240 template = '''class %(typename)s(tuple):
Christian Heimes0449f632007-12-15 01:27:15 +0000241 '%(typename)s(%(argtxt)s)' \n
242 __slots__ = () \n
Christian Heimesfaf2f632008-01-06 16:59:19 +0000243 _fields = %(field_names)r \n
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000244 def __new__(_cls, %(argtxt)s):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000245 'Create new instance of %(typename)s(%(argtxt)s)'
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000246 return _tuple.__new__(_cls, (%(argtxt)s)) \n
Christian Heimesfaf2f632008-01-06 16:59:19 +0000247 @classmethod
Christian Heimes043d6f62008-01-07 17:19:16 +0000248 def _make(cls, iterable, new=tuple.__new__, len=len):
Christian Heimesfaf2f632008-01-06 16:59:19 +0000249 'Make a new %(typename)s object from a sequence or iterable'
Christian Heimes043d6f62008-01-07 17:19:16 +0000250 result = new(cls, iterable)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000251 if len(result) != %(numfields)d:
252 raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
253 return result \n
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000254 def __repr__(self):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000255 'Return a nicely formatted representation string'
Raymond Hettingerd331ce92010-08-08 01:13:42 +0000256 return self.__class__.__name__ + '(%(reprtxt)s)' %% self \n
Raymond Hettingera4f52b12009-03-02 22:28:31 +0000257 def _asdict(self):
258 'Return a new OrderedDict which maps field names to their values'
259 return OrderedDict(zip(self._fields, self)) \n
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000260 def _replace(_self, **kwds):
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000261 'Return a new %(typename)s object replacing specified fields with new values'
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000262 result = _self._make(map(kwds.pop, %(field_names)r, _self))
Christian Heimesfaf2f632008-01-06 16:59:19 +0000263 if kwds:
264 raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000265 return result \n
266 def __getnewargs__(self):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000267 'Return self as a plain tuple. Used by copy and pickle.'
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000268 return tuple(self) \n\n''' % locals()
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000269 for i, name in enumerate(field_names):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000270 template += " %s = _property(_itemgetter(%d), doc='Alias for field number %d')\n" % (name, i, i)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000271 if verbose:
272 print(template)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000273
Georg Brandlf08a9dd2008-06-10 16:57:31 +0000274 # Execute the template string in a temporary namespace and
275 # support tracing utilities by setting a value for frame.f_globals['__name__']
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000276 namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
277 OrderedDict=OrderedDict, _property=property, _tuple=tuple)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000278 try:
279 exec(template, namespace)
280 except SyntaxError as e:
Christian Heimes99170a52007-12-19 02:07:34 +0000281 raise SyntaxError(e.msg + ':\n' + template) from e
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000282 result = namespace[typename]
283
284 # For pickling to work, the __module__ variable needs to be set to the frame
285 # where the named tuple is created. Bypass this step in enviroments where
Benjamin Peterson25c95f12009-05-08 20:42:26 +0000286 # sys._getframe is not defined (Jython for example) or sys._getframe is not
287 # defined for arguments greater than 0 (IronPython).
288 try:
Raymond Hettinger0f055172009-01-27 10:06:09 +0000289 result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
Benjamin Peterson25c95f12009-05-08 20:42:26 +0000290 except (AttributeError, ValueError):
291 pass
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000292
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000293 return result
Guido van Rossumd8faa362007-04-27 19:54:29 +0000294
Guido van Rossumd8faa362007-04-27 19:54:29 +0000295
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000296########################################################################
297### Counter
298########################################################################
299
300class Counter(dict):
301 '''Dict subclass for counting hashable items. Sometimes called a bag
302 or multiset. Elements are stored as dictionary keys and their counts
303 are stored as dictionary values.
304
305 >>> c = Counter('abracadabra') # count elements from a string
306
307 >>> c.most_common(3) # three most common elements
308 [('a', 5), ('r', 2), ('b', 2)]
309 >>> sorted(c) # list all unique elements
310 ['a', 'b', 'c', 'd', 'r']
311 >>> ''.join(sorted(c.elements())) # list elements with repetitions
312 'aaaaabbcdrr'
313 >>> sum(c.values()) # total of all counts
314 11
315
316 >>> c['a'] # count of letter 'a'
317 5
318 >>> for elem in 'shazam': # update counts from an iterable
319 ... c[elem] += 1 # by adding 1 to each element's count
320 >>> c['a'] # now there are seven 'a'
321 7
322 >>> del c['r'] # remove all 'r'
323 >>> c['r'] # now there are zero 'r'
324 0
325
326 >>> d = Counter('simsalabim') # make another counter
327 >>> c.update(d) # add in the second counter
328 >>> c['a'] # now there are nine 'a'
329 9
330
331 >>> c.clear() # empty the counter
332 >>> c
333 Counter()
334
335 Note: If a count is set to zero or reduced to zero, it will remain
336 in the counter until the entry is deleted or the counter is cleared:
337
338 >>> c = Counter('aaabbc')
339 >>> c['b'] -= 2 # reduce the count of 'b' by two
340 >>> c.most_common() # 'b' is still in, but its count is zero
341 [('a', 3), ('c', 1), ('b', 0)]
342
343 '''
344 # References:
345 # http://en.wikipedia.org/wiki/Multiset
346 # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
347 # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
348 # http://code.activestate.com/recipes/259174/
349 # Knuth, TAOCP Vol. II section 4.6.3
350
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000351 def __init__(self, iterable=None, **kwds):
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000352 '''Create a new, empty Counter object. And if given, count elements
353 from an input iterable. Or, initialize the count from another mapping
354 of elements to their counts.
355
356 >>> c = Counter() # a new, empty counter
357 >>> c = Counter('gallahad') # a new counter from an iterable
358 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000359 >>> c = Counter(a=4, b=2) # a new counter from keyword args
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000360
361 '''
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000362 self.update(iterable, **kwds)
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000363
364 def __missing__(self, key):
365 'The count of elements not in the Counter is zero.'
366 # Needed so that self[missing_item] does not raise KeyError
367 return 0
368
369 def most_common(self, n=None):
370 '''List the n most common elements and their counts from the most
371 common to the least. If n is None, then list all element counts.
372
373 >>> Counter('abracadabra').most_common(3)
374 [('a', 5), ('r', 2), ('b', 2)]
375
376 '''
377 # Emulate Bag.sortedByCount from Smalltalk
378 if n is None:
379 return sorted(self.items(), key=_itemgetter(1), reverse=True)
380 return _heapq.nlargest(n, self.items(), key=_itemgetter(1))
381
382 def elements(self):
383 '''Iterator over elements repeating each as many times as its count.
384
385 >>> c = Counter('ABCABC')
386 >>> sorted(c.elements())
387 ['A', 'A', 'B', 'B', 'C', 'C']
388
389 # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
390 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
391 >>> product = 1
392 >>> for factor in prime_factors.elements(): # loop over factors
393 ... product *= factor # and multiply them
394 >>> product
395 1836
396
397 Note, if an element's count has been set to zero or is a negative
398 number, elements() will ignore it.
399
400 '''
401 # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
402 return _chain.from_iterable(_starmap(_repeat, self.items()))
403
404 # Override dict methods where necessary
405
406 @classmethod
407 def fromkeys(cls, iterable, v=None):
408 # There is no equivalent method for counters because setting v=1
409 # means that no element can have a count greater than one.
410 raise NotImplementedError(
411 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
412
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000413 def update(self, iterable=None, **kwds):
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000414 '''Like dict.update() but add counts instead of replacing them.
415
416 Source can be an iterable, a dictionary, or another Counter instance.
417
418 >>> c = Counter('which')
419 >>> c.update('witch') # add elements from another iterable
420 >>> d = Counter('watch')
421 >>> c.update(d) # add elements from another counter
422 >>> c['h'] # four 'h' in which, witch, and watch
423 4
424
425 '''
426 # The regular dict.update() operation makes no sense here because the
427 # replace behavior results in the some of original untouched counts
428 # being mixed-in with all of the other counts for a mismash that
429 # doesn't have a straight-forward interpretation in most counting
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000430 # contexts. Instead, we implement straight-addition. Both the inputs
431 # and outputs are allowed to contain zero and negative counts.
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000432
433 if iterable is not None:
434 if isinstance(iterable, Mapping):
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000435 if self:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000436 self_get = self.get
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000437 for elem, count in iterable.items():
Raymond Hettingerf9092022009-06-29 18:30:43 +0000438 self[elem] = count + self_get(elem, 0)
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000439 else:
440 dict.update(self, iterable) # fast path when counter is empty
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000441 else:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000442 self_get = self.get
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000443 for elem in iterable:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000444 self[elem] = 1 + self_get(elem, 0)
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000445 if kwds:
446 self.update(kwds)
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000447
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000448 def subtract(self, iterable=None, **kwds):
449 '''Like dict.update() but subtracts counts instead of replacing them.
450 Counts can be reduced below zero. Both the inputs and outputs are
451 allowed to contain zero and negative counts.
452
453 Source can be an iterable, a dictionary, or another Counter instance.
454
455 >>> c = Counter('which')
456 >>> c.subtract('witch') # subtract elements from another iterable
457 >>> c.subtract(Counter('watch')) # subtract elements from another counter
458 >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch
459 0
460 >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch
461 -1
462
463 '''
464 if iterable is not None:
Raymond Hettingerfc3c9cd2010-04-11 20:41:56 +0000465 self_get = self.get
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000466 if isinstance(iterable, Mapping):
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000467 for elem, count in iterable.items():
468 self[elem] = self_get(elem, 0) - count
469 else:
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000470 for elem in iterable:
471 self[elem] = self_get(elem, 0) - 1
472 if kwds:
473 self.subtract(kwds)
474
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000475 def copy(self):
476 'Like dict.copy() but returns a Counter instance instead of a dict.'
477 return Counter(self)
478
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000479 def __delitem__(self, elem):
480 'Like dict.__delitem__() but does not raise KeyError for missing values.'
481 if elem in self:
482 dict.__delitem__(self, elem)
483
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000484 def __repr__(self):
485 if not self:
486 return '%s()' % self.__class__.__name__
487 items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
488 return '%s({%s})' % (self.__class__.__name__, items)
489
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000490 # Multiset-style mathematical operations discussed in:
491 # Knuth TAOCP Volume II section 4.6.3 exercise 19
492 # and at http://en.wikipedia.org/wiki/Multiset
493 #
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000494 # Outputs guaranteed to only include positive counts.
495 #
496 # To strip negative and zero counts, add-in an empty counter:
497 # c += Counter()
498
499 def __add__(self, other):
500 '''Add counts from two counters.
501
502 >>> Counter('abbb') + Counter('bcc')
503 Counter({'b': 4, 'c': 2, 'a': 1})
504
505 '''
506 if not isinstance(other, Counter):
507 return NotImplemented
508 result = Counter()
509 for elem in set(self) | set(other):
510 newcount = self[elem] + other[elem]
511 if newcount > 0:
512 result[elem] = newcount
513 return result
514
515 def __sub__(self, other):
516 ''' Subtract count, but keep only results with positive counts.
517
518 >>> Counter('abbbc') - Counter('bccd')
519 Counter({'b': 2, 'a': 1})
520
521 '''
522 if not isinstance(other, Counter):
523 return NotImplemented
524 result = Counter()
Raymond Hettingere0d1b9f2009-01-21 20:36:27 +0000525 for elem in set(self) | set(other):
526 newcount = self[elem] - other[elem]
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000527 if newcount > 0:
528 result[elem] = newcount
529 return result
530
531 def __or__(self, other):
532 '''Union is the maximum of value in either of the input counters.
533
534 >>> Counter('abbb') | Counter('bcc')
535 Counter({'b': 3, 'c': 2, 'a': 1})
536
537 '''
538 if not isinstance(other, Counter):
539 return NotImplemented
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000540 result = Counter()
541 for elem in set(self) | set(other):
Raymond Hettingerc4791702009-04-04 08:48:03 +0000542 p, q = self[elem], other[elem]
543 newcount = q if p < q else p
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000544 if newcount > 0:
545 result[elem] = newcount
546 return result
547
548 def __and__(self, other):
549 ''' Intersection is the minimum of corresponding counts.
550
551 >>> Counter('abbb') & Counter('bcc')
552 Counter({'b': 1})
553
554 '''
555 if not isinstance(other, Counter):
556 return NotImplemented
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000557 result = Counter()
558 if len(self) < len(other):
559 self, other = other, self
560 for elem in filter(self.__contains__, other):
Raymond Hettingerc4791702009-04-04 08:48:03 +0000561 p, q = self[elem], other[elem]
562 newcount = p if p < q else q
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000563 if newcount > 0:
564 result[elem] = newcount
565 return result
566
Guido van Rossumd8faa362007-04-27 19:54:29 +0000567
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000568################################################################################
569### UserDict
570################################################################################
Guido van Rossumd8faa362007-04-27 19:54:29 +0000571
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000572class UserDict(MutableMapping):
573
574 # Start by filling-out the abstract methods
575 def __init__(self, dict=None, **kwargs):
576 self.data = {}
577 if dict is not None:
578 self.update(dict)
579 if len(kwargs):
580 self.update(kwargs)
581 def __len__(self): return len(self.data)
582 def __getitem__(self, key):
583 if key in self.data:
584 return self.data[key]
585 if hasattr(self.__class__, "__missing__"):
586 return self.__class__.__missing__(self, key)
587 raise KeyError(key)
588 def __setitem__(self, key, item): self.data[key] = item
589 def __delitem__(self, key): del self.data[key]
590 def __iter__(self):
591 return iter(self.data)
592
Raymond Hettinger554c8b82008-02-05 22:54:43 +0000593 # Modify __contains__ to work correctly when __missing__ is present
594 def __contains__(self, key):
595 return key in self.data
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000596
597 # Now, add the methods in dicts but not in MutableMapping
598 def __repr__(self): return repr(self.data)
599 def copy(self):
600 if self.__class__ is UserDict:
601 return UserDict(self.data.copy())
602 import copy
603 data = self.data
604 try:
605 self.data = {}
606 c = copy.copy(self)
607 finally:
608 self.data = data
609 c.update(self)
610 return c
611 @classmethod
612 def fromkeys(cls, iterable, value=None):
613 d = cls()
614 for key in iterable:
615 d[key] = value
616 return d
617
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000618
619
620################################################################################
Raymond Hettinger53dbe392008-02-12 20:03:09 +0000621### UserList
622################################################################################
623
624class UserList(MutableSequence):
625 """A more or less complete user-defined wrapper around list objects."""
626 def __init__(self, initlist=None):
627 self.data = []
628 if initlist is not None:
629 # XXX should this accept an arbitrary sequence?
630 if type(initlist) == type(self.data):
631 self.data[:] = initlist
632 elif isinstance(initlist, UserList):
633 self.data[:] = initlist.data[:]
634 else:
635 self.data = list(initlist)
636 def __repr__(self): return repr(self.data)
637 def __lt__(self, other): return self.data < self.__cast(other)
638 def __le__(self, other): return self.data <= self.__cast(other)
639 def __eq__(self, other): return self.data == self.__cast(other)
640 def __ne__(self, other): return self.data != self.__cast(other)
641 def __gt__(self, other): return self.data > self.__cast(other)
642 def __ge__(self, other): return self.data >= self.__cast(other)
643 def __cast(self, other):
644 return other.data if isinstance(other, UserList) else other
Raymond Hettinger53dbe392008-02-12 20:03:09 +0000645 def __contains__(self, item): return item in self.data
646 def __len__(self): return len(self.data)
647 def __getitem__(self, i): return self.data[i]
648 def __setitem__(self, i, item): self.data[i] = item
649 def __delitem__(self, i): del self.data[i]
650 def __add__(self, other):
651 if isinstance(other, UserList):
652 return self.__class__(self.data + other.data)
653 elif isinstance(other, type(self.data)):
654 return self.__class__(self.data + other)
655 return self.__class__(self.data + list(other))
656 def __radd__(self, other):
657 if isinstance(other, UserList):
658 return self.__class__(other.data + self.data)
659 elif isinstance(other, type(self.data)):
660 return self.__class__(other + self.data)
661 return self.__class__(list(other) + self.data)
662 def __iadd__(self, other):
663 if isinstance(other, UserList):
664 self.data += other.data
665 elif isinstance(other, type(self.data)):
666 self.data += other
667 else:
668 self.data += list(other)
669 return self
670 def __mul__(self, n):
671 return self.__class__(self.data*n)
672 __rmul__ = __mul__
673 def __imul__(self, n):
674 self.data *= n
675 return self
676 def append(self, item): self.data.append(item)
677 def insert(self, i, item): self.data.insert(i, item)
678 def pop(self, i=-1): return self.data.pop(i)
679 def remove(self, item): self.data.remove(item)
680 def count(self, item): return self.data.count(item)
681 def index(self, item, *args): return self.data.index(item, *args)
682 def reverse(self): self.data.reverse()
683 def sort(self, *args, **kwds): self.data.sort(*args, **kwds)
684 def extend(self, other):
685 if isinstance(other, UserList):
686 self.data.extend(other.data)
687 else:
688 self.data.extend(other)
689
690
691
692################################################################################
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000693### UserString
694################################################################################
695
696class UserString(Sequence):
697 def __init__(self, seq):
698 if isinstance(seq, str):
699 self.data = seq
700 elif isinstance(seq, UserString):
701 self.data = seq.data[:]
702 else:
703 self.data = str(seq)
704 def __str__(self): return str(self.data)
705 def __repr__(self): return repr(self.data)
706 def __int__(self): return int(self.data)
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000707 def __float__(self): return float(self.data)
708 def __complex__(self): return complex(self.data)
709 def __hash__(self): return hash(self.data)
710
711 def __eq__(self, string):
712 if isinstance(string, UserString):
713 return self.data == string.data
714 return self.data == string
715 def __ne__(self, string):
716 if isinstance(string, UserString):
717 return self.data != string.data
718 return self.data != string
719 def __lt__(self, string):
720 if isinstance(string, UserString):
721 return self.data < string.data
722 return self.data < string
723 def __le__(self, string):
724 if isinstance(string, UserString):
725 return self.data <= string.data
726 return self.data <= string
727 def __gt__(self, string):
728 if isinstance(string, UserString):
729 return self.data > string.data
730 return self.data > string
731 def __ge__(self, string):
732 if isinstance(string, UserString):
733 return self.data >= string.data
734 return self.data >= string
735
736 def __contains__(self, char):
737 if isinstance(char, UserString):
738 char = char.data
739 return char in self.data
740
741 def __len__(self): return len(self.data)
742 def __getitem__(self, index): return self.__class__(self.data[index])
743 def __add__(self, other):
744 if isinstance(other, UserString):
745 return self.__class__(self.data + other.data)
746 elif isinstance(other, str):
747 return self.__class__(self.data + other)
748 return self.__class__(self.data + str(other))
749 def __radd__(self, other):
750 if isinstance(other, str):
751 return self.__class__(other + self.data)
752 return self.__class__(str(other) + self.data)
753 def __mul__(self, n):
754 return self.__class__(self.data*n)
755 __rmul__ = __mul__
756 def __mod__(self, args):
757 return self.__class__(self.data % args)
758
759 # the following methods are defined in alphabetical order:
760 def capitalize(self): return self.__class__(self.data.capitalize())
761 def center(self, width, *args):
762 return self.__class__(self.data.center(width, *args))
763 def count(self, sub, start=0, end=_sys.maxsize):
764 if isinstance(sub, UserString):
765 sub = sub.data
766 return self.data.count(sub, start, end)
767 def encode(self, encoding=None, errors=None): # XXX improve this?
768 if encoding:
769 if errors:
770 return self.__class__(self.data.encode(encoding, errors))
771 return self.__class__(self.data.encode(encoding))
772 return self.__class__(self.data.encode())
773 def endswith(self, suffix, start=0, end=_sys.maxsize):
774 return self.data.endswith(suffix, start, end)
775 def expandtabs(self, tabsize=8):
776 return self.__class__(self.data.expandtabs(tabsize))
777 def find(self, sub, start=0, end=_sys.maxsize):
778 if isinstance(sub, UserString):
779 sub = sub.data
780 return self.data.find(sub, start, end)
781 def format(self, *args, **kwds):
782 return self.data.format(*args, **kwds)
783 def index(self, sub, start=0, end=_sys.maxsize):
784 return self.data.index(sub, start, end)
785 def isalpha(self): return self.data.isalpha()
786 def isalnum(self): return self.data.isalnum()
787 def isdecimal(self): return self.data.isdecimal()
788 def isdigit(self): return self.data.isdigit()
789 def isidentifier(self): return self.data.isidentifier()
790 def islower(self): return self.data.islower()
791 def isnumeric(self): return self.data.isnumeric()
792 def isspace(self): return self.data.isspace()
793 def istitle(self): return self.data.istitle()
794 def isupper(self): return self.data.isupper()
795 def join(self, seq): return self.data.join(seq)
796 def ljust(self, width, *args):
797 return self.__class__(self.data.ljust(width, *args))
798 def lower(self): return self.__class__(self.data.lower())
799 def lstrip(self, chars=None): return self.__class__(self.data.lstrip(chars))
800 def partition(self, sep):
801 return self.data.partition(sep)
802 def replace(self, old, new, maxsplit=-1):
803 if isinstance(old, UserString):
804 old = old.data
805 if isinstance(new, UserString):
806 new = new.data
807 return self.__class__(self.data.replace(old, new, maxsplit))
808 def rfind(self, sub, start=0, end=_sys.maxsize):
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000809 if isinstance(sub, UserString):
810 sub = sub.data
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000811 return self.data.rfind(sub, start, end)
812 def rindex(self, sub, start=0, end=_sys.maxsize):
813 return self.data.rindex(sub, start, end)
814 def rjust(self, width, *args):
815 return self.__class__(self.data.rjust(width, *args))
816 def rpartition(self, sep):
817 return self.data.rpartition(sep)
818 def rstrip(self, chars=None):
819 return self.__class__(self.data.rstrip(chars))
820 def split(self, sep=None, maxsplit=-1):
821 return self.data.split(sep, maxsplit)
822 def rsplit(self, sep=None, maxsplit=-1):
823 return self.data.rsplit(sep, maxsplit)
824 def splitlines(self, keepends=0): return self.data.splitlines(keepends)
825 def startswith(self, prefix, start=0, end=_sys.maxsize):
826 return self.data.startswith(prefix, start, end)
827 def strip(self, chars=None): return self.__class__(self.data.strip(chars))
828 def swapcase(self): return self.__class__(self.data.swapcase())
829 def title(self): return self.__class__(self.data.title())
830 def translate(self, *args):
831 return self.__class__(self.data.translate(*args))
832 def upper(self): return self.__class__(self.data.upper())
833 def zfill(self, width): return self.__class__(self.data.zfill(width))
834
835
836
837################################################################################
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000838### Simple tests
839################################################################################
Guido van Rossumd8faa362007-04-27 19:54:29 +0000840
841if __name__ == '__main__':
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000842 # verify that instances can be pickled
Guido van Rossum99603b02007-07-20 00:22:32 +0000843 from pickle import loads, dumps
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000844 Point = namedtuple('Point', 'x, y', True)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000845 p = Point(x=10, y=20)
846 assert p == loads(dumps(p))
847
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000848 # test and demonstrate ability to override methods
Christian Heimes043d6f62008-01-07 17:19:16 +0000849 class Point(namedtuple('Point', 'x y')):
Christian Heimes25bb7832008-01-11 16:17:00 +0000850 __slots__ = ()
Christian Heimes043d6f62008-01-07 17:19:16 +0000851 @property
852 def hypot(self):
853 return (self.x ** 2 + self.y ** 2) ** 0.5
Christian Heimes790c8232008-01-07 21:14:23 +0000854 def __str__(self):
Christian Heimes25bb7832008-01-11 16:17:00 +0000855 return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
Christian Heimes043d6f62008-01-07 17:19:16 +0000856
Christian Heimes25bb7832008-01-11 16:17:00 +0000857 for p in Point(3, 4), Point(14, 5/7.):
Christian Heimes790c8232008-01-07 21:14:23 +0000858 print (p)
Christian Heimes043d6f62008-01-07 17:19:16 +0000859
860 class Point(namedtuple('Point', 'x y')):
861 'Point class with optimized _make() and _replace() without error-checking'
Christian Heimes25bb7832008-01-11 16:17:00 +0000862 __slots__ = ()
Christian Heimes043d6f62008-01-07 17:19:16 +0000863 _make = classmethod(tuple.__new__)
864 def _replace(self, _map=map, **kwds):
Christian Heimes2380ac72008-01-09 00:17:24 +0000865 return self._make(_map(kwds.get, ('x', 'y'), self))
Christian Heimes043d6f62008-01-07 17:19:16 +0000866
867 print(Point(11, 22)._replace(x=100))
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000868
Christian Heimes25bb7832008-01-11 16:17:00 +0000869 Point3D = namedtuple('Point3D', Point._fields + ('z',))
870 print(Point3D.__doc__)
871
Guido van Rossumd8faa362007-04-27 19:54:29 +0000872 import doctest
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000873 TestResults = namedtuple('TestResults', 'failed attempted')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000874 print(TestResults(*doctest.testmod()))