blob: 00886ef06ebf5b7b601a9e723b81edf92d656ae8 [file] [log] [blame]
Brett Cannon23a4a7b2008-05-12 00:56:28 +00001__all__ = ['deque', 'defaultdict', 'namedtuple', 'UserDict', 'UserList',
Raymond Hettinger2d32f632009-03-02 21:24:57 +00002 'UserString', 'Counter', 'OrderedDict']
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
4# They should however be considered an integral part of collections.py.
5from _abcoll import *
6import _abcoll
7__all__ += _abcoll.__all__
8
Christian Heimes99170a52007-12-19 02:07:34 +00009from _collections import deque, defaultdict
10from operator import itemgetter as _itemgetter
11from keyword import iskeyword as _iskeyword
12import sys as _sys
Raymond Hettingerb8baf632009-01-14 02:20:07 +000013import heapq as _heapq
Raymond Hettingerea9f8db2009-03-02 21:28:41 +000014from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
Raymond Hettinger2d32f632009-03-02 21:24:57 +000015
16################################################################################
17### OrderedDict
18################################################################################
19
20class OrderedDict(dict, MutableMapping):
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +000021 'Dictionary that remembers insertion order'
Raymond Hettingerf1736542009-03-23 05:19:21 +000022 # An inherited dict maps keys to values.
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +000023 # The inherited dict provides __getitem__, __len__, __contains__, and get.
24 # The remaining methods are order-aware.
Raymond Hettingerf1736542009-03-23 05:19:21 +000025 # Big-O running times for all methods are the same as for regular dictionaries.
26
27 # The internal self.__map dictionary maps keys to links in a doubly linked list.
28 # The circular doubly linked list starts and ends with a sentinel element.
29 # The sentinel element never gets deleted (this simplifies the algorithm).
Raymond Hettinger5be21b72010-08-01 22:10:57 +000030 # Each link is stored as a list of length three: [PREV, NEXT, KEY].
Raymond Hettinger2d32f632009-03-02 21:24:57 +000031
32 def __init__(self, *args, **kwds):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000033 '''Initialize an ordered dictionary. Signature is the same as for
34 regular dictionaries, but keyword arguments are not recommended
35 because their insertion order is arbitrary.
36
37 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +000038 if len(args) > 1:
39 raise TypeError('expected at most 1 arguments, got %d' % len(args))
Raymond Hettinger08c70cf2009-03-03 20:47:29 +000040 try:
Raymond Hettingerf1736542009-03-23 05:19:21 +000041 self.__root
Raymond Hettinger08c70cf2009-03-03 20:47:29 +000042 except AttributeError:
Raymond Hettinger5be21b72010-08-01 22:10:57 +000043 self.__root = root = [None, None, None] # sentinel node
44 PREV = 0
45 NEXT = 1
46 root[PREV] = root[NEXT] = root
Raymond Hettinger52dc06b2009-03-25 22:45:22 +000047 self.__map = {}
Raymond Hettinger2d32f632009-03-02 21:24:57 +000048 self.update(*args, **kwds)
49
Raymond Hettinger5be21b72010-08-01 22:10:57 +000050 def __setitem__(self, key, value, PREV=0, NEXT=1, dict_setitem=dict.__setitem__):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000051 'od.__setitem__(i, y) <==> od[i]=y'
Raymond Hettingerf1736542009-03-23 05:19:21 +000052 # Setting a new item creates a new link which goes at the end of the linked
53 # list, and the inherited dictionary is updated with the new key/value pair.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000054 if key not in self:
Raymond Hettingerf1736542009-03-23 05:19:21 +000055 root = self.__root
Raymond Hettinger5be21b72010-08-01 22:10:57 +000056 last = root[PREV]
57 last[NEXT] = root[PREV] = self.__map[key] = [last, root, key]
58 dict_setitem(self, key, value)
Raymond Hettinger2d32f632009-03-02 21:24:57 +000059
Raymond Hettinger5be21b72010-08-01 22:10:57 +000060 def __delitem__(self, key, PREV=0, NEXT=1, dict_delitem=dict.__delitem__):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000061 'od.__delitem__(y) <==> del od[y]'
Raymond Hettingerf1736542009-03-23 05:19:21 +000062 # Deleting an existing item uses self.__map to find the link which is
63 # then removed by updating the links in the predecessor and successor nodes.
Raymond Hettinger5be21b72010-08-01 22:10:57 +000064 dict_delitem(self, key)
Raymond Hettingerf1736542009-03-23 05:19:21 +000065 link = self.__map.pop(key)
Raymond Hettinger5be21b72010-08-01 22:10:57 +000066 link_prev = link[PREV]
67 link_next = link[NEXT]
68 link_prev[NEXT] = link_next
69 link_next[PREV] = link_prev
Raymond Hettinger2d32f632009-03-02 21:24:57 +000070
Raymond Hettinger5be21b72010-08-01 22:10:57 +000071 def __iter__(self, NEXT=1, KEY=2):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000072 'od.__iter__() <==> iter(od)'
Raymond Hettingerf1736542009-03-23 05:19:21 +000073 # Traverse the linked list in order.
74 root = self.__root
Raymond Hettinger5be21b72010-08-01 22:10:57 +000075 curr = root[NEXT]
Raymond Hettingerf1736542009-03-23 05:19:21 +000076 while curr is not root:
Raymond Hettinger5be21b72010-08-01 22:10:57 +000077 yield curr[KEY]
78 curr = curr[NEXT]
Raymond Hettinger2d32f632009-03-02 21:24:57 +000079
Raymond Hettinger5be21b72010-08-01 22:10:57 +000080 def __reversed__(self, PREV=0, KEY=2):
Raymond Hettinger2352cf32009-04-08 01:16:27 +000081 'od.__reversed__() <==> reversed(od)'
Raymond Hettingerf1736542009-03-23 05:19:21 +000082 # Traverse the linked list in reverse order.
83 root = self.__root
Raymond Hettinger5be21b72010-08-01 22:10:57 +000084 curr = root[PREV]
Raymond Hettingerf1736542009-03-23 05:19:21 +000085 while curr is not root:
Raymond Hettinger5be21b72010-08-01 22:10:57 +000086 yield curr[KEY]
87 curr = curr[PREV]
Raymond Hettinger2d32f632009-03-02 21:24:57 +000088
89 def __reduce__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000090 'Return state information for pickling'
Raymond Hettinger2d32f632009-03-02 21:24:57 +000091 items = [[k, self[k]] for k in self]
Raymond Hettingerf1736542009-03-23 05:19:21 +000092 tmp = self.__map, self.__root
93 del self.__map, self.__root
Raymond Hettinger2d32f632009-03-02 21:24:57 +000094 inst_dict = vars(self).copy()
Raymond Hettingerf1736542009-03-23 05:19:21 +000095 self.__map, self.__root = tmp
Raymond Hettinger14b89ff2009-03-03 22:20:56 +000096 if inst_dict:
97 return (self.__class__, (items,), inst_dict)
98 return self.__class__, (items,)
Raymond Hettinger2d32f632009-03-02 21:24:57 +000099
Raymond Hettinger5be21b72010-08-01 22:10:57 +0000100 def clear(self):
101 'od.clear() -> None. Remove all items from od.'
102 try:
103 for node in self.__map.values():
104 del node[:]
105 self.__root[:] = [self.__root, self.__root, None]
106 self.__map.clear()
107 except AttributeError:
108 pass
109 dict.clear(self)
110
Raymond Hettinger331722d2010-09-02 18:44:16 +0000111 def popitem(self, last=True, PREV=0, NEXT=1, KEY=2, dict_pop=dict.pop):
112 '''od.popitem() -> (k, v), return and remove a (key, value) pair.
113 Pairs are returned in LIFO order if last is true or FIFO order if false.
114
115 '''
116 if not self:
117 raise KeyError('dictionary is empty')
118 root = self.__root
119 if last: # link_prev <--> link <--> root
120 link = root[PREV]
121 link_prev = link[PREV]
122 link_prev[NEXT] = root
123 root[PREV] = link_prev
124 else: # root <--> link <--> link_next
125 link = root[NEXT]
126 link_next = link[NEXT]
127 root[NEXT] = link_next
128 link_next[PREV] = root
129 key = link[KEY]
130 del self.__map[key]
131 value = dict_pop(self, key)
132 return key, value
133
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000134 setdefault = MutableMapping.setdefault
135 update = MutableMapping.update
136 pop = MutableMapping.pop
137 keys = MutableMapping.keys
138 values = MutableMapping.values
139 items = MutableMapping.items
Raymond Hettinger5be21b72010-08-01 22:10:57 +0000140 __ne__ = MutableMapping.__ne__
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000141
142 def __repr__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000143 'od.__repr__() <==> repr(od)'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000144 if not self:
145 return '%s()' % (self.__class__.__name__,)
146 return '%s(%r)' % (self.__class__.__name__, list(self.items()))
147
148 def copy(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000149 'od.copy() -> a shallow copy of od'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000150 return self.__class__(self)
151
152 @classmethod
153 def fromkeys(cls, iterable, value=None):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000154 '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
155 and values equal to v (which defaults to None).
156
157 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000158 d = cls()
159 for key in iterable:
160 d[key] = value
161 return d
162
163 def __eq__(self, other):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000164 '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
165 while comparison to a regular mapping is order-insensitive.
166
167 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000168 if isinstance(other, OrderedDict):
Raymond Hettinger798ee1a2009-03-23 18:29:11 +0000169 return len(self)==len(other) and \
170 all(p==q for p, q in zip(self.items(), other.items()))
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000171 return dict.__eq__(self, other)
172
Raymond Hettinger5be21b72010-08-01 22:10:57 +0000173 def __del__(self):
174 self.clear() # eliminate cyclical references
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000175
Raymond Hettingerf45abc92010-09-06 21:26:09 +0000176 def move_to_end(self, key, last=True, PREV=0, NEXT=1):
177 '''Move an existing element to the end (or beginning if last==False).
178
179 Raises KeyError if the element does not exist.
180 When last=True, acts like a fast version of self[key]=self.pop(key).
181
182 '''
Raymond Hettinger38d17e32010-09-02 09:44:28 +0000183 link = self.__map[key]
184 link_prev = link[PREV]
185 link_next = link[NEXT]
186 link_prev[NEXT] = link_next
187 link_next[PREV] = link_prev
188 root = self.__root
Raymond Hettingerf45abc92010-09-06 21:26:09 +0000189 if last:
190 last = root[PREV]
191 link[PREV] = last
192 link[NEXT] = root
193 last[NEXT] = root[PREV] = link
194 else:
195 first = root[NEXT]
196 link[PREV] = root
197 link[NEXT] = first
198 root[NEXT] = first[PREV] = link
Raymond Hettinger38d17e32010-09-02 09:44:28 +0000199
Christian Heimes99170a52007-12-19 02:07:34 +0000200
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000201################################################################################
202### namedtuple
203################################################################################
204
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000205def namedtuple(typename, field_names, verbose=False, rename=False):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000206 """Returns a new subclass of tuple with named fields.
207
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000208 >>> Point = namedtuple('Point', 'x y')
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000209 >>> Point.__doc__ # docstring for the new class
Guido van Rossumd8faa362007-04-27 19:54:29 +0000210 'Point(x, y)'
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000211 >>> p = Point(11, y=22) # instantiate with positional args or keywords
Christian Heimes99170a52007-12-19 02:07:34 +0000212 >>> p[0] + p[1] # indexable like a plain tuple
Guido van Rossumd8faa362007-04-27 19:54:29 +0000213 33
Christian Heimes99170a52007-12-19 02:07:34 +0000214 >>> x, y = p # unpack like a regular tuple
Guido van Rossumd8faa362007-04-27 19:54:29 +0000215 >>> x, y
216 (11, 22)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000217 >>> p.x + p.y # fields also accessable by name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000218 33
Christian Heimes0449f632007-12-15 01:27:15 +0000219 >>> d = p._asdict() # convert to a dictionary
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000220 >>> d['x']
221 11
222 >>> Point(**d) # convert from a dictionary
Guido van Rossumd8faa362007-04-27 19:54:29 +0000223 Point(x=11, y=22)
Christian Heimes0449f632007-12-15 01:27:15 +0000224 >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000225 Point(x=100, y=22)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000226
227 """
228
Christian Heimes2380ac72008-01-09 00:17:24 +0000229 # Parse and validate the field names. Validation serves two purposes,
230 # generating informative error messages and preventing template injection attacks.
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000231 if isinstance(field_names, str):
232 field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas
Benjamin Petersone9bbc8b2008-09-28 02:06:32 +0000233 field_names = tuple(map(str, field_names))
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000234 if rename:
235 names = list(field_names)
236 seen = set()
237 for i, name in enumerate(names):
238 if (not all(c.isalnum() or c=='_' for c in name) or _iskeyword(name)
239 or not name or name[0].isdigit() or name.startswith('_')
240 or name in seen):
Raymond Hettinger56145242009-04-02 22:31:59 +0000241 names[i] = '_%d' % i
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000242 seen.add(name)
243 field_names = tuple(names)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000244 for name in (typename,) + field_names:
Christian Heimesb9eccbf2007-12-05 20:18:38 +0000245 if not all(c.isalnum() or c=='_' for c in name):
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000246 raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
247 if _iskeyword(name):
248 raise ValueError('Type names and field names cannot be a keyword: %r' % name)
249 if name[0].isdigit():
250 raise ValueError('Type names and field names cannot start with a number: %r' % name)
251 seen_names = set()
252 for name in field_names:
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000253 if name.startswith('_') and not rename:
Christian Heimes0449f632007-12-15 01:27:15 +0000254 raise ValueError('Field names cannot start with an underscore: %r' % name)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000255 if name in seen_names:
256 raise ValueError('Encountered duplicate field name: %r' % name)
257 seen_names.add(name)
258
259 # Create and fill-in the class template
Christian Heimesfaf2f632008-01-06 16:59:19 +0000260 numfields = len(field_names)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000261 argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000262 reprtxt = ', '.join('%s=%%r' % name for name in field_names)
263 template = '''class %(typename)s(tuple):
Christian Heimes0449f632007-12-15 01:27:15 +0000264 '%(typename)s(%(argtxt)s)' \n
265 __slots__ = () \n
Christian Heimesfaf2f632008-01-06 16:59:19 +0000266 _fields = %(field_names)r \n
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000267 def __new__(_cls, %(argtxt)s):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000268 'Create new instance of %(typename)s(%(argtxt)s)'
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000269 return _tuple.__new__(_cls, (%(argtxt)s)) \n
Christian Heimesfaf2f632008-01-06 16:59:19 +0000270 @classmethod
Christian Heimes043d6f62008-01-07 17:19:16 +0000271 def _make(cls, iterable, new=tuple.__new__, len=len):
Christian Heimesfaf2f632008-01-06 16:59:19 +0000272 'Make a new %(typename)s object from a sequence or iterable'
Christian Heimes043d6f62008-01-07 17:19:16 +0000273 result = new(cls, iterable)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000274 if len(result) != %(numfields)d:
275 raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
276 return result \n
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000277 def __repr__(self):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000278 'Return a nicely formatted representation string'
Raymond Hettingerd331ce92010-08-08 01:13:42 +0000279 return self.__class__.__name__ + '(%(reprtxt)s)' %% self \n
Raymond Hettingera4f52b12009-03-02 22:28:31 +0000280 def _asdict(self):
281 'Return a new OrderedDict which maps field names to their values'
282 return OrderedDict(zip(self._fields, self)) \n
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000283 def _replace(_self, **kwds):
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000284 'Return a new %(typename)s object replacing specified fields with new values'
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000285 result = _self._make(map(kwds.pop, %(field_names)r, _self))
Christian Heimesfaf2f632008-01-06 16:59:19 +0000286 if kwds:
287 raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000288 return result \n
289 def __getnewargs__(self):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000290 'Return self as a plain tuple. Used by copy and pickle.'
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000291 return tuple(self) \n\n''' % locals()
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000292 for i, name in enumerate(field_names):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000293 template += " %s = _property(_itemgetter(%d), doc='Alias for field number %d')\n" % (name, i, i)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000294 if verbose:
295 print(template)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000296
Georg Brandlf08a9dd2008-06-10 16:57:31 +0000297 # Execute the template string in a temporary namespace and
298 # support tracing utilities by setting a value for frame.f_globals['__name__']
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000299 namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
300 OrderedDict=OrderedDict, _property=property, _tuple=tuple)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000301 try:
302 exec(template, namespace)
303 except SyntaxError as e:
Christian Heimes99170a52007-12-19 02:07:34 +0000304 raise SyntaxError(e.msg + ':\n' + template) from e
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000305 result = namespace[typename]
306
307 # For pickling to work, the __module__ variable needs to be set to the frame
308 # where the named tuple is created. Bypass this step in enviroments where
Benjamin Peterson25c95f12009-05-08 20:42:26 +0000309 # sys._getframe is not defined (Jython for example) or sys._getframe is not
310 # defined for arguments greater than 0 (IronPython).
311 try:
Raymond Hettinger0f055172009-01-27 10:06:09 +0000312 result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
Benjamin Peterson25c95f12009-05-08 20:42:26 +0000313 except (AttributeError, ValueError):
314 pass
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000315
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000316 return result
Guido van Rossumd8faa362007-04-27 19:54:29 +0000317
Guido van Rossumd8faa362007-04-27 19:54:29 +0000318
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000319########################################################################
320### Counter
321########################################################################
322
323class Counter(dict):
324 '''Dict subclass for counting hashable items. Sometimes called a bag
325 or multiset. Elements are stored as dictionary keys and their counts
326 are stored as dictionary values.
327
328 >>> c = Counter('abracadabra') # count elements from a string
329
330 >>> c.most_common(3) # three most common elements
331 [('a', 5), ('r', 2), ('b', 2)]
332 >>> sorted(c) # list all unique elements
333 ['a', 'b', 'c', 'd', 'r']
334 >>> ''.join(sorted(c.elements())) # list elements with repetitions
335 'aaaaabbcdrr'
336 >>> sum(c.values()) # total of all counts
337 11
338
339 >>> c['a'] # count of letter 'a'
340 5
341 >>> for elem in 'shazam': # update counts from an iterable
342 ... c[elem] += 1 # by adding 1 to each element's count
343 >>> c['a'] # now there are seven 'a'
344 7
345 >>> del c['r'] # remove all 'r'
346 >>> c['r'] # now there are zero 'r'
347 0
348
349 >>> d = Counter('simsalabim') # make another counter
350 >>> c.update(d) # add in the second counter
351 >>> c['a'] # now there are nine 'a'
352 9
353
354 >>> c.clear() # empty the counter
355 >>> c
356 Counter()
357
358 Note: If a count is set to zero or reduced to zero, it will remain
359 in the counter until the entry is deleted or the counter is cleared:
360
361 >>> c = Counter('aaabbc')
362 >>> c['b'] -= 2 # reduce the count of 'b' by two
363 >>> c.most_common() # 'b' is still in, but its count is zero
364 [('a', 3), ('c', 1), ('b', 0)]
365
366 '''
367 # References:
368 # http://en.wikipedia.org/wiki/Multiset
369 # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
370 # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
371 # http://code.activestate.com/recipes/259174/
372 # Knuth, TAOCP Vol. II section 4.6.3
373
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000374 def __init__(self, iterable=None, **kwds):
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000375 '''Create a new, empty Counter object. And if given, count elements
376 from an input iterable. Or, initialize the count from another mapping
377 of elements to their counts.
378
379 >>> c = Counter() # a new, empty counter
380 >>> c = Counter('gallahad') # a new counter from an iterable
381 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000382 >>> c = Counter(a=4, b=2) # a new counter from keyword args
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000383
384 '''
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000385 self.update(iterable, **kwds)
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000386
387 def __missing__(self, key):
388 'The count of elements not in the Counter is zero.'
389 # Needed so that self[missing_item] does not raise KeyError
390 return 0
391
392 def most_common(self, n=None):
393 '''List the n most common elements and their counts from the most
394 common to the least. If n is None, then list all element counts.
395
396 >>> Counter('abracadabra').most_common(3)
397 [('a', 5), ('r', 2), ('b', 2)]
398
399 '''
400 # Emulate Bag.sortedByCount from Smalltalk
401 if n is None:
402 return sorted(self.items(), key=_itemgetter(1), reverse=True)
403 return _heapq.nlargest(n, self.items(), key=_itemgetter(1))
404
405 def elements(self):
406 '''Iterator over elements repeating each as many times as its count.
407
408 >>> c = Counter('ABCABC')
409 >>> sorted(c.elements())
410 ['A', 'A', 'B', 'B', 'C', 'C']
411
412 # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
413 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
414 >>> product = 1
415 >>> for factor in prime_factors.elements(): # loop over factors
416 ... product *= factor # and multiply them
417 >>> product
418 1836
419
420 Note, if an element's count has been set to zero or is a negative
421 number, elements() will ignore it.
422
423 '''
424 # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
425 return _chain.from_iterable(_starmap(_repeat, self.items()))
426
427 # Override dict methods where necessary
428
429 @classmethod
430 def fromkeys(cls, iterable, v=None):
431 # There is no equivalent method for counters because setting v=1
432 # means that no element can have a count greater than one.
433 raise NotImplementedError(
434 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
435
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000436 def update(self, iterable=None, **kwds):
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000437 '''Like dict.update() but add counts instead of replacing them.
438
439 Source can be an iterable, a dictionary, or another Counter instance.
440
441 >>> c = Counter('which')
442 >>> c.update('witch') # add elements from another iterable
443 >>> d = Counter('watch')
444 >>> c.update(d) # add elements from another counter
445 >>> c['h'] # four 'h' in which, witch, and watch
446 4
447
448 '''
449 # The regular dict.update() operation makes no sense here because the
450 # replace behavior results in the some of original untouched counts
451 # being mixed-in with all of the other counts for a mismash that
452 # doesn't have a straight-forward interpretation in most counting
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000453 # contexts. Instead, we implement straight-addition. Both the inputs
454 # and outputs are allowed to contain zero and negative counts.
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000455
456 if iterable is not None:
457 if isinstance(iterable, Mapping):
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000458 if self:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000459 self_get = self.get
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000460 for elem, count in iterable.items():
Raymond Hettingerf9092022009-06-29 18:30:43 +0000461 self[elem] = count + self_get(elem, 0)
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000462 else:
463 dict.update(self, iterable) # fast path when counter is empty
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000464 else:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000465 self_get = self.get
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000466 for elem in iterable:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000467 self[elem] = 1 + self_get(elem, 0)
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000468 if kwds:
469 self.update(kwds)
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000470
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000471 def subtract(self, iterable=None, **kwds):
472 '''Like dict.update() but subtracts counts instead of replacing them.
473 Counts can be reduced below zero. Both the inputs and outputs are
474 allowed to contain zero and negative counts.
475
476 Source can be an iterable, a dictionary, or another Counter instance.
477
478 >>> c = Counter('which')
479 >>> c.subtract('witch') # subtract elements from another iterable
480 >>> c.subtract(Counter('watch')) # subtract elements from another counter
481 >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch
482 0
483 >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch
484 -1
485
486 '''
487 if iterable is not None:
Raymond Hettingerfc3c9cd2010-04-11 20:41:56 +0000488 self_get = self.get
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000489 if isinstance(iterable, Mapping):
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000490 for elem, count in iterable.items():
491 self[elem] = self_get(elem, 0) - count
492 else:
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000493 for elem in iterable:
494 self[elem] = self_get(elem, 0) - 1
495 if kwds:
496 self.subtract(kwds)
497
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000498 def copy(self):
499 'Like dict.copy() but returns a Counter instance instead of a dict.'
500 return Counter(self)
501
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000502 def __delitem__(self, elem):
503 'Like dict.__delitem__() but does not raise KeyError for missing values.'
504 if elem in self:
505 dict.__delitem__(self, elem)
506
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000507 def __repr__(self):
508 if not self:
509 return '%s()' % self.__class__.__name__
510 items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
511 return '%s({%s})' % (self.__class__.__name__, items)
512
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000513 # Multiset-style mathematical operations discussed in:
514 # Knuth TAOCP Volume II section 4.6.3 exercise 19
515 # and at http://en.wikipedia.org/wiki/Multiset
516 #
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000517 # Outputs guaranteed to only include positive counts.
518 #
519 # To strip negative and zero counts, add-in an empty counter:
520 # c += Counter()
521
522 def __add__(self, other):
523 '''Add counts from two counters.
524
525 >>> Counter('abbb') + Counter('bcc')
526 Counter({'b': 4, 'c': 2, 'a': 1})
527
528 '''
529 if not isinstance(other, Counter):
530 return NotImplemented
531 result = Counter()
532 for elem in set(self) | set(other):
533 newcount = self[elem] + other[elem]
534 if newcount > 0:
535 result[elem] = newcount
536 return result
537
538 def __sub__(self, other):
539 ''' Subtract count, but keep only results with positive counts.
540
541 >>> Counter('abbbc') - Counter('bccd')
542 Counter({'b': 2, 'a': 1})
543
544 '''
545 if not isinstance(other, Counter):
546 return NotImplemented
547 result = Counter()
Raymond Hettingere0d1b9f2009-01-21 20:36:27 +0000548 for elem in set(self) | set(other):
549 newcount = self[elem] - other[elem]
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000550 if newcount > 0:
551 result[elem] = newcount
552 return result
553
554 def __or__(self, other):
555 '''Union is the maximum of value in either of the input counters.
556
557 >>> Counter('abbb') | Counter('bcc')
558 Counter({'b': 3, 'c': 2, 'a': 1})
559
560 '''
561 if not isinstance(other, Counter):
562 return NotImplemented
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000563 result = Counter()
564 for elem in set(self) | set(other):
Raymond Hettingerc4791702009-04-04 08:48:03 +0000565 p, q = self[elem], other[elem]
566 newcount = q if p < q else p
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000567 if newcount > 0:
568 result[elem] = newcount
569 return result
570
571 def __and__(self, other):
572 ''' Intersection is the minimum of corresponding counts.
573
574 >>> Counter('abbb') & Counter('bcc')
575 Counter({'b': 1})
576
577 '''
578 if not isinstance(other, Counter):
579 return NotImplemented
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000580 result = Counter()
581 if len(self) < len(other):
582 self, other = other, self
583 for elem in filter(self.__contains__, other):
Raymond Hettingerc4791702009-04-04 08:48:03 +0000584 p, q = self[elem], other[elem]
585 newcount = p if p < q else q
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000586 if newcount > 0:
587 result[elem] = newcount
588 return result
589
Guido van Rossumd8faa362007-04-27 19:54:29 +0000590
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000591################################################################################
592### UserDict
593################################################################################
Guido van Rossumd8faa362007-04-27 19:54:29 +0000594
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000595class UserDict(MutableMapping):
596
597 # Start by filling-out the abstract methods
598 def __init__(self, dict=None, **kwargs):
599 self.data = {}
600 if dict is not None:
601 self.update(dict)
602 if len(kwargs):
603 self.update(kwargs)
604 def __len__(self): return len(self.data)
605 def __getitem__(self, key):
606 if key in self.data:
607 return self.data[key]
608 if hasattr(self.__class__, "__missing__"):
609 return self.__class__.__missing__(self, key)
610 raise KeyError(key)
611 def __setitem__(self, key, item): self.data[key] = item
612 def __delitem__(self, key): del self.data[key]
613 def __iter__(self):
614 return iter(self.data)
615
Raymond Hettinger554c8b82008-02-05 22:54:43 +0000616 # Modify __contains__ to work correctly when __missing__ is present
617 def __contains__(self, key):
618 return key in self.data
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000619
620 # Now, add the methods in dicts but not in MutableMapping
621 def __repr__(self): return repr(self.data)
622 def copy(self):
623 if self.__class__ is UserDict:
624 return UserDict(self.data.copy())
625 import copy
626 data = self.data
627 try:
628 self.data = {}
629 c = copy.copy(self)
630 finally:
631 self.data = data
632 c.update(self)
633 return c
634 @classmethod
635 def fromkeys(cls, iterable, value=None):
636 d = cls()
637 for key in iterable:
638 d[key] = value
639 return d
640
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000641
642
643################################################################################
Raymond Hettinger53dbe392008-02-12 20:03:09 +0000644### UserList
645################################################################################
646
647class UserList(MutableSequence):
648 """A more or less complete user-defined wrapper around list objects."""
649 def __init__(self, initlist=None):
650 self.data = []
651 if initlist is not None:
652 # XXX should this accept an arbitrary sequence?
653 if type(initlist) == type(self.data):
654 self.data[:] = initlist
655 elif isinstance(initlist, UserList):
656 self.data[:] = initlist.data[:]
657 else:
658 self.data = list(initlist)
659 def __repr__(self): return repr(self.data)
660 def __lt__(self, other): return self.data < self.__cast(other)
661 def __le__(self, other): return self.data <= self.__cast(other)
662 def __eq__(self, other): return self.data == self.__cast(other)
663 def __ne__(self, other): return self.data != self.__cast(other)
664 def __gt__(self, other): return self.data > self.__cast(other)
665 def __ge__(self, other): return self.data >= self.__cast(other)
666 def __cast(self, other):
667 return other.data if isinstance(other, UserList) else other
Raymond Hettinger53dbe392008-02-12 20:03:09 +0000668 def __contains__(self, item): return item in self.data
669 def __len__(self): return len(self.data)
670 def __getitem__(self, i): return self.data[i]
671 def __setitem__(self, i, item): self.data[i] = item
672 def __delitem__(self, i): del self.data[i]
673 def __add__(self, other):
674 if isinstance(other, UserList):
675 return self.__class__(self.data + other.data)
676 elif isinstance(other, type(self.data)):
677 return self.__class__(self.data + other)
678 return self.__class__(self.data + list(other))
679 def __radd__(self, other):
680 if isinstance(other, UserList):
681 return self.__class__(other.data + self.data)
682 elif isinstance(other, type(self.data)):
683 return self.__class__(other + self.data)
684 return self.__class__(list(other) + self.data)
685 def __iadd__(self, other):
686 if isinstance(other, UserList):
687 self.data += other.data
688 elif isinstance(other, type(self.data)):
689 self.data += other
690 else:
691 self.data += list(other)
692 return self
693 def __mul__(self, n):
694 return self.__class__(self.data*n)
695 __rmul__ = __mul__
696 def __imul__(self, n):
697 self.data *= n
698 return self
699 def append(self, item): self.data.append(item)
700 def insert(self, i, item): self.data.insert(i, item)
701 def pop(self, i=-1): return self.data.pop(i)
702 def remove(self, item): self.data.remove(item)
703 def count(self, item): return self.data.count(item)
704 def index(self, item, *args): return self.data.index(item, *args)
705 def reverse(self): self.data.reverse()
706 def sort(self, *args, **kwds): self.data.sort(*args, **kwds)
707 def extend(self, other):
708 if isinstance(other, UserList):
709 self.data.extend(other.data)
710 else:
711 self.data.extend(other)
712
713
714
715################################################################################
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000716### UserString
717################################################################################
718
719class UserString(Sequence):
720 def __init__(self, seq):
721 if isinstance(seq, str):
722 self.data = seq
723 elif isinstance(seq, UserString):
724 self.data = seq.data[:]
725 else:
726 self.data = str(seq)
727 def __str__(self): return str(self.data)
728 def __repr__(self): return repr(self.data)
729 def __int__(self): return int(self.data)
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000730 def __float__(self): return float(self.data)
731 def __complex__(self): return complex(self.data)
732 def __hash__(self): return hash(self.data)
733
734 def __eq__(self, string):
735 if isinstance(string, UserString):
736 return self.data == string.data
737 return self.data == string
738 def __ne__(self, string):
739 if isinstance(string, UserString):
740 return self.data != string.data
741 return self.data != string
742 def __lt__(self, string):
743 if isinstance(string, UserString):
744 return self.data < string.data
745 return self.data < string
746 def __le__(self, string):
747 if isinstance(string, UserString):
748 return self.data <= string.data
749 return self.data <= string
750 def __gt__(self, string):
751 if isinstance(string, UserString):
752 return self.data > string.data
753 return self.data > string
754 def __ge__(self, string):
755 if isinstance(string, UserString):
756 return self.data >= string.data
757 return self.data >= string
758
759 def __contains__(self, char):
760 if isinstance(char, UserString):
761 char = char.data
762 return char in self.data
763
764 def __len__(self): return len(self.data)
765 def __getitem__(self, index): return self.__class__(self.data[index])
766 def __add__(self, other):
767 if isinstance(other, UserString):
768 return self.__class__(self.data + other.data)
769 elif isinstance(other, str):
770 return self.__class__(self.data + other)
771 return self.__class__(self.data + str(other))
772 def __radd__(self, other):
773 if isinstance(other, str):
774 return self.__class__(other + self.data)
775 return self.__class__(str(other) + self.data)
776 def __mul__(self, n):
777 return self.__class__(self.data*n)
778 __rmul__ = __mul__
779 def __mod__(self, args):
780 return self.__class__(self.data % args)
781
782 # the following methods are defined in alphabetical order:
783 def capitalize(self): return self.__class__(self.data.capitalize())
784 def center(self, width, *args):
785 return self.__class__(self.data.center(width, *args))
786 def count(self, sub, start=0, end=_sys.maxsize):
787 if isinstance(sub, UserString):
788 sub = sub.data
789 return self.data.count(sub, start, end)
790 def encode(self, encoding=None, errors=None): # XXX improve this?
791 if encoding:
792 if errors:
793 return self.__class__(self.data.encode(encoding, errors))
794 return self.__class__(self.data.encode(encoding))
795 return self.__class__(self.data.encode())
796 def endswith(self, suffix, start=0, end=_sys.maxsize):
797 return self.data.endswith(suffix, start, end)
798 def expandtabs(self, tabsize=8):
799 return self.__class__(self.data.expandtabs(tabsize))
800 def find(self, sub, start=0, end=_sys.maxsize):
801 if isinstance(sub, UserString):
802 sub = sub.data
803 return self.data.find(sub, start, end)
804 def format(self, *args, **kwds):
805 return self.data.format(*args, **kwds)
806 def index(self, sub, start=0, end=_sys.maxsize):
807 return self.data.index(sub, start, end)
808 def isalpha(self): return self.data.isalpha()
809 def isalnum(self): return self.data.isalnum()
810 def isdecimal(self): return self.data.isdecimal()
811 def isdigit(self): return self.data.isdigit()
812 def isidentifier(self): return self.data.isidentifier()
813 def islower(self): return self.data.islower()
814 def isnumeric(self): return self.data.isnumeric()
815 def isspace(self): return self.data.isspace()
816 def istitle(self): return self.data.istitle()
817 def isupper(self): return self.data.isupper()
818 def join(self, seq): return self.data.join(seq)
819 def ljust(self, width, *args):
820 return self.__class__(self.data.ljust(width, *args))
821 def lower(self): return self.__class__(self.data.lower())
822 def lstrip(self, chars=None): return self.__class__(self.data.lstrip(chars))
823 def partition(self, sep):
824 return self.data.partition(sep)
825 def replace(self, old, new, maxsplit=-1):
826 if isinstance(old, UserString):
827 old = old.data
828 if isinstance(new, UserString):
829 new = new.data
830 return self.__class__(self.data.replace(old, new, maxsplit))
831 def rfind(self, sub, start=0, end=_sys.maxsize):
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000832 if isinstance(sub, UserString):
833 sub = sub.data
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000834 return self.data.rfind(sub, start, end)
835 def rindex(self, sub, start=0, end=_sys.maxsize):
836 return self.data.rindex(sub, start, end)
837 def rjust(self, width, *args):
838 return self.__class__(self.data.rjust(width, *args))
839 def rpartition(self, sep):
840 return self.data.rpartition(sep)
841 def rstrip(self, chars=None):
842 return self.__class__(self.data.rstrip(chars))
843 def split(self, sep=None, maxsplit=-1):
844 return self.data.split(sep, maxsplit)
845 def rsplit(self, sep=None, maxsplit=-1):
846 return self.data.rsplit(sep, maxsplit)
847 def splitlines(self, keepends=0): return self.data.splitlines(keepends)
848 def startswith(self, prefix, start=0, end=_sys.maxsize):
849 return self.data.startswith(prefix, start, end)
850 def strip(self, chars=None): return self.__class__(self.data.strip(chars))
851 def swapcase(self): return self.__class__(self.data.swapcase())
852 def title(self): return self.__class__(self.data.title())
853 def translate(self, *args):
854 return self.__class__(self.data.translate(*args))
855 def upper(self): return self.__class__(self.data.upper())
856 def zfill(self, width): return self.__class__(self.data.zfill(width))
857
858
859
860################################################################################
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000861### Simple tests
862################################################################################
Guido van Rossumd8faa362007-04-27 19:54:29 +0000863
864if __name__ == '__main__':
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000865 # verify that instances can be pickled
Guido van Rossum99603b02007-07-20 00:22:32 +0000866 from pickle import loads, dumps
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000867 Point = namedtuple('Point', 'x, y', True)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000868 p = Point(x=10, y=20)
869 assert p == loads(dumps(p))
870
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000871 # test and demonstrate ability to override methods
Christian Heimes043d6f62008-01-07 17:19:16 +0000872 class Point(namedtuple('Point', 'x y')):
Christian Heimes25bb7832008-01-11 16:17:00 +0000873 __slots__ = ()
Christian Heimes043d6f62008-01-07 17:19:16 +0000874 @property
875 def hypot(self):
876 return (self.x ** 2 + self.y ** 2) ** 0.5
Christian Heimes790c8232008-01-07 21:14:23 +0000877 def __str__(self):
Christian Heimes25bb7832008-01-11 16:17:00 +0000878 return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
Christian Heimes043d6f62008-01-07 17:19:16 +0000879
Christian Heimes25bb7832008-01-11 16:17:00 +0000880 for p in Point(3, 4), Point(14, 5/7.):
Christian Heimes790c8232008-01-07 21:14:23 +0000881 print (p)
Christian Heimes043d6f62008-01-07 17:19:16 +0000882
883 class Point(namedtuple('Point', 'x y')):
884 'Point class with optimized _make() and _replace() without error-checking'
Christian Heimes25bb7832008-01-11 16:17:00 +0000885 __slots__ = ()
Christian Heimes043d6f62008-01-07 17:19:16 +0000886 _make = classmethod(tuple.__new__)
887 def _replace(self, _map=map, **kwds):
Christian Heimes2380ac72008-01-09 00:17:24 +0000888 return self._make(_map(kwds.get, ('x', 'y'), self))
Christian Heimes043d6f62008-01-07 17:19:16 +0000889
890 print(Point(11, 22)._replace(x=100))
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000891
Christian Heimes25bb7832008-01-11 16:17:00 +0000892 Point3D = namedtuple('Point3D', Point._fields + ('z',))
893 print(Point3D.__doc__)
894
Guido van Rossumd8faa362007-04-27 19:54:29 +0000895 import doctest
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000896 TestResults = namedtuple('TestResults', 'failed attempted')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000897 print(TestResults(*doctest.testmod()))