blob: fb6766e2a54ea385340f35ee006672daa782a6d5 [file] [log] [blame]
Brett Cannon23a4a7b2008-05-12 00:56:28 +00001__all__ = ['deque', 'defaultdict', 'namedtuple', 'UserDict', 'UserList',
Raymond Hettinger2d32f632009-03-02 21:24:57 +00002 'UserString', 'Counter', 'OrderedDict']
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
4# They should however be considered an integral part of collections.py.
5from _abcoll import *
6import _abcoll
7__all__ += _abcoll.__all__
8
Christian Heimes99170a52007-12-19 02:07:34 +00009from _collections import deque, defaultdict
10from operator import itemgetter as _itemgetter
11from keyword import iskeyword as _iskeyword
12import sys as _sys
Raymond Hettingerb8baf632009-01-14 02:20:07 +000013import heapq as _heapq
Raymond Hettingerea9f8db2009-03-02 21:28:41 +000014from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
Raymond Hettinger2d32f632009-03-02 21:24:57 +000015
16################################################################################
17### OrderedDict
18################################################################################
19
20class OrderedDict(dict, MutableMapping):
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +000021 'Dictionary that remembers insertion order'
Raymond Hettingerf1736542009-03-23 05:19:21 +000022 # An inherited dict maps keys to values.
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +000023 # The inherited dict provides __getitem__, __len__, __contains__, and get.
24 # The remaining methods are order-aware.
Raymond Hettingerf1736542009-03-23 05:19:21 +000025 # Big-O running times for all methods are the same as for regular dictionaries.
26
27 # The internal self.__map dictionary maps keys to links in a doubly linked list.
28 # The circular doubly linked list starts and ends with a sentinel element.
29 # The sentinel element never gets deleted (this simplifies the algorithm).
Raymond Hettinger5be21b72010-08-01 22:10:57 +000030 # Each link is stored as a list of length three: [PREV, NEXT, KEY].
Raymond Hettinger2d32f632009-03-02 21:24:57 +000031
32 def __init__(self, *args, **kwds):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000033 '''Initialize an ordered dictionary. Signature is the same as for
34 regular dictionaries, but keyword arguments are not recommended
35 because their insertion order is arbitrary.
36
37 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +000038 if len(args) > 1:
39 raise TypeError('expected at most 1 arguments, got %d' % len(args))
Raymond Hettinger08c70cf2009-03-03 20:47:29 +000040 try:
Raymond Hettingerf1736542009-03-23 05:19:21 +000041 self.__root
Raymond Hettinger08c70cf2009-03-03 20:47:29 +000042 except AttributeError:
Raymond Hettinger5be21b72010-08-01 22:10:57 +000043 self.__root = root = [None, None, None] # sentinel node
44 PREV = 0
45 NEXT = 1
46 root[PREV] = root[NEXT] = root
Raymond Hettinger52dc06b2009-03-25 22:45:22 +000047 self.__map = {}
Raymond Hettinger2d32f632009-03-02 21:24:57 +000048 self.update(*args, **kwds)
49
Raymond Hettinger5be21b72010-08-01 22:10:57 +000050 def __setitem__(self, key, value, PREV=0, NEXT=1, dict_setitem=dict.__setitem__):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000051 'od.__setitem__(i, y) <==> od[i]=y'
Raymond Hettingerf1736542009-03-23 05:19:21 +000052 # Setting a new item creates a new link which goes at the end of the linked
53 # list, and the inherited dictionary is updated with the new key/value pair.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000054 if key not in self:
Raymond Hettingerf1736542009-03-23 05:19:21 +000055 root = self.__root
Raymond Hettinger5be21b72010-08-01 22:10:57 +000056 last = root[PREV]
57 last[NEXT] = root[PREV] = self.__map[key] = [last, root, key]
58 dict_setitem(self, key, value)
Raymond Hettinger2d32f632009-03-02 21:24:57 +000059
Raymond Hettinger5be21b72010-08-01 22:10:57 +000060 def __delitem__(self, key, PREV=0, NEXT=1, dict_delitem=dict.__delitem__):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000061 'od.__delitem__(y) <==> del od[y]'
Raymond Hettingerf1736542009-03-23 05:19:21 +000062 # Deleting an existing item uses self.__map to find the link which is
63 # then removed by updating the links in the predecessor and successor nodes.
Raymond Hettinger5be21b72010-08-01 22:10:57 +000064 dict_delitem(self, key)
Raymond Hettingerf1736542009-03-23 05:19:21 +000065 link = self.__map.pop(key)
Raymond Hettinger5be21b72010-08-01 22:10:57 +000066 link_prev = link[PREV]
67 link_next = link[NEXT]
68 link_prev[NEXT] = link_next
69 link_next[PREV] = link_prev
Raymond Hettinger2d32f632009-03-02 21:24:57 +000070
Raymond Hettinger5be21b72010-08-01 22:10:57 +000071 def __iter__(self, NEXT=1, KEY=2):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000072 'od.__iter__() <==> iter(od)'
Raymond Hettingerf1736542009-03-23 05:19:21 +000073 # Traverse the linked list in order.
74 root = self.__root
Raymond Hettinger5be21b72010-08-01 22:10:57 +000075 curr = root[NEXT]
Raymond Hettingerf1736542009-03-23 05:19:21 +000076 while curr is not root:
Raymond Hettinger5be21b72010-08-01 22:10:57 +000077 yield curr[KEY]
78 curr = curr[NEXT]
Raymond Hettinger2d32f632009-03-02 21:24:57 +000079
Raymond Hettinger5be21b72010-08-01 22:10:57 +000080 def __reversed__(self, PREV=0, KEY=2):
Raymond Hettinger2352cf32009-04-08 01:16:27 +000081 'od.__reversed__() <==> reversed(od)'
Raymond Hettingerf1736542009-03-23 05:19:21 +000082 # Traverse the linked list in reverse order.
83 root = self.__root
Raymond Hettinger5be21b72010-08-01 22:10:57 +000084 curr = root[PREV]
Raymond Hettingerf1736542009-03-23 05:19:21 +000085 while curr is not root:
Raymond Hettinger5be21b72010-08-01 22:10:57 +000086 yield curr[KEY]
87 curr = curr[PREV]
Raymond Hettinger2d32f632009-03-02 21:24:57 +000088
89 def __reduce__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000090 'Return state information for pickling'
Raymond Hettinger2d32f632009-03-02 21:24:57 +000091 items = [[k, self[k]] for k in self]
Raymond Hettingerf1736542009-03-23 05:19:21 +000092 tmp = self.__map, self.__root
93 del self.__map, self.__root
Raymond Hettinger2d32f632009-03-02 21:24:57 +000094 inst_dict = vars(self).copy()
Raymond Hettingerf1736542009-03-23 05:19:21 +000095 self.__map, self.__root = tmp
Raymond Hettinger14b89ff2009-03-03 22:20:56 +000096 if inst_dict:
97 return (self.__class__, (items,), inst_dict)
98 return self.__class__, (items,)
Raymond Hettinger2d32f632009-03-02 21:24:57 +000099
Raymond Hettinger5be21b72010-08-01 22:10:57 +0000100 def clear(self):
101 'od.clear() -> None. Remove all items from od.'
102 try:
103 for node in self.__map.values():
104 del node[:]
105 self.__root[:] = [self.__root, self.__root, None]
106 self.__map.clear()
107 except AttributeError:
108 pass
109 dict.clear(self)
110
Raymond Hettinger331722d2010-09-02 18:44:16 +0000111 def popitem(self, last=True, PREV=0, NEXT=1, KEY=2, dict_pop=dict.pop):
112 '''od.popitem() -> (k, v), return and remove a (key, value) pair.
113 Pairs are returned in LIFO order if last is true or FIFO order if false.
114
115 '''
116 if not self:
117 raise KeyError('dictionary is empty')
118 root = self.__root
119 if last: # link_prev <--> link <--> root
120 link = root[PREV]
121 link_prev = link[PREV]
122 link_prev[NEXT] = root
123 root[PREV] = link_prev
124 else: # root <--> link <--> link_next
125 link = root[NEXT]
126 link_next = link[NEXT]
127 root[NEXT] = link_next
128 link_next[PREV] = root
129 key = link[KEY]
130 del self.__map[key]
131 value = dict_pop(self, key)
132 return key, value
133
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000134 setdefault = MutableMapping.setdefault
135 update = MutableMapping.update
136 pop = MutableMapping.pop
137 keys = MutableMapping.keys
138 values = MutableMapping.values
139 items = MutableMapping.items
Raymond Hettinger5be21b72010-08-01 22:10:57 +0000140 __ne__ = MutableMapping.__ne__
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000141
142 def __repr__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000143 'od.__repr__() <==> repr(od)'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000144 if not self:
145 return '%s()' % (self.__class__.__name__,)
146 return '%s(%r)' % (self.__class__.__name__, list(self.items()))
147
148 def copy(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000149 'od.copy() -> a shallow copy of od'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000150 return self.__class__(self)
151
152 @classmethod
153 def fromkeys(cls, iterable, value=None):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000154 '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
155 and values equal to v (which defaults to None).
156
157 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000158 d = cls()
159 for key in iterable:
160 d[key] = value
161 return d
162
163 def __eq__(self, other):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000164 '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
165 while comparison to a regular mapping is order-insensitive.
166
167 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000168 if isinstance(other, OrderedDict):
Raymond Hettinger798ee1a2009-03-23 18:29:11 +0000169 return len(self)==len(other) and \
170 all(p==q for p, q in zip(self.items(), other.items()))
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000171 return dict.__eq__(self, other)
172
Raymond Hettinger5be21b72010-08-01 22:10:57 +0000173 def __del__(self):
174 self.clear() # eliminate cyclical references
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000175
Raymond Hettinger38d17e32010-09-02 09:44:28 +0000176 def _move_to_end(self, key, PREV=0, NEXT=1):
177 'Fast version of self[key]=self.pop(key). Private method for internal use.'
178 link = self.__map[key]
179 link_prev = link[PREV]
180 link_next = link[NEXT]
181 link_prev[NEXT] = link_next
182 link_next[PREV] = link_prev
183 root = self.__root
184 last = root[PREV]
185 link[PREV] = last
186 link[NEXT] = root
187 last[NEXT] = root[PREV] = link
188
Christian Heimes99170a52007-12-19 02:07:34 +0000189
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000190################################################################################
191### namedtuple
192################################################################################
193
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000194def namedtuple(typename, field_names, verbose=False, rename=False):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000195 """Returns a new subclass of tuple with named fields.
196
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000197 >>> Point = namedtuple('Point', 'x y')
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000198 >>> Point.__doc__ # docstring for the new class
Guido van Rossumd8faa362007-04-27 19:54:29 +0000199 'Point(x, y)'
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000200 >>> p = Point(11, y=22) # instantiate with positional args or keywords
Christian Heimes99170a52007-12-19 02:07:34 +0000201 >>> p[0] + p[1] # indexable like a plain tuple
Guido van Rossumd8faa362007-04-27 19:54:29 +0000202 33
Christian Heimes99170a52007-12-19 02:07:34 +0000203 >>> x, y = p # unpack like a regular tuple
Guido van Rossumd8faa362007-04-27 19:54:29 +0000204 >>> x, y
205 (11, 22)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000206 >>> p.x + p.y # fields also accessable by name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000207 33
Christian Heimes0449f632007-12-15 01:27:15 +0000208 >>> d = p._asdict() # convert to a dictionary
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000209 >>> d['x']
210 11
211 >>> Point(**d) # convert from a dictionary
Guido van Rossumd8faa362007-04-27 19:54:29 +0000212 Point(x=11, y=22)
Christian Heimes0449f632007-12-15 01:27:15 +0000213 >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000214 Point(x=100, y=22)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000215
216 """
217
Christian Heimes2380ac72008-01-09 00:17:24 +0000218 # Parse and validate the field names. Validation serves two purposes,
219 # generating informative error messages and preventing template injection attacks.
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000220 if isinstance(field_names, str):
221 field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas
Benjamin Petersone9bbc8b2008-09-28 02:06:32 +0000222 field_names = tuple(map(str, field_names))
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000223 if rename:
224 names = list(field_names)
225 seen = set()
226 for i, name in enumerate(names):
227 if (not all(c.isalnum() or c=='_' for c in name) or _iskeyword(name)
228 or not name or name[0].isdigit() or name.startswith('_')
229 or name in seen):
Raymond Hettinger56145242009-04-02 22:31:59 +0000230 names[i] = '_%d' % i
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000231 seen.add(name)
232 field_names = tuple(names)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000233 for name in (typename,) + field_names:
Christian Heimesb9eccbf2007-12-05 20:18:38 +0000234 if not all(c.isalnum() or c=='_' for c in name):
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000235 raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
236 if _iskeyword(name):
237 raise ValueError('Type names and field names cannot be a keyword: %r' % name)
238 if name[0].isdigit():
239 raise ValueError('Type names and field names cannot start with a number: %r' % name)
240 seen_names = set()
241 for name in field_names:
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000242 if name.startswith('_') and not rename:
Christian Heimes0449f632007-12-15 01:27:15 +0000243 raise ValueError('Field names cannot start with an underscore: %r' % name)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000244 if name in seen_names:
245 raise ValueError('Encountered duplicate field name: %r' % name)
246 seen_names.add(name)
247
248 # Create and fill-in the class template
Christian Heimesfaf2f632008-01-06 16:59:19 +0000249 numfields = len(field_names)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000250 argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000251 reprtxt = ', '.join('%s=%%r' % name for name in field_names)
252 template = '''class %(typename)s(tuple):
Christian Heimes0449f632007-12-15 01:27:15 +0000253 '%(typename)s(%(argtxt)s)' \n
254 __slots__ = () \n
Christian Heimesfaf2f632008-01-06 16:59:19 +0000255 _fields = %(field_names)r \n
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000256 def __new__(_cls, %(argtxt)s):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000257 'Create new instance of %(typename)s(%(argtxt)s)'
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000258 return _tuple.__new__(_cls, (%(argtxt)s)) \n
Christian Heimesfaf2f632008-01-06 16:59:19 +0000259 @classmethod
Christian Heimes043d6f62008-01-07 17:19:16 +0000260 def _make(cls, iterable, new=tuple.__new__, len=len):
Christian Heimesfaf2f632008-01-06 16:59:19 +0000261 'Make a new %(typename)s object from a sequence or iterable'
Christian Heimes043d6f62008-01-07 17:19:16 +0000262 result = new(cls, iterable)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000263 if len(result) != %(numfields)d:
264 raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
265 return result \n
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000266 def __repr__(self):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000267 'Return a nicely formatted representation string'
Raymond Hettingerd331ce92010-08-08 01:13:42 +0000268 return self.__class__.__name__ + '(%(reprtxt)s)' %% self \n
Raymond Hettingera4f52b12009-03-02 22:28:31 +0000269 def _asdict(self):
270 'Return a new OrderedDict which maps field names to their values'
271 return OrderedDict(zip(self._fields, self)) \n
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000272 def _replace(_self, **kwds):
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000273 'Return a new %(typename)s object replacing specified fields with new values'
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000274 result = _self._make(map(kwds.pop, %(field_names)r, _self))
Christian Heimesfaf2f632008-01-06 16:59:19 +0000275 if kwds:
276 raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000277 return result \n
278 def __getnewargs__(self):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000279 'Return self as a plain tuple. Used by copy and pickle.'
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000280 return tuple(self) \n\n''' % locals()
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000281 for i, name in enumerate(field_names):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000282 template += " %s = _property(_itemgetter(%d), doc='Alias for field number %d')\n" % (name, i, i)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000283 if verbose:
284 print(template)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000285
Georg Brandlf08a9dd2008-06-10 16:57:31 +0000286 # Execute the template string in a temporary namespace and
287 # support tracing utilities by setting a value for frame.f_globals['__name__']
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000288 namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
289 OrderedDict=OrderedDict, _property=property, _tuple=tuple)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000290 try:
291 exec(template, namespace)
292 except SyntaxError as e:
Christian Heimes99170a52007-12-19 02:07:34 +0000293 raise SyntaxError(e.msg + ':\n' + template) from e
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000294 result = namespace[typename]
295
296 # For pickling to work, the __module__ variable needs to be set to the frame
297 # where the named tuple is created. Bypass this step in enviroments where
Benjamin Peterson25c95f12009-05-08 20:42:26 +0000298 # sys._getframe is not defined (Jython for example) or sys._getframe is not
299 # defined for arguments greater than 0 (IronPython).
300 try:
Raymond Hettinger0f055172009-01-27 10:06:09 +0000301 result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
Benjamin Peterson25c95f12009-05-08 20:42:26 +0000302 except (AttributeError, ValueError):
303 pass
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000304
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000305 return result
Guido van Rossumd8faa362007-04-27 19:54:29 +0000306
Guido van Rossumd8faa362007-04-27 19:54:29 +0000307
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000308########################################################################
309### Counter
310########################################################################
311
312class Counter(dict):
313 '''Dict subclass for counting hashable items. Sometimes called a bag
314 or multiset. Elements are stored as dictionary keys and their counts
315 are stored as dictionary values.
316
317 >>> c = Counter('abracadabra') # count elements from a string
318
319 >>> c.most_common(3) # three most common elements
320 [('a', 5), ('r', 2), ('b', 2)]
321 >>> sorted(c) # list all unique elements
322 ['a', 'b', 'c', 'd', 'r']
323 >>> ''.join(sorted(c.elements())) # list elements with repetitions
324 'aaaaabbcdrr'
325 >>> sum(c.values()) # total of all counts
326 11
327
328 >>> c['a'] # count of letter 'a'
329 5
330 >>> for elem in 'shazam': # update counts from an iterable
331 ... c[elem] += 1 # by adding 1 to each element's count
332 >>> c['a'] # now there are seven 'a'
333 7
334 >>> del c['r'] # remove all 'r'
335 >>> c['r'] # now there are zero 'r'
336 0
337
338 >>> d = Counter('simsalabim') # make another counter
339 >>> c.update(d) # add in the second counter
340 >>> c['a'] # now there are nine 'a'
341 9
342
343 >>> c.clear() # empty the counter
344 >>> c
345 Counter()
346
347 Note: If a count is set to zero or reduced to zero, it will remain
348 in the counter until the entry is deleted or the counter is cleared:
349
350 >>> c = Counter('aaabbc')
351 >>> c['b'] -= 2 # reduce the count of 'b' by two
352 >>> c.most_common() # 'b' is still in, but its count is zero
353 [('a', 3), ('c', 1), ('b', 0)]
354
355 '''
356 # References:
357 # http://en.wikipedia.org/wiki/Multiset
358 # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
359 # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
360 # http://code.activestate.com/recipes/259174/
361 # Knuth, TAOCP Vol. II section 4.6.3
362
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000363 def __init__(self, iterable=None, **kwds):
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000364 '''Create a new, empty Counter object. And if given, count elements
365 from an input iterable. Or, initialize the count from another mapping
366 of elements to their counts.
367
368 >>> c = Counter() # a new, empty counter
369 >>> c = Counter('gallahad') # a new counter from an iterable
370 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000371 >>> c = Counter(a=4, b=2) # a new counter from keyword args
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000372
373 '''
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000374 self.update(iterable, **kwds)
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000375
376 def __missing__(self, key):
377 'The count of elements not in the Counter is zero.'
378 # Needed so that self[missing_item] does not raise KeyError
379 return 0
380
381 def most_common(self, n=None):
382 '''List the n most common elements and their counts from the most
383 common to the least. If n is None, then list all element counts.
384
385 >>> Counter('abracadabra').most_common(3)
386 [('a', 5), ('r', 2), ('b', 2)]
387
388 '''
389 # Emulate Bag.sortedByCount from Smalltalk
390 if n is None:
391 return sorted(self.items(), key=_itemgetter(1), reverse=True)
392 return _heapq.nlargest(n, self.items(), key=_itemgetter(1))
393
394 def elements(self):
395 '''Iterator over elements repeating each as many times as its count.
396
397 >>> c = Counter('ABCABC')
398 >>> sorted(c.elements())
399 ['A', 'A', 'B', 'B', 'C', 'C']
400
401 # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
402 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
403 >>> product = 1
404 >>> for factor in prime_factors.elements(): # loop over factors
405 ... product *= factor # and multiply them
406 >>> product
407 1836
408
409 Note, if an element's count has been set to zero or is a negative
410 number, elements() will ignore it.
411
412 '''
413 # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
414 return _chain.from_iterable(_starmap(_repeat, self.items()))
415
416 # Override dict methods where necessary
417
418 @classmethod
419 def fromkeys(cls, iterable, v=None):
420 # There is no equivalent method for counters because setting v=1
421 # means that no element can have a count greater than one.
422 raise NotImplementedError(
423 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
424
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000425 def update(self, iterable=None, **kwds):
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000426 '''Like dict.update() but add counts instead of replacing them.
427
428 Source can be an iterable, a dictionary, or another Counter instance.
429
430 >>> c = Counter('which')
431 >>> c.update('witch') # add elements from another iterable
432 >>> d = Counter('watch')
433 >>> c.update(d) # add elements from another counter
434 >>> c['h'] # four 'h' in which, witch, and watch
435 4
436
437 '''
438 # The regular dict.update() operation makes no sense here because the
439 # replace behavior results in the some of original untouched counts
440 # being mixed-in with all of the other counts for a mismash that
441 # doesn't have a straight-forward interpretation in most counting
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000442 # contexts. Instead, we implement straight-addition. Both the inputs
443 # and outputs are allowed to contain zero and negative counts.
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000444
445 if iterable is not None:
446 if isinstance(iterable, Mapping):
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000447 if self:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000448 self_get = self.get
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000449 for elem, count in iterable.items():
Raymond Hettingerf9092022009-06-29 18:30:43 +0000450 self[elem] = count + self_get(elem, 0)
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000451 else:
452 dict.update(self, iterable) # fast path when counter is empty
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000453 else:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000454 self_get = self.get
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000455 for elem in iterable:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000456 self[elem] = 1 + self_get(elem, 0)
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000457 if kwds:
458 self.update(kwds)
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000459
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000460 def subtract(self, iterable=None, **kwds):
461 '''Like dict.update() but subtracts counts instead of replacing them.
462 Counts can be reduced below zero. Both the inputs and outputs are
463 allowed to contain zero and negative counts.
464
465 Source can be an iterable, a dictionary, or another Counter instance.
466
467 >>> c = Counter('which')
468 >>> c.subtract('witch') # subtract elements from another iterable
469 >>> c.subtract(Counter('watch')) # subtract elements from another counter
470 >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch
471 0
472 >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch
473 -1
474
475 '''
476 if iterable is not None:
Raymond Hettingerfc3c9cd2010-04-11 20:41:56 +0000477 self_get = self.get
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000478 if isinstance(iterable, Mapping):
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000479 for elem, count in iterable.items():
480 self[elem] = self_get(elem, 0) - count
481 else:
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000482 for elem in iterable:
483 self[elem] = self_get(elem, 0) - 1
484 if kwds:
485 self.subtract(kwds)
486
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000487 def copy(self):
488 'Like dict.copy() but returns a Counter instance instead of a dict.'
489 return Counter(self)
490
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000491 def __delitem__(self, elem):
492 'Like dict.__delitem__() but does not raise KeyError for missing values.'
493 if elem in self:
494 dict.__delitem__(self, elem)
495
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000496 def __repr__(self):
497 if not self:
498 return '%s()' % self.__class__.__name__
499 items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
500 return '%s({%s})' % (self.__class__.__name__, items)
501
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000502 # Multiset-style mathematical operations discussed in:
503 # Knuth TAOCP Volume II section 4.6.3 exercise 19
504 # and at http://en.wikipedia.org/wiki/Multiset
505 #
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000506 # Outputs guaranteed to only include positive counts.
507 #
508 # To strip negative and zero counts, add-in an empty counter:
509 # c += Counter()
510
511 def __add__(self, other):
512 '''Add counts from two counters.
513
514 >>> Counter('abbb') + Counter('bcc')
515 Counter({'b': 4, 'c': 2, 'a': 1})
516
517 '''
518 if not isinstance(other, Counter):
519 return NotImplemented
520 result = Counter()
521 for elem in set(self) | set(other):
522 newcount = self[elem] + other[elem]
523 if newcount > 0:
524 result[elem] = newcount
525 return result
526
527 def __sub__(self, other):
528 ''' Subtract count, but keep only results with positive counts.
529
530 >>> Counter('abbbc') - Counter('bccd')
531 Counter({'b': 2, 'a': 1})
532
533 '''
534 if not isinstance(other, Counter):
535 return NotImplemented
536 result = Counter()
Raymond Hettingere0d1b9f2009-01-21 20:36:27 +0000537 for elem in set(self) | set(other):
538 newcount = self[elem] - other[elem]
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000539 if newcount > 0:
540 result[elem] = newcount
541 return result
542
543 def __or__(self, other):
544 '''Union is the maximum of value in either of the input counters.
545
546 >>> Counter('abbb') | Counter('bcc')
547 Counter({'b': 3, 'c': 2, 'a': 1})
548
549 '''
550 if not isinstance(other, Counter):
551 return NotImplemented
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000552 result = Counter()
553 for elem in set(self) | set(other):
Raymond Hettingerc4791702009-04-04 08:48:03 +0000554 p, q = self[elem], other[elem]
555 newcount = q if p < q else p
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000556 if newcount > 0:
557 result[elem] = newcount
558 return result
559
560 def __and__(self, other):
561 ''' Intersection is the minimum of corresponding counts.
562
563 >>> Counter('abbb') & Counter('bcc')
564 Counter({'b': 1})
565
566 '''
567 if not isinstance(other, Counter):
568 return NotImplemented
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000569 result = Counter()
570 if len(self) < len(other):
571 self, other = other, self
572 for elem in filter(self.__contains__, other):
Raymond Hettingerc4791702009-04-04 08:48:03 +0000573 p, q = self[elem], other[elem]
574 newcount = p if p < q else q
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000575 if newcount > 0:
576 result[elem] = newcount
577 return result
578
Guido van Rossumd8faa362007-04-27 19:54:29 +0000579
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000580################################################################################
581### UserDict
582################################################################################
Guido van Rossumd8faa362007-04-27 19:54:29 +0000583
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000584class UserDict(MutableMapping):
585
586 # Start by filling-out the abstract methods
587 def __init__(self, dict=None, **kwargs):
588 self.data = {}
589 if dict is not None:
590 self.update(dict)
591 if len(kwargs):
592 self.update(kwargs)
593 def __len__(self): return len(self.data)
594 def __getitem__(self, key):
595 if key in self.data:
596 return self.data[key]
597 if hasattr(self.__class__, "__missing__"):
598 return self.__class__.__missing__(self, key)
599 raise KeyError(key)
600 def __setitem__(self, key, item): self.data[key] = item
601 def __delitem__(self, key): del self.data[key]
602 def __iter__(self):
603 return iter(self.data)
604
Raymond Hettinger554c8b82008-02-05 22:54:43 +0000605 # Modify __contains__ to work correctly when __missing__ is present
606 def __contains__(self, key):
607 return key in self.data
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000608
609 # Now, add the methods in dicts but not in MutableMapping
610 def __repr__(self): return repr(self.data)
611 def copy(self):
612 if self.__class__ is UserDict:
613 return UserDict(self.data.copy())
614 import copy
615 data = self.data
616 try:
617 self.data = {}
618 c = copy.copy(self)
619 finally:
620 self.data = data
621 c.update(self)
622 return c
623 @classmethod
624 def fromkeys(cls, iterable, value=None):
625 d = cls()
626 for key in iterable:
627 d[key] = value
628 return d
629
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000630
631
632################################################################################
Raymond Hettinger53dbe392008-02-12 20:03:09 +0000633### UserList
634################################################################################
635
636class UserList(MutableSequence):
637 """A more or less complete user-defined wrapper around list objects."""
638 def __init__(self, initlist=None):
639 self.data = []
640 if initlist is not None:
641 # XXX should this accept an arbitrary sequence?
642 if type(initlist) == type(self.data):
643 self.data[:] = initlist
644 elif isinstance(initlist, UserList):
645 self.data[:] = initlist.data[:]
646 else:
647 self.data = list(initlist)
648 def __repr__(self): return repr(self.data)
649 def __lt__(self, other): return self.data < self.__cast(other)
650 def __le__(self, other): return self.data <= self.__cast(other)
651 def __eq__(self, other): return self.data == self.__cast(other)
652 def __ne__(self, other): return self.data != self.__cast(other)
653 def __gt__(self, other): return self.data > self.__cast(other)
654 def __ge__(self, other): return self.data >= self.__cast(other)
655 def __cast(self, other):
656 return other.data if isinstance(other, UserList) else other
Raymond Hettinger53dbe392008-02-12 20:03:09 +0000657 def __contains__(self, item): return item in self.data
658 def __len__(self): return len(self.data)
659 def __getitem__(self, i): return self.data[i]
660 def __setitem__(self, i, item): self.data[i] = item
661 def __delitem__(self, i): del self.data[i]
662 def __add__(self, other):
663 if isinstance(other, UserList):
664 return self.__class__(self.data + other.data)
665 elif isinstance(other, type(self.data)):
666 return self.__class__(self.data + other)
667 return self.__class__(self.data + list(other))
668 def __radd__(self, other):
669 if isinstance(other, UserList):
670 return self.__class__(other.data + self.data)
671 elif isinstance(other, type(self.data)):
672 return self.__class__(other + self.data)
673 return self.__class__(list(other) + self.data)
674 def __iadd__(self, other):
675 if isinstance(other, UserList):
676 self.data += other.data
677 elif isinstance(other, type(self.data)):
678 self.data += other
679 else:
680 self.data += list(other)
681 return self
682 def __mul__(self, n):
683 return self.__class__(self.data*n)
684 __rmul__ = __mul__
685 def __imul__(self, n):
686 self.data *= n
687 return self
688 def append(self, item): self.data.append(item)
689 def insert(self, i, item): self.data.insert(i, item)
690 def pop(self, i=-1): return self.data.pop(i)
691 def remove(self, item): self.data.remove(item)
692 def count(self, item): return self.data.count(item)
693 def index(self, item, *args): return self.data.index(item, *args)
694 def reverse(self): self.data.reverse()
695 def sort(self, *args, **kwds): self.data.sort(*args, **kwds)
696 def extend(self, other):
697 if isinstance(other, UserList):
698 self.data.extend(other.data)
699 else:
700 self.data.extend(other)
701
702
703
704################################################################################
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000705### UserString
706################################################################################
707
708class UserString(Sequence):
709 def __init__(self, seq):
710 if isinstance(seq, str):
711 self.data = seq
712 elif isinstance(seq, UserString):
713 self.data = seq.data[:]
714 else:
715 self.data = str(seq)
716 def __str__(self): return str(self.data)
717 def __repr__(self): return repr(self.data)
718 def __int__(self): return int(self.data)
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000719 def __float__(self): return float(self.data)
720 def __complex__(self): return complex(self.data)
721 def __hash__(self): return hash(self.data)
722
723 def __eq__(self, string):
724 if isinstance(string, UserString):
725 return self.data == string.data
726 return self.data == string
727 def __ne__(self, string):
728 if isinstance(string, UserString):
729 return self.data != string.data
730 return self.data != string
731 def __lt__(self, string):
732 if isinstance(string, UserString):
733 return self.data < string.data
734 return self.data < string
735 def __le__(self, string):
736 if isinstance(string, UserString):
737 return self.data <= string.data
738 return self.data <= string
739 def __gt__(self, string):
740 if isinstance(string, UserString):
741 return self.data > string.data
742 return self.data > string
743 def __ge__(self, string):
744 if isinstance(string, UserString):
745 return self.data >= string.data
746 return self.data >= string
747
748 def __contains__(self, char):
749 if isinstance(char, UserString):
750 char = char.data
751 return char in self.data
752
753 def __len__(self): return len(self.data)
754 def __getitem__(self, index): return self.__class__(self.data[index])
755 def __add__(self, other):
756 if isinstance(other, UserString):
757 return self.__class__(self.data + other.data)
758 elif isinstance(other, str):
759 return self.__class__(self.data + other)
760 return self.__class__(self.data + str(other))
761 def __radd__(self, other):
762 if isinstance(other, str):
763 return self.__class__(other + self.data)
764 return self.__class__(str(other) + self.data)
765 def __mul__(self, n):
766 return self.__class__(self.data*n)
767 __rmul__ = __mul__
768 def __mod__(self, args):
769 return self.__class__(self.data % args)
770
771 # the following methods are defined in alphabetical order:
772 def capitalize(self): return self.__class__(self.data.capitalize())
773 def center(self, width, *args):
774 return self.__class__(self.data.center(width, *args))
775 def count(self, sub, start=0, end=_sys.maxsize):
776 if isinstance(sub, UserString):
777 sub = sub.data
778 return self.data.count(sub, start, end)
779 def encode(self, encoding=None, errors=None): # XXX improve this?
780 if encoding:
781 if errors:
782 return self.__class__(self.data.encode(encoding, errors))
783 return self.__class__(self.data.encode(encoding))
784 return self.__class__(self.data.encode())
785 def endswith(self, suffix, start=0, end=_sys.maxsize):
786 return self.data.endswith(suffix, start, end)
787 def expandtabs(self, tabsize=8):
788 return self.__class__(self.data.expandtabs(tabsize))
789 def find(self, sub, start=0, end=_sys.maxsize):
790 if isinstance(sub, UserString):
791 sub = sub.data
792 return self.data.find(sub, start, end)
793 def format(self, *args, **kwds):
794 return self.data.format(*args, **kwds)
795 def index(self, sub, start=0, end=_sys.maxsize):
796 return self.data.index(sub, start, end)
797 def isalpha(self): return self.data.isalpha()
798 def isalnum(self): return self.data.isalnum()
799 def isdecimal(self): return self.data.isdecimal()
800 def isdigit(self): return self.data.isdigit()
801 def isidentifier(self): return self.data.isidentifier()
802 def islower(self): return self.data.islower()
803 def isnumeric(self): return self.data.isnumeric()
804 def isspace(self): return self.data.isspace()
805 def istitle(self): return self.data.istitle()
806 def isupper(self): return self.data.isupper()
807 def join(self, seq): return self.data.join(seq)
808 def ljust(self, width, *args):
809 return self.__class__(self.data.ljust(width, *args))
810 def lower(self): return self.__class__(self.data.lower())
811 def lstrip(self, chars=None): return self.__class__(self.data.lstrip(chars))
812 def partition(self, sep):
813 return self.data.partition(sep)
814 def replace(self, old, new, maxsplit=-1):
815 if isinstance(old, UserString):
816 old = old.data
817 if isinstance(new, UserString):
818 new = new.data
819 return self.__class__(self.data.replace(old, new, maxsplit))
820 def rfind(self, sub, start=0, end=_sys.maxsize):
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000821 if isinstance(sub, UserString):
822 sub = sub.data
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000823 return self.data.rfind(sub, start, end)
824 def rindex(self, sub, start=0, end=_sys.maxsize):
825 return self.data.rindex(sub, start, end)
826 def rjust(self, width, *args):
827 return self.__class__(self.data.rjust(width, *args))
828 def rpartition(self, sep):
829 return self.data.rpartition(sep)
830 def rstrip(self, chars=None):
831 return self.__class__(self.data.rstrip(chars))
832 def split(self, sep=None, maxsplit=-1):
833 return self.data.split(sep, maxsplit)
834 def rsplit(self, sep=None, maxsplit=-1):
835 return self.data.rsplit(sep, maxsplit)
836 def splitlines(self, keepends=0): return self.data.splitlines(keepends)
837 def startswith(self, prefix, start=0, end=_sys.maxsize):
838 return self.data.startswith(prefix, start, end)
839 def strip(self, chars=None): return self.__class__(self.data.strip(chars))
840 def swapcase(self): return self.__class__(self.data.swapcase())
841 def title(self): return self.__class__(self.data.title())
842 def translate(self, *args):
843 return self.__class__(self.data.translate(*args))
844 def upper(self): return self.__class__(self.data.upper())
845 def zfill(self, width): return self.__class__(self.data.zfill(width))
846
847
848
849################################################################################
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000850### Simple tests
851################################################################################
Guido van Rossumd8faa362007-04-27 19:54:29 +0000852
853if __name__ == '__main__':
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000854 # verify that instances can be pickled
Guido van Rossum99603b02007-07-20 00:22:32 +0000855 from pickle import loads, dumps
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000856 Point = namedtuple('Point', 'x, y', True)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000857 p = Point(x=10, y=20)
858 assert p == loads(dumps(p))
859
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000860 # test and demonstrate ability to override methods
Christian Heimes043d6f62008-01-07 17:19:16 +0000861 class Point(namedtuple('Point', 'x y')):
Christian Heimes25bb7832008-01-11 16:17:00 +0000862 __slots__ = ()
Christian Heimes043d6f62008-01-07 17:19:16 +0000863 @property
864 def hypot(self):
865 return (self.x ** 2 + self.y ** 2) ** 0.5
Christian Heimes790c8232008-01-07 21:14:23 +0000866 def __str__(self):
Christian Heimes25bb7832008-01-11 16:17:00 +0000867 return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
Christian Heimes043d6f62008-01-07 17:19:16 +0000868
Christian Heimes25bb7832008-01-11 16:17:00 +0000869 for p in Point(3, 4), Point(14, 5/7.):
Christian Heimes790c8232008-01-07 21:14:23 +0000870 print (p)
Christian Heimes043d6f62008-01-07 17:19:16 +0000871
872 class Point(namedtuple('Point', 'x y')):
873 'Point class with optimized _make() and _replace() without error-checking'
Christian Heimes25bb7832008-01-11 16:17:00 +0000874 __slots__ = ()
Christian Heimes043d6f62008-01-07 17:19:16 +0000875 _make = classmethod(tuple.__new__)
876 def _replace(self, _map=map, **kwds):
Christian Heimes2380ac72008-01-09 00:17:24 +0000877 return self._make(_map(kwds.get, ('x', 'y'), self))
Christian Heimes043d6f62008-01-07 17:19:16 +0000878
879 print(Point(11, 22)._replace(x=100))
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000880
Christian Heimes25bb7832008-01-11 16:17:00 +0000881 Point3D = namedtuple('Point3D', Point._fields + ('z',))
882 print(Point3D.__doc__)
883
Guido van Rossumd8faa362007-04-27 19:54:29 +0000884 import doctest
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000885 TestResults = namedtuple('TestResults', 'failed attempted')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000886 print(TestResults(*doctest.testmod()))