blob: 2ce46de4883c2911fca0603f32aff8b9e72677da [file] [log] [blame]
Brett Cannon23a4a7b2008-05-12 00:56:28 +00001__all__ = ['deque', 'defaultdict', 'namedtuple', 'UserDict', 'UserList',
Raymond Hettinger2d32f632009-03-02 21:24:57 +00002 'UserString', 'Counter', 'OrderedDict']
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
4# They should however be considered an integral part of collections.py.
5from _abcoll import *
6import _abcoll
7__all__ += _abcoll.__all__
8
Christian Heimes99170a52007-12-19 02:07:34 +00009from _collections import deque, defaultdict
10from operator import itemgetter as _itemgetter
11from keyword import iskeyword as _iskeyword
12import sys as _sys
Raymond Hettingerb8baf632009-01-14 02:20:07 +000013import heapq as _heapq
Raymond Hettingerea9f8db2009-03-02 21:28:41 +000014from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
Raymond Hettinger2d32f632009-03-02 21:24:57 +000015
16################################################################################
17### OrderedDict
18################################################################################
19
20class OrderedDict(dict, MutableMapping):
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +000021 'Dictionary that remembers insertion order'
Raymond Hettingerf1736542009-03-23 05:19:21 +000022 # An inherited dict maps keys to values.
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +000023 # The inherited dict provides __getitem__, __len__, __contains__, and get.
24 # The remaining methods are order-aware.
Raymond Hettingerf1736542009-03-23 05:19:21 +000025 # Big-O running times for all methods are the same as for regular dictionaries.
26
27 # The internal self.__map dictionary maps keys to links in a doubly linked list.
28 # The circular doubly linked list starts and ends with a sentinel element.
29 # The sentinel element never gets deleted (this simplifies the algorithm).
Raymond Hettinger5be21b72010-08-01 22:10:57 +000030 # Each link is stored as a list of length three: [PREV, NEXT, KEY].
Raymond Hettinger2d32f632009-03-02 21:24:57 +000031
32 def __init__(self, *args, **kwds):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000033 '''Initialize an ordered dictionary. Signature is the same as for
34 regular dictionaries, but keyword arguments are not recommended
35 because their insertion order is arbitrary.
36
37 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +000038 if len(args) > 1:
39 raise TypeError('expected at most 1 arguments, got %d' % len(args))
Raymond Hettinger08c70cf2009-03-03 20:47:29 +000040 try:
Raymond Hettingerf1736542009-03-23 05:19:21 +000041 self.__root
Raymond Hettinger08c70cf2009-03-03 20:47:29 +000042 except AttributeError:
Raymond Hettinger5be21b72010-08-01 22:10:57 +000043 self.__root = root = [None, None, None] # sentinel node
44 PREV = 0
45 NEXT = 1
46 root[PREV] = root[NEXT] = root
Raymond Hettinger52dc06b2009-03-25 22:45:22 +000047 self.__map = {}
Raymond Hettinger2d32f632009-03-02 21:24:57 +000048 self.update(*args, **kwds)
49
Raymond Hettinger5be21b72010-08-01 22:10:57 +000050 def __setitem__(self, key, value, PREV=0, NEXT=1, dict_setitem=dict.__setitem__):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000051 'od.__setitem__(i, y) <==> od[i]=y'
Raymond Hettingerf1736542009-03-23 05:19:21 +000052 # Setting a new item creates a new link which goes at the end of the linked
53 # list, and the inherited dictionary is updated with the new key/value pair.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000054 if key not in self:
Raymond Hettingerf1736542009-03-23 05:19:21 +000055 root = self.__root
Raymond Hettinger5be21b72010-08-01 22:10:57 +000056 last = root[PREV]
57 last[NEXT] = root[PREV] = self.__map[key] = [last, root, key]
58 dict_setitem(self, key, value)
Raymond Hettinger2d32f632009-03-02 21:24:57 +000059
Raymond Hettinger5be21b72010-08-01 22:10:57 +000060 def __delitem__(self, key, PREV=0, NEXT=1, dict_delitem=dict.__delitem__):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000061 'od.__delitem__(y) <==> del od[y]'
Raymond Hettingerf1736542009-03-23 05:19:21 +000062 # Deleting an existing item uses self.__map to find the link which is
63 # then removed by updating the links in the predecessor and successor nodes.
Raymond Hettinger5be21b72010-08-01 22:10:57 +000064 dict_delitem(self, key)
Raymond Hettingerf1736542009-03-23 05:19:21 +000065 link = self.__map.pop(key)
Raymond Hettinger5be21b72010-08-01 22:10:57 +000066 link_prev = link[PREV]
67 link_next = link[NEXT]
68 link_prev[NEXT] = link_next
69 link_next[PREV] = link_prev
Raymond Hettinger2d32f632009-03-02 21:24:57 +000070
Raymond Hettinger5be21b72010-08-01 22:10:57 +000071 def __iter__(self, NEXT=1, KEY=2):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000072 'od.__iter__() <==> iter(od)'
Raymond Hettingerf1736542009-03-23 05:19:21 +000073 # Traverse the linked list in order.
74 root = self.__root
Raymond Hettinger5be21b72010-08-01 22:10:57 +000075 curr = root[NEXT]
Raymond Hettingerf1736542009-03-23 05:19:21 +000076 while curr is not root:
Raymond Hettinger5be21b72010-08-01 22:10:57 +000077 yield curr[KEY]
78 curr = curr[NEXT]
Raymond Hettinger2d32f632009-03-02 21:24:57 +000079
Raymond Hettinger5be21b72010-08-01 22:10:57 +000080 def __reversed__(self, PREV=0, KEY=2):
Raymond Hettinger2352cf32009-04-08 01:16:27 +000081 'od.__reversed__() <==> reversed(od)'
Raymond Hettingerf1736542009-03-23 05:19:21 +000082 # Traverse the linked list in reverse order.
83 root = self.__root
Raymond Hettinger5be21b72010-08-01 22:10:57 +000084 curr = root[PREV]
Raymond Hettingerf1736542009-03-23 05:19:21 +000085 while curr is not root:
Raymond Hettinger5be21b72010-08-01 22:10:57 +000086 yield curr[KEY]
87 curr = curr[PREV]
Raymond Hettinger2d32f632009-03-02 21:24:57 +000088
89 def __reduce__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000090 'Return state information for pickling'
Raymond Hettinger2d32f632009-03-02 21:24:57 +000091 items = [[k, self[k]] for k in self]
Raymond Hettingerf1736542009-03-23 05:19:21 +000092 tmp = self.__map, self.__root
93 del self.__map, self.__root
Raymond Hettinger2d32f632009-03-02 21:24:57 +000094 inst_dict = vars(self).copy()
Raymond Hettingerf1736542009-03-23 05:19:21 +000095 self.__map, self.__root = tmp
Raymond Hettinger14b89ff2009-03-03 22:20:56 +000096 if inst_dict:
97 return (self.__class__, (items,), inst_dict)
98 return self.__class__, (items,)
Raymond Hettinger2d32f632009-03-02 21:24:57 +000099
Raymond Hettinger5be21b72010-08-01 22:10:57 +0000100 def clear(self):
101 'od.clear() -> None. Remove all items from od.'
102 try:
103 for node in self.__map.values():
104 del node[:]
105 self.__root[:] = [self.__root, self.__root, None]
106 self.__map.clear()
107 except AttributeError:
108 pass
109 dict.clear(self)
110
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000111 setdefault = MutableMapping.setdefault
112 update = MutableMapping.update
113 pop = MutableMapping.pop
114 keys = MutableMapping.keys
115 values = MutableMapping.values
116 items = MutableMapping.items
Raymond Hettinger5be21b72010-08-01 22:10:57 +0000117 __ne__ = MutableMapping.__ne__
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000118
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +0000119 def popitem(self, last=True):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000120 '''od.popitem() -> (k, v), return and remove a (key, value) pair.
121 Pairs are returned in LIFO order if last is true or FIFO order if false.
122
123 '''
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +0000124 if not self:
125 raise KeyError('dictionary is empty')
Raymond Hettinger446a4f22009-04-08 08:28:28 +0000126 key = next(reversed(self) if last else iter(self))
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +0000127 value = self.pop(key)
128 return key, value
129
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000130 def __repr__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000131 'od.__repr__() <==> repr(od)'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000132 if not self:
133 return '%s()' % (self.__class__.__name__,)
134 return '%s(%r)' % (self.__class__.__name__, list(self.items()))
135
136 def copy(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000137 'od.copy() -> a shallow copy of od'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000138 return self.__class__(self)
139
140 @classmethod
141 def fromkeys(cls, iterable, value=None):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000142 '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
143 and values equal to v (which defaults to None).
144
145 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000146 d = cls()
147 for key in iterable:
148 d[key] = value
149 return d
150
151 def __eq__(self, other):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000152 '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
153 while comparison to a regular mapping is order-insensitive.
154
155 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000156 if isinstance(other, OrderedDict):
Raymond Hettinger798ee1a2009-03-23 18:29:11 +0000157 return len(self)==len(other) and \
158 all(p==q for p, q in zip(self.items(), other.items()))
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000159 return dict.__eq__(self, other)
160
Raymond Hettinger5be21b72010-08-01 22:10:57 +0000161 def __del__(self):
162 self.clear() # eliminate cyclical references
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000163
Christian Heimes99170a52007-12-19 02:07:34 +0000164
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000165################################################################################
166### namedtuple
167################################################################################
168
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000169def namedtuple(typename, field_names, verbose=False, rename=False):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000170 """Returns a new subclass of tuple with named fields.
171
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000172 >>> Point = namedtuple('Point', 'x y')
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000173 >>> Point.__doc__ # docstring for the new class
Guido van Rossumd8faa362007-04-27 19:54:29 +0000174 'Point(x, y)'
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000175 >>> p = Point(11, y=22) # instantiate with positional args or keywords
Christian Heimes99170a52007-12-19 02:07:34 +0000176 >>> p[0] + p[1] # indexable like a plain tuple
Guido van Rossumd8faa362007-04-27 19:54:29 +0000177 33
Christian Heimes99170a52007-12-19 02:07:34 +0000178 >>> x, y = p # unpack like a regular tuple
Guido van Rossumd8faa362007-04-27 19:54:29 +0000179 >>> x, y
180 (11, 22)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000181 >>> p.x + p.y # fields also accessable by name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000182 33
Christian Heimes0449f632007-12-15 01:27:15 +0000183 >>> d = p._asdict() # convert to a dictionary
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000184 >>> d['x']
185 11
186 >>> Point(**d) # convert from a dictionary
Guido van Rossumd8faa362007-04-27 19:54:29 +0000187 Point(x=11, y=22)
Christian Heimes0449f632007-12-15 01:27:15 +0000188 >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000189 Point(x=100, y=22)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000190
191 """
192
Christian Heimes2380ac72008-01-09 00:17:24 +0000193 # Parse and validate the field names. Validation serves two purposes,
194 # generating informative error messages and preventing template injection attacks.
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000195 if isinstance(field_names, str):
196 field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas
Benjamin Petersone9bbc8b2008-09-28 02:06:32 +0000197 field_names = tuple(map(str, field_names))
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000198 if rename:
199 names = list(field_names)
200 seen = set()
201 for i, name in enumerate(names):
202 if (not all(c.isalnum() or c=='_' for c in name) or _iskeyword(name)
203 or not name or name[0].isdigit() or name.startswith('_')
204 or name in seen):
Raymond Hettinger56145242009-04-02 22:31:59 +0000205 names[i] = '_%d' % i
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000206 seen.add(name)
207 field_names = tuple(names)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000208 for name in (typename,) + field_names:
Christian Heimesb9eccbf2007-12-05 20:18:38 +0000209 if not all(c.isalnum() or c=='_' for c in name):
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000210 raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
211 if _iskeyword(name):
212 raise ValueError('Type names and field names cannot be a keyword: %r' % name)
213 if name[0].isdigit():
214 raise ValueError('Type names and field names cannot start with a number: %r' % name)
215 seen_names = set()
216 for name in field_names:
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000217 if name.startswith('_') and not rename:
Christian Heimes0449f632007-12-15 01:27:15 +0000218 raise ValueError('Field names cannot start with an underscore: %r' % name)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000219 if name in seen_names:
220 raise ValueError('Encountered duplicate field name: %r' % name)
221 seen_names.add(name)
222
223 # Create and fill-in the class template
Christian Heimesfaf2f632008-01-06 16:59:19 +0000224 numfields = len(field_names)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000225 argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000226 reprtxt = ', '.join('%s=%%r' % name for name in field_names)
227 template = '''class %(typename)s(tuple):
Christian Heimes0449f632007-12-15 01:27:15 +0000228 '%(typename)s(%(argtxt)s)' \n
229 __slots__ = () \n
Christian Heimesfaf2f632008-01-06 16:59:19 +0000230 _fields = %(field_names)r \n
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000231 def __new__(_cls, %(argtxt)s):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000232 'Create new instance of %(typename)s(%(argtxt)s)'
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000233 return _tuple.__new__(_cls, (%(argtxt)s)) \n
Christian Heimesfaf2f632008-01-06 16:59:19 +0000234 @classmethod
Christian Heimes043d6f62008-01-07 17:19:16 +0000235 def _make(cls, iterable, new=tuple.__new__, len=len):
Christian Heimesfaf2f632008-01-06 16:59:19 +0000236 'Make a new %(typename)s object from a sequence or iterable'
Christian Heimes043d6f62008-01-07 17:19:16 +0000237 result = new(cls, iterable)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000238 if len(result) != %(numfields)d:
239 raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
240 return result \n
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000241 def __repr__(self):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000242 'Return a nicely formatted representation string'
Christian Heimes0449f632007-12-15 01:27:15 +0000243 return '%(typename)s(%(reprtxt)s)' %% self \n
Raymond Hettingera4f52b12009-03-02 22:28:31 +0000244 def _asdict(self):
245 'Return a new OrderedDict which maps field names to their values'
246 return OrderedDict(zip(self._fields, self)) \n
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000247 def _replace(_self, **kwds):
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000248 'Return a new %(typename)s object replacing specified fields with new values'
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000249 result = _self._make(map(kwds.pop, %(field_names)r, _self))
Christian Heimesfaf2f632008-01-06 16:59:19 +0000250 if kwds:
251 raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000252 return result \n
253 def __getnewargs__(self):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000254 'Return self as a plain tuple. Used by copy and pickle.'
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000255 return tuple(self) \n\n''' % locals()
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000256 for i, name in enumerate(field_names):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000257 template += " %s = _property(_itemgetter(%d), doc='Alias for field number %d')\n" % (name, i, i)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000258 if verbose:
259 print(template)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000260
Georg Brandlf08a9dd2008-06-10 16:57:31 +0000261 # Execute the template string in a temporary namespace and
262 # support tracing utilities by setting a value for frame.f_globals['__name__']
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000263 namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
264 OrderedDict=OrderedDict, _property=property, _tuple=tuple)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000265 try:
266 exec(template, namespace)
267 except SyntaxError as e:
Christian Heimes99170a52007-12-19 02:07:34 +0000268 raise SyntaxError(e.msg + ':\n' + template) from e
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000269 result = namespace[typename]
270
271 # For pickling to work, the __module__ variable needs to be set to the frame
272 # where the named tuple is created. Bypass this step in enviroments where
Benjamin Peterson25c95f12009-05-08 20:42:26 +0000273 # sys._getframe is not defined (Jython for example) or sys._getframe is not
274 # defined for arguments greater than 0 (IronPython).
275 try:
Raymond Hettinger0f055172009-01-27 10:06:09 +0000276 result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
Benjamin Peterson25c95f12009-05-08 20:42:26 +0000277 except (AttributeError, ValueError):
278 pass
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000279
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000280 return result
Guido van Rossumd8faa362007-04-27 19:54:29 +0000281
Guido van Rossumd8faa362007-04-27 19:54:29 +0000282
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000283########################################################################
284### Counter
285########################################################################
286
287class Counter(dict):
288 '''Dict subclass for counting hashable items. Sometimes called a bag
289 or multiset. Elements are stored as dictionary keys and their counts
290 are stored as dictionary values.
291
292 >>> c = Counter('abracadabra') # count elements from a string
293
294 >>> c.most_common(3) # three most common elements
295 [('a', 5), ('r', 2), ('b', 2)]
296 >>> sorted(c) # list all unique elements
297 ['a', 'b', 'c', 'd', 'r']
298 >>> ''.join(sorted(c.elements())) # list elements with repetitions
299 'aaaaabbcdrr'
300 >>> sum(c.values()) # total of all counts
301 11
302
303 >>> c['a'] # count of letter 'a'
304 5
305 >>> for elem in 'shazam': # update counts from an iterable
306 ... c[elem] += 1 # by adding 1 to each element's count
307 >>> c['a'] # now there are seven 'a'
308 7
309 >>> del c['r'] # remove all 'r'
310 >>> c['r'] # now there are zero 'r'
311 0
312
313 >>> d = Counter('simsalabim') # make another counter
314 >>> c.update(d) # add in the second counter
315 >>> c['a'] # now there are nine 'a'
316 9
317
318 >>> c.clear() # empty the counter
319 >>> c
320 Counter()
321
322 Note: If a count is set to zero or reduced to zero, it will remain
323 in the counter until the entry is deleted or the counter is cleared:
324
325 >>> c = Counter('aaabbc')
326 >>> c['b'] -= 2 # reduce the count of 'b' by two
327 >>> c.most_common() # 'b' is still in, but its count is zero
328 [('a', 3), ('c', 1), ('b', 0)]
329
330 '''
331 # References:
332 # http://en.wikipedia.org/wiki/Multiset
333 # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
334 # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
335 # http://code.activestate.com/recipes/259174/
336 # Knuth, TAOCP Vol. II section 4.6.3
337
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000338 def __init__(self, iterable=None, **kwds):
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000339 '''Create a new, empty Counter object. And if given, count elements
340 from an input iterable. Or, initialize the count from another mapping
341 of elements to their counts.
342
343 >>> c = Counter() # a new, empty counter
344 >>> c = Counter('gallahad') # a new counter from an iterable
345 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000346 >>> c = Counter(a=4, b=2) # a new counter from keyword args
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000347
348 '''
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000349 self.update(iterable, **kwds)
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000350
351 def __missing__(self, key):
352 'The count of elements not in the Counter is zero.'
353 # Needed so that self[missing_item] does not raise KeyError
354 return 0
355
356 def most_common(self, n=None):
357 '''List the n most common elements and their counts from the most
358 common to the least. If n is None, then list all element counts.
359
360 >>> Counter('abracadabra').most_common(3)
361 [('a', 5), ('r', 2), ('b', 2)]
362
363 '''
364 # Emulate Bag.sortedByCount from Smalltalk
365 if n is None:
366 return sorted(self.items(), key=_itemgetter(1), reverse=True)
367 return _heapq.nlargest(n, self.items(), key=_itemgetter(1))
368
369 def elements(self):
370 '''Iterator over elements repeating each as many times as its count.
371
372 >>> c = Counter('ABCABC')
373 >>> sorted(c.elements())
374 ['A', 'A', 'B', 'B', 'C', 'C']
375
376 # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
377 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
378 >>> product = 1
379 >>> for factor in prime_factors.elements(): # loop over factors
380 ... product *= factor # and multiply them
381 >>> product
382 1836
383
384 Note, if an element's count has been set to zero or is a negative
385 number, elements() will ignore it.
386
387 '''
388 # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
389 return _chain.from_iterable(_starmap(_repeat, self.items()))
390
391 # Override dict methods where necessary
392
393 @classmethod
394 def fromkeys(cls, iterable, v=None):
395 # There is no equivalent method for counters because setting v=1
396 # means that no element can have a count greater than one.
397 raise NotImplementedError(
398 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
399
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000400 def update(self, iterable=None, **kwds):
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000401 '''Like dict.update() but add counts instead of replacing them.
402
403 Source can be an iterable, a dictionary, or another Counter instance.
404
405 >>> c = Counter('which')
406 >>> c.update('witch') # add elements from another iterable
407 >>> d = Counter('watch')
408 >>> c.update(d) # add elements from another counter
409 >>> c['h'] # four 'h' in which, witch, and watch
410 4
411
412 '''
413 # The regular dict.update() operation makes no sense here because the
414 # replace behavior results in the some of original untouched counts
415 # being mixed-in with all of the other counts for a mismash that
416 # doesn't have a straight-forward interpretation in most counting
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000417 # contexts. Instead, we implement straight-addition. Both the inputs
418 # and outputs are allowed to contain zero and negative counts.
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000419
420 if iterable is not None:
421 if isinstance(iterable, Mapping):
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000422 if self:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000423 self_get = self.get
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000424 for elem, count in iterable.items():
Raymond Hettingerf9092022009-06-29 18:30:43 +0000425 self[elem] = count + self_get(elem, 0)
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000426 else:
427 dict.update(self, iterable) # fast path when counter is empty
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000428 else:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000429 self_get = self.get
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000430 for elem in iterable:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000431 self[elem] = 1 + self_get(elem, 0)
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000432 if kwds:
433 self.update(kwds)
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000434
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000435 def subtract(self, iterable=None, **kwds):
436 '''Like dict.update() but subtracts counts instead of replacing them.
437 Counts can be reduced below zero. Both the inputs and outputs are
438 allowed to contain zero and negative counts.
439
440 Source can be an iterable, a dictionary, or another Counter instance.
441
442 >>> c = Counter('which')
443 >>> c.subtract('witch') # subtract elements from another iterable
444 >>> c.subtract(Counter('watch')) # subtract elements from another counter
445 >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch
446 0
447 >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch
448 -1
449
450 '''
451 if iterable is not None:
Raymond Hettingerfc3c9cd2010-04-11 20:41:56 +0000452 self_get = self.get
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000453 if isinstance(iterable, Mapping):
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000454 for elem, count in iterable.items():
455 self[elem] = self_get(elem, 0) - count
456 else:
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000457 for elem in iterable:
458 self[elem] = self_get(elem, 0) - 1
459 if kwds:
460 self.subtract(kwds)
461
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000462 def copy(self):
463 'Like dict.copy() but returns a Counter instance instead of a dict.'
464 return Counter(self)
465
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000466 def __delitem__(self, elem):
467 'Like dict.__delitem__() but does not raise KeyError for missing values.'
468 if elem in self:
469 dict.__delitem__(self, elem)
470
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000471 def __repr__(self):
472 if not self:
473 return '%s()' % self.__class__.__name__
474 items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
475 return '%s({%s})' % (self.__class__.__name__, items)
476
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000477 # Multiset-style mathematical operations discussed in:
478 # Knuth TAOCP Volume II section 4.6.3 exercise 19
479 # and at http://en.wikipedia.org/wiki/Multiset
480 #
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000481 # Outputs guaranteed to only include positive counts.
482 #
483 # To strip negative and zero counts, add-in an empty counter:
484 # c += Counter()
485
486 def __add__(self, other):
487 '''Add counts from two counters.
488
489 >>> Counter('abbb') + Counter('bcc')
490 Counter({'b': 4, 'c': 2, 'a': 1})
491
492 '''
493 if not isinstance(other, Counter):
494 return NotImplemented
495 result = Counter()
496 for elem in set(self) | set(other):
497 newcount = self[elem] + other[elem]
498 if newcount > 0:
499 result[elem] = newcount
500 return result
501
502 def __sub__(self, other):
503 ''' Subtract count, but keep only results with positive counts.
504
505 >>> Counter('abbbc') - Counter('bccd')
506 Counter({'b': 2, 'a': 1})
507
508 '''
509 if not isinstance(other, Counter):
510 return NotImplemented
511 result = Counter()
Raymond Hettingere0d1b9f2009-01-21 20:36:27 +0000512 for elem in set(self) | set(other):
513 newcount = self[elem] - other[elem]
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000514 if newcount > 0:
515 result[elem] = newcount
516 return result
517
518 def __or__(self, other):
519 '''Union is the maximum of value in either of the input counters.
520
521 >>> Counter('abbb') | Counter('bcc')
522 Counter({'b': 3, 'c': 2, 'a': 1})
523
524 '''
525 if not isinstance(other, Counter):
526 return NotImplemented
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000527 result = Counter()
528 for elem in set(self) | set(other):
Raymond Hettingerc4791702009-04-04 08:48:03 +0000529 p, q = self[elem], other[elem]
530 newcount = q if p < q else p
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000531 if newcount > 0:
532 result[elem] = newcount
533 return result
534
535 def __and__(self, other):
536 ''' Intersection is the minimum of corresponding counts.
537
538 >>> Counter('abbb') & Counter('bcc')
539 Counter({'b': 1})
540
541 '''
542 if not isinstance(other, Counter):
543 return NotImplemented
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000544 result = Counter()
545 if len(self) < len(other):
546 self, other = other, self
547 for elem in filter(self.__contains__, other):
Raymond Hettingerc4791702009-04-04 08:48:03 +0000548 p, q = self[elem], other[elem]
549 newcount = p if p < q else q
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000550 if newcount > 0:
551 result[elem] = newcount
552 return result
553
Guido van Rossumd8faa362007-04-27 19:54:29 +0000554
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000555################################################################################
556### UserDict
557################################################################################
Guido van Rossumd8faa362007-04-27 19:54:29 +0000558
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000559class UserDict(MutableMapping):
560
561 # Start by filling-out the abstract methods
562 def __init__(self, dict=None, **kwargs):
563 self.data = {}
564 if dict is not None:
565 self.update(dict)
566 if len(kwargs):
567 self.update(kwargs)
568 def __len__(self): return len(self.data)
569 def __getitem__(self, key):
570 if key in self.data:
571 return self.data[key]
572 if hasattr(self.__class__, "__missing__"):
573 return self.__class__.__missing__(self, key)
574 raise KeyError(key)
575 def __setitem__(self, key, item): self.data[key] = item
576 def __delitem__(self, key): del self.data[key]
577 def __iter__(self):
578 return iter(self.data)
579
Raymond Hettinger554c8b82008-02-05 22:54:43 +0000580 # Modify __contains__ to work correctly when __missing__ is present
581 def __contains__(self, key):
582 return key in self.data
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000583
584 # Now, add the methods in dicts but not in MutableMapping
585 def __repr__(self): return repr(self.data)
586 def copy(self):
587 if self.__class__ is UserDict:
588 return UserDict(self.data.copy())
589 import copy
590 data = self.data
591 try:
592 self.data = {}
593 c = copy.copy(self)
594 finally:
595 self.data = data
596 c.update(self)
597 return c
598 @classmethod
599 def fromkeys(cls, iterable, value=None):
600 d = cls()
601 for key in iterable:
602 d[key] = value
603 return d
604
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000605
606
607################################################################################
Raymond Hettinger53dbe392008-02-12 20:03:09 +0000608### UserList
609################################################################################
610
611class UserList(MutableSequence):
612 """A more or less complete user-defined wrapper around list objects."""
613 def __init__(self, initlist=None):
614 self.data = []
615 if initlist is not None:
616 # XXX should this accept an arbitrary sequence?
617 if type(initlist) == type(self.data):
618 self.data[:] = initlist
619 elif isinstance(initlist, UserList):
620 self.data[:] = initlist.data[:]
621 else:
622 self.data = list(initlist)
623 def __repr__(self): return repr(self.data)
624 def __lt__(self, other): return self.data < self.__cast(other)
625 def __le__(self, other): return self.data <= self.__cast(other)
626 def __eq__(self, other): return self.data == self.__cast(other)
627 def __ne__(self, other): return self.data != self.__cast(other)
628 def __gt__(self, other): return self.data > self.__cast(other)
629 def __ge__(self, other): return self.data >= self.__cast(other)
630 def __cast(self, other):
631 return other.data if isinstance(other, UserList) else other
Raymond Hettinger53dbe392008-02-12 20:03:09 +0000632 def __contains__(self, item): return item in self.data
633 def __len__(self): return len(self.data)
634 def __getitem__(self, i): return self.data[i]
635 def __setitem__(self, i, item): self.data[i] = item
636 def __delitem__(self, i): del self.data[i]
637 def __add__(self, other):
638 if isinstance(other, UserList):
639 return self.__class__(self.data + other.data)
640 elif isinstance(other, type(self.data)):
641 return self.__class__(self.data + other)
642 return self.__class__(self.data + list(other))
643 def __radd__(self, other):
644 if isinstance(other, UserList):
645 return self.__class__(other.data + self.data)
646 elif isinstance(other, type(self.data)):
647 return self.__class__(other + self.data)
648 return self.__class__(list(other) + self.data)
649 def __iadd__(self, other):
650 if isinstance(other, UserList):
651 self.data += other.data
652 elif isinstance(other, type(self.data)):
653 self.data += other
654 else:
655 self.data += list(other)
656 return self
657 def __mul__(self, n):
658 return self.__class__(self.data*n)
659 __rmul__ = __mul__
660 def __imul__(self, n):
661 self.data *= n
662 return self
663 def append(self, item): self.data.append(item)
664 def insert(self, i, item): self.data.insert(i, item)
665 def pop(self, i=-1): return self.data.pop(i)
666 def remove(self, item): self.data.remove(item)
667 def count(self, item): return self.data.count(item)
668 def index(self, item, *args): return self.data.index(item, *args)
669 def reverse(self): self.data.reverse()
670 def sort(self, *args, **kwds): self.data.sort(*args, **kwds)
671 def extend(self, other):
672 if isinstance(other, UserList):
673 self.data.extend(other.data)
674 else:
675 self.data.extend(other)
676
677
678
679################################################################################
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000680### UserString
681################################################################################
682
683class UserString(Sequence):
684 def __init__(self, seq):
685 if isinstance(seq, str):
686 self.data = seq
687 elif isinstance(seq, UserString):
688 self.data = seq.data[:]
689 else:
690 self.data = str(seq)
691 def __str__(self): return str(self.data)
692 def __repr__(self): return repr(self.data)
693 def __int__(self): return int(self.data)
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000694 def __float__(self): return float(self.data)
695 def __complex__(self): return complex(self.data)
696 def __hash__(self): return hash(self.data)
697
698 def __eq__(self, string):
699 if isinstance(string, UserString):
700 return self.data == string.data
701 return self.data == string
702 def __ne__(self, string):
703 if isinstance(string, UserString):
704 return self.data != string.data
705 return self.data != string
706 def __lt__(self, string):
707 if isinstance(string, UserString):
708 return self.data < string.data
709 return self.data < string
710 def __le__(self, string):
711 if isinstance(string, UserString):
712 return self.data <= string.data
713 return self.data <= string
714 def __gt__(self, string):
715 if isinstance(string, UserString):
716 return self.data > string.data
717 return self.data > string
718 def __ge__(self, string):
719 if isinstance(string, UserString):
720 return self.data >= string.data
721 return self.data >= string
722
723 def __contains__(self, char):
724 if isinstance(char, UserString):
725 char = char.data
726 return char in self.data
727
728 def __len__(self): return len(self.data)
729 def __getitem__(self, index): return self.__class__(self.data[index])
730 def __add__(self, other):
731 if isinstance(other, UserString):
732 return self.__class__(self.data + other.data)
733 elif isinstance(other, str):
734 return self.__class__(self.data + other)
735 return self.__class__(self.data + str(other))
736 def __radd__(self, other):
737 if isinstance(other, str):
738 return self.__class__(other + self.data)
739 return self.__class__(str(other) + self.data)
740 def __mul__(self, n):
741 return self.__class__(self.data*n)
742 __rmul__ = __mul__
743 def __mod__(self, args):
744 return self.__class__(self.data % args)
745
746 # the following methods are defined in alphabetical order:
747 def capitalize(self): return self.__class__(self.data.capitalize())
748 def center(self, width, *args):
749 return self.__class__(self.data.center(width, *args))
750 def count(self, sub, start=0, end=_sys.maxsize):
751 if isinstance(sub, UserString):
752 sub = sub.data
753 return self.data.count(sub, start, end)
754 def encode(self, encoding=None, errors=None): # XXX improve this?
755 if encoding:
756 if errors:
757 return self.__class__(self.data.encode(encoding, errors))
758 return self.__class__(self.data.encode(encoding))
759 return self.__class__(self.data.encode())
760 def endswith(self, suffix, start=0, end=_sys.maxsize):
761 return self.data.endswith(suffix, start, end)
762 def expandtabs(self, tabsize=8):
763 return self.__class__(self.data.expandtabs(tabsize))
764 def find(self, sub, start=0, end=_sys.maxsize):
765 if isinstance(sub, UserString):
766 sub = sub.data
767 return self.data.find(sub, start, end)
768 def format(self, *args, **kwds):
769 return self.data.format(*args, **kwds)
770 def index(self, sub, start=0, end=_sys.maxsize):
771 return self.data.index(sub, start, end)
772 def isalpha(self): return self.data.isalpha()
773 def isalnum(self): return self.data.isalnum()
774 def isdecimal(self): return self.data.isdecimal()
775 def isdigit(self): return self.data.isdigit()
776 def isidentifier(self): return self.data.isidentifier()
777 def islower(self): return self.data.islower()
778 def isnumeric(self): return self.data.isnumeric()
779 def isspace(self): return self.data.isspace()
780 def istitle(self): return self.data.istitle()
781 def isupper(self): return self.data.isupper()
782 def join(self, seq): return self.data.join(seq)
783 def ljust(self, width, *args):
784 return self.__class__(self.data.ljust(width, *args))
785 def lower(self): return self.__class__(self.data.lower())
786 def lstrip(self, chars=None): return self.__class__(self.data.lstrip(chars))
787 def partition(self, sep):
788 return self.data.partition(sep)
789 def replace(self, old, new, maxsplit=-1):
790 if isinstance(old, UserString):
791 old = old.data
792 if isinstance(new, UserString):
793 new = new.data
794 return self.__class__(self.data.replace(old, new, maxsplit))
795 def rfind(self, sub, start=0, end=_sys.maxsize):
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000796 if isinstance(sub, UserString):
797 sub = sub.data
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000798 return self.data.rfind(sub, start, end)
799 def rindex(self, sub, start=0, end=_sys.maxsize):
800 return self.data.rindex(sub, start, end)
801 def rjust(self, width, *args):
802 return self.__class__(self.data.rjust(width, *args))
803 def rpartition(self, sep):
804 return self.data.rpartition(sep)
805 def rstrip(self, chars=None):
806 return self.__class__(self.data.rstrip(chars))
807 def split(self, sep=None, maxsplit=-1):
808 return self.data.split(sep, maxsplit)
809 def rsplit(self, sep=None, maxsplit=-1):
810 return self.data.rsplit(sep, maxsplit)
811 def splitlines(self, keepends=0): return self.data.splitlines(keepends)
812 def startswith(self, prefix, start=0, end=_sys.maxsize):
813 return self.data.startswith(prefix, start, end)
814 def strip(self, chars=None): return self.__class__(self.data.strip(chars))
815 def swapcase(self): return self.__class__(self.data.swapcase())
816 def title(self): return self.__class__(self.data.title())
817 def translate(self, *args):
818 return self.__class__(self.data.translate(*args))
819 def upper(self): return self.__class__(self.data.upper())
820 def zfill(self, width): return self.__class__(self.data.zfill(width))
821
822
823
824################################################################################
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000825### Simple tests
826################################################################################
Guido van Rossumd8faa362007-04-27 19:54:29 +0000827
828if __name__ == '__main__':
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000829 # verify that instances can be pickled
Guido van Rossum99603b02007-07-20 00:22:32 +0000830 from pickle import loads, dumps
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000831 Point = namedtuple('Point', 'x, y', True)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000832 p = Point(x=10, y=20)
833 assert p == loads(dumps(p))
834
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000835 # test and demonstrate ability to override methods
Christian Heimes043d6f62008-01-07 17:19:16 +0000836 class Point(namedtuple('Point', 'x y')):
Christian Heimes25bb7832008-01-11 16:17:00 +0000837 __slots__ = ()
Christian Heimes043d6f62008-01-07 17:19:16 +0000838 @property
839 def hypot(self):
840 return (self.x ** 2 + self.y ** 2) ** 0.5
Christian Heimes790c8232008-01-07 21:14:23 +0000841 def __str__(self):
Christian Heimes25bb7832008-01-11 16:17:00 +0000842 return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
Christian Heimes043d6f62008-01-07 17:19:16 +0000843
Christian Heimes25bb7832008-01-11 16:17:00 +0000844 for p in Point(3, 4), Point(14, 5/7.):
Christian Heimes790c8232008-01-07 21:14:23 +0000845 print (p)
Christian Heimes043d6f62008-01-07 17:19:16 +0000846
847 class Point(namedtuple('Point', 'x y')):
848 'Point class with optimized _make() and _replace() without error-checking'
Christian Heimes25bb7832008-01-11 16:17:00 +0000849 __slots__ = ()
Christian Heimes043d6f62008-01-07 17:19:16 +0000850 _make = classmethod(tuple.__new__)
851 def _replace(self, _map=map, **kwds):
Christian Heimes2380ac72008-01-09 00:17:24 +0000852 return self._make(_map(kwds.get, ('x', 'y'), self))
Christian Heimes043d6f62008-01-07 17:19:16 +0000853
854 print(Point(11, 22)._replace(x=100))
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000855
Christian Heimes25bb7832008-01-11 16:17:00 +0000856 Point3D = namedtuple('Point3D', Point._fields + ('z',))
857 print(Point3D.__doc__)
858
Guido van Rossumd8faa362007-04-27 19:54:29 +0000859 import doctest
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000860 TestResults = namedtuple('TestResults', 'failed attempted')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000861 print(TestResults(*doctest.testmod()))