blob: fb9464fa87eb4bed72aa61d6b00497b0900f17da [file] [log] [blame]
Brett Cannon23a4a7b2008-05-12 00:56:28 +00001__all__ = ['deque', 'defaultdict', 'namedtuple', 'UserDict', 'UserList',
Raymond Hettinger2d32f632009-03-02 21:24:57 +00002 'UserString', 'Counter', 'OrderedDict']
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
4# They should however be considered an integral part of collections.py.
5from _abcoll import *
6import _abcoll
7__all__ += _abcoll.__all__
8
Christian Heimes99170a52007-12-19 02:07:34 +00009from _collections import deque, defaultdict
10from operator import itemgetter as _itemgetter
11from keyword import iskeyword as _iskeyword
12import sys as _sys
Raymond Hettingerb8baf632009-01-14 02:20:07 +000013import heapq as _heapq
Raymond Hettinger798ee1a2009-03-23 18:29:11 +000014from weakref import proxy as _proxy
Raymond Hettingerea9f8db2009-03-02 21:28:41 +000015from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
Raymond Hettinger2d32f632009-03-02 21:24:57 +000016
17################################################################################
18### OrderedDict
19################################################################################
20
Raymond Hettingerf1736542009-03-23 05:19:21 +000021class _Link(object):
22 __slots__ = 'prev', 'next', 'key', '__weakref__'
23
Raymond Hettinger1d879f62011-01-04 20:57:19 +000024class OrderedDict(dict):
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +000025 'Dictionary that remembers insertion order'
Raymond Hettingerf1736542009-03-23 05:19:21 +000026 # An inherited dict maps keys to values.
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +000027 # The inherited dict provides __getitem__, __len__, __contains__, and get.
28 # The remaining methods are order-aware.
Raymond Hettingerf1736542009-03-23 05:19:21 +000029 # Big-O running times for all methods are the same as for regular dictionaries.
30
31 # The internal self.__map dictionary maps keys to links in a doubly linked list.
32 # The circular doubly linked list starts and ends with a sentinel element.
33 # The sentinel element never gets deleted (this simplifies the algorithm).
34 # The prev/next links are weakref proxies (to prevent circular references).
35 # Individual links are kept alive by the hard reference in self.__map.
36 # Those hard references disappear when a key is deleted from an OrderedDict.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000037
38 def __init__(self, *args, **kwds):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000039 '''Initialize an ordered dictionary. Signature is the same as for
40 regular dictionaries, but keyword arguments are not recommended
41 because their insertion order is arbitrary.
42
43 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +000044 if len(args) > 1:
45 raise TypeError('expected at most 1 arguments, got %d' % len(args))
Raymond Hettinger69976a72010-09-12 05:28:42 +000046 self.__in_repr = False # detects recursive repr
Raymond Hettinger08c70cf2009-03-03 20:47:29 +000047 try:
Raymond Hettingerf1736542009-03-23 05:19:21 +000048 self.__root
Raymond Hettinger08c70cf2009-03-03 20:47:29 +000049 except AttributeError:
Raymond Hettinger52dc06b2009-03-25 22:45:22 +000050 self.__root = root = _Link() # sentinel node for the doubly linked list
51 root.prev = root.next = root
52 self.__map = {}
Raymond Hettinger1d879f62011-01-04 20:57:19 +000053 self.__update(*args, **kwds)
Raymond Hettinger2d32f632009-03-02 21:24:57 +000054
55 def clear(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000056 'od.clear() -> None. Remove all items from od.'
Raymond Hettingerf1736542009-03-23 05:19:21 +000057 root = self.__root
58 root.prev = root.next = root
Raymond Hettinger52dc06b2009-03-25 22:45:22 +000059 self.__map.clear()
Raymond Hettinger2d32f632009-03-02 21:24:57 +000060 dict.clear(self)
61
62 def __setitem__(self, key, value):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000063 'od.__setitem__(i, y) <==> od[i]=y'
Raymond Hettingerf1736542009-03-23 05:19:21 +000064 # Setting a new item creates a new link which goes at the end of the linked
65 # list, and the inherited dictionary is updated with the new key/value pair.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000066 if key not in self:
Raymond Hettingerf1736542009-03-23 05:19:21 +000067 self.__map[key] = link = _Link()
68 root = self.__root
69 last = root.prev
70 link.prev, link.next, link.key = last, root, key
Raymond Hettinger798ee1a2009-03-23 18:29:11 +000071 last.next = root.prev = _proxy(link)
Raymond Hettinger2d32f632009-03-02 21:24:57 +000072 dict.__setitem__(self, key, value)
73
74 def __delitem__(self, key):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000075 'od.__delitem__(y) <==> del od[y]'
Raymond Hettingerf1736542009-03-23 05:19:21 +000076 # Deleting an existing item uses self.__map to find the link which is
77 # then removed by updating the links in the predecessor and successor nodes.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000078 dict.__delitem__(self, key)
Raymond Hettingerf1736542009-03-23 05:19:21 +000079 link = self.__map.pop(key)
80 link.prev.next = link.next
81 link.next.prev = link.prev
Raymond Hettinger2d32f632009-03-02 21:24:57 +000082
83 def __iter__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000084 'od.__iter__() <==> iter(od)'
Raymond Hettingerf1736542009-03-23 05:19:21 +000085 # Traverse the linked list in order.
86 root = self.__root
87 curr = root.next
88 while curr is not root:
89 yield curr.key
90 curr = curr.next
Raymond Hettinger2d32f632009-03-02 21:24:57 +000091
92 def __reversed__(self):
Raymond Hettinger2352cf32009-04-08 01:16:27 +000093 'od.__reversed__() <==> reversed(od)'
Raymond Hettingerf1736542009-03-23 05:19:21 +000094 # Traverse the linked list in reverse order.
95 root = self.__root
96 curr = root.prev
97 while curr is not root:
98 yield curr.key
99 curr = curr.prev
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000100
101 def __reduce__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000102 'Return state information for pickling'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000103 items = [[k, self[k]] for k in self]
Raymond Hettinger69976a72010-09-12 05:28:42 +0000104 tmp = self.__map, self.__root, self.__in_repr
105 del self.__map, self.__root, self.__in_repr
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000106 inst_dict = vars(self).copy()
Raymond Hettinger69976a72010-09-12 05:28:42 +0000107 self.__map, self.__root, self.__in_repr = tmp
Raymond Hettinger14b89ff2009-03-03 22:20:56 +0000108 if inst_dict:
109 return (self.__class__, (items,), inst_dict)
110 return self.__class__, (items,)
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000111
Raymond Hettinger1d879f62011-01-04 20:57:19 +0000112 update = __update = MutableMapping.update
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000113 keys = MutableMapping.keys
114 values = MutableMapping.values
115 items = MutableMapping.items
116
Raymond Hettinger1d879f62011-01-04 20:57:19 +0000117 __marker = object()
118
119 def pop(self, key, default=__marker):
120 if key in self:
121 result = self[key]
122 del self[key]
123 return result
124 if default is self.__marker:
125 raise KeyError(key)
126 return default
127
128 def setdefault(self, key, default=None):
129 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od'
130 if key in self:
131 return self[key]
132 self[key] = default
133 return default
134
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +0000135 def popitem(self, last=True):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000136 '''od.popitem() -> (k, v), return and remove a (key, value) pair.
137 Pairs are returned in LIFO order if last is true or FIFO order if false.
138
139 '''
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +0000140 if not self:
141 raise KeyError('dictionary is empty')
Raymond Hettinger446a4f22009-04-08 08:28:28 +0000142 key = next(reversed(self) if last else iter(self))
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +0000143 value = self.pop(key)
144 return key, value
145
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000146 def __repr__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000147 'od.__repr__() <==> repr(od)'
Raymond Hettinger69976a72010-09-12 05:28:42 +0000148 if self.__in_repr:
149 return '...'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000150 if not self:
151 return '%s()' % (self.__class__.__name__,)
Raymond Hettinger69976a72010-09-12 05:28:42 +0000152 self.__in_repr = True
153 try:
154 result = '%s(%r)' % (self.__class__.__name__, list(self.items()))
155 finally:
156 self.__in_repr = False
157 return result
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000158
159 def copy(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000160 'od.copy() -> a shallow copy of od'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000161 return self.__class__(self)
162
163 @classmethod
164 def fromkeys(cls, iterable, value=None):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000165 '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
166 and values equal to v (which defaults to None).
167
168 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000169 d = cls()
170 for key in iterable:
171 d[key] = value
172 return d
173
174 def __eq__(self, other):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000175 '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
176 while comparison to a regular mapping is order-insensitive.
177
178 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000179 if isinstance(other, OrderedDict):
Raymond Hettinger798ee1a2009-03-23 18:29:11 +0000180 return len(self)==len(other) and \
181 all(p==q for p, q in zip(self.items(), other.items()))
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000182 return dict.__eq__(self, other)
183
Benjamin Peterson2504b7a2009-04-04 17:26:32 +0000184 def __ne__(self, other):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000185 '''od.__ne__(y) <==> od!=y. Comparison to another OD is order-sensitive
186 while comparison to a regular mapping is order-insensitive.
187
188 '''
Benjamin Peterson2504b7a2009-04-04 17:26:32 +0000189 return not self == other
190
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000191
Christian Heimes99170a52007-12-19 02:07:34 +0000192
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000193################################################################################
194### namedtuple
195################################################################################
196
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000197def namedtuple(typename, field_names, verbose=False, rename=False):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000198 """Returns a new subclass of tuple with named fields.
199
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000200 >>> Point = namedtuple('Point', 'x y')
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000201 >>> Point.__doc__ # docstring for the new class
Guido van Rossumd8faa362007-04-27 19:54:29 +0000202 'Point(x, y)'
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000203 >>> p = Point(11, y=22) # instantiate with positional args or keywords
Christian Heimes99170a52007-12-19 02:07:34 +0000204 >>> p[0] + p[1] # indexable like a plain tuple
Guido van Rossumd8faa362007-04-27 19:54:29 +0000205 33
Christian Heimes99170a52007-12-19 02:07:34 +0000206 >>> x, y = p # unpack like a regular tuple
Guido van Rossumd8faa362007-04-27 19:54:29 +0000207 >>> x, y
208 (11, 22)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000209 >>> p.x + p.y # fields also accessable by name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000210 33
Christian Heimes0449f632007-12-15 01:27:15 +0000211 >>> d = p._asdict() # convert to a dictionary
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000212 >>> d['x']
213 11
214 >>> Point(**d) # convert from a dictionary
Guido van Rossumd8faa362007-04-27 19:54:29 +0000215 Point(x=11, y=22)
Christian Heimes0449f632007-12-15 01:27:15 +0000216 >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000217 Point(x=100, y=22)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000218
219 """
220
Christian Heimes2380ac72008-01-09 00:17:24 +0000221 # Parse and validate the field names. Validation serves two purposes,
222 # generating informative error messages and preventing template injection attacks.
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000223 if isinstance(field_names, str):
224 field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas
Benjamin Petersone9bbc8b2008-09-28 02:06:32 +0000225 field_names = tuple(map(str, field_names))
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000226 if rename:
227 names = list(field_names)
228 seen = set()
229 for i, name in enumerate(names):
230 if (not all(c.isalnum() or c=='_' for c in name) or _iskeyword(name)
231 or not name or name[0].isdigit() or name.startswith('_')
232 or name in seen):
Raymond Hettinger56145242009-04-02 22:31:59 +0000233 names[i] = '_%d' % i
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000234 seen.add(name)
235 field_names = tuple(names)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000236 for name in (typename,) + field_names:
Christian Heimesb9eccbf2007-12-05 20:18:38 +0000237 if not all(c.isalnum() or c=='_' for c in name):
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000238 raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
239 if _iskeyword(name):
240 raise ValueError('Type names and field names cannot be a keyword: %r' % name)
241 if name[0].isdigit():
242 raise ValueError('Type names and field names cannot start with a number: %r' % name)
243 seen_names = set()
244 for name in field_names:
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000245 if name.startswith('_') and not rename:
Christian Heimes0449f632007-12-15 01:27:15 +0000246 raise ValueError('Field names cannot start with an underscore: %r' % name)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000247 if name in seen_names:
248 raise ValueError('Encountered duplicate field name: %r' % name)
249 seen_names.add(name)
250
251 # Create and fill-in the class template
Christian Heimesfaf2f632008-01-06 16:59:19 +0000252 numfields = len(field_names)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000253 argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000254 reprtxt = ', '.join('%s=%%r' % name for name in field_names)
255 template = '''class %(typename)s(tuple):
Christian Heimes0449f632007-12-15 01:27:15 +0000256 '%(typename)s(%(argtxt)s)' \n
257 __slots__ = () \n
Christian Heimesfaf2f632008-01-06 16:59:19 +0000258 _fields = %(field_names)r \n
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000259 def __new__(_cls, %(argtxt)s):
260 return _tuple.__new__(_cls, (%(argtxt)s)) \n
Christian Heimesfaf2f632008-01-06 16:59:19 +0000261 @classmethod
Christian Heimes043d6f62008-01-07 17:19:16 +0000262 def _make(cls, iterable, new=tuple.__new__, len=len):
Christian Heimesfaf2f632008-01-06 16:59:19 +0000263 'Make a new %(typename)s object from a sequence or iterable'
Christian Heimes043d6f62008-01-07 17:19:16 +0000264 result = new(cls, iterable)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000265 if len(result) != %(numfields)d:
266 raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
267 return result \n
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000268 def __repr__(self):
Christian Heimes0449f632007-12-15 01:27:15 +0000269 return '%(typename)s(%(reprtxt)s)' %% self \n
Raymond Hettingera4f52b12009-03-02 22:28:31 +0000270 def _asdict(self):
271 'Return a new OrderedDict which maps field names to their values'
272 return OrderedDict(zip(self._fields, self)) \n
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000273 def _replace(_self, **kwds):
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000274 'Return a new %(typename)s object replacing specified fields with new values'
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000275 result = _self._make(map(kwds.pop, %(field_names)r, _self))
Christian Heimesfaf2f632008-01-06 16:59:19 +0000276 if kwds:
277 raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000278 return result \n
279 def __getnewargs__(self):
280 return tuple(self) \n\n''' % locals()
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000281 for i, name in enumerate(field_names):
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000282 template += ' %s = _property(_itemgetter(%d))\n' % (name, i)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000283 if verbose:
284 print(template)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000285
Georg Brandlf08a9dd2008-06-10 16:57:31 +0000286 # Execute the template string in a temporary namespace and
287 # support tracing utilities by setting a value for frame.f_globals['__name__']
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000288 namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
289 OrderedDict=OrderedDict, _property=property, _tuple=tuple)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000290 try:
291 exec(template, namespace)
292 except SyntaxError as e:
Christian Heimes99170a52007-12-19 02:07:34 +0000293 raise SyntaxError(e.msg + ':\n' + template) from e
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000294 result = namespace[typename]
295
296 # For pickling to work, the __module__ variable needs to be set to the frame
297 # where the named tuple is created. Bypass this step in enviroments where
Benjamin Peterson25c95f12009-05-08 20:42:26 +0000298 # sys._getframe is not defined (Jython for example) or sys._getframe is not
299 # defined for arguments greater than 0 (IronPython).
300 try:
Raymond Hettinger0f055172009-01-27 10:06:09 +0000301 result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
Benjamin Peterson25c95f12009-05-08 20:42:26 +0000302 except (AttributeError, ValueError):
303 pass
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000304
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000305 return result
Guido van Rossumd8faa362007-04-27 19:54:29 +0000306
Guido van Rossumd8faa362007-04-27 19:54:29 +0000307
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000308########################################################################
309### Counter
310########################################################################
311
312class Counter(dict):
313 '''Dict subclass for counting hashable items. Sometimes called a bag
314 or multiset. Elements are stored as dictionary keys and their counts
315 are stored as dictionary values.
316
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000317 >>> c = Counter('abcdeabcdabcaba') # count elements from a string
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000318
319 >>> c.most_common(3) # three most common elements
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000320 [('a', 5), ('b', 4), ('c', 3)]
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000321 >>> sorted(c) # list all unique elements
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000322 ['a', 'b', 'c', 'd', 'e']
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000323 >>> ''.join(sorted(c.elements())) # list elements with repetitions
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000324 'aaaaabbbbcccdde'
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000325 >>> sum(c.values()) # total of all counts
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000326 15
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000327
328 >>> c['a'] # count of letter 'a'
329 5
330 >>> for elem in 'shazam': # update counts from an iterable
331 ... c[elem] += 1 # by adding 1 to each element's count
332 >>> c['a'] # now there are seven 'a'
333 7
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000334 >>> del c['b'] # remove all 'b'
335 >>> c['b'] # now there are zero 'b'
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000336 0
337
338 >>> d = Counter('simsalabim') # make another counter
339 >>> c.update(d) # add in the second counter
340 >>> c['a'] # now there are nine 'a'
341 9
342
343 >>> c.clear() # empty the counter
344 >>> c
345 Counter()
346
347 Note: If a count is set to zero or reduced to zero, it will remain
348 in the counter until the entry is deleted or the counter is cleared:
349
350 >>> c = Counter('aaabbc')
351 >>> c['b'] -= 2 # reduce the count of 'b' by two
352 >>> c.most_common() # 'b' is still in, but its count is zero
353 [('a', 3), ('c', 1), ('b', 0)]
354
355 '''
356 # References:
357 # http://en.wikipedia.org/wiki/Multiset
358 # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
359 # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
360 # http://code.activestate.com/recipes/259174/
361 # Knuth, TAOCP Vol. II section 4.6.3
362
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000363 def __init__(self, iterable=None, **kwds):
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000364 '''Create a new, empty Counter object. And if given, count elements
365 from an input iterable. Or, initialize the count from another mapping
366 of elements to their counts.
367
368 >>> c = Counter() # a new, empty counter
369 >>> c = Counter('gallahad') # a new counter from an iterable
370 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000371 >>> c = Counter(a=4, b=2) # a new counter from keyword args
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000372
373 '''
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000374 super().__init__()
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000375 self.update(iterable, **kwds)
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000376
377 def __missing__(self, key):
378 'The count of elements not in the Counter is zero.'
379 # Needed so that self[missing_item] does not raise KeyError
380 return 0
381
382 def most_common(self, n=None):
383 '''List the n most common elements and their counts from the most
384 common to the least. If n is None, then list all element counts.
385
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000386 >>> Counter('abcdeabcdabcaba').most_common(3)
387 [('a', 5), ('b', 4), ('c', 3)]
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000388
389 '''
390 # Emulate Bag.sortedByCount from Smalltalk
391 if n is None:
392 return sorted(self.items(), key=_itemgetter(1), reverse=True)
393 return _heapq.nlargest(n, self.items(), key=_itemgetter(1))
394
395 def elements(self):
396 '''Iterator over elements repeating each as many times as its count.
397
398 >>> c = Counter('ABCABC')
399 >>> sorted(c.elements())
400 ['A', 'A', 'B', 'B', 'C', 'C']
401
402 # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
403 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
404 >>> product = 1
405 >>> for factor in prime_factors.elements(): # loop over factors
406 ... product *= factor # and multiply them
407 >>> product
408 1836
409
410 Note, if an element's count has been set to zero or is a negative
411 number, elements() will ignore it.
412
413 '''
414 # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
415 return _chain.from_iterable(_starmap(_repeat, self.items()))
416
417 # Override dict methods where necessary
418
419 @classmethod
420 def fromkeys(cls, iterable, v=None):
421 # There is no equivalent method for counters because setting v=1
422 # means that no element can have a count greater than one.
423 raise NotImplementedError(
424 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
425
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000426 def update(self, iterable=None, **kwds):
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000427 '''Like dict.update() but add counts instead of replacing them.
428
429 Source can be an iterable, a dictionary, or another Counter instance.
430
431 >>> c = Counter('which')
432 >>> c.update('witch') # add elements from another iterable
433 >>> d = Counter('watch')
434 >>> c.update(d) # add elements from another counter
435 >>> c['h'] # four 'h' in which, witch, and watch
436 4
437
438 '''
439 # The regular dict.update() operation makes no sense here because the
440 # replace behavior results in the some of original untouched counts
441 # being mixed-in with all of the other counts for a mismash that
442 # doesn't have a straight-forward interpretation in most counting
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000443 # contexts. Instead, we implement straight-addition. Both the inputs
444 # and outputs are allowed to contain zero and negative counts.
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000445
446 if iterable is not None:
447 if isinstance(iterable, Mapping):
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000448 if self:
Raymond Hettinger77b31ef2009-06-29 18:34:46 +0000449 self_get = self.get
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000450 for elem, count in iterable.items():
Raymond Hettinger77b31ef2009-06-29 18:34:46 +0000451 self[elem] = count + self_get(elem, 0)
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000452 else:
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000453 super().update(iterable) # fast path when counter is empty
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000454 else:
Raymond Hettinger77b31ef2009-06-29 18:34:46 +0000455 self_get = self.get
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000456 for elem in iterable:
Raymond Hettinger77b31ef2009-06-29 18:34:46 +0000457 self[elem] = 1 + self_get(elem, 0)
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000458 if kwds:
459 self.update(kwds)
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000460
461 def copy(self):
Raymond Hettinger1c746c22011-04-15 13:16:46 -0700462 'Return a shallow copy.'
463 return self.__class__(self)
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000464
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000465 def __reduce__(self):
466 return self.__class__, (dict(self),)
467
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000468 def __delitem__(self, elem):
469 'Like dict.__delitem__() but does not raise KeyError for missing values.'
470 if elem in self:
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000471 super().__delitem__(elem)
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000472
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000473 def __repr__(self):
474 if not self:
475 return '%s()' % self.__class__.__name__
476 items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
477 return '%s({%s})' % (self.__class__.__name__, items)
478
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000479 # Multiset-style mathematical operations discussed in:
480 # Knuth TAOCP Volume II section 4.6.3 exercise 19
481 # and at http://en.wikipedia.org/wiki/Multiset
482 #
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000483 # Outputs guaranteed to only include positive counts.
484 #
485 # To strip negative and zero counts, add-in an empty counter:
486 # c += Counter()
487
488 def __add__(self, other):
489 '''Add counts from two counters.
490
491 >>> Counter('abbb') + Counter('bcc')
492 Counter({'b': 4, 'c': 2, 'a': 1})
493
494 '''
495 if not isinstance(other, Counter):
496 return NotImplemented
497 result = Counter()
Raymond Hettinger2876a8c2011-04-17 19:46:46 -0700498 for elem, count in self.items():
499 newcount = count + other[elem]
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000500 if newcount > 0:
501 result[elem] = newcount
Raymond Hettinger2876a8c2011-04-17 19:46:46 -0700502 for elem, count in other.items():
503 if elem not in self and count > 0:
504 result[elem] = count
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000505 return result
506
507 def __sub__(self, other):
508 ''' Subtract count, but keep only results with positive counts.
509
510 >>> Counter('abbbc') - Counter('bccd')
511 Counter({'b': 2, 'a': 1})
512
513 '''
514 if not isinstance(other, Counter):
515 return NotImplemented
516 result = Counter()
Raymond Hettinger2876a8c2011-04-17 19:46:46 -0700517 for elem, count in self.items():
518 newcount = count - other[elem]
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000519 if newcount > 0:
520 result[elem] = newcount
Raymond Hettinger2876a8c2011-04-17 19:46:46 -0700521 for elem, count in other.items():
522 if elem not in self and count < 0:
523 result[elem] = 0 - count
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000524 return result
525
526 def __or__(self, other):
527 '''Union is the maximum of value in either of the input counters.
528
529 >>> Counter('abbb') | Counter('bcc')
530 Counter({'b': 3, 'c': 2, 'a': 1})
531
532 '''
533 if not isinstance(other, Counter):
534 return NotImplemented
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000535 result = Counter()
Raymond Hettinger2876a8c2011-04-17 19:46:46 -0700536 for elem, count in self.items():
537 other_count = other[elem]
538 newcount = other_count if count < other_count else count
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000539 if newcount > 0:
540 result[elem] = newcount
Raymond Hettinger2876a8c2011-04-17 19:46:46 -0700541 for elem, count in other.items():
542 if elem not in self and count > 0:
543 result[elem] = count
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000544 return result
545
546 def __and__(self, other):
547 ''' Intersection is the minimum of corresponding counts.
548
549 >>> Counter('abbb') & Counter('bcc')
550 Counter({'b': 1})
551
552 '''
553 if not isinstance(other, Counter):
554 return NotImplemented
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000555 result = Counter()
Raymond Hettinger2876a8c2011-04-17 19:46:46 -0700556 for elem, count in self.items():
557 other_count = other[elem]
558 newcount = count if count < other_count else other_count
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000559 if newcount > 0:
560 result[elem] = newcount
561 return result
562
Guido van Rossumd8faa362007-04-27 19:54:29 +0000563
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000564################################################################################
565### UserDict
566################################################################################
Guido van Rossumd8faa362007-04-27 19:54:29 +0000567
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000568class UserDict(MutableMapping):
569
570 # Start by filling-out the abstract methods
571 def __init__(self, dict=None, **kwargs):
572 self.data = {}
573 if dict is not None:
574 self.update(dict)
575 if len(kwargs):
576 self.update(kwargs)
577 def __len__(self): return len(self.data)
578 def __getitem__(self, key):
579 if key in self.data:
580 return self.data[key]
581 if hasattr(self.__class__, "__missing__"):
582 return self.__class__.__missing__(self, key)
583 raise KeyError(key)
584 def __setitem__(self, key, item): self.data[key] = item
585 def __delitem__(self, key): del self.data[key]
586 def __iter__(self):
587 return iter(self.data)
588
Raymond Hettinger554c8b82008-02-05 22:54:43 +0000589 # Modify __contains__ to work correctly when __missing__ is present
590 def __contains__(self, key):
591 return key in self.data
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000592
593 # Now, add the methods in dicts but not in MutableMapping
594 def __repr__(self): return repr(self.data)
595 def copy(self):
596 if self.__class__ is UserDict:
597 return UserDict(self.data.copy())
598 import copy
599 data = self.data
600 try:
601 self.data = {}
602 c = copy.copy(self)
603 finally:
604 self.data = data
605 c.update(self)
606 return c
607 @classmethod
608 def fromkeys(cls, iterable, value=None):
609 d = cls()
610 for key in iterable:
611 d[key] = value
612 return d
613
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000614
615
616################################################################################
Raymond Hettinger53dbe392008-02-12 20:03:09 +0000617### UserList
618################################################################################
619
620class UserList(MutableSequence):
621 """A more or less complete user-defined wrapper around list objects."""
622 def __init__(self, initlist=None):
623 self.data = []
624 if initlist is not None:
625 # XXX should this accept an arbitrary sequence?
626 if type(initlist) == type(self.data):
627 self.data[:] = initlist
628 elif isinstance(initlist, UserList):
629 self.data[:] = initlist.data[:]
630 else:
631 self.data = list(initlist)
632 def __repr__(self): return repr(self.data)
633 def __lt__(self, other): return self.data < self.__cast(other)
634 def __le__(self, other): return self.data <= self.__cast(other)
635 def __eq__(self, other): return self.data == self.__cast(other)
636 def __ne__(self, other): return self.data != self.__cast(other)
637 def __gt__(self, other): return self.data > self.__cast(other)
638 def __ge__(self, other): return self.data >= self.__cast(other)
639 def __cast(self, other):
640 return other.data if isinstance(other, UserList) else other
Raymond Hettinger53dbe392008-02-12 20:03:09 +0000641 def __contains__(self, item): return item in self.data
642 def __len__(self): return len(self.data)
643 def __getitem__(self, i): return self.data[i]
644 def __setitem__(self, i, item): self.data[i] = item
645 def __delitem__(self, i): del self.data[i]
646 def __add__(self, other):
647 if isinstance(other, UserList):
648 return self.__class__(self.data + other.data)
649 elif isinstance(other, type(self.data)):
650 return self.__class__(self.data + other)
651 return self.__class__(self.data + list(other))
652 def __radd__(self, other):
653 if isinstance(other, UserList):
654 return self.__class__(other.data + self.data)
655 elif isinstance(other, type(self.data)):
656 return self.__class__(other + self.data)
657 return self.__class__(list(other) + self.data)
658 def __iadd__(self, other):
659 if isinstance(other, UserList):
660 self.data += other.data
661 elif isinstance(other, type(self.data)):
662 self.data += other
663 else:
664 self.data += list(other)
665 return self
666 def __mul__(self, n):
667 return self.__class__(self.data*n)
668 __rmul__ = __mul__
669 def __imul__(self, n):
670 self.data *= n
671 return self
672 def append(self, item): self.data.append(item)
673 def insert(self, i, item): self.data.insert(i, item)
674 def pop(self, i=-1): return self.data.pop(i)
675 def remove(self, item): self.data.remove(item)
676 def count(self, item): return self.data.count(item)
677 def index(self, item, *args): return self.data.index(item, *args)
678 def reverse(self): self.data.reverse()
679 def sort(self, *args, **kwds): self.data.sort(*args, **kwds)
680 def extend(self, other):
681 if isinstance(other, UserList):
682 self.data.extend(other.data)
683 else:
684 self.data.extend(other)
685
686
687
688################################################################################
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000689### UserString
690################################################################################
691
692class UserString(Sequence):
693 def __init__(self, seq):
694 if isinstance(seq, str):
695 self.data = seq
696 elif isinstance(seq, UserString):
697 self.data = seq.data[:]
698 else:
699 self.data = str(seq)
700 def __str__(self): return str(self.data)
701 def __repr__(self): return repr(self.data)
702 def __int__(self): return int(self.data)
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000703 def __float__(self): return float(self.data)
704 def __complex__(self): return complex(self.data)
705 def __hash__(self): return hash(self.data)
706
707 def __eq__(self, string):
708 if isinstance(string, UserString):
709 return self.data == string.data
710 return self.data == string
711 def __ne__(self, string):
712 if isinstance(string, UserString):
713 return self.data != string.data
714 return self.data != string
715 def __lt__(self, string):
716 if isinstance(string, UserString):
717 return self.data < string.data
718 return self.data < string
719 def __le__(self, string):
720 if isinstance(string, UserString):
721 return self.data <= string.data
722 return self.data <= string
723 def __gt__(self, string):
724 if isinstance(string, UserString):
725 return self.data > string.data
726 return self.data > string
727 def __ge__(self, string):
728 if isinstance(string, UserString):
729 return self.data >= string.data
730 return self.data >= string
731
732 def __contains__(self, char):
733 if isinstance(char, UserString):
734 char = char.data
735 return char in self.data
736
737 def __len__(self): return len(self.data)
738 def __getitem__(self, index): return self.__class__(self.data[index])
739 def __add__(self, other):
740 if isinstance(other, UserString):
741 return self.__class__(self.data + other.data)
742 elif isinstance(other, str):
743 return self.__class__(self.data + other)
744 return self.__class__(self.data + str(other))
745 def __radd__(self, other):
746 if isinstance(other, str):
747 return self.__class__(other + self.data)
748 return self.__class__(str(other) + self.data)
749 def __mul__(self, n):
750 return self.__class__(self.data*n)
751 __rmul__ = __mul__
752 def __mod__(self, args):
753 return self.__class__(self.data % args)
754
755 # the following methods are defined in alphabetical order:
756 def capitalize(self): return self.__class__(self.data.capitalize())
757 def center(self, width, *args):
758 return self.__class__(self.data.center(width, *args))
759 def count(self, sub, start=0, end=_sys.maxsize):
760 if isinstance(sub, UserString):
761 sub = sub.data
762 return self.data.count(sub, start, end)
763 def encode(self, encoding=None, errors=None): # XXX improve this?
764 if encoding:
765 if errors:
766 return self.__class__(self.data.encode(encoding, errors))
767 return self.__class__(self.data.encode(encoding))
768 return self.__class__(self.data.encode())
769 def endswith(self, suffix, start=0, end=_sys.maxsize):
770 return self.data.endswith(suffix, start, end)
771 def expandtabs(self, tabsize=8):
772 return self.__class__(self.data.expandtabs(tabsize))
773 def find(self, sub, start=0, end=_sys.maxsize):
774 if isinstance(sub, UserString):
775 sub = sub.data
776 return self.data.find(sub, start, end)
777 def format(self, *args, **kwds):
778 return self.data.format(*args, **kwds)
779 def index(self, sub, start=0, end=_sys.maxsize):
780 return self.data.index(sub, start, end)
781 def isalpha(self): return self.data.isalpha()
782 def isalnum(self): return self.data.isalnum()
783 def isdecimal(self): return self.data.isdecimal()
784 def isdigit(self): return self.data.isdigit()
785 def isidentifier(self): return self.data.isidentifier()
786 def islower(self): return self.data.islower()
787 def isnumeric(self): return self.data.isnumeric()
788 def isspace(self): return self.data.isspace()
789 def istitle(self): return self.data.istitle()
790 def isupper(self): return self.data.isupper()
791 def join(self, seq): return self.data.join(seq)
792 def ljust(self, width, *args):
793 return self.__class__(self.data.ljust(width, *args))
794 def lower(self): return self.__class__(self.data.lower())
795 def lstrip(self, chars=None): return self.__class__(self.data.lstrip(chars))
796 def partition(self, sep):
797 return self.data.partition(sep)
798 def replace(self, old, new, maxsplit=-1):
799 if isinstance(old, UserString):
800 old = old.data
801 if isinstance(new, UserString):
802 new = new.data
803 return self.__class__(self.data.replace(old, new, maxsplit))
804 def rfind(self, sub, start=0, end=_sys.maxsize):
805 return self.data.rfind(sub, start, end)
806 def rindex(self, sub, start=0, end=_sys.maxsize):
807 return self.data.rindex(sub, start, end)
808 def rjust(self, width, *args):
809 return self.__class__(self.data.rjust(width, *args))
810 def rpartition(self, sep):
811 return self.data.rpartition(sep)
812 def rstrip(self, chars=None):
813 return self.__class__(self.data.rstrip(chars))
814 def split(self, sep=None, maxsplit=-1):
815 return self.data.split(sep, maxsplit)
816 def rsplit(self, sep=None, maxsplit=-1):
817 return self.data.rsplit(sep, maxsplit)
818 def splitlines(self, keepends=0): return self.data.splitlines(keepends)
819 def startswith(self, prefix, start=0, end=_sys.maxsize):
820 return self.data.startswith(prefix, start, end)
821 def strip(self, chars=None): return self.__class__(self.data.strip(chars))
822 def swapcase(self): return self.__class__(self.data.swapcase())
823 def title(self): return self.__class__(self.data.title())
824 def translate(self, *args):
825 return self.__class__(self.data.translate(*args))
826 def upper(self): return self.__class__(self.data.upper())
827 def zfill(self, width): return self.__class__(self.data.zfill(width))
828
829
830
831################################################################################
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000832### Simple tests
833################################################################################
Guido van Rossumd8faa362007-04-27 19:54:29 +0000834
835if __name__ == '__main__':
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000836 # verify that instances can be pickled
Guido van Rossum99603b02007-07-20 00:22:32 +0000837 from pickle import loads, dumps
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000838 Point = namedtuple('Point', 'x, y', True)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000839 p = Point(x=10, y=20)
840 assert p == loads(dumps(p))
841
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000842 # test and demonstrate ability to override methods
Christian Heimes043d6f62008-01-07 17:19:16 +0000843 class Point(namedtuple('Point', 'x y')):
Christian Heimes25bb7832008-01-11 16:17:00 +0000844 __slots__ = ()
Christian Heimes043d6f62008-01-07 17:19:16 +0000845 @property
846 def hypot(self):
847 return (self.x ** 2 + self.y ** 2) ** 0.5
Christian Heimes790c8232008-01-07 21:14:23 +0000848 def __str__(self):
Christian Heimes25bb7832008-01-11 16:17:00 +0000849 return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
Christian Heimes043d6f62008-01-07 17:19:16 +0000850
Christian Heimes25bb7832008-01-11 16:17:00 +0000851 for p in Point(3, 4), Point(14, 5/7.):
Christian Heimes790c8232008-01-07 21:14:23 +0000852 print (p)
Christian Heimes043d6f62008-01-07 17:19:16 +0000853
854 class Point(namedtuple('Point', 'x y')):
855 'Point class with optimized _make() and _replace() without error-checking'
Christian Heimes25bb7832008-01-11 16:17:00 +0000856 __slots__ = ()
Christian Heimes043d6f62008-01-07 17:19:16 +0000857 _make = classmethod(tuple.__new__)
858 def _replace(self, _map=map, **kwds):
Christian Heimes2380ac72008-01-09 00:17:24 +0000859 return self._make(_map(kwds.get, ('x', 'y'), self))
Christian Heimes043d6f62008-01-07 17:19:16 +0000860
861 print(Point(11, 22)._replace(x=100))
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000862
Christian Heimes25bb7832008-01-11 16:17:00 +0000863 Point3D = namedtuple('Point3D', Point._fields + ('z',))
864 print(Point3D.__doc__)
865
Guido van Rossumd8faa362007-04-27 19:54:29 +0000866 import doctest
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000867 TestResults = namedtuple('TestResults', 'failed attempted')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000868 print(TestResults(*doctest.testmod()))