blob: 27bb5e126855dd695598ef11b8917d6433168262 [file] [log] [blame]
Brett Cannon23a4a7b2008-05-12 00:56:28 +00001__all__ = ['deque', 'defaultdict', 'namedtuple', 'UserDict', 'UserList',
Raymond Hettinger2d32f632009-03-02 21:24:57 +00002 'UserString', 'Counter', 'OrderedDict']
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
4# They should however be considered an integral part of collections.py.
5from _abcoll import *
6import _abcoll
7__all__ += _abcoll.__all__
8
Christian Heimes99170a52007-12-19 02:07:34 +00009from _collections import deque, defaultdict
10from operator import itemgetter as _itemgetter
11from keyword import iskeyword as _iskeyword
12import sys as _sys
Raymond Hettingerb8baf632009-01-14 02:20:07 +000013import heapq as _heapq
Raymond Hettinger798ee1a2009-03-23 18:29:11 +000014from weakref import proxy as _proxy
Raymond Hettingerea9f8db2009-03-02 21:28:41 +000015from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
Raymond Hettinger2d32f632009-03-02 21:24:57 +000016
17################################################################################
18### OrderedDict
19################################################################################
20
Raymond Hettingerf1736542009-03-23 05:19:21 +000021class _Link(object):
22 __slots__ = 'prev', 'next', 'key', '__weakref__'
23
Raymond Hettinger1d879f62011-01-04 20:57:19 +000024class OrderedDict(dict):
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +000025 'Dictionary that remembers insertion order'
Raymond Hettingerf1736542009-03-23 05:19:21 +000026 # An inherited dict maps keys to values.
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +000027 # The inherited dict provides __getitem__, __len__, __contains__, and get.
28 # The remaining methods are order-aware.
Raymond Hettingerf1736542009-03-23 05:19:21 +000029 # Big-O running times for all methods are the same as for regular dictionaries.
30
31 # The internal self.__map dictionary maps keys to links in a doubly linked list.
32 # The circular doubly linked list starts and ends with a sentinel element.
33 # The sentinel element never gets deleted (this simplifies the algorithm).
34 # The prev/next links are weakref proxies (to prevent circular references).
35 # Individual links are kept alive by the hard reference in self.__map.
36 # Those hard references disappear when a key is deleted from an OrderedDict.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000037
38 def __init__(self, *args, **kwds):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000039 '''Initialize an ordered dictionary. Signature is the same as for
40 regular dictionaries, but keyword arguments are not recommended
41 because their insertion order is arbitrary.
42
43 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +000044 if len(args) > 1:
45 raise TypeError('expected at most 1 arguments, got %d' % len(args))
Raymond Hettinger69976a72010-09-12 05:28:42 +000046 self.__in_repr = False # detects recursive repr
Raymond Hettinger08c70cf2009-03-03 20:47:29 +000047 try:
Raymond Hettingerf1736542009-03-23 05:19:21 +000048 self.__root
Raymond Hettinger08c70cf2009-03-03 20:47:29 +000049 except AttributeError:
Raymond Hettinger52dc06b2009-03-25 22:45:22 +000050 self.__root = root = _Link() # sentinel node for the doubly linked list
51 root.prev = root.next = root
52 self.__map = {}
Raymond Hettinger1d879f62011-01-04 20:57:19 +000053 self.__update(*args, **kwds)
Raymond Hettinger2d32f632009-03-02 21:24:57 +000054
55 def clear(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000056 'od.clear() -> None. Remove all items from od.'
Raymond Hettingerf1736542009-03-23 05:19:21 +000057 root = self.__root
58 root.prev = root.next = root
Raymond Hettinger52dc06b2009-03-25 22:45:22 +000059 self.__map.clear()
Raymond Hettinger2d32f632009-03-02 21:24:57 +000060 dict.clear(self)
61
62 def __setitem__(self, key, value):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000063 'od.__setitem__(i, y) <==> od[i]=y'
Raymond Hettingerf1736542009-03-23 05:19:21 +000064 # Setting a new item creates a new link which goes at the end of the linked
65 # list, and the inherited dictionary is updated with the new key/value pair.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000066 if key not in self:
Raymond Hettingerf1736542009-03-23 05:19:21 +000067 self.__map[key] = link = _Link()
68 root = self.__root
69 last = root.prev
70 link.prev, link.next, link.key = last, root, key
Raymond Hettinger798ee1a2009-03-23 18:29:11 +000071 last.next = root.prev = _proxy(link)
Raymond Hettinger2d32f632009-03-02 21:24:57 +000072 dict.__setitem__(self, key, value)
73
74 def __delitem__(self, key):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000075 'od.__delitem__(y) <==> del od[y]'
Raymond Hettingerf1736542009-03-23 05:19:21 +000076 # Deleting an existing item uses self.__map to find the link which is
77 # then removed by updating the links in the predecessor and successor nodes.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000078 dict.__delitem__(self, key)
Raymond Hettingerf1736542009-03-23 05:19:21 +000079 link = self.__map.pop(key)
80 link.prev.next = link.next
81 link.next.prev = link.prev
Raymond Hettinger2d32f632009-03-02 21:24:57 +000082
83 def __iter__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000084 'od.__iter__() <==> iter(od)'
Raymond Hettingerf1736542009-03-23 05:19:21 +000085 # Traverse the linked list in order.
86 root = self.__root
87 curr = root.next
88 while curr is not root:
89 yield curr.key
90 curr = curr.next
Raymond Hettinger2d32f632009-03-02 21:24:57 +000091
92 def __reversed__(self):
Raymond Hettinger2352cf32009-04-08 01:16:27 +000093 'od.__reversed__() <==> reversed(od)'
Raymond Hettingerf1736542009-03-23 05:19:21 +000094 # Traverse the linked list in reverse order.
95 root = self.__root
96 curr = root.prev
97 while curr is not root:
98 yield curr.key
99 curr = curr.prev
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000100
101 def __reduce__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000102 'Return state information for pickling'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000103 items = [[k, self[k]] for k in self]
Raymond Hettinger69976a72010-09-12 05:28:42 +0000104 tmp = self.__map, self.__root, self.__in_repr
105 del self.__map, self.__root, self.__in_repr
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000106 inst_dict = vars(self).copy()
Raymond Hettinger69976a72010-09-12 05:28:42 +0000107 self.__map, self.__root, self.__in_repr = tmp
Raymond Hettinger14b89ff2009-03-03 22:20:56 +0000108 if inst_dict:
109 return (self.__class__, (items,), inst_dict)
110 return self.__class__, (items,)
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000111
Raymond Hettinger1d879f62011-01-04 20:57:19 +0000112 update = __update = MutableMapping.update
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000113 keys = MutableMapping.keys
114 values = MutableMapping.values
115 items = MutableMapping.items
116
Raymond Hettinger1d879f62011-01-04 20:57:19 +0000117 __marker = object()
118
119 def pop(self, key, default=__marker):
120 if key in self:
121 result = self[key]
122 del self[key]
123 return result
124 if default is self.__marker:
125 raise KeyError(key)
126 return default
127
128 def setdefault(self, key, default=None):
129 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od'
130 if key in self:
131 return self[key]
132 self[key] = default
133 return default
134
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +0000135 def popitem(self, last=True):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000136 '''od.popitem() -> (k, v), return and remove a (key, value) pair.
137 Pairs are returned in LIFO order if last is true or FIFO order if false.
138
139 '''
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +0000140 if not self:
141 raise KeyError('dictionary is empty')
Raymond Hettinger446a4f22009-04-08 08:28:28 +0000142 key = next(reversed(self) if last else iter(self))
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +0000143 value = self.pop(key)
144 return key, value
145
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000146 def __repr__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000147 'od.__repr__() <==> repr(od)'
Raymond Hettinger69976a72010-09-12 05:28:42 +0000148 if self.__in_repr:
149 return '...'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000150 if not self:
151 return '%s()' % (self.__class__.__name__,)
Raymond Hettinger69976a72010-09-12 05:28:42 +0000152 self.__in_repr = True
153 try:
154 result = '%s(%r)' % (self.__class__.__name__, list(self.items()))
155 finally:
156 self.__in_repr = False
157 return result
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000158
159 def copy(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000160 'od.copy() -> a shallow copy of od'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000161 return self.__class__(self)
162
163 @classmethod
164 def fromkeys(cls, iterable, value=None):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000165 '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
166 and values equal to v (which defaults to None).
167
168 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000169 d = cls()
170 for key in iterable:
171 d[key] = value
172 return d
173
174 def __eq__(self, other):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000175 '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
176 while comparison to a regular mapping is order-insensitive.
177
178 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000179 if isinstance(other, OrderedDict):
Raymond Hettinger798ee1a2009-03-23 18:29:11 +0000180 return len(self)==len(other) and \
181 all(p==q for p, q in zip(self.items(), other.items()))
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000182 return dict.__eq__(self, other)
183
Benjamin Peterson2504b7a2009-04-04 17:26:32 +0000184 def __ne__(self, other):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000185 '''od.__ne__(y) <==> od!=y. Comparison to another OD is order-sensitive
186 while comparison to a regular mapping is order-insensitive.
187
188 '''
Benjamin Peterson2504b7a2009-04-04 17:26:32 +0000189 return not self == other
190
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000191
Christian Heimes99170a52007-12-19 02:07:34 +0000192
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000193################################################################################
194### namedtuple
195################################################################################
196
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000197def namedtuple(typename, field_names, verbose=False, rename=False):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000198 """Returns a new subclass of tuple with named fields.
199
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000200 >>> Point = namedtuple('Point', 'x y')
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000201 >>> Point.__doc__ # docstring for the new class
Guido van Rossumd8faa362007-04-27 19:54:29 +0000202 'Point(x, y)'
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000203 >>> p = Point(11, y=22) # instantiate with positional args or keywords
Christian Heimes99170a52007-12-19 02:07:34 +0000204 >>> p[0] + p[1] # indexable like a plain tuple
Guido van Rossumd8faa362007-04-27 19:54:29 +0000205 33
Christian Heimes99170a52007-12-19 02:07:34 +0000206 >>> x, y = p # unpack like a regular tuple
Guido van Rossumd8faa362007-04-27 19:54:29 +0000207 >>> x, y
208 (11, 22)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000209 >>> p.x + p.y # fields also accessable by name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000210 33
Christian Heimes0449f632007-12-15 01:27:15 +0000211 >>> d = p._asdict() # convert to a dictionary
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000212 >>> d['x']
213 11
214 >>> Point(**d) # convert from a dictionary
Guido van Rossumd8faa362007-04-27 19:54:29 +0000215 Point(x=11, y=22)
Christian Heimes0449f632007-12-15 01:27:15 +0000216 >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000217 Point(x=100, y=22)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000218
219 """
220
Christian Heimes2380ac72008-01-09 00:17:24 +0000221 # Parse and validate the field names. Validation serves two purposes,
222 # generating informative error messages and preventing template injection attacks.
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000223 if isinstance(field_names, str):
224 field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas
Benjamin Petersone9bbc8b2008-09-28 02:06:32 +0000225 field_names = tuple(map(str, field_names))
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000226 if rename:
227 names = list(field_names)
228 seen = set()
229 for i, name in enumerate(names):
230 if (not all(c.isalnum() or c=='_' for c in name) or _iskeyword(name)
231 or not name or name[0].isdigit() or name.startswith('_')
232 or name in seen):
Raymond Hettinger56145242009-04-02 22:31:59 +0000233 names[i] = '_%d' % i
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000234 seen.add(name)
235 field_names = tuple(names)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000236 for name in (typename,) + field_names:
Christian Heimesb9eccbf2007-12-05 20:18:38 +0000237 if not all(c.isalnum() or c=='_' for c in name):
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000238 raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
239 if _iskeyword(name):
240 raise ValueError('Type names and field names cannot be a keyword: %r' % name)
241 if name[0].isdigit():
242 raise ValueError('Type names and field names cannot start with a number: %r' % name)
243 seen_names = set()
244 for name in field_names:
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000245 if name.startswith('_') and not rename:
Christian Heimes0449f632007-12-15 01:27:15 +0000246 raise ValueError('Field names cannot start with an underscore: %r' % name)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000247 if name in seen_names:
248 raise ValueError('Encountered duplicate field name: %r' % name)
249 seen_names.add(name)
250
251 # Create and fill-in the class template
Christian Heimesfaf2f632008-01-06 16:59:19 +0000252 numfields = len(field_names)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000253 argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000254 reprtxt = ', '.join('%s=%%r' % name for name in field_names)
255 template = '''class %(typename)s(tuple):
Christian Heimes0449f632007-12-15 01:27:15 +0000256 '%(typename)s(%(argtxt)s)' \n
257 __slots__ = () \n
Christian Heimesfaf2f632008-01-06 16:59:19 +0000258 _fields = %(field_names)r \n
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000259 def __new__(_cls, %(argtxt)s):
260 return _tuple.__new__(_cls, (%(argtxt)s)) \n
Christian Heimesfaf2f632008-01-06 16:59:19 +0000261 @classmethod
Christian Heimes043d6f62008-01-07 17:19:16 +0000262 def _make(cls, iterable, new=tuple.__new__, len=len):
Christian Heimesfaf2f632008-01-06 16:59:19 +0000263 'Make a new %(typename)s object from a sequence or iterable'
Christian Heimes043d6f62008-01-07 17:19:16 +0000264 result = new(cls, iterable)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000265 if len(result) != %(numfields)d:
266 raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
267 return result \n
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000268 def __repr__(self):
Christian Heimes0449f632007-12-15 01:27:15 +0000269 return '%(typename)s(%(reprtxt)s)' %% self \n
Raymond Hettingera4f52b12009-03-02 22:28:31 +0000270 def _asdict(self):
271 'Return a new OrderedDict which maps field names to their values'
272 return OrderedDict(zip(self._fields, self)) \n
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000273 def _replace(_self, **kwds):
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000274 'Return a new %(typename)s object replacing specified fields with new values'
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000275 result = _self._make(map(kwds.pop, %(field_names)r, _self))
Christian Heimesfaf2f632008-01-06 16:59:19 +0000276 if kwds:
277 raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000278 return result \n
279 def __getnewargs__(self):
280 return tuple(self) \n\n''' % locals()
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000281 for i, name in enumerate(field_names):
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000282 template += ' %s = _property(_itemgetter(%d))\n' % (name, i)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000283 if verbose:
284 print(template)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000285
Georg Brandlf08a9dd2008-06-10 16:57:31 +0000286 # Execute the template string in a temporary namespace and
287 # support tracing utilities by setting a value for frame.f_globals['__name__']
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000288 namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
289 OrderedDict=OrderedDict, _property=property, _tuple=tuple)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000290 try:
291 exec(template, namespace)
292 except SyntaxError as e:
Christian Heimes99170a52007-12-19 02:07:34 +0000293 raise SyntaxError(e.msg + ':\n' + template) from e
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000294 result = namespace[typename]
295
296 # For pickling to work, the __module__ variable needs to be set to the frame
297 # where the named tuple is created. Bypass this step in enviroments where
Benjamin Peterson25c95f12009-05-08 20:42:26 +0000298 # sys._getframe is not defined (Jython for example) or sys._getframe is not
299 # defined for arguments greater than 0 (IronPython).
300 try:
Raymond Hettinger0f055172009-01-27 10:06:09 +0000301 result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
Benjamin Peterson25c95f12009-05-08 20:42:26 +0000302 except (AttributeError, ValueError):
303 pass
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000304
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000305 return result
Guido van Rossumd8faa362007-04-27 19:54:29 +0000306
Guido van Rossumd8faa362007-04-27 19:54:29 +0000307
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000308########################################################################
309### Counter
310########################################################################
311
312class Counter(dict):
313 '''Dict subclass for counting hashable items. Sometimes called a bag
314 or multiset. Elements are stored as dictionary keys and their counts
315 are stored as dictionary values.
316
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000317 >>> c = Counter('abcdeabcdabcaba') # count elements from a string
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000318
319 >>> c.most_common(3) # three most common elements
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000320 [('a', 5), ('b', 4), ('c', 3)]
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000321 >>> sorted(c) # list all unique elements
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000322 ['a', 'b', 'c', 'd', 'e']
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000323 >>> ''.join(sorted(c.elements())) # list elements with repetitions
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000324 'aaaaabbbbcccdde'
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000325 >>> sum(c.values()) # total of all counts
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000326 15
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000327
328 >>> c['a'] # count of letter 'a'
329 5
330 >>> for elem in 'shazam': # update counts from an iterable
331 ... c[elem] += 1 # by adding 1 to each element's count
332 >>> c['a'] # now there are seven 'a'
333 7
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000334 >>> del c['b'] # remove all 'b'
335 >>> c['b'] # now there are zero 'b'
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000336 0
337
338 >>> d = Counter('simsalabim') # make another counter
339 >>> c.update(d) # add in the second counter
340 >>> c['a'] # now there are nine 'a'
341 9
342
343 >>> c.clear() # empty the counter
344 >>> c
345 Counter()
346
347 Note: If a count is set to zero or reduced to zero, it will remain
348 in the counter until the entry is deleted or the counter is cleared:
349
350 >>> c = Counter('aaabbc')
351 >>> c['b'] -= 2 # reduce the count of 'b' by two
352 >>> c.most_common() # 'b' is still in, but its count is zero
353 [('a', 3), ('c', 1), ('b', 0)]
354
355 '''
356 # References:
357 # http://en.wikipedia.org/wiki/Multiset
358 # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
359 # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
360 # http://code.activestate.com/recipes/259174/
361 # Knuth, TAOCP Vol. II section 4.6.3
362
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000363 def __init__(self, iterable=None, **kwds):
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000364 '''Create a new, empty Counter object. And if given, count elements
365 from an input iterable. Or, initialize the count from another mapping
366 of elements to their counts.
367
368 >>> c = Counter() # a new, empty counter
369 >>> c = Counter('gallahad') # a new counter from an iterable
370 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000371 >>> c = Counter(a=4, b=2) # a new counter from keyword args
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000372
373 '''
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000374 super().__init__()
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000375 self.update(iterable, **kwds)
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000376
377 def __missing__(self, key):
378 'The count of elements not in the Counter is zero.'
379 # Needed so that self[missing_item] does not raise KeyError
380 return 0
381
382 def most_common(self, n=None):
383 '''List the n most common elements and their counts from the most
384 common to the least. If n is None, then list all element counts.
385
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000386 >>> Counter('abcdeabcdabcaba').most_common(3)
387 [('a', 5), ('b', 4), ('c', 3)]
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000388
389 '''
390 # Emulate Bag.sortedByCount from Smalltalk
391 if n is None:
392 return sorted(self.items(), key=_itemgetter(1), reverse=True)
393 return _heapq.nlargest(n, self.items(), key=_itemgetter(1))
394
395 def elements(self):
396 '''Iterator over elements repeating each as many times as its count.
397
398 >>> c = Counter('ABCABC')
399 >>> sorted(c.elements())
400 ['A', 'A', 'B', 'B', 'C', 'C']
401
402 # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
403 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
404 >>> product = 1
405 >>> for factor in prime_factors.elements(): # loop over factors
406 ... product *= factor # and multiply them
407 >>> product
408 1836
409
410 Note, if an element's count has been set to zero or is a negative
411 number, elements() will ignore it.
412
413 '''
414 # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
415 return _chain.from_iterable(_starmap(_repeat, self.items()))
416
417 # Override dict methods where necessary
418
419 @classmethod
420 def fromkeys(cls, iterable, v=None):
421 # There is no equivalent method for counters because setting v=1
422 # means that no element can have a count greater than one.
423 raise NotImplementedError(
424 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
425
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000426 def update(self, iterable=None, **kwds):
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000427 '''Like dict.update() but add counts instead of replacing them.
428
429 Source can be an iterable, a dictionary, or another Counter instance.
430
431 >>> c = Counter('which')
432 >>> c.update('witch') # add elements from another iterable
433 >>> d = Counter('watch')
434 >>> c.update(d) # add elements from another counter
435 >>> c['h'] # four 'h' in which, witch, and watch
436 4
437
438 '''
439 # The regular dict.update() operation makes no sense here because the
440 # replace behavior results in the some of original untouched counts
441 # being mixed-in with all of the other counts for a mismash that
442 # doesn't have a straight-forward interpretation in most counting
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000443 # contexts. Instead, we implement straight-addition. Both the inputs
444 # and outputs are allowed to contain zero and negative counts.
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000445
446 if iterable is not None:
447 if isinstance(iterable, Mapping):
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000448 if self:
Raymond Hettinger77b31ef2009-06-29 18:34:46 +0000449 self_get = self.get
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000450 for elem, count in iterable.items():
Raymond Hettinger77b31ef2009-06-29 18:34:46 +0000451 self[elem] = count + self_get(elem, 0)
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000452 else:
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000453 super().update(iterable) # fast path when counter is empty
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000454 else:
Raymond Hettinger77b31ef2009-06-29 18:34:46 +0000455 self_get = self.get
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000456 for elem in iterable:
Raymond Hettinger77b31ef2009-06-29 18:34:46 +0000457 self[elem] = 1 + self_get(elem, 0)
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000458 if kwds:
459 self.update(kwds)
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000460
461 def copy(self):
462 'Like dict.copy() but returns a Counter instance instead of a dict.'
463 return Counter(self)
464
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000465 def __reduce__(self):
466 return self.__class__, (dict(self),)
467
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000468 def __delitem__(self, elem):
469 'Like dict.__delitem__() but does not raise KeyError for missing values.'
470 if elem in self:
Raymond Hettinger8dfb9282011-01-03 08:50:48 +0000471 super().__delitem__(elem)
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000472
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000473 def __repr__(self):
474 if not self:
475 return '%s()' % self.__class__.__name__
476 items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
477 return '%s({%s})' % (self.__class__.__name__, items)
478
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000479 # Multiset-style mathematical operations discussed in:
480 # Knuth TAOCP Volume II section 4.6.3 exercise 19
481 # and at http://en.wikipedia.org/wiki/Multiset
482 #
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000483 # Outputs guaranteed to only include positive counts.
484 #
485 # To strip negative and zero counts, add-in an empty counter:
486 # c += Counter()
487
488 def __add__(self, other):
489 '''Add counts from two counters.
490
491 >>> Counter('abbb') + Counter('bcc')
492 Counter({'b': 4, 'c': 2, 'a': 1})
493
494 '''
495 if not isinstance(other, Counter):
496 return NotImplemented
497 result = Counter()
498 for elem in set(self) | set(other):
499 newcount = self[elem] + other[elem]
500 if newcount > 0:
501 result[elem] = newcount
502 return result
503
504 def __sub__(self, other):
505 ''' Subtract count, but keep only results with positive counts.
506
507 >>> Counter('abbbc') - Counter('bccd')
508 Counter({'b': 2, 'a': 1})
509
510 '''
511 if not isinstance(other, Counter):
512 return NotImplemented
513 result = Counter()
Raymond Hettingere0d1b9f2009-01-21 20:36:27 +0000514 for elem in set(self) | set(other):
515 newcount = self[elem] - other[elem]
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000516 if newcount > 0:
517 result[elem] = newcount
518 return result
519
520 def __or__(self, other):
521 '''Union is the maximum of value in either of the input counters.
522
523 >>> Counter('abbb') | Counter('bcc')
524 Counter({'b': 3, 'c': 2, 'a': 1})
525
526 '''
527 if not isinstance(other, Counter):
528 return NotImplemented
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000529 result = Counter()
530 for elem in set(self) | set(other):
Raymond Hettingerc4791702009-04-04 08:48:03 +0000531 p, q = self[elem], other[elem]
532 newcount = q if p < q else p
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000533 if newcount > 0:
534 result[elem] = newcount
535 return result
536
537 def __and__(self, other):
538 ''' Intersection is the minimum of corresponding counts.
539
540 >>> Counter('abbb') & Counter('bcc')
541 Counter({'b': 1})
542
543 '''
544 if not isinstance(other, Counter):
545 return NotImplemented
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000546 result = Counter()
547 if len(self) < len(other):
548 self, other = other, self
549 for elem in filter(self.__contains__, other):
Raymond Hettingerc4791702009-04-04 08:48:03 +0000550 p, q = self[elem], other[elem]
551 newcount = p if p < q else q
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000552 if newcount > 0:
553 result[elem] = newcount
554 return result
555
Guido van Rossumd8faa362007-04-27 19:54:29 +0000556
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000557################################################################################
558### UserDict
559################################################################################
Guido van Rossumd8faa362007-04-27 19:54:29 +0000560
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000561class UserDict(MutableMapping):
562
563 # Start by filling-out the abstract methods
564 def __init__(self, dict=None, **kwargs):
565 self.data = {}
566 if dict is not None:
567 self.update(dict)
568 if len(kwargs):
569 self.update(kwargs)
570 def __len__(self): return len(self.data)
571 def __getitem__(self, key):
572 if key in self.data:
573 return self.data[key]
574 if hasattr(self.__class__, "__missing__"):
575 return self.__class__.__missing__(self, key)
576 raise KeyError(key)
577 def __setitem__(self, key, item): self.data[key] = item
578 def __delitem__(self, key): del self.data[key]
579 def __iter__(self):
580 return iter(self.data)
581
Raymond Hettinger554c8b82008-02-05 22:54:43 +0000582 # Modify __contains__ to work correctly when __missing__ is present
583 def __contains__(self, key):
584 return key in self.data
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000585
586 # Now, add the methods in dicts but not in MutableMapping
587 def __repr__(self): return repr(self.data)
588 def copy(self):
589 if self.__class__ is UserDict:
590 return UserDict(self.data.copy())
591 import copy
592 data = self.data
593 try:
594 self.data = {}
595 c = copy.copy(self)
596 finally:
597 self.data = data
598 c.update(self)
599 return c
600 @classmethod
601 def fromkeys(cls, iterable, value=None):
602 d = cls()
603 for key in iterable:
604 d[key] = value
605 return d
606
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000607
608
609################################################################################
Raymond Hettinger53dbe392008-02-12 20:03:09 +0000610### UserList
611################################################################################
612
613class UserList(MutableSequence):
614 """A more or less complete user-defined wrapper around list objects."""
615 def __init__(self, initlist=None):
616 self.data = []
617 if initlist is not None:
618 # XXX should this accept an arbitrary sequence?
619 if type(initlist) == type(self.data):
620 self.data[:] = initlist
621 elif isinstance(initlist, UserList):
622 self.data[:] = initlist.data[:]
623 else:
624 self.data = list(initlist)
625 def __repr__(self): return repr(self.data)
626 def __lt__(self, other): return self.data < self.__cast(other)
627 def __le__(self, other): return self.data <= self.__cast(other)
628 def __eq__(self, other): return self.data == self.__cast(other)
629 def __ne__(self, other): return self.data != self.__cast(other)
630 def __gt__(self, other): return self.data > self.__cast(other)
631 def __ge__(self, other): return self.data >= self.__cast(other)
632 def __cast(self, other):
633 return other.data if isinstance(other, UserList) else other
Raymond Hettinger53dbe392008-02-12 20:03:09 +0000634 def __contains__(self, item): return item in self.data
635 def __len__(self): return len(self.data)
636 def __getitem__(self, i): return self.data[i]
637 def __setitem__(self, i, item): self.data[i] = item
638 def __delitem__(self, i): del self.data[i]
639 def __add__(self, other):
640 if isinstance(other, UserList):
641 return self.__class__(self.data + other.data)
642 elif isinstance(other, type(self.data)):
643 return self.__class__(self.data + other)
644 return self.__class__(self.data + list(other))
645 def __radd__(self, other):
646 if isinstance(other, UserList):
647 return self.__class__(other.data + self.data)
648 elif isinstance(other, type(self.data)):
649 return self.__class__(other + self.data)
650 return self.__class__(list(other) + self.data)
651 def __iadd__(self, other):
652 if isinstance(other, UserList):
653 self.data += other.data
654 elif isinstance(other, type(self.data)):
655 self.data += other
656 else:
657 self.data += list(other)
658 return self
659 def __mul__(self, n):
660 return self.__class__(self.data*n)
661 __rmul__ = __mul__
662 def __imul__(self, n):
663 self.data *= n
664 return self
665 def append(self, item): self.data.append(item)
666 def insert(self, i, item): self.data.insert(i, item)
667 def pop(self, i=-1): return self.data.pop(i)
668 def remove(self, item): self.data.remove(item)
669 def count(self, item): return self.data.count(item)
670 def index(self, item, *args): return self.data.index(item, *args)
671 def reverse(self): self.data.reverse()
672 def sort(self, *args, **kwds): self.data.sort(*args, **kwds)
673 def extend(self, other):
674 if isinstance(other, UserList):
675 self.data.extend(other.data)
676 else:
677 self.data.extend(other)
678
679
680
681################################################################################
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000682### UserString
683################################################################################
684
685class UserString(Sequence):
686 def __init__(self, seq):
687 if isinstance(seq, str):
688 self.data = seq
689 elif isinstance(seq, UserString):
690 self.data = seq.data[:]
691 else:
692 self.data = str(seq)
693 def __str__(self): return str(self.data)
694 def __repr__(self): return repr(self.data)
695 def __int__(self): return int(self.data)
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000696 def __float__(self): return float(self.data)
697 def __complex__(self): return complex(self.data)
698 def __hash__(self): return hash(self.data)
699
700 def __eq__(self, string):
701 if isinstance(string, UserString):
702 return self.data == string.data
703 return self.data == string
704 def __ne__(self, string):
705 if isinstance(string, UserString):
706 return self.data != string.data
707 return self.data != string
708 def __lt__(self, string):
709 if isinstance(string, UserString):
710 return self.data < string.data
711 return self.data < string
712 def __le__(self, string):
713 if isinstance(string, UserString):
714 return self.data <= string.data
715 return self.data <= string
716 def __gt__(self, string):
717 if isinstance(string, UserString):
718 return self.data > string.data
719 return self.data > string
720 def __ge__(self, string):
721 if isinstance(string, UserString):
722 return self.data >= string.data
723 return self.data >= string
724
725 def __contains__(self, char):
726 if isinstance(char, UserString):
727 char = char.data
728 return char in self.data
729
730 def __len__(self): return len(self.data)
731 def __getitem__(self, index): return self.__class__(self.data[index])
732 def __add__(self, other):
733 if isinstance(other, UserString):
734 return self.__class__(self.data + other.data)
735 elif isinstance(other, str):
736 return self.__class__(self.data + other)
737 return self.__class__(self.data + str(other))
738 def __radd__(self, other):
739 if isinstance(other, str):
740 return self.__class__(other + self.data)
741 return self.__class__(str(other) + self.data)
742 def __mul__(self, n):
743 return self.__class__(self.data*n)
744 __rmul__ = __mul__
745 def __mod__(self, args):
746 return self.__class__(self.data % args)
747
748 # the following methods are defined in alphabetical order:
749 def capitalize(self): return self.__class__(self.data.capitalize())
750 def center(self, width, *args):
751 return self.__class__(self.data.center(width, *args))
752 def count(self, sub, start=0, end=_sys.maxsize):
753 if isinstance(sub, UserString):
754 sub = sub.data
755 return self.data.count(sub, start, end)
756 def encode(self, encoding=None, errors=None): # XXX improve this?
757 if encoding:
758 if errors:
759 return self.__class__(self.data.encode(encoding, errors))
760 return self.__class__(self.data.encode(encoding))
761 return self.__class__(self.data.encode())
762 def endswith(self, suffix, start=0, end=_sys.maxsize):
763 return self.data.endswith(suffix, start, end)
764 def expandtabs(self, tabsize=8):
765 return self.__class__(self.data.expandtabs(tabsize))
766 def find(self, sub, start=0, end=_sys.maxsize):
767 if isinstance(sub, UserString):
768 sub = sub.data
769 return self.data.find(sub, start, end)
770 def format(self, *args, **kwds):
771 return self.data.format(*args, **kwds)
772 def index(self, sub, start=0, end=_sys.maxsize):
773 return self.data.index(sub, start, end)
774 def isalpha(self): return self.data.isalpha()
775 def isalnum(self): return self.data.isalnum()
776 def isdecimal(self): return self.data.isdecimal()
777 def isdigit(self): return self.data.isdigit()
778 def isidentifier(self): return self.data.isidentifier()
779 def islower(self): return self.data.islower()
780 def isnumeric(self): return self.data.isnumeric()
781 def isspace(self): return self.data.isspace()
782 def istitle(self): return self.data.istitle()
783 def isupper(self): return self.data.isupper()
784 def join(self, seq): return self.data.join(seq)
785 def ljust(self, width, *args):
786 return self.__class__(self.data.ljust(width, *args))
787 def lower(self): return self.__class__(self.data.lower())
788 def lstrip(self, chars=None): return self.__class__(self.data.lstrip(chars))
789 def partition(self, sep):
790 return self.data.partition(sep)
791 def replace(self, old, new, maxsplit=-1):
792 if isinstance(old, UserString):
793 old = old.data
794 if isinstance(new, UserString):
795 new = new.data
796 return self.__class__(self.data.replace(old, new, maxsplit))
797 def rfind(self, sub, start=0, end=_sys.maxsize):
798 return self.data.rfind(sub, start, end)
799 def rindex(self, sub, start=0, end=_sys.maxsize):
800 return self.data.rindex(sub, start, end)
801 def rjust(self, width, *args):
802 return self.__class__(self.data.rjust(width, *args))
803 def rpartition(self, sep):
804 return self.data.rpartition(sep)
805 def rstrip(self, chars=None):
806 return self.__class__(self.data.rstrip(chars))
807 def split(self, sep=None, maxsplit=-1):
808 return self.data.split(sep, maxsplit)
809 def rsplit(self, sep=None, maxsplit=-1):
810 return self.data.rsplit(sep, maxsplit)
811 def splitlines(self, keepends=0): return self.data.splitlines(keepends)
812 def startswith(self, prefix, start=0, end=_sys.maxsize):
813 return self.data.startswith(prefix, start, end)
814 def strip(self, chars=None): return self.__class__(self.data.strip(chars))
815 def swapcase(self): return self.__class__(self.data.swapcase())
816 def title(self): return self.__class__(self.data.title())
817 def translate(self, *args):
818 return self.__class__(self.data.translate(*args))
819 def upper(self): return self.__class__(self.data.upper())
820 def zfill(self, width): return self.__class__(self.data.zfill(width))
821
822
823
824################################################################################
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000825### Simple tests
826################################################################################
Guido van Rossumd8faa362007-04-27 19:54:29 +0000827
828if __name__ == '__main__':
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000829 # verify that instances can be pickled
Guido van Rossum99603b02007-07-20 00:22:32 +0000830 from pickle import loads, dumps
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000831 Point = namedtuple('Point', 'x, y', True)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000832 p = Point(x=10, y=20)
833 assert p == loads(dumps(p))
834
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000835 # test and demonstrate ability to override methods
Christian Heimes043d6f62008-01-07 17:19:16 +0000836 class Point(namedtuple('Point', 'x y')):
Christian Heimes25bb7832008-01-11 16:17:00 +0000837 __slots__ = ()
Christian Heimes043d6f62008-01-07 17:19:16 +0000838 @property
839 def hypot(self):
840 return (self.x ** 2 + self.y ** 2) ** 0.5
Christian Heimes790c8232008-01-07 21:14:23 +0000841 def __str__(self):
Christian Heimes25bb7832008-01-11 16:17:00 +0000842 return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
Christian Heimes043d6f62008-01-07 17:19:16 +0000843
Christian Heimes25bb7832008-01-11 16:17:00 +0000844 for p in Point(3, 4), Point(14, 5/7.):
Christian Heimes790c8232008-01-07 21:14:23 +0000845 print (p)
Christian Heimes043d6f62008-01-07 17:19:16 +0000846
847 class Point(namedtuple('Point', 'x y')):
848 'Point class with optimized _make() and _replace() without error-checking'
Christian Heimes25bb7832008-01-11 16:17:00 +0000849 __slots__ = ()
Christian Heimes043d6f62008-01-07 17:19:16 +0000850 _make = classmethod(tuple.__new__)
851 def _replace(self, _map=map, **kwds):
Christian Heimes2380ac72008-01-09 00:17:24 +0000852 return self._make(_map(kwds.get, ('x', 'y'), self))
Christian Heimes043d6f62008-01-07 17:19:16 +0000853
854 print(Point(11, 22)._replace(x=100))
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000855
Christian Heimes25bb7832008-01-11 16:17:00 +0000856 Point3D = namedtuple('Point3D', Point._fields + ('z',))
857 print(Point3D.__doc__)
858
Guido van Rossumd8faa362007-04-27 19:54:29 +0000859 import doctest
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000860 TestResults = namedtuple('TestResults', 'failed attempted')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000861 print(TestResults(*doctest.testmod()))