blob: 68e9b2807acdbe1178f922b632f4352426917ca1 [file] [log] [blame]
Brett Cannon23a4a7b2008-05-12 00:56:28 +00001__all__ = ['deque', 'defaultdict', 'namedtuple', 'UserDict', 'UserList',
Raymond Hettinger2d32f632009-03-02 21:24:57 +00002 'UserString', 'Counter', 'OrderedDict']
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
4# They should however be considered an integral part of collections.py.
5from _abcoll import *
6import _abcoll
7__all__ += _abcoll.__all__
8
Christian Heimes99170a52007-12-19 02:07:34 +00009from _collections import deque, defaultdict
10from operator import itemgetter as _itemgetter
11from keyword import iskeyword as _iskeyword
12import sys as _sys
Raymond Hettingerb8baf632009-01-14 02:20:07 +000013import heapq as _heapq
Raymond Hettingerfa11db02010-09-12 04:12:42 +000014from weakref import proxy as _proxy
Raymond Hettingerea9f8db2009-03-02 21:28:41 +000015from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
Raymond Hettinger98a5f3f2010-09-13 21:36:00 +000016from reprlib import recursive_repr as _recursive_repr
Raymond Hettinger2d32f632009-03-02 21:24:57 +000017
18################################################################################
19### OrderedDict
20################################################################################
21
Raymond Hettingerfa11db02010-09-12 04:12:42 +000022class _Link(object):
23 __slots__ = 'prev', 'next', 'key', '__weakref__'
24
Raymond Hettinger2d32f632009-03-02 21:24:57 +000025class OrderedDict(dict, MutableMapping):
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +000026 'Dictionary that remembers insertion order'
Raymond Hettingerf1736542009-03-23 05:19:21 +000027 # An inherited dict maps keys to values.
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +000028 # The inherited dict provides __getitem__, __len__, __contains__, and get.
29 # The remaining methods are order-aware.
Raymond Hettingerf1736542009-03-23 05:19:21 +000030 # Big-O running times for all methods are the same as for regular dictionaries.
31
32 # The internal self.__map dictionary maps keys to links in a doubly linked list.
33 # The circular doubly linked list starts and ends with a sentinel element.
34 # The sentinel element never gets deleted (this simplifies the algorithm).
Raymond Hettingerf7328d02010-09-14 19:40:15 +000035 # The sentinel is stored in self.__hardroot with a weakref proxy in self.__root.
Raymond Hettingerc5c29c02010-09-12 18:13:46 +000036 # The prev/next links are weakref proxies (to prevent circular references).
37 # Individual links are kept alive by the hard reference in self.__map.
38 # Those hard references disappear when a key is deleted from an OrderedDict.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000039
40 def __init__(self, *args, **kwds):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000041 '''Initialize an ordered dictionary. Signature is the same as for
42 regular dictionaries, but keyword arguments are not recommended
43 because their insertion order is arbitrary.
44
45 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +000046 if len(args) > 1:
47 raise TypeError('expected at most 1 arguments, got %d' % len(args))
Raymond Hettinger08c70cf2009-03-03 20:47:29 +000048 try:
Raymond Hettingerf1736542009-03-23 05:19:21 +000049 self.__root
Raymond Hettinger08c70cf2009-03-03 20:47:29 +000050 except AttributeError:
Raymond Hettingerf7328d02010-09-14 19:40:15 +000051 self.__hardroot = _Link()
52 self.__root = root = _proxy(self.__hardroot)
Raymond Hettingerfa11db02010-09-12 04:12:42 +000053 root.prev = root.next = root
Raymond Hettinger52dc06b2009-03-25 22:45:22 +000054 self.__map = {}
Raymond Hettinger2d32f632009-03-02 21:24:57 +000055 self.update(*args, **kwds)
56
Raymond Hettingerfa11db02010-09-12 04:12:42 +000057 def __setitem__(self, key, value,
58 dict_setitem=dict.__setitem__, proxy=_proxy, Link=_Link):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000059 'od.__setitem__(i, y) <==> od[i]=y'
Raymond Hettingerf1736542009-03-23 05:19:21 +000060 # Setting a new item creates a new link which goes at the end of the linked
61 # list, and the inherited dictionary is updated with the new key/value pair.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000062 if key not in self:
Raymond Hettingerfa11db02010-09-12 04:12:42 +000063 self.__map[key] = link = Link()
Raymond Hettingerf1736542009-03-23 05:19:21 +000064 root = self.__root
Raymond Hettingerfa11db02010-09-12 04:12:42 +000065 last = root.prev
66 link.prev, link.next, link.key = last, root, key
Raymond Hettingerf7328d02010-09-14 19:40:15 +000067 last.next = link
68 root.prev = proxy(link)
69 dict_setitem(self, key, value)
Raymond Hettinger2d32f632009-03-02 21:24:57 +000070
Raymond Hettingerfa11db02010-09-12 04:12:42 +000071 def __delitem__(self, key, dict_delitem=dict.__delitem__):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000072 'od.__delitem__(y) <==> del od[y]'
Raymond Hettingerf1736542009-03-23 05:19:21 +000073 # Deleting an existing item uses self.__map to find the link which is
74 # then removed by updating the links in the predecessor and successor nodes.
Raymond Hettinger5be21b72010-08-01 22:10:57 +000075 dict_delitem(self, key)
Raymond Hettingerf1736542009-03-23 05:19:21 +000076 link = self.__map.pop(key)
Raymond Hettingerfa11db02010-09-12 04:12:42 +000077 link_prev = link.prev
78 link_next = link.next
79 link_prev.next = link_next
80 link_next.prev = link_prev
Raymond Hettinger2d32f632009-03-02 21:24:57 +000081
Raymond Hettingerfa11db02010-09-12 04:12:42 +000082 def __iter__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000083 'od.__iter__() <==> iter(od)'
Raymond Hettingerf1736542009-03-23 05:19:21 +000084 # Traverse the linked list in order.
85 root = self.__root
Raymond Hettingerfa11db02010-09-12 04:12:42 +000086 curr = root.next
Raymond Hettingerf1736542009-03-23 05:19:21 +000087 while curr is not root:
Raymond Hettingerfa11db02010-09-12 04:12:42 +000088 yield curr.key
89 curr = curr.next
Raymond Hettinger2d32f632009-03-02 21:24:57 +000090
Raymond Hettingerfa11db02010-09-12 04:12:42 +000091 def __reversed__(self):
Raymond Hettinger2352cf32009-04-08 01:16:27 +000092 'od.__reversed__() <==> reversed(od)'
Raymond Hettingerf1736542009-03-23 05:19:21 +000093 # Traverse the linked list in reverse order.
94 root = self.__root
Raymond Hettingerfa11db02010-09-12 04:12:42 +000095 curr = root.prev
Raymond Hettingerf1736542009-03-23 05:19:21 +000096 while curr is not root:
Raymond Hettingerfa11db02010-09-12 04:12:42 +000097 yield curr.key
98 curr = curr.prev
Raymond Hettinger2d32f632009-03-02 21:24:57 +000099
100 def __reduce__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000101 'Return state information for pickling'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000102 items = [[k, self[k]] for k in self]
Raymond Hettingerf7328d02010-09-14 19:40:15 +0000103 tmp = self.__map, self.__root, self.__hardroot
104 del self.__map, self.__root, self.__hardroot
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000105 inst_dict = vars(self).copy()
Raymond Hettingerf7328d02010-09-14 19:40:15 +0000106 self.__map, self.__root, self.__hardroot = tmp
Raymond Hettinger14b89ff2009-03-03 22:20:56 +0000107 if inst_dict:
108 return (self.__class__, (items,), inst_dict)
109 return self.__class__, (items,)
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000110
Raymond Hettinger5be21b72010-08-01 22:10:57 +0000111 def clear(self):
112 'od.clear() -> None. Remove all items from od.'
Raymond Hettingerfa11db02010-09-12 04:12:42 +0000113 root = self.__root
114 root.prev = root.next = root
115 self.__map.clear()
Raymond Hettinger5be21b72010-08-01 22:10:57 +0000116 dict.clear(self)
117
Raymond Hettingerfa11db02010-09-12 04:12:42 +0000118 def popitem(self, last=True):
Raymond Hettinger331722d2010-09-02 18:44:16 +0000119 '''od.popitem() -> (k, v), return and remove a (key, value) pair.
120 Pairs are returned in LIFO order if last is true or FIFO order if false.
121
122 '''
123 if not self:
124 raise KeyError('dictionary is empty')
125 root = self.__root
Raymond Hettingerfa11db02010-09-12 04:12:42 +0000126 if last:
127 link = root.prev
128 link_prev = link.prev
129 link_prev.next = root
130 root.prev = link_prev
131 else:
132 link = root.next
133 link_next = link.next
134 root.next = link_next
135 link_next.prev = root
136 key = link.key
Raymond Hettinger331722d2010-09-02 18:44:16 +0000137 del self.__map[key]
Raymond Hettingerfa11db02010-09-12 04:12:42 +0000138 value = dict.pop(self, key)
Raymond Hettinger331722d2010-09-02 18:44:16 +0000139 return key, value
140
Raymond Hettingerfa11db02010-09-12 04:12:42 +0000141 def move_to_end(self, key, last=True):
142 '''Move an existing element to the end (or beginning if last==False).
143
144 Raises KeyError if the element does not exist.
145 When last=True, acts like a fast version of self[key]=self.pop(key).
146
147 '''
148 link = self.__map[key]
149 link_prev = link.prev
150 link_next = link.next
151 link_prev.next = link_next
152 link_next.prev = link_prev
153 root = self.__root
154 if last:
155 last = root.prev
156 link.prev = last
157 link.next = root
158 last.next = root.prev = link
159 else:
160 first = root.next
161 link.prev = root
162 link.next = first
163 root.next = first.prev = link
164
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000165 setdefault = MutableMapping.setdefault
166 update = MutableMapping.update
167 pop = MutableMapping.pop
168 keys = MutableMapping.keys
169 values = MutableMapping.values
170 items = MutableMapping.items
Raymond Hettinger5be21b72010-08-01 22:10:57 +0000171 __ne__ = MutableMapping.__ne__
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000172
Raymond Hettinger98a5f3f2010-09-13 21:36:00 +0000173 @_recursive_repr()
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000174 def __repr__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000175 'od.__repr__() <==> repr(od)'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000176 if not self:
177 return '%s()' % (self.__class__.__name__,)
Raymond Hettinger98a5f3f2010-09-13 21:36:00 +0000178 return '%s(%r)' % (self.__class__.__name__, list(self.items()))
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000179
180 def copy(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000181 'od.copy() -> a shallow copy of od'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000182 return self.__class__(self)
183
184 @classmethod
185 def fromkeys(cls, iterable, value=None):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000186 '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
187 and values equal to v (which defaults to None).
188
189 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000190 d = cls()
191 for key in iterable:
192 d[key] = value
193 return d
194
195 def __eq__(self, other):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000196 '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
197 while comparison to a regular mapping is order-insensitive.
198
199 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000200 if isinstance(other, OrderedDict):
Raymond Hettinger798ee1a2009-03-23 18:29:11 +0000201 return len(self)==len(other) and \
202 all(p==q for p, q in zip(self.items(), other.items()))
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000203 return dict.__eq__(self, other)
204
Christian Heimes99170a52007-12-19 02:07:34 +0000205
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000206################################################################################
207### namedtuple
208################################################################################
209
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000210def namedtuple(typename, field_names, verbose=False, rename=False):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000211 """Returns a new subclass of tuple with named fields.
212
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000213 >>> Point = namedtuple('Point', 'x y')
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000214 >>> Point.__doc__ # docstring for the new class
Guido van Rossumd8faa362007-04-27 19:54:29 +0000215 'Point(x, y)'
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000216 >>> p = Point(11, y=22) # instantiate with positional args or keywords
Christian Heimes99170a52007-12-19 02:07:34 +0000217 >>> p[0] + p[1] # indexable like a plain tuple
Guido van Rossumd8faa362007-04-27 19:54:29 +0000218 33
Christian Heimes99170a52007-12-19 02:07:34 +0000219 >>> x, y = p # unpack like a regular tuple
Guido van Rossumd8faa362007-04-27 19:54:29 +0000220 >>> x, y
221 (11, 22)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000222 >>> p.x + p.y # fields also accessable by name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000223 33
Christian Heimes0449f632007-12-15 01:27:15 +0000224 >>> d = p._asdict() # convert to a dictionary
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000225 >>> d['x']
226 11
227 >>> Point(**d) # convert from a dictionary
Guido van Rossumd8faa362007-04-27 19:54:29 +0000228 Point(x=11, y=22)
Christian Heimes0449f632007-12-15 01:27:15 +0000229 >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000230 Point(x=100, y=22)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000231
232 """
233
Christian Heimes2380ac72008-01-09 00:17:24 +0000234 # Parse and validate the field names. Validation serves two purposes,
235 # generating informative error messages and preventing template injection attacks.
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000236 if isinstance(field_names, str):
237 field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas
Benjamin Petersone9bbc8b2008-09-28 02:06:32 +0000238 field_names = tuple(map(str, field_names))
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000239 if rename:
240 names = list(field_names)
241 seen = set()
242 for i, name in enumerate(names):
243 if (not all(c.isalnum() or c=='_' for c in name) or _iskeyword(name)
244 or not name or name[0].isdigit() or name.startswith('_')
245 or name in seen):
Raymond Hettinger56145242009-04-02 22:31:59 +0000246 names[i] = '_%d' % i
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000247 seen.add(name)
248 field_names = tuple(names)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000249 for name in (typename,) + field_names:
Christian Heimesb9eccbf2007-12-05 20:18:38 +0000250 if not all(c.isalnum() or c=='_' for c in name):
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000251 raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
252 if _iskeyword(name):
253 raise ValueError('Type names and field names cannot be a keyword: %r' % name)
254 if name[0].isdigit():
255 raise ValueError('Type names and field names cannot start with a number: %r' % name)
256 seen_names = set()
257 for name in field_names:
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000258 if name.startswith('_') and not rename:
Christian Heimes0449f632007-12-15 01:27:15 +0000259 raise ValueError('Field names cannot start with an underscore: %r' % name)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000260 if name in seen_names:
261 raise ValueError('Encountered duplicate field name: %r' % name)
262 seen_names.add(name)
263
264 # Create and fill-in the class template
Christian Heimesfaf2f632008-01-06 16:59:19 +0000265 numfields = len(field_names)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000266 argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000267 reprtxt = ', '.join('%s=%%r' % name for name in field_names)
268 template = '''class %(typename)s(tuple):
Christian Heimes0449f632007-12-15 01:27:15 +0000269 '%(typename)s(%(argtxt)s)' \n
270 __slots__ = () \n
Christian Heimesfaf2f632008-01-06 16:59:19 +0000271 _fields = %(field_names)r \n
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000272 def __new__(_cls, %(argtxt)s):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000273 'Create new instance of %(typename)s(%(argtxt)s)'
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000274 return _tuple.__new__(_cls, (%(argtxt)s)) \n
Christian Heimesfaf2f632008-01-06 16:59:19 +0000275 @classmethod
Christian Heimes043d6f62008-01-07 17:19:16 +0000276 def _make(cls, iterable, new=tuple.__new__, len=len):
Christian Heimesfaf2f632008-01-06 16:59:19 +0000277 'Make a new %(typename)s object from a sequence or iterable'
Christian Heimes043d6f62008-01-07 17:19:16 +0000278 result = new(cls, iterable)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000279 if len(result) != %(numfields)d:
280 raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
281 return result \n
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000282 def __repr__(self):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000283 'Return a nicely formatted representation string'
Raymond Hettingerd331ce92010-08-08 01:13:42 +0000284 return self.__class__.__name__ + '(%(reprtxt)s)' %% self \n
Raymond Hettingera4f52b12009-03-02 22:28:31 +0000285 def _asdict(self):
286 'Return a new OrderedDict which maps field names to their values'
287 return OrderedDict(zip(self._fields, self)) \n
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000288 def _replace(_self, **kwds):
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000289 'Return a new %(typename)s object replacing specified fields with new values'
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000290 result = _self._make(map(kwds.pop, %(field_names)r, _self))
Christian Heimesfaf2f632008-01-06 16:59:19 +0000291 if kwds:
292 raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000293 return result \n
294 def __getnewargs__(self):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000295 'Return self as a plain tuple. Used by copy and pickle.'
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000296 return tuple(self) \n\n''' % locals()
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000297 for i, name in enumerate(field_names):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000298 template += " %s = _property(_itemgetter(%d), doc='Alias for field number %d')\n" % (name, i, i)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000299 if verbose:
300 print(template)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000301
Georg Brandlf08a9dd2008-06-10 16:57:31 +0000302 # Execute the template string in a temporary namespace and
303 # support tracing utilities by setting a value for frame.f_globals['__name__']
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000304 namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
305 OrderedDict=OrderedDict, _property=property, _tuple=tuple)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000306 try:
307 exec(template, namespace)
308 except SyntaxError as e:
Raymond Hettingerc1cc0d02010-09-16 08:06:05 +0000309 raise SyntaxError(e.msg + ':\n\n' + template)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000310 result = namespace[typename]
311
312 # For pickling to work, the __module__ variable needs to be set to the frame
313 # where the named tuple is created. Bypass this step in enviroments where
Benjamin Peterson25c95f12009-05-08 20:42:26 +0000314 # sys._getframe is not defined (Jython for example) or sys._getframe is not
315 # defined for arguments greater than 0 (IronPython).
316 try:
Raymond Hettinger0f055172009-01-27 10:06:09 +0000317 result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
Benjamin Peterson25c95f12009-05-08 20:42:26 +0000318 except (AttributeError, ValueError):
319 pass
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000320
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000321 return result
Guido van Rossumd8faa362007-04-27 19:54:29 +0000322
Guido van Rossumd8faa362007-04-27 19:54:29 +0000323
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000324########################################################################
325### Counter
326########################################################################
327
328class Counter(dict):
329 '''Dict subclass for counting hashable items. Sometimes called a bag
330 or multiset. Elements are stored as dictionary keys and their counts
331 are stored as dictionary values.
332
333 >>> c = Counter('abracadabra') # count elements from a string
334
335 >>> c.most_common(3) # three most common elements
336 [('a', 5), ('r', 2), ('b', 2)]
337 >>> sorted(c) # list all unique elements
338 ['a', 'b', 'c', 'd', 'r']
339 >>> ''.join(sorted(c.elements())) # list elements with repetitions
340 'aaaaabbcdrr'
341 >>> sum(c.values()) # total of all counts
342 11
343
344 >>> c['a'] # count of letter 'a'
345 5
346 >>> for elem in 'shazam': # update counts from an iterable
347 ... c[elem] += 1 # by adding 1 to each element's count
348 >>> c['a'] # now there are seven 'a'
349 7
350 >>> del c['r'] # remove all 'r'
351 >>> c['r'] # now there are zero 'r'
352 0
353
354 >>> d = Counter('simsalabim') # make another counter
355 >>> c.update(d) # add in the second counter
356 >>> c['a'] # now there are nine 'a'
357 9
358
359 >>> c.clear() # empty the counter
360 >>> c
361 Counter()
362
363 Note: If a count is set to zero or reduced to zero, it will remain
364 in the counter until the entry is deleted or the counter is cleared:
365
366 >>> c = Counter('aaabbc')
367 >>> c['b'] -= 2 # reduce the count of 'b' by two
368 >>> c.most_common() # 'b' is still in, but its count is zero
369 [('a', 3), ('c', 1), ('b', 0)]
370
371 '''
372 # References:
373 # http://en.wikipedia.org/wiki/Multiset
374 # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
375 # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
376 # http://code.activestate.com/recipes/259174/
377 # Knuth, TAOCP Vol. II section 4.6.3
378
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000379 def __init__(self, iterable=None, **kwds):
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000380 '''Create a new, empty Counter object. And if given, count elements
381 from an input iterable. Or, initialize the count from another mapping
382 of elements to their counts.
383
384 >>> c = Counter() # a new, empty counter
385 >>> c = Counter('gallahad') # a new counter from an iterable
386 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000387 >>> c = Counter(a=4, b=2) # a new counter from keyword args
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000388
389 '''
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000390 self.update(iterable, **kwds)
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000391
392 def __missing__(self, key):
393 'The count of elements not in the Counter is zero.'
394 # Needed so that self[missing_item] does not raise KeyError
395 return 0
396
397 def most_common(self, n=None):
398 '''List the n most common elements and their counts from the most
399 common to the least. If n is None, then list all element counts.
400
401 >>> Counter('abracadabra').most_common(3)
402 [('a', 5), ('r', 2), ('b', 2)]
403
404 '''
405 # Emulate Bag.sortedByCount from Smalltalk
406 if n is None:
407 return sorted(self.items(), key=_itemgetter(1), reverse=True)
408 return _heapq.nlargest(n, self.items(), key=_itemgetter(1))
409
410 def elements(self):
411 '''Iterator over elements repeating each as many times as its count.
412
413 >>> c = Counter('ABCABC')
414 >>> sorted(c.elements())
415 ['A', 'A', 'B', 'B', 'C', 'C']
416
417 # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
418 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
419 >>> product = 1
420 >>> for factor in prime_factors.elements(): # loop over factors
421 ... product *= factor # and multiply them
422 >>> product
423 1836
424
425 Note, if an element's count has been set to zero or is a negative
426 number, elements() will ignore it.
427
428 '''
429 # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
430 return _chain.from_iterable(_starmap(_repeat, self.items()))
431
432 # Override dict methods where necessary
433
434 @classmethod
435 def fromkeys(cls, iterable, v=None):
436 # There is no equivalent method for counters because setting v=1
437 # means that no element can have a count greater than one.
438 raise NotImplementedError(
439 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
440
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000441 def update(self, iterable=None, **kwds):
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000442 '''Like dict.update() but add counts instead of replacing them.
443
444 Source can be an iterable, a dictionary, or another Counter instance.
445
446 >>> c = Counter('which')
447 >>> c.update('witch') # add elements from another iterable
448 >>> d = Counter('watch')
449 >>> c.update(d) # add elements from another counter
450 >>> c['h'] # four 'h' in which, witch, and watch
451 4
452
453 '''
454 # The regular dict.update() operation makes no sense here because the
455 # replace behavior results in the some of original untouched counts
456 # being mixed-in with all of the other counts for a mismash that
457 # doesn't have a straight-forward interpretation in most counting
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000458 # contexts. Instead, we implement straight-addition. Both the inputs
459 # and outputs are allowed to contain zero and negative counts.
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000460
461 if iterable is not None:
462 if isinstance(iterable, Mapping):
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000463 if self:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000464 self_get = self.get
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000465 for elem, count in iterable.items():
Raymond Hettingerf9092022009-06-29 18:30:43 +0000466 self[elem] = count + self_get(elem, 0)
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000467 else:
468 dict.update(self, iterable) # fast path when counter is empty
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000469 else:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000470 self_get = self.get
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000471 for elem in iterable:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000472 self[elem] = 1 + self_get(elem, 0)
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000473 if kwds:
474 self.update(kwds)
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000475
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000476 def subtract(self, iterable=None, **kwds):
477 '''Like dict.update() but subtracts counts instead of replacing them.
478 Counts can be reduced below zero. Both the inputs and outputs are
479 allowed to contain zero and negative counts.
480
481 Source can be an iterable, a dictionary, or another Counter instance.
482
483 >>> c = Counter('which')
484 >>> c.subtract('witch') # subtract elements from another iterable
485 >>> c.subtract(Counter('watch')) # subtract elements from another counter
486 >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch
487 0
488 >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch
489 -1
490
491 '''
492 if iterable is not None:
Raymond Hettingerfc3c9cd2010-04-11 20:41:56 +0000493 self_get = self.get
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000494 if isinstance(iterable, Mapping):
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000495 for elem, count in iterable.items():
496 self[elem] = self_get(elem, 0) - count
497 else:
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000498 for elem in iterable:
499 self[elem] = self_get(elem, 0) - 1
500 if kwds:
501 self.subtract(kwds)
502
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000503 def copy(self):
504 'Like dict.copy() but returns a Counter instance instead of a dict.'
505 return Counter(self)
506
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000507 def __delitem__(self, elem):
508 'Like dict.__delitem__() but does not raise KeyError for missing values.'
509 if elem in self:
510 dict.__delitem__(self, elem)
511
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000512 def __repr__(self):
513 if not self:
514 return '%s()' % self.__class__.__name__
515 items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
516 return '%s({%s})' % (self.__class__.__name__, items)
517
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000518 # Multiset-style mathematical operations discussed in:
519 # Knuth TAOCP Volume II section 4.6.3 exercise 19
520 # and at http://en.wikipedia.org/wiki/Multiset
521 #
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000522 # Outputs guaranteed to only include positive counts.
523 #
524 # To strip negative and zero counts, add-in an empty counter:
525 # c += Counter()
526
527 def __add__(self, other):
528 '''Add counts from two counters.
529
530 >>> Counter('abbb') + Counter('bcc')
531 Counter({'b': 4, 'c': 2, 'a': 1})
532
533 '''
534 if not isinstance(other, Counter):
535 return NotImplemented
536 result = Counter()
537 for elem in set(self) | set(other):
538 newcount = self[elem] + other[elem]
539 if newcount > 0:
540 result[elem] = newcount
541 return result
542
543 def __sub__(self, other):
544 ''' Subtract count, but keep only results with positive counts.
545
546 >>> Counter('abbbc') - Counter('bccd')
547 Counter({'b': 2, 'a': 1})
548
549 '''
550 if not isinstance(other, Counter):
551 return NotImplemented
552 result = Counter()
Raymond Hettingere0d1b9f2009-01-21 20:36:27 +0000553 for elem in set(self) | set(other):
554 newcount = self[elem] - other[elem]
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000555 if newcount > 0:
556 result[elem] = newcount
557 return result
558
559 def __or__(self, other):
560 '''Union is the maximum of value in either of the input counters.
561
562 >>> Counter('abbb') | Counter('bcc')
563 Counter({'b': 3, 'c': 2, 'a': 1})
564
565 '''
566 if not isinstance(other, Counter):
567 return NotImplemented
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000568 result = Counter()
569 for elem in set(self) | set(other):
Raymond Hettingerc4791702009-04-04 08:48:03 +0000570 p, q = self[elem], other[elem]
571 newcount = q if p < q else p
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000572 if newcount > 0:
573 result[elem] = newcount
574 return result
575
576 def __and__(self, other):
577 ''' Intersection is the minimum of corresponding counts.
578
579 >>> Counter('abbb') & Counter('bcc')
580 Counter({'b': 1})
581
582 '''
583 if not isinstance(other, Counter):
584 return NotImplemented
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000585 result = Counter()
586 if len(self) < len(other):
587 self, other = other, self
588 for elem in filter(self.__contains__, other):
Raymond Hettingerc4791702009-04-04 08:48:03 +0000589 p, q = self[elem], other[elem]
590 newcount = p if p < q else q
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000591 if newcount > 0:
592 result[elem] = newcount
593 return result
594
Guido van Rossumd8faa362007-04-27 19:54:29 +0000595
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000596################################################################################
597### UserDict
598################################################################################
Guido van Rossumd8faa362007-04-27 19:54:29 +0000599
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000600class UserDict(MutableMapping):
601
602 # Start by filling-out the abstract methods
603 def __init__(self, dict=None, **kwargs):
604 self.data = {}
605 if dict is not None:
606 self.update(dict)
607 if len(kwargs):
608 self.update(kwargs)
609 def __len__(self): return len(self.data)
610 def __getitem__(self, key):
611 if key in self.data:
612 return self.data[key]
613 if hasattr(self.__class__, "__missing__"):
614 return self.__class__.__missing__(self, key)
615 raise KeyError(key)
616 def __setitem__(self, key, item): self.data[key] = item
617 def __delitem__(self, key): del self.data[key]
618 def __iter__(self):
619 return iter(self.data)
620
Raymond Hettinger554c8b82008-02-05 22:54:43 +0000621 # Modify __contains__ to work correctly when __missing__ is present
622 def __contains__(self, key):
623 return key in self.data
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000624
625 # Now, add the methods in dicts but not in MutableMapping
626 def __repr__(self): return repr(self.data)
627 def copy(self):
628 if self.__class__ is UserDict:
629 return UserDict(self.data.copy())
630 import copy
631 data = self.data
632 try:
633 self.data = {}
634 c = copy.copy(self)
635 finally:
636 self.data = data
637 c.update(self)
638 return c
639 @classmethod
640 def fromkeys(cls, iterable, value=None):
641 d = cls()
642 for key in iterable:
643 d[key] = value
644 return d
645
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000646
647
648################################################################################
Raymond Hettinger53dbe392008-02-12 20:03:09 +0000649### UserList
650################################################################################
651
652class UserList(MutableSequence):
653 """A more or less complete user-defined wrapper around list objects."""
654 def __init__(self, initlist=None):
655 self.data = []
656 if initlist is not None:
657 # XXX should this accept an arbitrary sequence?
658 if type(initlist) == type(self.data):
659 self.data[:] = initlist
660 elif isinstance(initlist, UserList):
661 self.data[:] = initlist.data[:]
662 else:
663 self.data = list(initlist)
664 def __repr__(self): return repr(self.data)
665 def __lt__(self, other): return self.data < self.__cast(other)
666 def __le__(self, other): return self.data <= self.__cast(other)
667 def __eq__(self, other): return self.data == self.__cast(other)
668 def __ne__(self, other): return self.data != self.__cast(other)
669 def __gt__(self, other): return self.data > self.__cast(other)
670 def __ge__(self, other): return self.data >= self.__cast(other)
671 def __cast(self, other):
672 return other.data if isinstance(other, UserList) else other
Raymond Hettinger53dbe392008-02-12 20:03:09 +0000673 def __contains__(self, item): return item in self.data
674 def __len__(self): return len(self.data)
675 def __getitem__(self, i): return self.data[i]
676 def __setitem__(self, i, item): self.data[i] = item
677 def __delitem__(self, i): del self.data[i]
678 def __add__(self, other):
679 if isinstance(other, UserList):
680 return self.__class__(self.data + other.data)
681 elif isinstance(other, type(self.data)):
682 return self.__class__(self.data + other)
683 return self.__class__(self.data + list(other))
684 def __radd__(self, other):
685 if isinstance(other, UserList):
686 return self.__class__(other.data + self.data)
687 elif isinstance(other, type(self.data)):
688 return self.__class__(other + self.data)
689 return self.__class__(list(other) + self.data)
690 def __iadd__(self, other):
691 if isinstance(other, UserList):
692 self.data += other.data
693 elif isinstance(other, type(self.data)):
694 self.data += other
695 else:
696 self.data += list(other)
697 return self
698 def __mul__(self, n):
699 return self.__class__(self.data*n)
700 __rmul__ = __mul__
701 def __imul__(self, n):
702 self.data *= n
703 return self
704 def append(self, item): self.data.append(item)
705 def insert(self, i, item): self.data.insert(i, item)
706 def pop(self, i=-1): return self.data.pop(i)
707 def remove(self, item): self.data.remove(item)
708 def count(self, item): return self.data.count(item)
709 def index(self, item, *args): return self.data.index(item, *args)
710 def reverse(self): self.data.reverse()
711 def sort(self, *args, **kwds): self.data.sort(*args, **kwds)
712 def extend(self, other):
713 if isinstance(other, UserList):
714 self.data.extend(other.data)
715 else:
716 self.data.extend(other)
717
718
719
720################################################################################
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000721### UserString
722################################################################################
723
724class UserString(Sequence):
725 def __init__(self, seq):
726 if isinstance(seq, str):
727 self.data = seq
728 elif isinstance(seq, UserString):
729 self.data = seq.data[:]
730 else:
731 self.data = str(seq)
732 def __str__(self): return str(self.data)
733 def __repr__(self): return repr(self.data)
734 def __int__(self): return int(self.data)
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000735 def __float__(self): return float(self.data)
736 def __complex__(self): return complex(self.data)
737 def __hash__(self): return hash(self.data)
738
739 def __eq__(self, string):
740 if isinstance(string, UserString):
741 return self.data == string.data
742 return self.data == string
743 def __ne__(self, string):
744 if isinstance(string, UserString):
745 return self.data != string.data
746 return self.data != string
747 def __lt__(self, string):
748 if isinstance(string, UserString):
749 return self.data < string.data
750 return self.data < string
751 def __le__(self, string):
752 if isinstance(string, UserString):
753 return self.data <= string.data
754 return self.data <= string
755 def __gt__(self, string):
756 if isinstance(string, UserString):
757 return self.data > string.data
758 return self.data > string
759 def __ge__(self, string):
760 if isinstance(string, UserString):
761 return self.data >= string.data
762 return self.data >= string
763
764 def __contains__(self, char):
765 if isinstance(char, UserString):
766 char = char.data
767 return char in self.data
768
769 def __len__(self): return len(self.data)
770 def __getitem__(self, index): return self.__class__(self.data[index])
771 def __add__(self, other):
772 if isinstance(other, UserString):
773 return self.__class__(self.data + other.data)
774 elif isinstance(other, str):
775 return self.__class__(self.data + other)
776 return self.__class__(self.data + str(other))
777 def __radd__(self, other):
778 if isinstance(other, str):
779 return self.__class__(other + self.data)
780 return self.__class__(str(other) + self.data)
781 def __mul__(self, n):
782 return self.__class__(self.data*n)
783 __rmul__ = __mul__
784 def __mod__(self, args):
785 return self.__class__(self.data % args)
786
787 # the following methods are defined in alphabetical order:
788 def capitalize(self): return self.__class__(self.data.capitalize())
789 def center(self, width, *args):
790 return self.__class__(self.data.center(width, *args))
791 def count(self, sub, start=0, end=_sys.maxsize):
792 if isinstance(sub, UserString):
793 sub = sub.data
794 return self.data.count(sub, start, end)
795 def encode(self, encoding=None, errors=None): # XXX improve this?
796 if encoding:
797 if errors:
798 return self.__class__(self.data.encode(encoding, errors))
799 return self.__class__(self.data.encode(encoding))
800 return self.__class__(self.data.encode())
801 def endswith(self, suffix, start=0, end=_sys.maxsize):
802 return self.data.endswith(suffix, start, end)
803 def expandtabs(self, tabsize=8):
804 return self.__class__(self.data.expandtabs(tabsize))
805 def find(self, sub, start=0, end=_sys.maxsize):
806 if isinstance(sub, UserString):
807 sub = sub.data
808 return self.data.find(sub, start, end)
809 def format(self, *args, **kwds):
810 return self.data.format(*args, **kwds)
811 def index(self, sub, start=0, end=_sys.maxsize):
812 return self.data.index(sub, start, end)
813 def isalpha(self): return self.data.isalpha()
814 def isalnum(self): return self.data.isalnum()
815 def isdecimal(self): return self.data.isdecimal()
816 def isdigit(self): return self.data.isdigit()
817 def isidentifier(self): return self.data.isidentifier()
818 def islower(self): return self.data.islower()
819 def isnumeric(self): return self.data.isnumeric()
820 def isspace(self): return self.data.isspace()
821 def istitle(self): return self.data.istitle()
822 def isupper(self): return self.data.isupper()
823 def join(self, seq): return self.data.join(seq)
824 def ljust(self, width, *args):
825 return self.__class__(self.data.ljust(width, *args))
826 def lower(self): return self.__class__(self.data.lower())
827 def lstrip(self, chars=None): return self.__class__(self.data.lstrip(chars))
828 def partition(self, sep):
829 return self.data.partition(sep)
830 def replace(self, old, new, maxsplit=-1):
831 if isinstance(old, UserString):
832 old = old.data
833 if isinstance(new, UserString):
834 new = new.data
835 return self.__class__(self.data.replace(old, new, maxsplit))
836 def rfind(self, sub, start=0, end=_sys.maxsize):
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000837 if isinstance(sub, UserString):
838 sub = sub.data
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000839 return self.data.rfind(sub, start, end)
840 def rindex(self, sub, start=0, end=_sys.maxsize):
841 return self.data.rindex(sub, start, end)
842 def rjust(self, width, *args):
843 return self.__class__(self.data.rjust(width, *args))
844 def rpartition(self, sep):
845 return self.data.rpartition(sep)
846 def rstrip(self, chars=None):
847 return self.__class__(self.data.rstrip(chars))
848 def split(self, sep=None, maxsplit=-1):
849 return self.data.split(sep, maxsplit)
850 def rsplit(self, sep=None, maxsplit=-1):
851 return self.data.rsplit(sep, maxsplit)
852 def splitlines(self, keepends=0): return self.data.splitlines(keepends)
853 def startswith(self, prefix, start=0, end=_sys.maxsize):
854 return self.data.startswith(prefix, start, end)
855 def strip(self, chars=None): return self.__class__(self.data.strip(chars))
856 def swapcase(self): return self.__class__(self.data.swapcase())
857 def title(self): return self.__class__(self.data.title())
858 def translate(self, *args):
859 return self.__class__(self.data.translate(*args))
860 def upper(self): return self.__class__(self.data.upper())
861 def zfill(self, width): return self.__class__(self.data.zfill(width))
862
863
864
865################################################################################
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000866### Simple tests
867################################################################################
Guido van Rossumd8faa362007-04-27 19:54:29 +0000868
869if __name__ == '__main__':
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000870 # verify that instances can be pickled
Guido van Rossum99603b02007-07-20 00:22:32 +0000871 from pickle import loads, dumps
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000872 Point = namedtuple('Point', 'x, y', True)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000873 p = Point(x=10, y=20)
874 assert p == loads(dumps(p))
875
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000876 # test and demonstrate ability to override methods
Christian Heimes043d6f62008-01-07 17:19:16 +0000877 class Point(namedtuple('Point', 'x y')):
Christian Heimes25bb7832008-01-11 16:17:00 +0000878 __slots__ = ()
Christian Heimes043d6f62008-01-07 17:19:16 +0000879 @property
880 def hypot(self):
881 return (self.x ** 2 + self.y ** 2) ** 0.5
Christian Heimes790c8232008-01-07 21:14:23 +0000882 def __str__(self):
Christian Heimes25bb7832008-01-11 16:17:00 +0000883 return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
Christian Heimes043d6f62008-01-07 17:19:16 +0000884
Christian Heimes25bb7832008-01-11 16:17:00 +0000885 for p in Point(3, 4), Point(14, 5/7.):
Christian Heimes790c8232008-01-07 21:14:23 +0000886 print (p)
Christian Heimes043d6f62008-01-07 17:19:16 +0000887
888 class Point(namedtuple('Point', 'x y')):
889 'Point class with optimized _make() and _replace() without error-checking'
Christian Heimes25bb7832008-01-11 16:17:00 +0000890 __slots__ = ()
Christian Heimes043d6f62008-01-07 17:19:16 +0000891 _make = classmethod(tuple.__new__)
892 def _replace(self, _map=map, **kwds):
Christian Heimes2380ac72008-01-09 00:17:24 +0000893 return self._make(_map(kwds.get, ('x', 'y'), self))
Christian Heimes043d6f62008-01-07 17:19:16 +0000894
895 print(Point(11, 22)._replace(x=100))
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000896
Christian Heimes25bb7832008-01-11 16:17:00 +0000897 Point3D = namedtuple('Point3D', Point._fields + ('z',))
898 print(Point3D.__doc__)
899
Guido van Rossumd8faa362007-04-27 19:54:29 +0000900 import doctest
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000901 TestResults = namedtuple('TestResults', 'failed attempted')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000902 print(TestResults(*doctest.testmod()))