blob: 78b4115eb1d69b4cb5326cf15ee8e83a1f23b72f [file] [log] [blame]
Brett Cannon23a4a7b2008-05-12 00:56:28 +00001__all__ = ['deque', 'defaultdict', 'namedtuple', 'UserDict', 'UserList',
Raymond Hettinger2d32f632009-03-02 21:24:57 +00002 'UserString', 'Counter', 'OrderedDict']
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
4# They should however be considered an integral part of collections.py.
5from _abcoll import *
6import _abcoll
7__all__ += _abcoll.__all__
8
Christian Heimes99170a52007-12-19 02:07:34 +00009from _collections import deque, defaultdict
10from operator import itemgetter as _itemgetter
11from keyword import iskeyword as _iskeyword
12import sys as _sys
Raymond Hettingerb8baf632009-01-14 02:20:07 +000013import heapq as _heapq
Raymond Hettingerfa11db02010-09-12 04:12:42 +000014from weakref import proxy as _proxy
Raymond Hettingerea9f8db2009-03-02 21:28:41 +000015from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
Raymond Hettinger98a5f3f2010-09-13 21:36:00 +000016from reprlib import recursive_repr as _recursive_repr
Raymond Hettinger2d32f632009-03-02 21:24:57 +000017
18################################################################################
19### OrderedDict
20################################################################################
21
Raymond Hettingerfa11db02010-09-12 04:12:42 +000022class _Link(object):
23 __slots__ = 'prev', 'next', 'key', '__weakref__'
24
Raymond Hettinger2d32f632009-03-02 21:24:57 +000025class OrderedDict(dict, MutableMapping):
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +000026 'Dictionary that remembers insertion order'
Raymond Hettingerf1736542009-03-23 05:19:21 +000027 # An inherited dict maps keys to values.
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +000028 # The inherited dict provides __getitem__, __len__, __contains__, and get.
29 # The remaining methods are order-aware.
Raymond Hettingerf1736542009-03-23 05:19:21 +000030 # Big-O running times for all methods are the same as for regular dictionaries.
31
32 # The internal self.__map dictionary maps keys to links in a doubly linked list.
33 # The circular doubly linked list starts and ends with a sentinel element.
34 # The sentinel element never gets deleted (this simplifies the algorithm).
Raymond Hettingerc5c29c02010-09-12 18:13:46 +000035 # The prev/next links are weakref proxies (to prevent circular references).
36 # Individual links are kept alive by the hard reference in self.__map.
37 # Those hard references disappear when a key is deleted from an OrderedDict.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000038
39 def __init__(self, *args, **kwds):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000040 '''Initialize an ordered dictionary. Signature is the same as for
41 regular dictionaries, but keyword arguments are not recommended
42 because their insertion order is arbitrary.
43
44 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +000045 if len(args) > 1:
46 raise TypeError('expected at most 1 arguments, got %d' % len(args))
Raymond Hettinger08c70cf2009-03-03 20:47:29 +000047 try:
Raymond Hettingerf1736542009-03-23 05:19:21 +000048 self.__root
Raymond Hettinger08c70cf2009-03-03 20:47:29 +000049 except AttributeError:
Raymond Hettingerfa11db02010-09-12 04:12:42 +000050 self.__root = root = _Link() # sentinel node for the doubly linked list
51 root.prev = root.next = root
Raymond Hettinger52dc06b2009-03-25 22:45:22 +000052 self.__map = {}
Raymond Hettinger2d32f632009-03-02 21:24:57 +000053 self.update(*args, **kwds)
54
Raymond Hettingerfa11db02010-09-12 04:12:42 +000055 def __setitem__(self, key, value,
56 dict_setitem=dict.__setitem__, proxy=_proxy, Link=_Link):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000057 'od.__setitem__(i, y) <==> od[i]=y'
Raymond Hettingerf1736542009-03-23 05:19:21 +000058 # Setting a new item creates a new link which goes at the end of the linked
59 # list, and the inherited dictionary is updated with the new key/value pair.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000060 if key not in self:
Raymond Hettingerfa11db02010-09-12 04:12:42 +000061 self.__map[key] = link = Link()
Raymond Hettingerf1736542009-03-23 05:19:21 +000062 root = self.__root
Raymond Hettingerfa11db02010-09-12 04:12:42 +000063 last = root.prev
64 link.prev, link.next, link.key = last, root, key
Raymond Hettingerc5c29c02010-09-12 18:13:46 +000065 last.next = root.prev = proxy(link)
Raymond Hettingerfa11db02010-09-12 04:12:42 +000066 dict.__setitem__(self, key, value)
Raymond Hettinger2d32f632009-03-02 21:24:57 +000067
Raymond Hettingerfa11db02010-09-12 04:12:42 +000068 def __delitem__(self, key, dict_delitem=dict.__delitem__):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000069 'od.__delitem__(y) <==> del od[y]'
Raymond Hettingerf1736542009-03-23 05:19:21 +000070 # Deleting an existing item uses self.__map to find the link which is
71 # then removed by updating the links in the predecessor and successor nodes.
Raymond Hettinger5be21b72010-08-01 22:10:57 +000072 dict_delitem(self, key)
Raymond Hettingerf1736542009-03-23 05:19:21 +000073 link = self.__map.pop(key)
Raymond Hettingerfa11db02010-09-12 04:12:42 +000074 link_prev = link.prev
75 link_next = link.next
76 link_prev.next = link_next
77 link_next.prev = link_prev
Raymond Hettinger2d32f632009-03-02 21:24:57 +000078
Raymond Hettingerfa11db02010-09-12 04:12:42 +000079 def __iter__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000080 'od.__iter__() <==> iter(od)'
Raymond Hettingerf1736542009-03-23 05:19:21 +000081 # Traverse the linked list in order.
82 root = self.__root
Raymond Hettingerfa11db02010-09-12 04:12:42 +000083 curr = root.next
Raymond Hettingerf1736542009-03-23 05:19:21 +000084 while curr is not root:
Raymond Hettingerfa11db02010-09-12 04:12:42 +000085 yield curr.key
86 curr = curr.next
Raymond Hettinger2d32f632009-03-02 21:24:57 +000087
Raymond Hettingerfa11db02010-09-12 04:12:42 +000088 def __reversed__(self):
Raymond Hettinger2352cf32009-04-08 01:16:27 +000089 'od.__reversed__() <==> reversed(od)'
Raymond Hettingerf1736542009-03-23 05:19:21 +000090 # Traverse the linked list in reverse order.
91 root = self.__root
Raymond Hettingerfa11db02010-09-12 04:12:42 +000092 curr = root.prev
Raymond Hettingerf1736542009-03-23 05:19:21 +000093 while curr is not root:
Raymond Hettingerfa11db02010-09-12 04:12:42 +000094 yield curr.key
95 curr = curr.prev
Raymond Hettinger2d32f632009-03-02 21:24:57 +000096
97 def __reduce__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000098 'Return state information for pickling'
Raymond Hettinger2d32f632009-03-02 21:24:57 +000099 items = [[k, self[k]] for k in self]
Raymond Hettinger98a5f3f2010-09-13 21:36:00 +0000100 tmp = self.__map, self.__root
101 del self.__map, self.__root
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000102 inst_dict = vars(self).copy()
Raymond Hettinger98a5f3f2010-09-13 21:36:00 +0000103 self.__map, self.__root = tmp
Raymond Hettinger14b89ff2009-03-03 22:20:56 +0000104 if inst_dict:
105 return (self.__class__, (items,), inst_dict)
106 return self.__class__, (items,)
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000107
Raymond Hettinger5be21b72010-08-01 22:10:57 +0000108 def clear(self):
109 'od.clear() -> None. Remove all items from od.'
Raymond Hettingerfa11db02010-09-12 04:12:42 +0000110 root = self.__root
111 root.prev = root.next = root
112 self.__map.clear()
Raymond Hettinger5be21b72010-08-01 22:10:57 +0000113 dict.clear(self)
114
Raymond Hettingerfa11db02010-09-12 04:12:42 +0000115 def popitem(self, last=True):
Raymond Hettinger331722d2010-09-02 18:44:16 +0000116 '''od.popitem() -> (k, v), return and remove a (key, value) pair.
117 Pairs are returned in LIFO order if last is true or FIFO order if false.
118
119 '''
120 if not self:
121 raise KeyError('dictionary is empty')
122 root = self.__root
Raymond Hettingerfa11db02010-09-12 04:12:42 +0000123 if last:
124 link = root.prev
125 link_prev = link.prev
126 link_prev.next = root
127 root.prev = link_prev
128 else:
129 link = root.next
130 link_next = link.next
131 root.next = link_next
132 link_next.prev = root
133 key = link.key
Raymond Hettinger331722d2010-09-02 18:44:16 +0000134 del self.__map[key]
Raymond Hettingerfa11db02010-09-12 04:12:42 +0000135 value = dict.pop(self, key)
Raymond Hettinger331722d2010-09-02 18:44:16 +0000136 return key, value
137
Raymond Hettingerfa11db02010-09-12 04:12:42 +0000138 def move_to_end(self, key, last=True):
139 '''Move an existing element to the end (or beginning if last==False).
140
141 Raises KeyError if the element does not exist.
142 When last=True, acts like a fast version of self[key]=self.pop(key).
143
144 '''
145 link = self.__map[key]
146 link_prev = link.prev
147 link_next = link.next
148 link_prev.next = link_next
149 link_next.prev = link_prev
150 root = self.__root
151 if last:
152 last = root.prev
153 link.prev = last
154 link.next = root
155 last.next = root.prev = link
156 else:
157 first = root.next
158 link.prev = root
159 link.next = first
160 root.next = first.prev = link
161
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000162 setdefault = MutableMapping.setdefault
163 update = MutableMapping.update
164 pop = MutableMapping.pop
165 keys = MutableMapping.keys
166 values = MutableMapping.values
167 items = MutableMapping.items
Raymond Hettinger5be21b72010-08-01 22:10:57 +0000168 __ne__ = MutableMapping.__ne__
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000169
Raymond Hettinger98a5f3f2010-09-13 21:36:00 +0000170 @_recursive_repr()
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000171 def __repr__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000172 'od.__repr__() <==> repr(od)'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000173 if not self:
174 return '%s()' % (self.__class__.__name__,)
Raymond Hettinger98a5f3f2010-09-13 21:36:00 +0000175 return '%s(%r)' % (self.__class__.__name__, list(self.items()))
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000176
177 def copy(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000178 'od.copy() -> a shallow copy of od'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000179 return self.__class__(self)
180
181 @classmethod
182 def fromkeys(cls, iterable, value=None):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000183 '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
184 and values equal to v (which defaults to None).
185
186 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000187 d = cls()
188 for key in iterable:
189 d[key] = value
190 return d
191
192 def __eq__(self, other):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000193 '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
194 while comparison to a regular mapping is order-insensitive.
195
196 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000197 if isinstance(other, OrderedDict):
Raymond Hettinger798ee1a2009-03-23 18:29:11 +0000198 return len(self)==len(other) and \
199 all(p==q for p, q in zip(self.items(), other.items()))
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000200 return dict.__eq__(self, other)
201
Christian Heimes99170a52007-12-19 02:07:34 +0000202
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000203################################################################################
204### namedtuple
205################################################################################
206
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000207def namedtuple(typename, field_names, verbose=False, rename=False):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000208 """Returns a new subclass of tuple with named fields.
209
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000210 >>> Point = namedtuple('Point', 'x y')
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000211 >>> Point.__doc__ # docstring for the new class
Guido van Rossumd8faa362007-04-27 19:54:29 +0000212 'Point(x, y)'
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000213 >>> p = Point(11, y=22) # instantiate with positional args or keywords
Christian Heimes99170a52007-12-19 02:07:34 +0000214 >>> p[0] + p[1] # indexable like a plain tuple
Guido van Rossumd8faa362007-04-27 19:54:29 +0000215 33
Christian Heimes99170a52007-12-19 02:07:34 +0000216 >>> x, y = p # unpack like a regular tuple
Guido van Rossumd8faa362007-04-27 19:54:29 +0000217 >>> x, y
218 (11, 22)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000219 >>> p.x + p.y # fields also accessable by name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000220 33
Christian Heimes0449f632007-12-15 01:27:15 +0000221 >>> d = p._asdict() # convert to a dictionary
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000222 >>> d['x']
223 11
224 >>> Point(**d) # convert from a dictionary
Guido van Rossumd8faa362007-04-27 19:54:29 +0000225 Point(x=11, y=22)
Christian Heimes0449f632007-12-15 01:27:15 +0000226 >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000227 Point(x=100, y=22)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000228
229 """
230
Christian Heimes2380ac72008-01-09 00:17:24 +0000231 # Parse and validate the field names. Validation serves two purposes,
232 # generating informative error messages and preventing template injection attacks.
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000233 if isinstance(field_names, str):
234 field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas
Benjamin Petersone9bbc8b2008-09-28 02:06:32 +0000235 field_names = tuple(map(str, field_names))
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000236 if rename:
237 names = list(field_names)
238 seen = set()
239 for i, name in enumerate(names):
240 if (not all(c.isalnum() or c=='_' for c in name) or _iskeyword(name)
241 or not name or name[0].isdigit() or name.startswith('_')
242 or name in seen):
Raymond Hettinger56145242009-04-02 22:31:59 +0000243 names[i] = '_%d' % i
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000244 seen.add(name)
245 field_names = tuple(names)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000246 for name in (typename,) + field_names:
Christian Heimesb9eccbf2007-12-05 20:18:38 +0000247 if not all(c.isalnum() or c=='_' for c in name):
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000248 raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
249 if _iskeyword(name):
250 raise ValueError('Type names and field names cannot be a keyword: %r' % name)
251 if name[0].isdigit():
252 raise ValueError('Type names and field names cannot start with a number: %r' % name)
253 seen_names = set()
254 for name in field_names:
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000255 if name.startswith('_') and not rename:
Christian Heimes0449f632007-12-15 01:27:15 +0000256 raise ValueError('Field names cannot start with an underscore: %r' % name)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000257 if name in seen_names:
258 raise ValueError('Encountered duplicate field name: %r' % name)
259 seen_names.add(name)
260
261 # Create and fill-in the class template
Christian Heimesfaf2f632008-01-06 16:59:19 +0000262 numfields = len(field_names)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000263 argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000264 reprtxt = ', '.join('%s=%%r' % name for name in field_names)
265 template = '''class %(typename)s(tuple):
Christian Heimes0449f632007-12-15 01:27:15 +0000266 '%(typename)s(%(argtxt)s)' \n
267 __slots__ = () \n
Christian Heimesfaf2f632008-01-06 16:59:19 +0000268 _fields = %(field_names)r \n
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000269 def __new__(_cls, %(argtxt)s):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000270 'Create new instance of %(typename)s(%(argtxt)s)'
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000271 return _tuple.__new__(_cls, (%(argtxt)s)) \n
Christian Heimesfaf2f632008-01-06 16:59:19 +0000272 @classmethod
Christian Heimes043d6f62008-01-07 17:19:16 +0000273 def _make(cls, iterable, new=tuple.__new__, len=len):
Christian Heimesfaf2f632008-01-06 16:59:19 +0000274 'Make a new %(typename)s object from a sequence or iterable'
Christian Heimes043d6f62008-01-07 17:19:16 +0000275 result = new(cls, iterable)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000276 if len(result) != %(numfields)d:
277 raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
278 return result \n
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000279 def __repr__(self):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000280 'Return a nicely formatted representation string'
Raymond Hettingerd331ce92010-08-08 01:13:42 +0000281 return self.__class__.__name__ + '(%(reprtxt)s)' %% self \n
Raymond Hettingera4f52b12009-03-02 22:28:31 +0000282 def _asdict(self):
283 'Return a new OrderedDict which maps field names to their values'
284 return OrderedDict(zip(self._fields, self)) \n
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000285 def _replace(_self, **kwds):
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000286 'Return a new %(typename)s object replacing specified fields with new values'
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000287 result = _self._make(map(kwds.pop, %(field_names)r, _self))
Christian Heimesfaf2f632008-01-06 16:59:19 +0000288 if kwds:
289 raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000290 return result \n
291 def __getnewargs__(self):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000292 'Return self as a plain tuple. Used by copy and pickle.'
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000293 return tuple(self) \n\n''' % locals()
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000294 for i, name in enumerate(field_names):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000295 template += " %s = _property(_itemgetter(%d), doc='Alias for field number %d')\n" % (name, i, i)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000296 if verbose:
297 print(template)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000298
Georg Brandlf08a9dd2008-06-10 16:57:31 +0000299 # Execute the template string in a temporary namespace and
300 # support tracing utilities by setting a value for frame.f_globals['__name__']
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000301 namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
302 OrderedDict=OrderedDict, _property=property, _tuple=tuple)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000303 try:
304 exec(template, namespace)
305 except SyntaxError as e:
Christian Heimes99170a52007-12-19 02:07:34 +0000306 raise SyntaxError(e.msg + ':\n' + template) from e
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000307 result = namespace[typename]
308
309 # For pickling to work, the __module__ variable needs to be set to the frame
310 # where the named tuple is created. Bypass this step in enviroments where
Benjamin Peterson25c95f12009-05-08 20:42:26 +0000311 # sys._getframe is not defined (Jython for example) or sys._getframe is not
312 # defined for arguments greater than 0 (IronPython).
313 try:
Raymond Hettinger0f055172009-01-27 10:06:09 +0000314 result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
Benjamin Peterson25c95f12009-05-08 20:42:26 +0000315 except (AttributeError, ValueError):
316 pass
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000317
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000318 return result
Guido van Rossumd8faa362007-04-27 19:54:29 +0000319
Guido van Rossumd8faa362007-04-27 19:54:29 +0000320
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000321########################################################################
322### Counter
323########################################################################
324
325class Counter(dict):
326 '''Dict subclass for counting hashable items. Sometimes called a bag
327 or multiset. Elements are stored as dictionary keys and their counts
328 are stored as dictionary values.
329
330 >>> c = Counter('abracadabra') # count elements from a string
331
332 >>> c.most_common(3) # three most common elements
333 [('a', 5), ('r', 2), ('b', 2)]
334 >>> sorted(c) # list all unique elements
335 ['a', 'b', 'c', 'd', 'r']
336 >>> ''.join(sorted(c.elements())) # list elements with repetitions
337 'aaaaabbcdrr'
338 >>> sum(c.values()) # total of all counts
339 11
340
341 >>> c['a'] # count of letter 'a'
342 5
343 >>> for elem in 'shazam': # update counts from an iterable
344 ... c[elem] += 1 # by adding 1 to each element's count
345 >>> c['a'] # now there are seven 'a'
346 7
347 >>> del c['r'] # remove all 'r'
348 >>> c['r'] # now there are zero 'r'
349 0
350
351 >>> d = Counter('simsalabim') # make another counter
352 >>> c.update(d) # add in the second counter
353 >>> c['a'] # now there are nine 'a'
354 9
355
356 >>> c.clear() # empty the counter
357 >>> c
358 Counter()
359
360 Note: If a count is set to zero or reduced to zero, it will remain
361 in the counter until the entry is deleted or the counter is cleared:
362
363 >>> c = Counter('aaabbc')
364 >>> c['b'] -= 2 # reduce the count of 'b' by two
365 >>> c.most_common() # 'b' is still in, but its count is zero
366 [('a', 3), ('c', 1), ('b', 0)]
367
368 '''
369 # References:
370 # http://en.wikipedia.org/wiki/Multiset
371 # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
372 # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
373 # http://code.activestate.com/recipes/259174/
374 # Knuth, TAOCP Vol. II section 4.6.3
375
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000376 def __init__(self, iterable=None, **kwds):
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000377 '''Create a new, empty Counter object. And if given, count elements
378 from an input iterable. Or, initialize the count from another mapping
379 of elements to their counts.
380
381 >>> c = Counter() # a new, empty counter
382 >>> c = Counter('gallahad') # a new counter from an iterable
383 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000384 >>> c = Counter(a=4, b=2) # a new counter from keyword args
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000385
386 '''
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000387 self.update(iterable, **kwds)
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000388
389 def __missing__(self, key):
390 'The count of elements not in the Counter is zero.'
391 # Needed so that self[missing_item] does not raise KeyError
392 return 0
393
394 def most_common(self, n=None):
395 '''List the n most common elements and their counts from the most
396 common to the least. If n is None, then list all element counts.
397
398 >>> Counter('abracadabra').most_common(3)
399 [('a', 5), ('r', 2), ('b', 2)]
400
401 '''
402 # Emulate Bag.sortedByCount from Smalltalk
403 if n is None:
404 return sorted(self.items(), key=_itemgetter(1), reverse=True)
405 return _heapq.nlargest(n, self.items(), key=_itemgetter(1))
406
407 def elements(self):
408 '''Iterator over elements repeating each as many times as its count.
409
410 >>> c = Counter('ABCABC')
411 >>> sorted(c.elements())
412 ['A', 'A', 'B', 'B', 'C', 'C']
413
414 # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
415 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
416 >>> product = 1
417 >>> for factor in prime_factors.elements(): # loop over factors
418 ... product *= factor # and multiply them
419 >>> product
420 1836
421
422 Note, if an element's count has been set to zero or is a negative
423 number, elements() will ignore it.
424
425 '''
426 # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
427 return _chain.from_iterable(_starmap(_repeat, self.items()))
428
429 # Override dict methods where necessary
430
431 @classmethod
432 def fromkeys(cls, iterable, v=None):
433 # There is no equivalent method for counters because setting v=1
434 # means that no element can have a count greater than one.
435 raise NotImplementedError(
436 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
437
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000438 def update(self, iterable=None, **kwds):
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000439 '''Like dict.update() but add counts instead of replacing them.
440
441 Source can be an iterable, a dictionary, or another Counter instance.
442
443 >>> c = Counter('which')
444 >>> c.update('witch') # add elements from another iterable
445 >>> d = Counter('watch')
446 >>> c.update(d) # add elements from another counter
447 >>> c['h'] # four 'h' in which, witch, and watch
448 4
449
450 '''
451 # The regular dict.update() operation makes no sense here because the
452 # replace behavior results in the some of original untouched counts
453 # being mixed-in with all of the other counts for a mismash that
454 # doesn't have a straight-forward interpretation in most counting
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000455 # contexts. Instead, we implement straight-addition. Both the inputs
456 # and outputs are allowed to contain zero and negative counts.
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000457
458 if iterable is not None:
459 if isinstance(iterable, Mapping):
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000460 if self:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000461 self_get = self.get
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000462 for elem, count in iterable.items():
Raymond Hettingerf9092022009-06-29 18:30:43 +0000463 self[elem] = count + self_get(elem, 0)
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000464 else:
465 dict.update(self, iterable) # fast path when counter is empty
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000466 else:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000467 self_get = self.get
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000468 for elem in iterable:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000469 self[elem] = 1 + self_get(elem, 0)
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000470 if kwds:
471 self.update(kwds)
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000472
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000473 def subtract(self, iterable=None, **kwds):
474 '''Like dict.update() but subtracts counts instead of replacing them.
475 Counts can be reduced below zero. Both the inputs and outputs are
476 allowed to contain zero and negative counts.
477
478 Source can be an iterable, a dictionary, or another Counter instance.
479
480 >>> c = Counter('which')
481 >>> c.subtract('witch') # subtract elements from another iterable
482 >>> c.subtract(Counter('watch')) # subtract elements from another counter
483 >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch
484 0
485 >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch
486 -1
487
488 '''
489 if iterable is not None:
Raymond Hettingerfc3c9cd2010-04-11 20:41:56 +0000490 self_get = self.get
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000491 if isinstance(iterable, Mapping):
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000492 for elem, count in iterable.items():
493 self[elem] = self_get(elem, 0) - count
494 else:
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000495 for elem in iterable:
496 self[elem] = self_get(elem, 0) - 1
497 if kwds:
498 self.subtract(kwds)
499
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000500 def copy(self):
501 'Like dict.copy() but returns a Counter instance instead of a dict.'
502 return Counter(self)
503
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000504 def __delitem__(self, elem):
505 'Like dict.__delitem__() but does not raise KeyError for missing values.'
506 if elem in self:
507 dict.__delitem__(self, elem)
508
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000509 def __repr__(self):
510 if not self:
511 return '%s()' % self.__class__.__name__
512 items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
513 return '%s({%s})' % (self.__class__.__name__, items)
514
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000515 # Multiset-style mathematical operations discussed in:
516 # Knuth TAOCP Volume II section 4.6.3 exercise 19
517 # and at http://en.wikipedia.org/wiki/Multiset
518 #
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000519 # Outputs guaranteed to only include positive counts.
520 #
521 # To strip negative and zero counts, add-in an empty counter:
522 # c += Counter()
523
524 def __add__(self, other):
525 '''Add counts from two counters.
526
527 >>> Counter('abbb') + Counter('bcc')
528 Counter({'b': 4, 'c': 2, 'a': 1})
529
530 '''
531 if not isinstance(other, Counter):
532 return NotImplemented
533 result = Counter()
534 for elem in set(self) | set(other):
535 newcount = self[elem] + other[elem]
536 if newcount > 0:
537 result[elem] = newcount
538 return result
539
540 def __sub__(self, other):
541 ''' Subtract count, but keep only results with positive counts.
542
543 >>> Counter('abbbc') - Counter('bccd')
544 Counter({'b': 2, 'a': 1})
545
546 '''
547 if not isinstance(other, Counter):
548 return NotImplemented
549 result = Counter()
Raymond Hettingere0d1b9f2009-01-21 20:36:27 +0000550 for elem in set(self) | set(other):
551 newcount = self[elem] - other[elem]
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000552 if newcount > 0:
553 result[elem] = newcount
554 return result
555
556 def __or__(self, other):
557 '''Union is the maximum of value in either of the input counters.
558
559 >>> Counter('abbb') | Counter('bcc')
560 Counter({'b': 3, 'c': 2, 'a': 1})
561
562 '''
563 if not isinstance(other, Counter):
564 return NotImplemented
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000565 result = Counter()
566 for elem in set(self) | set(other):
Raymond Hettingerc4791702009-04-04 08:48:03 +0000567 p, q = self[elem], other[elem]
568 newcount = q if p < q else p
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000569 if newcount > 0:
570 result[elem] = newcount
571 return result
572
573 def __and__(self, other):
574 ''' Intersection is the minimum of corresponding counts.
575
576 >>> Counter('abbb') & Counter('bcc')
577 Counter({'b': 1})
578
579 '''
580 if not isinstance(other, Counter):
581 return NotImplemented
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000582 result = Counter()
583 if len(self) < len(other):
584 self, other = other, self
585 for elem in filter(self.__contains__, other):
Raymond Hettingerc4791702009-04-04 08:48:03 +0000586 p, q = self[elem], other[elem]
587 newcount = p if p < q else q
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000588 if newcount > 0:
589 result[elem] = newcount
590 return result
591
Guido van Rossumd8faa362007-04-27 19:54:29 +0000592
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000593################################################################################
594### UserDict
595################################################################################
Guido van Rossumd8faa362007-04-27 19:54:29 +0000596
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000597class UserDict(MutableMapping):
598
599 # Start by filling-out the abstract methods
600 def __init__(self, dict=None, **kwargs):
601 self.data = {}
602 if dict is not None:
603 self.update(dict)
604 if len(kwargs):
605 self.update(kwargs)
606 def __len__(self): return len(self.data)
607 def __getitem__(self, key):
608 if key in self.data:
609 return self.data[key]
610 if hasattr(self.__class__, "__missing__"):
611 return self.__class__.__missing__(self, key)
612 raise KeyError(key)
613 def __setitem__(self, key, item): self.data[key] = item
614 def __delitem__(self, key): del self.data[key]
615 def __iter__(self):
616 return iter(self.data)
617
Raymond Hettinger554c8b82008-02-05 22:54:43 +0000618 # Modify __contains__ to work correctly when __missing__ is present
619 def __contains__(self, key):
620 return key in self.data
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000621
622 # Now, add the methods in dicts but not in MutableMapping
623 def __repr__(self): return repr(self.data)
624 def copy(self):
625 if self.__class__ is UserDict:
626 return UserDict(self.data.copy())
627 import copy
628 data = self.data
629 try:
630 self.data = {}
631 c = copy.copy(self)
632 finally:
633 self.data = data
634 c.update(self)
635 return c
636 @classmethod
637 def fromkeys(cls, iterable, value=None):
638 d = cls()
639 for key in iterable:
640 d[key] = value
641 return d
642
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000643
644
645################################################################################
Raymond Hettinger53dbe392008-02-12 20:03:09 +0000646### UserList
647################################################################################
648
649class UserList(MutableSequence):
650 """A more or less complete user-defined wrapper around list objects."""
651 def __init__(self, initlist=None):
652 self.data = []
653 if initlist is not None:
654 # XXX should this accept an arbitrary sequence?
655 if type(initlist) == type(self.data):
656 self.data[:] = initlist
657 elif isinstance(initlist, UserList):
658 self.data[:] = initlist.data[:]
659 else:
660 self.data = list(initlist)
661 def __repr__(self): return repr(self.data)
662 def __lt__(self, other): return self.data < self.__cast(other)
663 def __le__(self, other): return self.data <= self.__cast(other)
664 def __eq__(self, other): return self.data == self.__cast(other)
665 def __ne__(self, other): return self.data != self.__cast(other)
666 def __gt__(self, other): return self.data > self.__cast(other)
667 def __ge__(self, other): return self.data >= self.__cast(other)
668 def __cast(self, other):
669 return other.data if isinstance(other, UserList) else other
Raymond Hettinger53dbe392008-02-12 20:03:09 +0000670 def __contains__(self, item): return item in self.data
671 def __len__(self): return len(self.data)
672 def __getitem__(self, i): return self.data[i]
673 def __setitem__(self, i, item): self.data[i] = item
674 def __delitem__(self, i): del self.data[i]
675 def __add__(self, other):
676 if isinstance(other, UserList):
677 return self.__class__(self.data + other.data)
678 elif isinstance(other, type(self.data)):
679 return self.__class__(self.data + other)
680 return self.__class__(self.data + list(other))
681 def __radd__(self, other):
682 if isinstance(other, UserList):
683 return self.__class__(other.data + self.data)
684 elif isinstance(other, type(self.data)):
685 return self.__class__(other + self.data)
686 return self.__class__(list(other) + self.data)
687 def __iadd__(self, other):
688 if isinstance(other, UserList):
689 self.data += other.data
690 elif isinstance(other, type(self.data)):
691 self.data += other
692 else:
693 self.data += list(other)
694 return self
695 def __mul__(self, n):
696 return self.__class__(self.data*n)
697 __rmul__ = __mul__
698 def __imul__(self, n):
699 self.data *= n
700 return self
701 def append(self, item): self.data.append(item)
702 def insert(self, i, item): self.data.insert(i, item)
703 def pop(self, i=-1): return self.data.pop(i)
704 def remove(self, item): self.data.remove(item)
705 def count(self, item): return self.data.count(item)
706 def index(self, item, *args): return self.data.index(item, *args)
707 def reverse(self): self.data.reverse()
708 def sort(self, *args, **kwds): self.data.sort(*args, **kwds)
709 def extend(self, other):
710 if isinstance(other, UserList):
711 self.data.extend(other.data)
712 else:
713 self.data.extend(other)
714
715
716
717################################################################################
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000718### UserString
719################################################################################
720
721class UserString(Sequence):
722 def __init__(self, seq):
723 if isinstance(seq, str):
724 self.data = seq
725 elif isinstance(seq, UserString):
726 self.data = seq.data[:]
727 else:
728 self.data = str(seq)
729 def __str__(self): return str(self.data)
730 def __repr__(self): return repr(self.data)
731 def __int__(self): return int(self.data)
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000732 def __float__(self): return float(self.data)
733 def __complex__(self): return complex(self.data)
734 def __hash__(self): return hash(self.data)
735
736 def __eq__(self, string):
737 if isinstance(string, UserString):
738 return self.data == string.data
739 return self.data == string
740 def __ne__(self, string):
741 if isinstance(string, UserString):
742 return self.data != string.data
743 return self.data != string
744 def __lt__(self, string):
745 if isinstance(string, UserString):
746 return self.data < string.data
747 return self.data < string
748 def __le__(self, string):
749 if isinstance(string, UserString):
750 return self.data <= string.data
751 return self.data <= string
752 def __gt__(self, string):
753 if isinstance(string, UserString):
754 return self.data > string.data
755 return self.data > string
756 def __ge__(self, string):
757 if isinstance(string, UserString):
758 return self.data >= string.data
759 return self.data >= string
760
761 def __contains__(self, char):
762 if isinstance(char, UserString):
763 char = char.data
764 return char in self.data
765
766 def __len__(self): return len(self.data)
767 def __getitem__(self, index): return self.__class__(self.data[index])
768 def __add__(self, other):
769 if isinstance(other, UserString):
770 return self.__class__(self.data + other.data)
771 elif isinstance(other, str):
772 return self.__class__(self.data + other)
773 return self.__class__(self.data + str(other))
774 def __radd__(self, other):
775 if isinstance(other, str):
776 return self.__class__(other + self.data)
777 return self.__class__(str(other) + self.data)
778 def __mul__(self, n):
779 return self.__class__(self.data*n)
780 __rmul__ = __mul__
781 def __mod__(self, args):
782 return self.__class__(self.data % args)
783
784 # the following methods are defined in alphabetical order:
785 def capitalize(self): return self.__class__(self.data.capitalize())
786 def center(self, width, *args):
787 return self.__class__(self.data.center(width, *args))
788 def count(self, sub, start=0, end=_sys.maxsize):
789 if isinstance(sub, UserString):
790 sub = sub.data
791 return self.data.count(sub, start, end)
792 def encode(self, encoding=None, errors=None): # XXX improve this?
793 if encoding:
794 if errors:
795 return self.__class__(self.data.encode(encoding, errors))
796 return self.__class__(self.data.encode(encoding))
797 return self.__class__(self.data.encode())
798 def endswith(self, suffix, start=0, end=_sys.maxsize):
799 return self.data.endswith(suffix, start, end)
800 def expandtabs(self, tabsize=8):
801 return self.__class__(self.data.expandtabs(tabsize))
802 def find(self, sub, start=0, end=_sys.maxsize):
803 if isinstance(sub, UserString):
804 sub = sub.data
805 return self.data.find(sub, start, end)
806 def format(self, *args, **kwds):
807 return self.data.format(*args, **kwds)
808 def index(self, sub, start=0, end=_sys.maxsize):
809 return self.data.index(sub, start, end)
810 def isalpha(self): return self.data.isalpha()
811 def isalnum(self): return self.data.isalnum()
812 def isdecimal(self): return self.data.isdecimal()
813 def isdigit(self): return self.data.isdigit()
814 def isidentifier(self): return self.data.isidentifier()
815 def islower(self): return self.data.islower()
816 def isnumeric(self): return self.data.isnumeric()
817 def isspace(self): return self.data.isspace()
818 def istitle(self): return self.data.istitle()
819 def isupper(self): return self.data.isupper()
820 def join(self, seq): return self.data.join(seq)
821 def ljust(self, width, *args):
822 return self.__class__(self.data.ljust(width, *args))
823 def lower(self): return self.__class__(self.data.lower())
824 def lstrip(self, chars=None): return self.__class__(self.data.lstrip(chars))
825 def partition(self, sep):
826 return self.data.partition(sep)
827 def replace(self, old, new, maxsplit=-1):
828 if isinstance(old, UserString):
829 old = old.data
830 if isinstance(new, UserString):
831 new = new.data
832 return self.__class__(self.data.replace(old, new, maxsplit))
833 def rfind(self, sub, start=0, end=_sys.maxsize):
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000834 if isinstance(sub, UserString):
835 sub = sub.data
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000836 return self.data.rfind(sub, start, end)
837 def rindex(self, sub, start=0, end=_sys.maxsize):
838 return self.data.rindex(sub, start, end)
839 def rjust(self, width, *args):
840 return self.__class__(self.data.rjust(width, *args))
841 def rpartition(self, sep):
842 return self.data.rpartition(sep)
843 def rstrip(self, chars=None):
844 return self.__class__(self.data.rstrip(chars))
845 def split(self, sep=None, maxsplit=-1):
846 return self.data.split(sep, maxsplit)
847 def rsplit(self, sep=None, maxsplit=-1):
848 return self.data.rsplit(sep, maxsplit)
849 def splitlines(self, keepends=0): return self.data.splitlines(keepends)
850 def startswith(self, prefix, start=0, end=_sys.maxsize):
851 return self.data.startswith(prefix, start, end)
852 def strip(self, chars=None): return self.__class__(self.data.strip(chars))
853 def swapcase(self): return self.__class__(self.data.swapcase())
854 def title(self): return self.__class__(self.data.title())
855 def translate(self, *args):
856 return self.__class__(self.data.translate(*args))
857 def upper(self): return self.__class__(self.data.upper())
858 def zfill(self, width): return self.__class__(self.data.zfill(width))
859
860
861
862################################################################################
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000863### Simple tests
864################################################################################
Guido van Rossumd8faa362007-04-27 19:54:29 +0000865
866if __name__ == '__main__':
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000867 # verify that instances can be pickled
Guido van Rossum99603b02007-07-20 00:22:32 +0000868 from pickle import loads, dumps
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000869 Point = namedtuple('Point', 'x, y', True)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000870 p = Point(x=10, y=20)
871 assert p == loads(dumps(p))
872
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000873 # test and demonstrate ability to override methods
Christian Heimes043d6f62008-01-07 17:19:16 +0000874 class Point(namedtuple('Point', 'x y')):
Christian Heimes25bb7832008-01-11 16:17:00 +0000875 __slots__ = ()
Christian Heimes043d6f62008-01-07 17:19:16 +0000876 @property
877 def hypot(self):
878 return (self.x ** 2 + self.y ** 2) ** 0.5
Christian Heimes790c8232008-01-07 21:14:23 +0000879 def __str__(self):
Christian Heimes25bb7832008-01-11 16:17:00 +0000880 return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
Christian Heimes043d6f62008-01-07 17:19:16 +0000881
Christian Heimes25bb7832008-01-11 16:17:00 +0000882 for p in Point(3, 4), Point(14, 5/7.):
Christian Heimes790c8232008-01-07 21:14:23 +0000883 print (p)
Christian Heimes043d6f62008-01-07 17:19:16 +0000884
885 class Point(namedtuple('Point', 'x y')):
886 'Point class with optimized _make() and _replace() without error-checking'
Christian Heimes25bb7832008-01-11 16:17:00 +0000887 __slots__ = ()
Christian Heimes043d6f62008-01-07 17:19:16 +0000888 _make = classmethod(tuple.__new__)
889 def _replace(self, _map=map, **kwds):
Christian Heimes2380ac72008-01-09 00:17:24 +0000890 return self._make(_map(kwds.get, ('x', 'y'), self))
Christian Heimes043d6f62008-01-07 17:19:16 +0000891
892 print(Point(11, 22)._replace(x=100))
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000893
Christian Heimes25bb7832008-01-11 16:17:00 +0000894 Point3D = namedtuple('Point3D', Point._fields + ('z',))
895 print(Point3D.__doc__)
896
Guido van Rossumd8faa362007-04-27 19:54:29 +0000897 import doctest
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000898 TestResults = namedtuple('TestResults', 'failed attempted')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000899 print(TestResults(*doctest.testmod()))