blob: 1126fb1c99282bbb681c5eb6c6dfc1a0b48411a4 [file] [log] [blame]
Brett Cannon23a4a7b2008-05-12 00:56:28 +00001__all__ = ['deque', 'defaultdict', 'namedtuple', 'UserDict', 'UserList',
Raymond Hettinger2d32f632009-03-02 21:24:57 +00002 'UserString', 'Counter', 'OrderedDict']
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
4# They should however be considered an integral part of collections.py.
5from _abcoll import *
6import _abcoll
7__all__ += _abcoll.__all__
8
Christian Heimes99170a52007-12-19 02:07:34 +00009from _collections import deque, defaultdict
10from operator import itemgetter as _itemgetter
11from keyword import iskeyword as _iskeyword
12import sys as _sys
Raymond Hettingerb8baf632009-01-14 02:20:07 +000013import heapq as _heapq
Raymond Hettingerfa11db02010-09-12 04:12:42 +000014from weakref import proxy as _proxy
Raymond Hettingerea9f8db2009-03-02 21:28:41 +000015from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
Raymond Hettinger2d32f632009-03-02 21:24:57 +000016
17################################################################################
18### OrderedDict
19################################################################################
20
Raymond Hettingerfa11db02010-09-12 04:12:42 +000021class _Link(object):
22 __slots__ = 'prev', 'next', 'key', '__weakref__'
23
Raymond Hettinger2d32f632009-03-02 21:24:57 +000024class OrderedDict(dict, MutableMapping):
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +000025 'Dictionary that remembers insertion order'
Raymond Hettingerf1736542009-03-23 05:19:21 +000026 # An inherited dict maps keys to values.
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +000027 # The inherited dict provides __getitem__, __len__, __contains__, and get.
28 # The remaining methods are order-aware.
Raymond Hettingerf1736542009-03-23 05:19:21 +000029 # Big-O running times for all methods are the same as for regular dictionaries.
30
31 # The internal self.__map dictionary maps keys to links in a doubly linked list.
32 # The circular doubly linked list starts and ends with a sentinel element.
33 # The sentinel element never gets deleted (this simplifies the algorithm).
Raymond Hettingerfa11db02010-09-12 04:12:42 +000034 # The back links are weakref proxies (to prevent circular references).
Raymond Hettinger2d32f632009-03-02 21:24:57 +000035
36 def __init__(self, *args, **kwds):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000037 '''Initialize an ordered dictionary. Signature is the same as for
38 regular dictionaries, but keyword arguments are not recommended
39 because their insertion order is arbitrary.
40
41 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +000042 if len(args) > 1:
43 raise TypeError('expected at most 1 arguments, got %d' % len(args))
Raymond Hettingerdc08a142010-09-12 05:15:22 +000044 self.__in_repr = False # detects recursive repr
Raymond Hettinger08c70cf2009-03-03 20:47:29 +000045 try:
Raymond Hettingerf1736542009-03-23 05:19:21 +000046 self.__root
Raymond Hettinger08c70cf2009-03-03 20:47:29 +000047 except AttributeError:
Raymond Hettingerfa11db02010-09-12 04:12:42 +000048 self.__root = root = _Link() # sentinel node for the doubly linked list
49 root.prev = root.next = root
Raymond Hettinger52dc06b2009-03-25 22:45:22 +000050 self.__map = {}
Raymond Hettinger2d32f632009-03-02 21:24:57 +000051 self.update(*args, **kwds)
52
Raymond Hettingerfa11db02010-09-12 04:12:42 +000053 def __setitem__(self, key, value,
54 dict_setitem=dict.__setitem__, proxy=_proxy, Link=_Link):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000055 'od.__setitem__(i, y) <==> od[i]=y'
Raymond Hettingerf1736542009-03-23 05:19:21 +000056 # Setting a new item creates a new link which goes at the end of the linked
57 # list, and the inherited dictionary is updated with the new key/value pair.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000058 if key not in self:
Raymond Hettingerfa11db02010-09-12 04:12:42 +000059 self.__map[key] = link = Link()
Raymond Hettingerf1736542009-03-23 05:19:21 +000060 root = self.__root
Raymond Hettingerfa11db02010-09-12 04:12:42 +000061 last = root.prev
62 link.prev, link.next, link.key = last, root, key
63 last.next = link
64 root.prev = proxy(link)
65 dict.__setitem__(self, key, value)
Raymond Hettinger2d32f632009-03-02 21:24:57 +000066
Raymond Hettingerfa11db02010-09-12 04:12:42 +000067 def __delitem__(self, key, dict_delitem=dict.__delitem__):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000068 'od.__delitem__(y) <==> del od[y]'
Raymond Hettingerf1736542009-03-23 05:19:21 +000069 # Deleting an existing item uses self.__map to find the link which is
70 # then removed by updating the links in the predecessor and successor nodes.
Raymond Hettinger5be21b72010-08-01 22:10:57 +000071 dict_delitem(self, key)
Raymond Hettingerf1736542009-03-23 05:19:21 +000072 link = self.__map.pop(key)
Raymond Hettingerfa11db02010-09-12 04:12:42 +000073 link_prev = link.prev
74 link_next = link.next
75 link_prev.next = link_next
76 link_next.prev = link_prev
Raymond Hettinger2d32f632009-03-02 21:24:57 +000077
Raymond Hettingerfa11db02010-09-12 04:12:42 +000078 def __iter__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000079 'od.__iter__() <==> iter(od)'
Raymond Hettingerf1736542009-03-23 05:19:21 +000080 # Traverse the linked list in order.
81 root = self.__root
Raymond Hettingerfa11db02010-09-12 04:12:42 +000082 curr = root.next
Raymond Hettingerf1736542009-03-23 05:19:21 +000083 while curr is not root:
Raymond Hettingerfa11db02010-09-12 04:12:42 +000084 yield curr.key
85 curr = curr.next
Raymond Hettinger2d32f632009-03-02 21:24:57 +000086
Raymond Hettingerfa11db02010-09-12 04:12:42 +000087 def __reversed__(self):
Raymond Hettinger2352cf32009-04-08 01:16:27 +000088 'od.__reversed__() <==> reversed(od)'
Raymond Hettingerf1736542009-03-23 05:19:21 +000089 # Traverse the linked list in reverse order.
90 root = self.__root
Raymond Hettingerfa11db02010-09-12 04:12:42 +000091 curr = root.prev
Raymond Hettingerf1736542009-03-23 05:19:21 +000092 while curr is not root:
Raymond Hettingerfa11db02010-09-12 04:12:42 +000093 yield curr.key
94 curr = curr.prev
Raymond Hettinger2d32f632009-03-02 21:24:57 +000095
96 def __reduce__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000097 'Return state information for pickling'
Raymond Hettinger2d32f632009-03-02 21:24:57 +000098 items = [[k, self[k]] for k in self]
Raymond Hettingerdc08a142010-09-12 05:15:22 +000099 tmp = self.__map, self.__root, self.__in_repr
100 del self.__map, self.__root, self.__in_repr
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000101 inst_dict = vars(self).copy()
Raymond Hettingerdc08a142010-09-12 05:15:22 +0000102 self.__map, self.__root, self.__in_repr = tmp
Raymond Hettinger14b89ff2009-03-03 22:20:56 +0000103 if inst_dict:
104 return (self.__class__, (items,), inst_dict)
105 return self.__class__, (items,)
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000106
Raymond Hettinger5be21b72010-08-01 22:10:57 +0000107 def clear(self):
108 'od.clear() -> None. Remove all items from od.'
Raymond Hettingerfa11db02010-09-12 04:12:42 +0000109 root = self.__root
110 root.prev = root.next = root
111 self.__map.clear()
Raymond Hettinger5be21b72010-08-01 22:10:57 +0000112 dict.clear(self)
113
Raymond Hettingerfa11db02010-09-12 04:12:42 +0000114 def popitem(self, last=True):
Raymond Hettinger331722d2010-09-02 18:44:16 +0000115 '''od.popitem() -> (k, v), return and remove a (key, value) pair.
116 Pairs are returned in LIFO order if last is true or FIFO order if false.
117
118 '''
119 if not self:
120 raise KeyError('dictionary is empty')
121 root = self.__root
Raymond Hettingerfa11db02010-09-12 04:12:42 +0000122 if last:
123 link = root.prev
124 link_prev = link.prev
125 link_prev.next = root
126 root.prev = link_prev
127 else:
128 link = root.next
129 link_next = link.next
130 root.next = link_next
131 link_next.prev = root
132 key = link.key
Raymond Hettinger331722d2010-09-02 18:44:16 +0000133 del self.__map[key]
Raymond Hettingerfa11db02010-09-12 04:12:42 +0000134 value = dict.pop(self, key)
Raymond Hettinger331722d2010-09-02 18:44:16 +0000135 return key, value
136
Raymond Hettingerfa11db02010-09-12 04:12:42 +0000137 def move_to_end(self, key, last=True):
138 '''Move an existing element to the end (or beginning if last==False).
139
140 Raises KeyError if the element does not exist.
141 When last=True, acts like a fast version of self[key]=self.pop(key).
142
143 '''
144 link = self.__map[key]
145 link_prev = link.prev
146 link_next = link.next
147 link_prev.next = link_next
148 link_next.prev = link_prev
149 root = self.__root
150 if last:
151 last = root.prev
152 link.prev = last
153 link.next = root
154 last.next = root.prev = link
155 else:
156 first = root.next
157 link.prev = root
158 link.next = first
159 root.next = first.prev = link
160
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000161 setdefault = MutableMapping.setdefault
162 update = MutableMapping.update
163 pop = MutableMapping.pop
164 keys = MutableMapping.keys
165 values = MutableMapping.values
166 items = MutableMapping.items
Raymond Hettinger5be21b72010-08-01 22:10:57 +0000167 __ne__ = MutableMapping.__ne__
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000168
169 def __repr__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000170 'od.__repr__() <==> repr(od)'
Raymond Hettingerdc08a142010-09-12 05:15:22 +0000171 if self.__in_repr:
172 return '...'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000173 if not self:
174 return '%s()' % (self.__class__.__name__,)
Raymond Hettingerdc08a142010-09-12 05:15:22 +0000175 self.__in_repr = True
176 try:
177 result = '%s(%r)' % (self.__class__.__name__, list(self.items()))
178 finally:
179 self.__in_repr = False
180 return result
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000181
182 def copy(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000183 'od.copy() -> a shallow copy of od'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000184 return self.__class__(self)
185
186 @classmethod
187 def fromkeys(cls, iterable, value=None):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000188 '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
189 and values equal to v (which defaults to None).
190
191 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000192 d = cls()
193 for key in iterable:
194 d[key] = value
195 return d
196
197 def __eq__(self, other):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000198 '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
199 while comparison to a regular mapping is order-insensitive.
200
201 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000202 if isinstance(other, OrderedDict):
Raymond Hettinger798ee1a2009-03-23 18:29:11 +0000203 return len(self)==len(other) and \
204 all(p==q for p, q in zip(self.items(), other.items()))
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000205 return dict.__eq__(self, other)
206
Christian Heimes99170a52007-12-19 02:07:34 +0000207
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000208################################################################################
209### namedtuple
210################################################################################
211
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000212def namedtuple(typename, field_names, verbose=False, rename=False):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000213 """Returns a new subclass of tuple with named fields.
214
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000215 >>> Point = namedtuple('Point', 'x y')
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000216 >>> Point.__doc__ # docstring for the new class
Guido van Rossumd8faa362007-04-27 19:54:29 +0000217 'Point(x, y)'
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000218 >>> p = Point(11, y=22) # instantiate with positional args or keywords
Christian Heimes99170a52007-12-19 02:07:34 +0000219 >>> p[0] + p[1] # indexable like a plain tuple
Guido van Rossumd8faa362007-04-27 19:54:29 +0000220 33
Christian Heimes99170a52007-12-19 02:07:34 +0000221 >>> x, y = p # unpack like a regular tuple
Guido van Rossumd8faa362007-04-27 19:54:29 +0000222 >>> x, y
223 (11, 22)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000224 >>> p.x + p.y # fields also accessable by name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000225 33
Christian Heimes0449f632007-12-15 01:27:15 +0000226 >>> d = p._asdict() # convert to a dictionary
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000227 >>> d['x']
228 11
229 >>> Point(**d) # convert from a dictionary
Guido van Rossumd8faa362007-04-27 19:54:29 +0000230 Point(x=11, y=22)
Christian Heimes0449f632007-12-15 01:27:15 +0000231 >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000232 Point(x=100, y=22)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000233
234 """
235
Christian Heimes2380ac72008-01-09 00:17:24 +0000236 # Parse and validate the field names. Validation serves two purposes,
237 # generating informative error messages and preventing template injection attacks.
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000238 if isinstance(field_names, str):
239 field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas
Benjamin Petersone9bbc8b2008-09-28 02:06:32 +0000240 field_names = tuple(map(str, field_names))
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000241 if rename:
242 names = list(field_names)
243 seen = set()
244 for i, name in enumerate(names):
245 if (not all(c.isalnum() or c=='_' for c in name) or _iskeyword(name)
246 or not name or name[0].isdigit() or name.startswith('_')
247 or name in seen):
Raymond Hettinger56145242009-04-02 22:31:59 +0000248 names[i] = '_%d' % i
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000249 seen.add(name)
250 field_names = tuple(names)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000251 for name in (typename,) + field_names:
Christian Heimesb9eccbf2007-12-05 20:18:38 +0000252 if not all(c.isalnum() or c=='_' for c in name):
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000253 raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
254 if _iskeyword(name):
255 raise ValueError('Type names and field names cannot be a keyword: %r' % name)
256 if name[0].isdigit():
257 raise ValueError('Type names and field names cannot start with a number: %r' % name)
258 seen_names = set()
259 for name in field_names:
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000260 if name.startswith('_') and not rename:
Christian Heimes0449f632007-12-15 01:27:15 +0000261 raise ValueError('Field names cannot start with an underscore: %r' % name)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000262 if name in seen_names:
263 raise ValueError('Encountered duplicate field name: %r' % name)
264 seen_names.add(name)
265
266 # Create and fill-in the class template
Christian Heimesfaf2f632008-01-06 16:59:19 +0000267 numfields = len(field_names)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000268 argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000269 reprtxt = ', '.join('%s=%%r' % name for name in field_names)
270 template = '''class %(typename)s(tuple):
Christian Heimes0449f632007-12-15 01:27:15 +0000271 '%(typename)s(%(argtxt)s)' \n
272 __slots__ = () \n
Christian Heimesfaf2f632008-01-06 16:59:19 +0000273 _fields = %(field_names)r \n
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000274 def __new__(_cls, %(argtxt)s):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000275 'Create new instance of %(typename)s(%(argtxt)s)'
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000276 return _tuple.__new__(_cls, (%(argtxt)s)) \n
Christian Heimesfaf2f632008-01-06 16:59:19 +0000277 @classmethod
Christian Heimes043d6f62008-01-07 17:19:16 +0000278 def _make(cls, iterable, new=tuple.__new__, len=len):
Christian Heimesfaf2f632008-01-06 16:59:19 +0000279 'Make a new %(typename)s object from a sequence or iterable'
Christian Heimes043d6f62008-01-07 17:19:16 +0000280 result = new(cls, iterable)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000281 if len(result) != %(numfields)d:
282 raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
283 return result \n
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000284 def __repr__(self):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000285 'Return a nicely formatted representation string'
Raymond Hettingerd331ce92010-08-08 01:13:42 +0000286 return self.__class__.__name__ + '(%(reprtxt)s)' %% self \n
Raymond Hettingera4f52b12009-03-02 22:28:31 +0000287 def _asdict(self):
288 'Return a new OrderedDict which maps field names to their values'
289 return OrderedDict(zip(self._fields, self)) \n
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000290 def _replace(_self, **kwds):
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000291 'Return a new %(typename)s object replacing specified fields with new values'
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000292 result = _self._make(map(kwds.pop, %(field_names)r, _self))
Christian Heimesfaf2f632008-01-06 16:59:19 +0000293 if kwds:
294 raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000295 return result \n
296 def __getnewargs__(self):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000297 'Return self as a plain tuple. Used by copy and pickle.'
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000298 return tuple(self) \n\n''' % locals()
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000299 for i, name in enumerate(field_names):
Raymond Hettinger7b0d3c62010-04-02 18:54:02 +0000300 template += " %s = _property(_itemgetter(%d), doc='Alias for field number %d')\n" % (name, i, i)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000301 if verbose:
302 print(template)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000303
Georg Brandlf08a9dd2008-06-10 16:57:31 +0000304 # Execute the template string in a temporary namespace and
305 # support tracing utilities by setting a value for frame.f_globals['__name__']
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000306 namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
307 OrderedDict=OrderedDict, _property=property, _tuple=tuple)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000308 try:
309 exec(template, namespace)
310 except SyntaxError as e:
Christian Heimes99170a52007-12-19 02:07:34 +0000311 raise SyntaxError(e.msg + ':\n' + template) from e
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000312 result = namespace[typename]
313
314 # For pickling to work, the __module__ variable needs to be set to the frame
315 # where the named tuple is created. Bypass this step in enviroments where
Benjamin Peterson25c95f12009-05-08 20:42:26 +0000316 # sys._getframe is not defined (Jython for example) or sys._getframe is not
317 # defined for arguments greater than 0 (IronPython).
318 try:
Raymond Hettinger0f055172009-01-27 10:06:09 +0000319 result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
Benjamin Peterson25c95f12009-05-08 20:42:26 +0000320 except (AttributeError, ValueError):
321 pass
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000322
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000323 return result
Guido van Rossumd8faa362007-04-27 19:54:29 +0000324
Guido van Rossumd8faa362007-04-27 19:54:29 +0000325
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000326########################################################################
327### Counter
328########################################################################
329
330class Counter(dict):
331 '''Dict subclass for counting hashable items. Sometimes called a bag
332 or multiset. Elements are stored as dictionary keys and their counts
333 are stored as dictionary values.
334
335 >>> c = Counter('abracadabra') # count elements from a string
336
337 >>> c.most_common(3) # three most common elements
338 [('a', 5), ('r', 2), ('b', 2)]
339 >>> sorted(c) # list all unique elements
340 ['a', 'b', 'c', 'd', 'r']
341 >>> ''.join(sorted(c.elements())) # list elements with repetitions
342 'aaaaabbcdrr'
343 >>> sum(c.values()) # total of all counts
344 11
345
346 >>> c['a'] # count of letter 'a'
347 5
348 >>> for elem in 'shazam': # update counts from an iterable
349 ... c[elem] += 1 # by adding 1 to each element's count
350 >>> c['a'] # now there are seven 'a'
351 7
352 >>> del c['r'] # remove all 'r'
353 >>> c['r'] # now there are zero 'r'
354 0
355
356 >>> d = Counter('simsalabim') # make another counter
357 >>> c.update(d) # add in the second counter
358 >>> c['a'] # now there are nine 'a'
359 9
360
361 >>> c.clear() # empty the counter
362 >>> c
363 Counter()
364
365 Note: If a count is set to zero or reduced to zero, it will remain
366 in the counter until the entry is deleted or the counter is cleared:
367
368 >>> c = Counter('aaabbc')
369 >>> c['b'] -= 2 # reduce the count of 'b' by two
370 >>> c.most_common() # 'b' is still in, but its count is zero
371 [('a', 3), ('c', 1), ('b', 0)]
372
373 '''
374 # References:
375 # http://en.wikipedia.org/wiki/Multiset
376 # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
377 # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
378 # http://code.activestate.com/recipes/259174/
379 # Knuth, TAOCP Vol. II section 4.6.3
380
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000381 def __init__(self, iterable=None, **kwds):
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000382 '''Create a new, empty Counter object. And if given, count elements
383 from an input iterable. Or, initialize the count from another mapping
384 of elements to their counts.
385
386 >>> c = Counter() # a new, empty counter
387 >>> c = Counter('gallahad') # a new counter from an iterable
388 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000389 >>> c = Counter(a=4, b=2) # a new counter from keyword args
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000390
391 '''
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000392 self.update(iterable, **kwds)
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000393
394 def __missing__(self, key):
395 'The count of elements not in the Counter is zero.'
396 # Needed so that self[missing_item] does not raise KeyError
397 return 0
398
399 def most_common(self, n=None):
400 '''List the n most common elements and their counts from the most
401 common to the least. If n is None, then list all element counts.
402
403 >>> Counter('abracadabra').most_common(3)
404 [('a', 5), ('r', 2), ('b', 2)]
405
406 '''
407 # Emulate Bag.sortedByCount from Smalltalk
408 if n is None:
409 return sorted(self.items(), key=_itemgetter(1), reverse=True)
410 return _heapq.nlargest(n, self.items(), key=_itemgetter(1))
411
412 def elements(self):
413 '''Iterator over elements repeating each as many times as its count.
414
415 >>> c = Counter('ABCABC')
416 >>> sorted(c.elements())
417 ['A', 'A', 'B', 'B', 'C', 'C']
418
419 # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
420 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
421 >>> product = 1
422 >>> for factor in prime_factors.elements(): # loop over factors
423 ... product *= factor # and multiply them
424 >>> product
425 1836
426
427 Note, if an element's count has been set to zero or is a negative
428 number, elements() will ignore it.
429
430 '''
431 # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
432 return _chain.from_iterable(_starmap(_repeat, self.items()))
433
434 # Override dict methods where necessary
435
436 @classmethod
437 def fromkeys(cls, iterable, v=None):
438 # There is no equivalent method for counters because setting v=1
439 # means that no element can have a count greater than one.
440 raise NotImplementedError(
441 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
442
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000443 def update(self, iterable=None, **kwds):
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000444 '''Like dict.update() but add counts instead of replacing them.
445
446 Source can be an iterable, a dictionary, or another Counter instance.
447
448 >>> c = Counter('which')
449 >>> c.update('witch') # add elements from another iterable
450 >>> d = Counter('watch')
451 >>> c.update(d) # add elements from another counter
452 >>> c['h'] # four 'h' in which, witch, and watch
453 4
454
455 '''
456 # The regular dict.update() operation makes no sense here because the
457 # replace behavior results in the some of original untouched counts
458 # being mixed-in with all of the other counts for a mismash that
459 # doesn't have a straight-forward interpretation in most counting
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000460 # contexts. Instead, we implement straight-addition. Both the inputs
461 # and outputs are allowed to contain zero and negative counts.
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000462
463 if iterable is not None:
464 if isinstance(iterable, Mapping):
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000465 if self:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000466 self_get = self.get
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000467 for elem, count in iterable.items():
Raymond Hettingerf9092022009-06-29 18:30:43 +0000468 self[elem] = count + self_get(elem, 0)
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000469 else:
470 dict.update(self, iterable) # fast path when counter is empty
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000471 else:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000472 self_get = self.get
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000473 for elem in iterable:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000474 self[elem] = 1 + self_get(elem, 0)
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000475 if kwds:
476 self.update(kwds)
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000477
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000478 def subtract(self, iterable=None, **kwds):
479 '''Like dict.update() but subtracts counts instead of replacing them.
480 Counts can be reduced below zero. Both the inputs and outputs are
481 allowed to contain zero and negative counts.
482
483 Source can be an iterable, a dictionary, or another Counter instance.
484
485 >>> c = Counter('which')
486 >>> c.subtract('witch') # subtract elements from another iterable
487 >>> c.subtract(Counter('watch')) # subtract elements from another counter
488 >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch
489 0
490 >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch
491 -1
492
493 '''
494 if iterable is not None:
Raymond Hettingerfc3c9cd2010-04-11 20:41:56 +0000495 self_get = self.get
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000496 if isinstance(iterable, Mapping):
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000497 for elem, count in iterable.items():
498 self[elem] = self_get(elem, 0) - count
499 else:
Raymond Hettinger9c01e442010-04-03 10:32:58 +0000500 for elem in iterable:
501 self[elem] = self_get(elem, 0) - 1
502 if kwds:
503 self.subtract(kwds)
504
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000505 def copy(self):
506 'Like dict.copy() but returns a Counter instance instead of a dict.'
507 return Counter(self)
508
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000509 def __delitem__(self, elem):
510 'Like dict.__delitem__() but does not raise KeyError for missing values.'
511 if elem in self:
512 dict.__delitem__(self, elem)
513
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000514 def __repr__(self):
515 if not self:
516 return '%s()' % self.__class__.__name__
517 items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
518 return '%s({%s})' % (self.__class__.__name__, items)
519
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000520 # Multiset-style mathematical operations discussed in:
521 # Knuth TAOCP Volume II section 4.6.3 exercise 19
522 # and at http://en.wikipedia.org/wiki/Multiset
523 #
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000524 # Outputs guaranteed to only include positive counts.
525 #
526 # To strip negative and zero counts, add-in an empty counter:
527 # c += Counter()
528
529 def __add__(self, other):
530 '''Add counts from two counters.
531
532 >>> Counter('abbb') + Counter('bcc')
533 Counter({'b': 4, 'c': 2, 'a': 1})
534
535 '''
536 if not isinstance(other, Counter):
537 return NotImplemented
538 result = Counter()
539 for elem in set(self) | set(other):
540 newcount = self[elem] + other[elem]
541 if newcount > 0:
542 result[elem] = newcount
543 return result
544
545 def __sub__(self, other):
546 ''' Subtract count, but keep only results with positive counts.
547
548 >>> Counter('abbbc') - Counter('bccd')
549 Counter({'b': 2, 'a': 1})
550
551 '''
552 if not isinstance(other, Counter):
553 return NotImplemented
554 result = Counter()
Raymond Hettingere0d1b9f2009-01-21 20:36:27 +0000555 for elem in set(self) | set(other):
556 newcount = self[elem] - other[elem]
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000557 if newcount > 0:
558 result[elem] = newcount
559 return result
560
561 def __or__(self, other):
562 '''Union is the maximum of value in either of the input counters.
563
564 >>> Counter('abbb') | Counter('bcc')
565 Counter({'b': 3, 'c': 2, 'a': 1})
566
567 '''
568 if not isinstance(other, Counter):
569 return NotImplemented
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000570 result = Counter()
571 for elem in set(self) | set(other):
Raymond Hettingerc4791702009-04-04 08:48:03 +0000572 p, q = self[elem], other[elem]
573 newcount = q if p < q else p
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000574 if newcount > 0:
575 result[elem] = newcount
576 return result
577
578 def __and__(self, other):
579 ''' Intersection is the minimum of corresponding counts.
580
581 >>> Counter('abbb') & Counter('bcc')
582 Counter({'b': 1})
583
584 '''
585 if not isinstance(other, Counter):
586 return NotImplemented
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000587 result = Counter()
588 if len(self) < len(other):
589 self, other = other, self
590 for elem in filter(self.__contains__, other):
Raymond Hettingerc4791702009-04-04 08:48:03 +0000591 p, q = self[elem], other[elem]
592 newcount = p if p < q else q
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000593 if newcount > 0:
594 result[elem] = newcount
595 return result
596
Guido van Rossumd8faa362007-04-27 19:54:29 +0000597
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000598################################################################################
599### UserDict
600################################################################################
Guido van Rossumd8faa362007-04-27 19:54:29 +0000601
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000602class UserDict(MutableMapping):
603
604 # Start by filling-out the abstract methods
605 def __init__(self, dict=None, **kwargs):
606 self.data = {}
607 if dict is not None:
608 self.update(dict)
609 if len(kwargs):
610 self.update(kwargs)
611 def __len__(self): return len(self.data)
612 def __getitem__(self, key):
613 if key in self.data:
614 return self.data[key]
615 if hasattr(self.__class__, "__missing__"):
616 return self.__class__.__missing__(self, key)
617 raise KeyError(key)
618 def __setitem__(self, key, item): self.data[key] = item
619 def __delitem__(self, key): del self.data[key]
620 def __iter__(self):
621 return iter(self.data)
622
Raymond Hettinger554c8b82008-02-05 22:54:43 +0000623 # Modify __contains__ to work correctly when __missing__ is present
624 def __contains__(self, key):
625 return key in self.data
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000626
627 # Now, add the methods in dicts but not in MutableMapping
628 def __repr__(self): return repr(self.data)
629 def copy(self):
630 if self.__class__ is UserDict:
631 return UserDict(self.data.copy())
632 import copy
633 data = self.data
634 try:
635 self.data = {}
636 c = copy.copy(self)
637 finally:
638 self.data = data
639 c.update(self)
640 return c
641 @classmethod
642 def fromkeys(cls, iterable, value=None):
643 d = cls()
644 for key in iterable:
645 d[key] = value
646 return d
647
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000648
649
650################################################################################
Raymond Hettinger53dbe392008-02-12 20:03:09 +0000651### UserList
652################################################################################
653
654class UserList(MutableSequence):
655 """A more or less complete user-defined wrapper around list objects."""
656 def __init__(self, initlist=None):
657 self.data = []
658 if initlist is not None:
659 # XXX should this accept an arbitrary sequence?
660 if type(initlist) == type(self.data):
661 self.data[:] = initlist
662 elif isinstance(initlist, UserList):
663 self.data[:] = initlist.data[:]
664 else:
665 self.data = list(initlist)
666 def __repr__(self): return repr(self.data)
667 def __lt__(self, other): return self.data < self.__cast(other)
668 def __le__(self, other): return self.data <= self.__cast(other)
669 def __eq__(self, other): return self.data == self.__cast(other)
670 def __ne__(self, other): return self.data != self.__cast(other)
671 def __gt__(self, other): return self.data > self.__cast(other)
672 def __ge__(self, other): return self.data >= self.__cast(other)
673 def __cast(self, other):
674 return other.data if isinstance(other, UserList) else other
Raymond Hettinger53dbe392008-02-12 20:03:09 +0000675 def __contains__(self, item): return item in self.data
676 def __len__(self): return len(self.data)
677 def __getitem__(self, i): return self.data[i]
678 def __setitem__(self, i, item): self.data[i] = item
679 def __delitem__(self, i): del self.data[i]
680 def __add__(self, other):
681 if isinstance(other, UserList):
682 return self.__class__(self.data + other.data)
683 elif isinstance(other, type(self.data)):
684 return self.__class__(self.data + other)
685 return self.__class__(self.data + list(other))
686 def __radd__(self, other):
687 if isinstance(other, UserList):
688 return self.__class__(other.data + self.data)
689 elif isinstance(other, type(self.data)):
690 return self.__class__(other + self.data)
691 return self.__class__(list(other) + self.data)
692 def __iadd__(self, other):
693 if isinstance(other, UserList):
694 self.data += other.data
695 elif isinstance(other, type(self.data)):
696 self.data += other
697 else:
698 self.data += list(other)
699 return self
700 def __mul__(self, n):
701 return self.__class__(self.data*n)
702 __rmul__ = __mul__
703 def __imul__(self, n):
704 self.data *= n
705 return self
706 def append(self, item): self.data.append(item)
707 def insert(self, i, item): self.data.insert(i, item)
708 def pop(self, i=-1): return self.data.pop(i)
709 def remove(self, item): self.data.remove(item)
710 def count(self, item): return self.data.count(item)
711 def index(self, item, *args): return self.data.index(item, *args)
712 def reverse(self): self.data.reverse()
713 def sort(self, *args, **kwds): self.data.sort(*args, **kwds)
714 def extend(self, other):
715 if isinstance(other, UserList):
716 self.data.extend(other.data)
717 else:
718 self.data.extend(other)
719
720
721
722################################################################################
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000723### UserString
724################################################################################
725
726class UserString(Sequence):
727 def __init__(self, seq):
728 if isinstance(seq, str):
729 self.data = seq
730 elif isinstance(seq, UserString):
731 self.data = seq.data[:]
732 else:
733 self.data = str(seq)
734 def __str__(self): return str(self.data)
735 def __repr__(self): return repr(self.data)
736 def __int__(self): return int(self.data)
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000737 def __float__(self): return float(self.data)
738 def __complex__(self): return complex(self.data)
739 def __hash__(self): return hash(self.data)
740
741 def __eq__(self, string):
742 if isinstance(string, UserString):
743 return self.data == string.data
744 return self.data == string
745 def __ne__(self, string):
746 if isinstance(string, UserString):
747 return self.data != string.data
748 return self.data != string
749 def __lt__(self, string):
750 if isinstance(string, UserString):
751 return self.data < string.data
752 return self.data < string
753 def __le__(self, string):
754 if isinstance(string, UserString):
755 return self.data <= string.data
756 return self.data <= string
757 def __gt__(self, string):
758 if isinstance(string, UserString):
759 return self.data > string.data
760 return self.data > string
761 def __ge__(self, string):
762 if isinstance(string, UserString):
763 return self.data >= string.data
764 return self.data >= string
765
766 def __contains__(self, char):
767 if isinstance(char, UserString):
768 char = char.data
769 return char in self.data
770
771 def __len__(self): return len(self.data)
772 def __getitem__(self, index): return self.__class__(self.data[index])
773 def __add__(self, other):
774 if isinstance(other, UserString):
775 return self.__class__(self.data + other.data)
776 elif isinstance(other, str):
777 return self.__class__(self.data + other)
778 return self.__class__(self.data + str(other))
779 def __radd__(self, other):
780 if isinstance(other, str):
781 return self.__class__(other + self.data)
782 return self.__class__(str(other) + self.data)
783 def __mul__(self, n):
784 return self.__class__(self.data*n)
785 __rmul__ = __mul__
786 def __mod__(self, args):
787 return self.__class__(self.data % args)
788
789 # the following methods are defined in alphabetical order:
790 def capitalize(self): return self.__class__(self.data.capitalize())
791 def center(self, width, *args):
792 return self.__class__(self.data.center(width, *args))
793 def count(self, sub, start=0, end=_sys.maxsize):
794 if isinstance(sub, UserString):
795 sub = sub.data
796 return self.data.count(sub, start, end)
797 def encode(self, encoding=None, errors=None): # XXX improve this?
798 if encoding:
799 if errors:
800 return self.__class__(self.data.encode(encoding, errors))
801 return self.__class__(self.data.encode(encoding))
802 return self.__class__(self.data.encode())
803 def endswith(self, suffix, start=0, end=_sys.maxsize):
804 return self.data.endswith(suffix, start, end)
805 def expandtabs(self, tabsize=8):
806 return self.__class__(self.data.expandtabs(tabsize))
807 def find(self, sub, start=0, end=_sys.maxsize):
808 if isinstance(sub, UserString):
809 sub = sub.data
810 return self.data.find(sub, start, end)
811 def format(self, *args, **kwds):
812 return self.data.format(*args, **kwds)
813 def index(self, sub, start=0, end=_sys.maxsize):
814 return self.data.index(sub, start, end)
815 def isalpha(self): return self.data.isalpha()
816 def isalnum(self): return self.data.isalnum()
817 def isdecimal(self): return self.data.isdecimal()
818 def isdigit(self): return self.data.isdigit()
819 def isidentifier(self): return self.data.isidentifier()
820 def islower(self): return self.data.islower()
821 def isnumeric(self): return self.data.isnumeric()
822 def isspace(self): return self.data.isspace()
823 def istitle(self): return self.data.istitle()
824 def isupper(self): return self.data.isupper()
825 def join(self, seq): return self.data.join(seq)
826 def ljust(self, width, *args):
827 return self.__class__(self.data.ljust(width, *args))
828 def lower(self): return self.__class__(self.data.lower())
829 def lstrip(self, chars=None): return self.__class__(self.data.lstrip(chars))
830 def partition(self, sep):
831 return self.data.partition(sep)
832 def replace(self, old, new, maxsplit=-1):
833 if isinstance(old, UserString):
834 old = old.data
835 if isinstance(new, UserString):
836 new = new.data
837 return self.__class__(self.data.replace(old, new, maxsplit))
838 def rfind(self, sub, start=0, end=_sys.maxsize):
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000839 if isinstance(sub, UserString):
840 sub = sub.data
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000841 return self.data.rfind(sub, start, end)
842 def rindex(self, sub, start=0, end=_sys.maxsize):
843 return self.data.rindex(sub, start, end)
844 def rjust(self, width, *args):
845 return self.__class__(self.data.rjust(width, *args))
846 def rpartition(self, sep):
847 return self.data.rpartition(sep)
848 def rstrip(self, chars=None):
849 return self.__class__(self.data.rstrip(chars))
850 def split(self, sep=None, maxsplit=-1):
851 return self.data.split(sep, maxsplit)
852 def rsplit(self, sep=None, maxsplit=-1):
853 return self.data.rsplit(sep, maxsplit)
854 def splitlines(self, keepends=0): return self.data.splitlines(keepends)
855 def startswith(self, prefix, start=0, end=_sys.maxsize):
856 return self.data.startswith(prefix, start, end)
857 def strip(self, chars=None): return self.__class__(self.data.strip(chars))
858 def swapcase(self): return self.__class__(self.data.swapcase())
859 def title(self): return self.__class__(self.data.title())
860 def translate(self, *args):
861 return self.__class__(self.data.translate(*args))
862 def upper(self): return self.__class__(self.data.upper())
863 def zfill(self, width): return self.__class__(self.data.zfill(width))
864
865
866
867################################################################################
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000868### Simple tests
869################################################################################
Guido van Rossumd8faa362007-04-27 19:54:29 +0000870
871if __name__ == '__main__':
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000872 # verify that instances can be pickled
Guido van Rossum99603b02007-07-20 00:22:32 +0000873 from pickle import loads, dumps
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000874 Point = namedtuple('Point', 'x, y', True)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000875 p = Point(x=10, y=20)
876 assert p == loads(dumps(p))
877
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000878 # test and demonstrate ability to override methods
Christian Heimes043d6f62008-01-07 17:19:16 +0000879 class Point(namedtuple('Point', 'x y')):
Christian Heimes25bb7832008-01-11 16:17:00 +0000880 __slots__ = ()
Christian Heimes043d6f62008-01-07 17:19:16 +0000881 @property
882 def hypot(self):
883 return (self.x ** 2 + self.y ** 2) ** 0.5
Christian Heimes790c8232008-01-07 21:14:23 +0000884 def __str__(self):
Christian Heimes25bb7832008-01-11 16:17:00 +0000885 return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
Christian Heimes043d6f62008-01-07 17:19:16 +0000886
Christian Heimes25bb7832008-01-11 16:17:00 +0000887 for p in Point(3, 4), Point(14, 5/7.):
Christian Heimes790c8232008-01-07 21:14:23 +0000888 print (p)
Christian Heimes043d6f62008-01-07 17:19:16 +0000889
890 class Point(namedtuple('Point', 'x y')):
891 'Point class with optimized _make() and _replace() without error-checking'
Christian Heimes25bb7832008-01-11 16:17:00 +0000892 __slots__ = ()
Christian Heimes043d6f62008-01-07 17:19:16 +0000893 _make = classmethod(tuple.__new__)
894 def _replace(self, _map=map, **kwds):
Christian Heimes2380ac72008-01-09 00:17:24 +0000895 return self._make(_map(kwds.get, ('x', 'y'), self))
Christian Heimes043d6f62008-01-07 17:19:16 +0000896
897 print(Point(11, 22)._replace(x=100))
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000898
Christian Heimes25bb7832008-01-11 16:17:00 +0000899 Point3D = namedtuple('Point3D', Point._fields + ('z',))
900 print(Point3D.__doc__)
901
Guido van Rossumd8faa362007-04-27 19:54:29 +0000902 import doctest
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000903 TestResults = namedtuple('TestResults', 'failed attempted')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000904 print(TestResults(*doctest.testmod()))