blob: 3e010545239b77a4ba045419ccc6e00fd543ce72 [file] [log] [blame]
Brett Cannon23a4a7b2008-05-12 00:56:28 +00001__all__ = ['deque', 'defaultdict', 'namedtuple', 'UserDict', 'UserList',
Raymond Hettinger2d32f632009-03-02 21:24:57 +00002 'UserString', 'Counter', 'OrderedDict']
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
4# They should however be considered an integral part of collections.py.
5from _abcoll import *
6import _abcoll
7__all__ += _abcoll.__all__
8
Christian Heimes99170a52007-12-19 02:07:34 +00009from _collections import deque, defaultdict
10from operator import itemgetter as _itemgetter
11from keyword import iskeyword as _iskeyword
12import sys as _sys
Raymond Hettingerb8baf632009-01-14 02:20:07 +000013import heapq as _heapq
Raymond Hettinger798ee1a2009-03-23 18:29:11 +000014from weakref import proxy as _proxy
Raymond Hettingerea9f8db2009-03-02 21:28:41 +000015from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
Raymond Hettinger2d32f632009-03-02 21:24:57 +000016
17################################################################################
18### OrderedDict
19################################################################################
20
Raymond Hettingerf1736542009-03-23 05:19:21 +000021class _Link(object):
22 __slots__ = 'prev', 'next', 'key', '__weakref__'
23
Raymond Hettinger2d32f632009-03-02 21:24:57 +000024class OrderedDict(dict, MutableMapping):
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +000025 'Dictionary that remembers insertion order'
Raymond Hettingerf1736542009-03-23 05:19:21 +000026 # An inherited dict maps keys to values.
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +000027 # The inherited dict provides __getitem__, __len__, __contains__, and get.
28 # The remaining methods are order-aware.
Raymond Hettingerf1736542009-03-23 05:19:21 +000029 # Big-O running times for all methods are the same as for regular dictionaries.
30
31 # The internal self.__map dictionary maps keys to links in a doubly linked list.
32 # The circular doubly linked list starts and ends with a sentinel element.
33 # The sentinel element never gets deleted (this simplifies the algorithm).
34 # The prev/next links are weakref proxies (to prevent circular references).
35 # Individual links are kept alive by the hard reference in self.__map.
36 # Those hard references disappear when a key is deleted from an OrderedDict.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000037
38 def __init__(self, *args, **kwds):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000039 '''Initialize an ordered dictionary. Signature is the same as for
40 regular dictionaries, but keyword arguments are not recommended
41 because their insertion order is arbitrary.
42
43 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +000044 if len(args) > 1:
45 raise TypeError('expected at most 1 arguments, got %d' % len(args))
Raymond Hettinger08c70cf2009-03-03 20:47:29 +000046 try:
Raymond Hettingerf1736542009-03-23 05:19:21 +000047 self.__root
Raymond Hettinger08c70cf2009-03-03 20:47:29 +000048 except AttributeError:
Raymond Hettinger52dc06b2009-03-25 22:45:22 +000049 self.__root = root = _Link() # sentinel node for the doubly linked list
50 root.prev = root.next = root
51 self.__map = {}
Raymond Hettinger2d32f632009-03-02 21:24:57 +000052 self.update(*args, **kwds)
53
54 def clear(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000055 'od.clear() -> None. Remove all items from od.'
Raymond Hettingerf1736542009-03-23 05:19:21 +000056 root = self.__root
57 root.prev = root.next = root
Raymond Hettinger52dc06b2009-03-25 22:45:22 +000058 self.__map.clear()
Raymond Hettinger2d32f632009-03-02 21:24:57 +000059 dict.clear(self)
60
61 def __setitem__(self, key, value):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000062 'od.__setitem__(i, y) <==> od[i]=y'
Raymond Hettingerf1736542009-03-23 05:19:21 +000063 # Setting a new item creates a new link which goes at the end of the linked
64 # list, and the inherited dictionary is updated with the new key/value pair.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000065 if key not in self:
Raymond Hettingerf1736542009-03-23 05:19:21 +000066 self.__map[key] = link = _Link()
67 root = self.__root
68 last = root.prev
69 link.prev, link.next, link.key = last, root, key
Raymond Hettinger798ee1a2009-03-23 18:29:11 +000070 last.next = root.prev = _proxy(link)
Raymond Hettinger2d32f632009-03-02 21:24:57 +000071 dict.__setitem__(self, key, value)
72
73 def __delitem__(self, key):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000074 'od.__delitem__(y) <==> del od[y]'
Raymond Hettingerf1736542009-03-23 05:19:21 +000075 # Deleting an existing item uses self.__map to find the link which is
76 # then removed by updating the links in the predecessor and successor nodes.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000077 dict.__delitem__(self, key)
Raymond Hettingerf1736542009-03-23 05:19:21 +000078 link = self.__map.pop(key)
79 link.prev.next = link.next
80 link.next.prev = link.prev
Raymond Hettinger2d32f632009-03-02 21:24:57 +000081
82 def __iter__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000083 'od.__iter__() <==> iter(od)'
Raymond Hettingerf1736542009-03-23 05:19:21 +000084 # Traverse the linked list in order.
85 root = self.__root
86 curr = root.next
87 while curr is not root:
88 yield curr.key
89 curr = curr.next
Raymond Hettinger2d32f632009-03-02 21:24:57 +000090
91 def __reversed__(self):
Raymond Hettinger2352cf32009-04-08 01:16:27 +000092 'od.__reversed__() <==> reversed(od)'
Raymond Hettingerf1736542009-03-23 05:19:21 +000093 # Traverse the linked list in reverse order.
94 root = self.__root
95 curr = root.prev
96 while curr is not root:
97 yield curr.key
98 curr = curr.prev
Raymond Hettinger2d32f632009-03-02 21:24:57 +000099
100 def __reduce__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000101 'Return state information for pickling'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000102 items = [[k, self[k]] for k in self]
Raymond Hettingerf1736542009-03-23 05:19:21 +0000103 tmp = self.__map, self.__root
104 del self.__map, self.__root
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000105 inst_dict = vars(self).copy()
Raymond Hettingerf1736542009-03-23 05:19:21 +0000106 self.__map, self.__root = tmp
Raymond Hettinger14b89ff2009-03-03 22:20:56 +0000107 if inst_dict:
108 return (self.__class__, (items,), inst_dict)
109 return self.__class__, (items,)
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000110
111 setdefault = MutableMapping.setdefault
112 update = MutableMapping.update
113 pop = MutableMapping.pop
114 keys = MutableMapping.keys
115 values = MutableMapping.values
116 items = MutableMapping.items
117
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +0000118 def popitem(self, last=True):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000119 '''od.popitem() -> (k, v), return and remove a (key, value) pair.
120 Pairs are returned in LIFO order if last is true or FIFO order if false.
121
122 '''
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +0000123 if not self:
124 raise KeyError('dictionary is empty')
Raymond Hettinger446a4f22009-04-08 08:28:28 +0000125 key = next(reversed(self) if last else iter(self))
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +0000126 value = self.pop(key)
127 return key, value
128
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000129 def __repr__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000130 'od.__repr__() <==> repr(od)'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000131 if not self:
132 return '%s()' % (self.__class__.__name__,)
133 return '%s(%r)' % (self.__class__.__name__, list(self.items()))
134
135 def copy(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000136 'od.copy() -> a shallow copy of od'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000137 return self.__class__(self)
138
139 @classmethod
140 def fromkeys(cls, iterable, value=None):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000141 '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
142 and values equal to v (which defaults to None).
143
144 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000145 d = cls()
146 for key in iterable:
147 d[key] = value
148 return d
149
150 def __eq__(self, other):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000151 '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
152 while comparison to a regular mapping is order-insensitive.
153
154 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000155 if isinstance(other, OrderedDict):
Raymond Hettinger798ee1a2009-03-23 18:29:11 +0000156 return len(self)==len(other) and \
157 all(p==q for p, q in zip(self.items(), other.items()))
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000158 return dict.__eq__(self, other)
159
Benjamin Peterson2504b7a2009-04-04 17:26:32 +0000160 def __ne__(self, other):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000161 '''od.__ne__(y) <==> od!=y. Comparison to another OD is order-sensitive
162 while comparison to a regular mapping is order-insensitive.
163
164 '''
Benjamin Peterson2504b7a2009-04-04 17:26:32 +0000165 return not self == other
166
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000167
Christian Heimes99170a52007-12-19 02:07:34 +0000168
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000169################################################################################
170### namedtuple
171################################################################################
172
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000173def namedtuple(typename, field_names, verbose=False, rename=False):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000174 """Returns a new subclass of tuple with named fields.
175
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000176 >>> Point = namedtuple('Point', 'x y')
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000177 >>> Point.__doc__ # docstring for the new class
Guido van Rossumd8faa362007-04-27 19:54:29 +0000178 'Point(x, y)'
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000179 >>> p = Point(11, y=22) # instantiate with positional args or keywords
Christian Heimes99170a52007-12-19 02:07:34 +0000180 >>> p[0] + p[1] # indexable like a plain tuple
Guido van Rossumd8faa362007-04-27 19:54:29 +0000181 33
Christian Heimes99170a52007-12-19 02:07:34 +0000182 >>> x, y = p # unpack like a regular tuple
Guido van Rossumd8faa362007-04-27 19:54:29 +0000183 >>> x, y
184 (11, 22)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000185 >>> p.x + p.y # fields also accessable by name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000186 33
Christian Heimes0449f632007-12-15 01:27:15 +0000187 >>> d = p._asdict() # convert to a dictionary
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000188 >>> d['x']
189 11
190 >>> Point(**d) # convert from a dictionary
Guido van Rossumd8faa362007-04-27 19:54:29 +0000191 Point(x=11, y=22)
Christian Heimes0449f632007-12-15 01:27:15 +0000192 >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000193 Point(x=100, y=22)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000194
195 """
196
Christian Heimes2380ac72008-01-09 00:17:24 +0000197 # Parse and validate the field names. Validation serves two purposes,
198 # generating informative error messages and preventing template injection attacks.
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000199 if isinstance(field_names, str):
200 field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas
Benjamin Petersone9bbc8b2008-09-28 02:06:32 +0000201 field_names = tuple(map(str, field_names))
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000202 if rename:
203 names = list(field_names)
204 seen = set()
205 for i, name in enumerate(names):
206 if (not all(c.isalnum() or c=='_' for c in name) or _iskeyword(name)
207 or not name or name[0].isdigit() or name.startswith('_')
208 or name in seen):
Raymond Hettinger56145242009-04-02 22:31:59 +0000209 names[i] = '_%d' % i
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000210 seen.add(name)
211 field_names = tuple(names)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000212 for name in (typename,) + field_names:
Christian Heimesb9eccbf2007-12-05 20:18:38 +0000213 if not all(c.isalnum() or c=='_' for c in name):
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000214 raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
215 if _iskeyword(name):
216 raise ValueError('Type names and field names cannot be a keyword: %r' % name)
217 if name[0].isdigit():
218 raise ValueError('Type names and field names cannot start with a number: %r' % name)
219 seen_names = set()
220 for name in field_names:
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000221 if name.startswith('_') and not rename:
Christian Heimes0449f632007-12-15 01:27:15 +0000222 raise ValueError('Field names cannot start with an underscore: %r' % name)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000223 if name in seen_names:
224 raise ValueError('Encountered duplicate field name: %r' % name)
225 seen_names.add(name)
226
227 # Create and fill-in the class template
Christian Heimesfaf2f632008-01-06 16:59:19 +0000228 numfields = len(field_names)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000229 argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000230 reprtxt = ', '.join('%s=%%r' % name for name in field_names)
231 template = '''class %(typename)s(tuple):
Christian Heimes0449f632007-12-15 01:27:15 +0000232 '%(typename)s(%(argtxt)s)' \n
233 __slots__ = () \n
Christian Heimesfaf2f632008-01-06 16:59:19 +0000234 _fields = %(field_names)r \n
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000235 def __new__(_cls, %(argtxt)s):
236 return _tuple.__new__(_cls, (%(argtxt)s)) \n
Christian Heimesfaf2f632008-01-06 16:59:19 +0000237 @classmethod
Christian Heimes043d6f62008-01-07 17:19:16 +0000238 def _make(cls, iterable, new=tuple.__new__, len=len):
Christian Heimesfaf2f632008-01-06 16:59:19 +0000239 'Make a new %(typename)s object from a sequence or iterable'
Christian Heimes043d6f62008-01-07 17:19:16 +0000240 result = new(cls, iterable)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000241 if len(result) != %(numfields)d:
242 raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
243 return result \n
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000244 def __repr__(self):
Christian Heimes0449f632007-12-15 01:27:15 +0000245 return '%(typename)s(%(reprtxt)s)' %% self \n
Raymond Hettingera4f52b12009-03-02 22:28:31 +0000246 def _asdict(self):
247 'Return a new OrderedDict which maps field names to their values'
248 return OrderedDict(zip(self._fields, self)) \n
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000249 def _replace(_self, **kwds):
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000250 'Return a new %(typename)s object replacing specified fields with new values'
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000251 result = _self._make(map(kwds.pop, %(field_names)r, _self))
Christian Heimesfaf2f632008-01-06 16:59:19 +0000252 if kwds:
253 raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000254 return result \n
255 def __getnewargs__(self):
256 return tuple(self) \n\n''' % locals()
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000257 for i, name in enumerate(field_names):
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000258 template += ' %s = _property(_itemgetter(%d))\n' % (name, i)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000259 if verbose:
260 print(template)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000261
Georg Brandlf08a9dd2008-06-10 16:57:31 +0000262 # Execute the template string in a temporary namespace and
263 # support tracing utilities by setting a value for frame.f_globals['__name__']
Raymond Hettinger089ba7f2009-05-27 00:38:24 +0000264 namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
265 OrderedDict=OrderedDict, _property=property, _tuple=tuple)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000266 try:
267 exec(template, namespace)
268 except SyntaxError as e:
Christian Heimes99170a52007-12-19 02:07:34 +0000269 raise SyntaxError(e.msg + ':\n' + template) from e
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000270 result = namespace[typename]
271
272 # For pickling to work, the __module__ variable needs to be set to the frame
273 # where the named tuple is created. Bypass this step in enviroments where
Benjamin Peterson25c95f12009-05-08 20:42:26 +0000274 # sys._getframe is not defined (Jython for example) or sys._getframe is not
275 # defined for arguments greater than 0 (IronPython).
276 try:
Raymond Hettinger0f055172009-01-27 10:06:09 +0000277 result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
Benjamin Peterson25c95f12009-05-08 20:42:26 +0000278 except (AttributeError, ValueError):
279 pass
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000280
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000281 return result
Guido van Rossumd8faa362007-04-27 19:54:29 +0000282
Guido van Rossumd8faa362007-04-27 19:54:29 +0000283
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000284########################################################################
285### Counter
286########################################################################
287
288class Counter(dict):
289 '''Dict subclass for counting hashable items. Sometimes called a bag
290 or multiset. Elements are stored as dictionary keys and their counts
291 are stored as dictionary values.
292
293 >>> c = Counter('abracadabra') # count elements from a string
294
295 >>> c.most_common(3) # three most common elements
296 [('a', 5), ('r', 2), ('b', 2)]
297 >>> sorted(c) # list all unique elements
298 ['a', 'b', 'c', 'd', 'r']
299 >>> ''.join(sorted(c.elements())) # list elements with repetitions
300 'aaaaabbcdrr'
301 >>> sum(c.values()) # total of all counts
302 11
303
304 >>> c['a'] # count of letter 'a'
305 5
306 >>> for elem in 'shazam': # update counts from an iterable
307 ... c[elem] += 1 # by adding 1 to each element's count
308 >>> c['a'] # now there are seven 'a'
309 7
310 >>> del c['r'] # remove all 'r'
311 >>> c['r'] # now there are zero 'r'
312 0
313
314 >>> d = Counter('simsalabim') # make another counter
315 >>> c.update(d) # add in the second counter
316 >>> c['a'] # now there are nine 'a'
317 9
318
319 >>> c.clear() # empty the counter
320 >>> c
321 Counter()
322
323 Note: If a count is set to zero or reduced to zero, it will remain
324 in the counter until the entry is deleted or the counter is cleared:
325
326 >>> c = Counter('aaabbc')
327 >>> c['b'] -= 2 # reduce the count of 'b' by two
328 >>> c.most_common() # 'b' is still in, but its count is zero
329 [('a', 3), ('c', 1), ('b', 0)]
330
331 '''
332 # References:
333 # http://en.wikipedia.org/wiki/Multiset
334 # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
335 # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
336 # http://code.activestate.com/recipes/259174/
337 # Knuth, TAOCP Vol. II section 4.6.3
338
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000339 def __init__(self, iterable=None, **kwds):
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000340 '''Create a new, empty Counter object. And if given, count elements
341 from an input iterable. Or, initialize the count from another mapping
342 of elements to their counts.
343
344 >>> c = Counter() # a new, empty counter
345 >>> c = Counter('gallahad') # a new counter from an iterable
346 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000347 >>> c = Counter(a=4, b=2) # a new counter from keyword args
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000348
349 '''
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000350 self.update(iterable, **kwds)
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000351
352 def __missing__(self, key):
353 'The count of elements not in the Counter is zero.'
354 # Needed so that self[missing_item] does not raise KeyError
355 return 0
356
357 def most_common(self, n=None):
358 '''List the n most common elements and their counts from the most
359 common to the least. If n is None, then list all element counts.
360
361 >>> Counter('abracadabra').most_common(3)
362 [('a', 5), ('r', 2), ('b', 2)]
363
364 '''
365 # Emulate Bag.sortedByCount from Smalltalk
366 if n is None:
367 return sorted(self.items(), key=_itemgetter(1), reverse=True)
368 return _heapq.nlargest(n, self.items(), key=_itemgetter(1))
369
370 def elements(self):
371 '''Iterator over elements repeating each as many times as its count.
372
373 >>> c = Counter('ABCABC')
374 >>> sorted(c.elements())
375 ['A', 'A', 'B', 'B', 'C', 'C']
376
377 # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
378 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
379 >>> product = 1
380 >>> for factor in prime_factors.elements(): # loop over factors
381 ... product *= factor # and multiply them
382 >>> product
383 1836
384
385 Note, if an element's count has been set to zero or is a negative
386 number, elements() will ignore it.
387
388 '''
389 # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
390 return _chain.from_iterable(_starmap(_repeat, self.items()))
391
392 # Override dict methods where necessary
393
394 @classmethod
395 def fromkeys(cls, iterable, v=None):
396 # There is no equivalent method for counters because setting v=1
397 # means that no element can have a count greater than one.
398 raise NotImplementedError(
399 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
400
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000401 def update(self, iterable=None, **kwds):
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000402 '''Like dict.update() but add counts instead of replacing them.
403
404 Source can be an iterable, a dictionary, or another Counter instance.
405
406 >>> c = Counter('which')
407 >>> c.update('witch') # add elements from another iterable
408 >>> d = Counter('watch')
409 >>> c.update(d) # add elements from another counter
410 >>> c['h'] # four 'h' in which, witch, and watch
411 4
412
413 '''
414 # The regular dict.update() operation makes no sense here because the
415 # replace behavior results in the some of original untouched counts
416 # being mixed-in with all of the other counts for a mismash that
417 # doesn't have a straight-forward interpretation in most counting
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000418 # contexts. Instead, we implement straight-addition. Both the inputs
419 # and outputs are allowed to contain zero and negative counts.
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000420
421 if iterable is not None:
422 if isinstance(iterable, Mapping):
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000423 if self:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000424 self_get = self.get
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000425 for elem, count in iterable.items():
Raymond Hettingerf9092022009-06-29 18:30:43 +0000426 self[elem] = count + self_get(elem, 0)
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000427 else:
428 dict.update(self, iterable) # fast path when counter is empty
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000429 else:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000430 self_get = self.get
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000431 for elem in iterable:
Raymond Hettingerf9092022009-06-29 18:30:43 +0000432 self[elem] = 1 + self_get(elem, 0)
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000433 if kwds:
434 self.update(kwds)
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000435
436 def copy(self):
437 'Like dict.copy() but returns a Counter instance instead of a dict.'
438 return Counter(self)
439
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000440 def __delitem__(self, elem):
441 'Like dict.__delitem__() but does not raise KeyError for missing values.'
442 if elem in self:
443 dict.__delitem__(self, elem)
444
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000445 def __repr__(self):
446 if not self:
447 return '%s()' % self.__class__.__name__
448 items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
449 return '%s({%s})' % (self.__class__.__name__, items)
450
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000451 # Multiset-style mathematical operations discussed in:
452 # Knuth TAOCP Volume II section 4.6.3 exercise 19
453 # and at http://en.wikipedia.org/wiki/Multiset
454 #
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000455 # Outputs guaranteed to only include positive counts.
456 #
457 # To strip negative and zero counts, add-in an empty counter:
458 # c += Counter()
459
460 def __add__(self, other):
461 '''Add counts from two counters.
462
463 >>> Counter('abbb') + Counter('bcc')
464 Counter({'b': 4, 'c': 2, 'a': 1})
465
466 '''
467 if not isinstance(other, Counter):
468 return NotImplemented
469 result = Counter()
470 for elem in set(self) | set(other):
471 newcount = self[elem] + other[elem]
472 if newcount > 0:
473 result[elem] = newcount
474 return result
475
476 def __sub__(self, other):
477 ''' Subtract count, but keep only results with positive counts.
478
479 >>> Counter('abbbc') - Counter('bccd')
480 Counter({'b': 2, 'a': 1})
481
482 '''
483 if not isinstance(other, Counter):
484 return NotImplemented
485 result = Counter()
Raymond Hettingere0d1b9f2009-01-21 20:36:27 +0000486 for elem in set(self) | set(other):
487 newcount = self[elem] - other[elem]
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000488 if newcount > 0:
489 result[elem] = newcount
490 return result
491
492 def __or__(self, other):
493 '''Union is the maximum of value in either of the input counters.
494
495 >>> Counter('abbb') | Counter('bcc')
496 Counter({'b': 3, 'c': 2, 'a': 1})
497
498 '''
499 if not isinstance(other, Counter):
500 return NotImplemented
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000501 result = Counter()
502 for elem in set(self) | set(other):
Raymond Hettingerc4791702009-04-04 08:48:03 +0000503 p, q = self[elem], other[elem]
504 newcount = q if p < q else p
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000505 if newcount > 0:
506 result[elem] = newcount
507 return result
508
509 def __and__(self, other):
510 ''' Intersection is the minimum of corresponding counts.
511
512 >>> Counter('abbb') & Counter('bcc')
513 Counter({'b': 1})
514
515 '''
516 if not isinstance(other, Counter):
517 return NotImplemented
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000518 result = Counter()
519 if len(self) < len(other):
520 self, other = other, self
521 for elem in filter(self.__contains__, other):
Raymond Hettingerc4791702009-04-04 08:48:03 +0000522 p, q = self[elem], other[elem]
523 newcount = p if p < q else q
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000524 if newcount > 0:
525 result[elem] = newcount
526 return result
527
Guido van Rossumd8faa362007-04-27 19:54:29 +0000528
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000529################################################################################
530### UserDict
531################################################################################
Guido van Rossumd8faa362007-04-27 19:54:29 +0000532
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000533class UserDict(MutableMapping):
534
535 # Start by filling-out the abstract methods
536 def __init__(self, dict=None, **kwargs):
537 self.data = {}
538 if dict is not None:
539 self.update(dict)
540 if len(kwargs):
541 self.update(kwargs)
542 def __len__(self): return len(self.data)
543 def __getitem__(self, key):
544 if key in self.data:
545 return self.data[key]
546 if hasattr(self.__class__, "__missing__"):
547 return self.__class__.__missing__(self, key)
548 raise KeyError(key)
549 def __setitem__(self, key, item): self.data[key] = item
550 def __delitem__(self, key): del self.data[key]
551 def __iter__(self):
552 return iter(self.data)
553
Raymond Hettinger554c8b82008-02-05 22:54:43 +0000554 # Modify __contains__ to work correctly when __missing__ is present
555 def __contains__(self, key):
556 return key in self.data
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000557
558 # Now, add the methods in dicts but not in MutableMapping
559 def __repr__(self): return repr(self.data)
560 def copy(self):
561 if self.__class__ is UserDict:
562 return UserDict(self.data.copy())
563 import copy
564 data = self.data
565 try:
566 self.data = {}
567 c = copy.copy(self)
568 finally:
569 self.data = data
570 c.update(self)
571 return c
572 @classmethod
573 def fromkeys(cls, iterable, value=None):
574 d = cls()
575 for key in iterable:
576 d[key] = value
577 return d
578
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000579
580
581################################################################################
Raymond Hettinger53dbe392008-02-12 20:03:09 +0000582### UserList
583################################################################################
584
585class UserList(MutableSequence):
586 """A more or less complete user-defined wrapper around list objects."""
587 def __init__(self, initlist=None):
588 self.data = []
589 if initlist is not None:
590 # XXX should this accept an arbitrary sequence?
591 if type(initlist) == type(self.data):
592 self.data[:] = initlist
593 elif isinstance(initlist, UserList):
594 self.data[:] = initlist.data[:]
595 else:
596 self.data = list(initlist)
597 def __repr__(self): return repr(self.data)
598 def __lt__(self, other): return self.data < self.__cast(other)
599 def __le__(self, other): return self.data <= self.__cast(other)
600 def __eq__(self, other): return self.data == self.__cast(other)
601 def __ne__(self, other): return self.data != self.__cast(other)
602 def __gt__(self, other): return self.data > self.__cast(other)
603 def __ge__(self, other): return self.data >= self.__cast(other)
604 def __cast(self, other):
605 return other.data if isinstance(other, UserList) else other
Raymond Hettinger53dbe392008-02-12 20:03:09 +0000606 def __contains__(self, item): return item in self.data
607 def __len__(self): return len(self.data)
608 def __getitem__(self, i): return self.data[i]
609 def __setitem__(self, i, item): self.data[i] = item
610 def __delitem__(self, i): del self.data[i]
611 def __add__(self, other):
612 if isinstance(other, UserList):
613 return self.__class__(self.data + other.data)
614 elif isinstance(other, type(self.data)):
615 return self.__class__(self.data + other)
616 return self.__class__(self.data + list(other))
617 def __radd__(self, other):
618 if isinstance(other, UserList):
619 return self.__class__(other.data + self.data)
620 elif isinstance(other, type(self.data)):
621 return self.__class__(other + self.data)
622 return self.__class__(list(other) + self.data)
623 def __iadd__(self, other):
624 if isinstance(other, UserList):
625 self.data += other.data
626 elif isinstance(other, type(self.data)):
627 self.data += other
628 else:
629 self.data += list(other)
630 return self
631 def __mul__(self, n):
632 return self.__class__(self.data*n)
633 __rmul__ = __mul__
634 def __imul__(self, n):
635 self.data *= n
636 return self
637 def append(self, item): self.data.append(item)
638 def insert(self, i, item): self.data.insert(i, item)
639 def pop(self, i=-1): return self.data.pop(i)
640 def remove(self, item): self.data.remove(item)
641 def count(self, item): return self.data.count(item)
642 def index(self, item, *args): return self.data.index(item, *args)
643 def reverse(self): self.data.reverse()
644 def sort(self, *args, **kwds): self.data.sort(*args, **kwds)
645 def extend(self, other):
646 if isinstance(other, UserList):
647 self.data.extend(other.data)
648 else:
649 self.data.extend(other)
650
651
652
653################################################################################
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000654### UserString
655################################################################################
656
657class UserString(Sequence):
658 def __init__(self, seq):
659 if isinstance(seq, str):
660 self.data = seq
661 elif isinstance(seq, UserString):
662 self.data = seq.data[:]
663 else:
664 self.data = str(seq)
665 def __str__(self): return str(self.data)
666 def __repr__(self): return repr(self.data)
667 def __int__(self): return int(self.data)
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000668 def __float__(self): return float(self.data)
669 def __complex__(self): return complex(self.data)
670 def __hash__(self): return hash(self.data)
671
672 def __eq__(self, string):
673 if isinstance(string, UserString):
674 return self.data == string.data
675 return self.data == string
676 def __ne__(self, string):
677 if isinstance(string, UserString):
678 return self.data != string.data
679 return self.data != string
680 def __lt__(self, string):
681 if isinstance(string, UserString):
682 return self.data < string.data
683 return self.data < string
684 def __le__(self, string):
685 if isinstance(string, UserString):
686 return self.data <= string.data
687 return self.data <= string
688 def __gt__(self, string):
689 if isinstance(string, UserString):
690 return self.data > string.data
691 return self.data > string
692 def __ge__(self, string):
693 if isinstance(string, UserString):
694 return self.data >= string.data
695 return self.data >= string
696
697 def __contains__(self, char):
698 if isinstance(char, UserString):
699 char = char.data
700 return char in self.data
701
702 def __len__(self): return len(self.data)
703 def __getitem__(self, index): return self.__class__(self.data[index])
704 def __add__(self, other):
705 if isinstance(other, UserString):
706 return self.__class__(self.data + other.data)
707 elif isinstance(other, str):
708 return self.__class__(self.data + other)
709 return self.__class__(self.data + str(other))
710 def __radd__(self, other):
711 if isinstance(other, str):
712 return self.__class__(other + self.data)
713 return self.__class__(str(other) + self.data)
714 def __mul__(self, n):
715 return self.__class__(self.data*n)
716 __rmul__ = __mul__
717 def __mod__(self, args):
718 return self.__class__(self.data % args)
719
720 # the following methods are defined in alphabetical order:
721 def capitalize(self): return self.__class__(self.data.capitalize())
722 def center(self, width, *args):
723 return self.__class__(self.data.center(width, *args))
724 def count(self, sub, start=0, end=_sys.maxsize):
725 if isinstance(sub, UserString):
726 sub = sub.data
727 return self.data.count(sub, start, end)
728 def encode(self, encoding=None, errors=None): # XXX improve this?
729 if encoding:
730 if errors:
731 return self.__class__(self.data.encode(encoding, errors))
732 return self.__class__(self.data.encode(encoding))
733 return self.__class__(self.data.encode())
734 def endswith(self, suffix, start=0, end=_sys.maxsize):
735 return self.data.endswith(suffix, start, end)
736 def expandtabs(self, tabsize=8):
737 return self.__class__(self.data.expandtabs(tabsize))
738 def find(self, sub, start=0, end=_sys.maxsize):
739 if isinstance(sub, UserString):
740 sub = sub.data
741 return self.data.find(sub, start, end)
742 def format(self, *args, **kwds):
743 return self.data.format(*args, **kwds)
744 def index(self, sub, start=0, end=_sys.maxsize):
745 return self.data.index(sub, start, end)
746 def isalpha(self): return self.data.isalpha()
747 def isalnum(self): return self.data.isalnum()
748 def isdecimal(self): return self.data.isdecimal()
749 def isdigit(self): return self.data.isdigit()
750 def isidentifier(self): return self.data.isidentifier()
751 def islower(self): return self.data.islower()
752 def isnumeric(self): return self.data.isnumeric()
753 def isspace(self): return self.data.isspace()
754 def istitle(self): return self.data.istitle()
755 def isupper(self): return self.data.isupper()
756 def join(self, seq): return self.data.join(seq)
757 def ljust(self, width, *args):
758 return self.__class__(self.data.ljust(width, *args))
759 def lower(self): return self.__class__(self.data.lower())
760 def lstrip(self, chars=None): return self.__class__(self.data.lstrip(chars))
761 def partition(self, sep):
762 return self.data.partition(sep)
763 def replace(self, old, new, maxsplit=-1):
764 if isinstance(old, UserString):
765 old = old.data
766 if isinstance(new, UserString):
767 new = new.data
768 return self.__class__(self.data.replace(old, new, maxsplit))
769 def rfind(self, sub, start=0, end=_sys.maxsize):
Antoine Pitrouda2ecaf2010-01-02 21:40:36 +0000770 if isinstance(sub, UserString):
771 sub = sub.data
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000772 return self.data.rfind(sub, start, end)
773 def rindex(self, sub, start=0, end=_sys.maxsize):
774 return self.data.rindex(sub, start, end)
775 def rjust(self, width, *args):
776 return self.__class__(self.data.rjust(width, *args))
777 def rpartition(self, sep):
778 return self.data.rpartition(sep)
779 def rstrip(self, chars=None):
780 return self.__class__(self.data.rstrip(chars))
781 def split(self, sep=None, maxsplit=-1):
782 return self.data.split(sep, maxsplit)
783 def rsplit(self, sep=None, maxsplit=-1):
784 return self.data.rsplit(sep, maxsplit)
785 def splitlines(self, keepends=0): return self.data.splitlines(keepends)
786 def startswith(self, prefix, start=0, end=_sys.maxsize):
787 return self.data.startswith(prefix, start, end)
788 def strip(self, chars=None): return self.__class__(self.data.strip(chars))
789 def swapcase(self): return self.__class__(self.data.swapcase())
790 def title(self): return self.__class__(self.data.title())
791 def translate(self, *args):
792 return self.__class__(self.data.translate(*args))
793 def upper(self): return self.__class__(self.data.upper())
794 def zfill(self, width): return self.__class__(self.data.zfill(width))
795
796
797
798################################################################################
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000799### Simple tests
800################################################################################
Guido van Rossumd8faa362007-04-27 19:54:29 +0000801
802if __name__ == '__main__':
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000803 # verify that instances can be pickled
Guido van Rossum99603b02007-07-20 00:22:32 +0000804 from pickle import loads, dumps
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000805 Point = namedtuple('Point', 'x, y', True)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000806 p = Point(x=10, y=20)
807 assert p == loads(dumps(p))
808
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000809 # test and demonstrate ability to override methods
Christian Heimes043d6f62008-01-07 17:19:16 +0000810 class Point(namedtuple('Point', 'x y')):
Christian Heimes25bb7832008-01-11 16:17:00 +0000811 __slots__ = ()
Christian Heimes043d6f62008-01-07 17:19:16 +0000812 @property
813 def hypot(self):
814 return (self.x ** 2 + self.y ** 2) ** 0.5
Christian Heimes790c8232008-01-07 21:14:23 +0000815 def __str__(self):
Christian Heimes25bb7832008-01-11 16:17:00 +0000816 return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
Christian Heimes043d6f62008-01-07 17:19:16 +0000817
Christian Heimes25bb7832008-01-11 16:17:00 +0000818 for p in Point(3, 4), Point(14, 5/7.):
Christian Heimes790c8232008-01-07 21:14:23 +0000819 print (p)
Christian Heimes043d6f62008-01-07 17:19:16 +0000820
821 class Point(namedtuple('Point', 'x y')):
822 'Point class with optimized _make() and _replace() without error-checking'
Christian Heimes25bb7832008-01-11 16:17:00 +0000823 __slots__ = ()
Christian Heimes043d6f62008-01-07 17:19:16 +0000824 _make = classmethod(tuple.__new__)
825 def _replace(self, _map=map, **kwds):
Christian Heimes2380ac72008-01-09 00:17:24 +0000826 return self._make(_map(kwds.get, ('x', 'y'), self))
Christian Heimes043d6f62008-01-07 17:19:16 +0000827
828 print(Point(11, 22)._replace(x=100))
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000829
Christian Heimes25bb7832008-01-11 16:17:00 +0000830 Point3D = namedtuple('Point3D', Point._fields + ('z',))
831 print(Point3D.__doc__)
832
Guido van Rossumd8faa362007-04-27 19:54:29 +0000833 import doctest
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000834 TestResults = namedtuple('TestResults', 'failed attempted')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000835 print(TestResults(*doctest.testmod()))