blob: 69a4b019cdee9d86ceb42eff736e30b3939b9a0b [file] [log] [blame]
Brett Cannon23a4a7b2008-05-12 00:56:28 +00001__all__ = ['deque', 'defaultdict', 'namedtuple', 'UserDict', 'UserList',
Raymond Hettinger2d32f632009-03-02 21:24:57 +00002 'UserString', 'Counter', 'OrderedDict']
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
4# They should however be considered an integral part of collections.py.
5from _abcoll import *
6import _abcoll
7__all__ += _abcoll.__all__
8
Christian Heimes99170a52007-12-19 02:07:34 +00009from _collections import deque, defaultdict
10from operator import itemgetter as _itemgetter
11from keyword import iskeyword as _iskeyword
12import sys as _sys
Raymond Hettingerb8baf632009-01-14 02:20:07 +000013import heapq as _heapq
Raymond Hettinger798ee1a2009-03-23 18:29:11 +000014from weakref import proxy as _proxy
Raymond Hettingerea9f8db2009-03-02 21:28:41 +000015from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
Raymond Hettinger2d32f632009-03-02 21:24:57 +000016
17################################################################################
18### OrderedDict
19################################################################################
20
Raymond Hettingerf1736542009-03-23 05:19:21 +000021class _Link(object):
22 __slots__ = 'prev', 'next', 'key', '__weakref__'
23
Raymond Hettinger2d32f632009-03-02 21:24:57 +000024class OrderedDict(dict, MutableMapping):
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +000025 'Dictionary that remembers insertion order'
Raymond Hettingerf1736542009-03-23 05:19:21 +000026 # An inherited dict maps keys to values.
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +000027 # The inherited dict provides __getitem__, __len__, __contains__, and get.
28 # The remaining methods are order-aware.
Raymond Hettingerf1736542009-03-23 05:19:21 +000029 # Big-O running times for all methods are the same as for regular dictionaries.
30
31 # The internal self.__map dictionary maps keys to links in a doubly linked list.
32 # The circular doubly linked list starts and ends with a sentinel element.
33 # The sentinel element never gets deleted (this simplifies the algorithm).
34 # The prev/next links are weakref proxies (to prevent circular references).
35 # Individual links are kept alive by the hard reference in self.__map.
36 # Those hard references disappear when a key is deleted from an OrderedDict.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000037
38 def __init__(self, *args, **kwds):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000039 '''Initialize an ordered dictionary. Signature is the same as for
40 regular dictionaries, but keyword arguments are not recommended
41 because their insertion order is arbitrary.
42
43 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +000044 if len(args) > 1:
45 raise TypeError('expected at most 1 arguments, got %d' % len(args))
Raymond Hettinger08c70cf2009-03-03 20:47:29 +000046 try:
Raymond Hettingerf1736542009-03-23 05:19:21 +000047 self.__root
Raymond Hettinger08c70cf2009-03-03 20:47:29 +000048 except AttributeError:
Raymond Hettinger52dc06b2009-03-25 22:45:22 +000049 self.__root = root = _Link() # sentinel node for the doubly linked list
50 root.prev = root.next = root
51 self.__map = {}
Raymond Hettinger2d32f632009-03-02 21:24:57 +000052 self.update(*args, **kwds)
53
54 def clear(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000055 'od.clear() -> None. Remove all items from od.'
Raymond Hettingerf1736542009-03-23 05:19:21 +000056 root = self.__root
57 root.prev = root.next = root
Raymond Hettinger52dc06b2009-03-25 22:45:22 +000058 self.__map.clear()
Raymond Hettinger2d32f632009-03-02 21:24:57 +000059 dict.clear(self)
60
61 def __setitem__(self, key, value):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000062 'od.__setitem__(i, y) <==> od[i]=y'
Raymond Hettingerf1736542009-03-23 05:19:21 +000063 # Setting a new item creates a new link which goes at the end of the linked
64 # list, and the inherited dictionary is updated with the new key/value pair.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000065 if key not in self:
Raymond Hettingerf1736542009-03-23 05:19:21 +000066 self.__map[key] = link = _Link()
67 root = self.__root
68 last = root.prev
69 link.prev, link.next, link.key = last, root, key
Raymond Hettinger798ee1a2009-03-23 18:29:11 +000070 last.next = root.prev = _proxy(link)
Raymond Hettinger2d32f632009-03-02 21:24:57 +000071 dict.__setitem__(self, key, value)
72
73 def __delitem__(self, key):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000074 'od.__delitem__(y) <==> del od[y]'
Raymond Hettingerf1736542009-03-23 05:19:21 +000075 # Deleting an existing item uses self.__map to find the link which is
76 # then removed by updating the links in the predecessor and successor nodes.
Raymond Hettinger2d32f632009-03-02 21:24:57 +000077 dict.__delitem__(self, key)
Raymond Hettingerf1736542009-03-23 05:19:21 +000078 link = self.__map.pop(key)
79 link.prev.next = link.next
80 link.next.prev = link.prev
Raymond Hettinger2d32f632009-03-02 21:24:57 +000081
82 def __iter__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000083 'od.__iter__() <==> iter(od)'
Raymond Hettingerf1736542009-03-23 05:19:21 +000084 # Traverse the linked list in order.
85 root = self.__root
86 curr = root.next
87 while curr is not root:
88 yield curr.key
89 curr = curr.next
Raymond Hettinger2d32f632009-03-02 21:24:57 +000090
91 def __reversed__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +000092 'od.__iter__() <==> reversed(od)'
Raymond Hettingerf1736542009-03-23 05:19:21 +000093 # Traverse the linked list in reverse order.
94 root = self.__root
95 curr = root.prev
96 while curr is not root:
97 yield curr.key
98 curr = curr.prev
Raymond Hettinger2d32f632009-03-02 21:24:57 +000099
100 def __reduce__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000101 'Return state information for pickling'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000102 items = [[k, self[k]] for k in self]
Raymond Hettingerf1736542009-03-23 05:19:21 +0000103 tmp = self.__map, self.__root
104 del self.__map, self.__root
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000105 inst_dict = vars(self).copy()
Raymond Hettingerf1736542009-03-23 05:19:21 +0000106 self.__map, self.__root = tmp
Raymond Hettinger14b89ff2009-03-03 22:20:56 +0000107 if inst_dict:
108 return (self.__class__, (items,), inst_dict)
109 return self.__class__, (items,)
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000110
111 setdefault = MutableMapping.setdefault
112 update = MutableMapping.update
113 pop = MutableMapping.pop
114 keys = MutableMapping.keys
115 values = MutableMapping.values
116 items = MutableMapping.items
117
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +0000118 def popitem(self, last=True):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000119 '''od.popitem() -> (k, v), return and remove a (key, value) pair.
120 Pairs are returned in LIFO order if last is true or FIFO order if false.
121
122 '''
Raymond Hettinger18ed2cb2009-03-19 23:14:39 +0000123 if not self:
124 raise KeyError('dictionary is empty')
125 key = next(reversed(self)) if last else next(iter(self))
126 value = self.pop(key)
127 return key, value
128
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000129 def __repr__(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000130 'od.__repr__() <==> repr(od)'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000131 if not self:
132 return '%s()' % (self.__class__.__name__,)
133 return '%s(%r)' % (self.__class__.__name__, list(self.items()))
134
135 def copy(self):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000136 'od.copy() -> a shallow copy of od'
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000137 return self.__class__(self)
138
139 @classmethod
140 def fromkeys(cls, iterable, value=None):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000141 '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
142 and values equal to v (which defaults to None).
143
144 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000145 d = cls()
146 for key in iterable:
147 d[key] = value
148 return d
149
150 def __eq__(self, other):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000151 '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
152 while comparison to a regular mapping is order-insensitive.
153
154 '''
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000155 if isinstance(other, OrderedDict):
Raymond Hettinger798ee1a2009-03-23 18:29:11 +0000156 return len(self)==len(other) and \
157 all(p==q for p, q in zip(self.items(), other.items()))
Raymond Hettinger2d32f632009-03-02 21:24:57 +0000158 return dict.__eq__(self, other)
159
Benjamin Peterson2504b7a2009-04-04 17:26:32 +0000160 def __ne__(self, other):
Raymond Hettingerf04fa1b2009-04-08 01:15:02 +0000161 '''od.__ne__(y) <==> od!=y. Comparison to another OD is order-sensitive
162 while comparison to a regular mapping is order-insensitive.
163
164 '''
Benjamin Peterson2504b7a2009-04-04 17:26:32 +0000165 return not self == other
166
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000167
Christian Heimes99170a52007-12-19 02:07:34 +0000168
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000169################################################################################
170### namedtuple
171################################################################################
172
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000173def namedtuple(typename, field_names, verbose=False, rename=False):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000174 """Returns a new subclass of tuple with named fields.
175
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000176 >>> Point = namedtuple('Point', 'x y')
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000177 >>> Point.__doc__ # docstring for the new class
Guido van Rossumd8faa362007-04-27 19:54:29 +0000178 'Point(x, y)'
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000179 >>> p = Point(11, y=22) # instantiate with positional args or keywords
Christian Heimes99170a52007-12-19 02:07:34 +0000180 >>> p[0] + p[1] # indexable like a plain tuple
Guido van Rossumd8faa362007-04-27 19:54:29 +0000181 33
Christian Heimes99170a52007-12-19 02:07:34 +0000182 >>> x, y = p # unpack like a regular tuple
Guido van Rossumd8faa362007-04-27 19:54:29 +0000183 >>> x, y
184 (11, 22)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000185 >>> p.x + p.y # fields also accessable by name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000186 33
Christian Heimes0449f632007-12-15 01:27:15 +0000187 >>> d = p._asdict() # convert to a dictionary
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000188 >>> d['x']
189 11
190 >>> Point(**d) # convert from a dictionary
Guido van Rossumd8faa362007-04-27 19:54:29 +0000191 Point(x=11, y=22)
Christian Heimes0449f632007-12-15 01:27:15 +0000192 >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000193 Point(x=100, y=22)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000194
195 """
196
Christian Heimes2380ac72008-01-09 00:17:24 +0000197 # Parse and validate the field names. Validation serves two purposes,
198 # generating informative error messages and preventing template injection attacks.
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000199 if isinstance(field_names, str):
200 field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas
Benjamin Petersone9bbc8b2008-09-28 02:06:32 +0000201 field_names = tuple(map(str, field_names))
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000202 if rename:
203 names = list(field_names)
204 seen = set()
205 for i, name in enumerate(names):
206 if (not all(c.isalnum() or c=='_' for c in name) or _iskeyword(name)
207 or not name or name[0].isdigit() or name.startswith('_')
208 or name in seen):
Raymond Hettinger56145242009-04-02 22:31:59 +0000209 names[i] = '_%d' % i
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000210 seen.add(name)
211 field_names = tuple(names)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000212 for name in (typename,) + field_names:
Christian Heimesb9eccbf2007-12-05 20:18:38 +0000213 if not all(c.isalnum() or c=='_' for c in name):
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000214 raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
215 if _iskeyword(name):
216 raise ValueError('Type names and field names cannot be a keyword: %r' % name)
217 if name[0].isdigit():
218 raise ValueError('Type names and field names cannot start with a number: %r' % name)
219 seen_names = set()
220 for name in field_names:
Benjamin Petersona86f2c02009-02-10 02:41:10 +0000221 if name.startswith('_') and not rename:
Christian Heimes0449f632007-12-15 01:27:15 +0000222 raise ValueError('Field names cannot start with an underscore: %r' % name)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000223 if name in seen_names:
224 raise ValueError('Encountered duplicate field name: %r' % name)
225 seen_names.add(name)
226
227 # Create and fill-in the class template
Christian Heimesfaf2f632008-01-06 16:59:19 +0000228 numfields = len(field_names)
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000229 argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000230 reprtxt = ', '.join('%s=%%r' % name for name in field_names)
231 template = '''class %(typename)s(tuple):
Christian Heimes0449f632007-12-15 01:27:15 +0000232 '%(typename)s(%(argtxt)s)' \n
233 __slots__ = () \n
Christian Heimesfaf2f632008-01-06 16:59:19 +0000234 _fields = %(field_names)r \n
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000235 def __new__(cls, %(argtxt)s):
Christian Heimes0449f632007-12-15 01:27:15 +0000236 return tuple.__new__(cls, (%(argtxt)s)) \n
Christian Heimesfaf2f632008-01-06 16:59:19 +0000237 @classmethod
Christian Heimes043d6f62008-01-07 17:19:16 +0000238 def _make(cls, iterable, new=tuple.__new__, len=len):
Christian Heimesfaf2f632008-01-06 16:59:19 +0000239 'Make a new %(typename)s object from a sequence or iterable'
Christian Heimes043d6f62008-01-07 17:19:16 +0000240 result = new(cls, iterable)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000241 if len(result) != %(numfields)d:
242 raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
243 return result \n
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000244 def __repr__(self):
Christian Heimes0449f632007-12-15 01:27:15 +0000245 return '%(typename)s(%(reprtxt)s)' %% self \n
Raymond Hettingera4f52b12009-03-02 22:28:31 +0000246 def _asdict(self):
247 'Return a new OrderedDict which maps field names to their values'
248 return OrderedDict(zip(self._fields, self)) \n
Christian Heimes0449f632007-12-15 01:27:15 +0000249 def _replace(self, **kwds):
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000250 'Return a new %(typename)s object replacing specified fields with new values'
Christian Heimesfaf2f632008-01-06 16:59:19 +0000251 result = self._make(map(kwds.pop, %(field_names)r, self))
252 if kwds:
253 raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
Georg Brandlc28e1fa2008-06-10 19:20:26 +0000254 return result \n
255 def __getnewargs__(self):
256 return tuple(self) \n\n''' % locals()
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000257 for i, name in enumerate(field_names):
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000258 template += ' %s = property(itemgetter(%d))\n' % (name, i)
259 if verbose:
260 print(template)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000261
Georg Brandlf08a9dd2008-06-10 16:57:31 +0000262 # Execute the template string in a temporary namespace and
263 # support tracing utilities by setting a value for frame.f_globals['__name__']
Raymond Hettingera4f52b12009-03-02 22:28:31 +0000264 namespace = dict(itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
265 OrderedDict=OrderedDict)
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000266 try:
267 exec(template, namespace)
268 except SyntaxError as e:
Christian Heimes99170a52007-12-19 02:07:34 +0000269 raise SyntaxError(e.msg + ':\n' + template) from e
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000270 result = namespace[typename]
271
272 # For pickling to work, the __module__ variable needs to be set to the frame
273 # where the named tuple is created. Bypass this step in enviroments where
274 # sys._getframe is not defined (Jython for example).
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000275 if hasattr(_sys, '_getframe'):
Raymond Hettinger0f055172009-01-27 10:06:09 +0000276 result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000277
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000278 return result
Guido van Rossumd8faa362007-04-27 19:54:29 +0000279
Guido van Rossumd8faa362007-04-27 19:54:29 +0000280
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000281########################################################################
282### Counter
283########################################################################
284
285class Counter(dict):
286 '''Dict subclass for counting hashable items. Sometimes called a bag
287 or multiset. Elements are stored as dictionary keys and their counts
288 are stored as dictionary values.
289
290 >>> c = Counter('abracadabra') # count elements from a string
291
292 >>> c.most_common(3) # three most common elements
293 [('a', 5), ('r', 2), ('b', 2)]
294 >>> sorted(c) # list all unique elements
295 ['a', 'b', 'c', 'd', 'r']
296 >>> ''.join(sorted(c.elements())) # list elements with repetitions
297 'aaaaabbcdrr'
298 >>> sum(c.values()) # total of all counts
299 11
300
301 >>> c['a'] # count of letter 'a'
302 5
303 >>> for elem in 'shazam': # update counts from an iterable
304 ... c[elem] += 1 # by adding 1 to each element's count
305 >>> c['a'] # now there are seven 'a'
306 7
307 >>> del c['r'] # remove all 'r'
308 >>> c['r'] # now there are zero 'r'
309 0
310
311 >>> d = Counter('simsalabim') # make another counter
312 >>> c.update(d) # add in the second counter
313 >>> c['a'] # now there are nine 'a'
314 9
315
316 >>> c.clear() # empty the counter
317 >>> c
318 Counter()
319
320 Note: If a count is set to zero or reduced to zero, it will remain
321 in the counter until the entry is deleted or the counter is cleared:
322
323 >>> c = Counter('aaabbc')
324 >>> c['b'] -= 2 # reduce the count of 'b' by two
325 >>> c.most_common() # 'b' is still in, but its count is zero
326 [('a', 3), ('c', 1), ('b', 0)]
327
328 '''
329 # References:
330 # http://en.wikipedia.org/wiki/Multiset
331 # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
332 # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
333 # http://code.activestate.com/recipes/259174/
334 # Knuth, TAOCP Vol. II section 4.6.3
335
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000336 def __init__(self, iterable=None, **kwds):
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000337 '''Create a new, empty Counter object. And if given, count elements
338 from an input iterable. Or, initialize the count from another mapping
339 of elements to their counts.
340
341 >>> c = Counter() # a new, empty counter
342 >>> c = Counter('gallahad') # a new counter from an iterable
343 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000344 >>> c = Counter(a=4, b=2) # a new counter from keyword args
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000345
346 '''
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000347 self.update(iterable, **kwds)
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000348
349 def __missing__(self, key):
350 'The count of elements not in the Counter is zero.'
351 # Needed so that self[missing_item] does not raise KeyError
352 return 0
353
354 def most_common(self, n=None):
355 '''List the n most common elements and their counts from the most
356 common to the least. If n is None, then list all element counts.
357
358 >>> Counter('abracadabra').most_common(3)
359 [('a', 5), ('r', 2), ('b', 2)]
360
361 '''
362 # Emulate Bag.sortedByCount from Smalltalk
363 if n is None:
364 return sorted(self.items(), key=_itemgetter(1), reverse=True)
365 return _heapq.nlargest(n, self.items(), key=_itemgetter(1))
366
367 def elements(self):
368 '''Iterator over elements repeating each as many times as its count.
369
370 >>> c = Counter('ABCABC')
371 >>> sorted(c.elements())
372 ['A', 'A', 'B', 'B', 'C', 'C']
373
374 # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
375 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
376 >>> product = 1
377 >>> for factor in prime_factors.elements(): # loop over factors
378 ... product *= factor # and multiply them
379 >>> product
380 1836
381
382 Note, if an element's count has been set to zero or is a negative
383 number, elements() will ignore it.
384
385 '''
386 # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
387 return _chain.from_iterable(_starmap(_repeat, self.items()))
388
389 # Override dict methods where necessary
390
391 @classmethod
392 def fromkeys(cls, iterable, v=None):
393 # There is no equivalent method for counters because setting v=1
394 # means that no element can have a count greater than one.
395 raise NotImplementedError(
396 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
397
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000398 def update(self, iterable=None, **kwds):
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000399 '''Like dict.update() but add counts instead of replacing them.
400
401 Source can be an iterable, a dictionary, or another Counter instance.
402
403 >>> c = Counter('which')
404 >>> c.update('witch') # add elements from another iterable
405 >>> d = Counter('watch')
406 >>> c.update(d) # add elements from another counter
407 >>> c['h'] # four 'h' in which, witch, and watch
408 4
409
410 '''
411 # The regular dict.update() operation makes no sense here because the
412 # replace behavior results in the some of original untouched counts
413 # being mixed-in with all of the other counts for a mismash that
414 # doesn't have a straight-forward interpretation in most counting
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000415 # contexts. Instead, we implement straight-addition. Both the inputs
416 # and outputs are allowed to contain zero and negative counts.
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000417
418 if iterable is not None:
419 if isinstance(iterable, Mapping):
Raymond Hettingerdd01f8f2009-01-22 09:09:55 +0000420 if self:
421 for elem, count in iterable.items():
422 self[elem] += count
423 else:
424 dict.update(self, iterable) # fast path when counter is empty
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000425 else:
426 for elem in iterable:
427 self[elem] += 1
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000428 if kwds:
429 self.update(kwds)
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000430
431 def copy(self):
432 'Like dict.copy() but returns a Counter instance instead of a dict.'
433 return Counter(self)
434
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000435 def __delitem__(self, elem):
436 'Like dict.__delitem__() but does not raise KeyError for missing values.'
437 if elem in self:
438 dict.__delitem__(self, elem)
439
Raymond Hettingerb8baf632009-01-14 02:20:07 +0000440 def __repr__(self):
441 if not self:
442 return '%s()' % self.__class__.__name__
443 items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
444 return '%s({%s})' % (self.__class__.__name__, items)
445
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000446 # Multiset-style mathematical operations discussed in:
447 # Knuth TAOCP Volume II section 4.6.3 exercise 19
448 # and at http://en.wikipedia.org/wiki/Multiset
449 #
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000450 # Outputs guaranteed to only include positive counts.
451 #
452 # To strip negative and zero counts, add-in an empty counter:
453 # c += Counter()
454
455 def __add__(self, other):
456 '''Add counts from two counters.
457
458 >>> Counter('abbb') + Counter('bcc')
459 Counter({'b': 4, 'c': 2, 'a': 1})
460
461 '''
462 if not isinstance(other, Counter):
463 return NotImplemented
464 result = Counter()
465 for elem in set(self) | set(other):
466 newcount = self[elem] + other[elem]
467 if newcount > 0:
468 result[elem] = newcount
469 return result
470
471 def __sub__(self, other):
472 ''' Subtract count, but keep only results with positive counts.
473
474 >>> Counter('abbbc') - Counter('bccd')
475 Counter({'b': 2, 'a': 1})
476
477 '''
478 if not isinstance(other, Counter):
479 return NotImplemented
480 result = Counter()
Raymond Hettingere0d1b9f2009-01-21 20:36:27 +0000481 for elem in set(self) | set(other):
482 newcount = self[elem] - other[elem]
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000483 if newcount > 0:
484 result[elem] = newcount
485 return result
486
487 def __or__(self, other):
488 '''Union is the maximum of value in either of the input counters.
489
490 >>> Counter('abbb') | Counter('bcc')
491 Counter({'b': 3, 'c': 2, 'a': 1})
492
493 '''
494 if not isinstance(other, Counter):
495 return NotImplemented
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000496 result = Counter()
497 for elem in set(self) | set(other):
Raymond Hettingerc4791702009-04-04 08:48:03 +0000498 p, q = self[elem], other[elem]
499 newcount = q if p < q else p
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000500 if newcount > 0:
501 result[elem] = newcount
502 return result
503
504 def __and__(self, other):
505 ''' Intersection is the minimum of corresponding counts.
506
507 >>> Counter('abbb') & Counter('bcc')
508 Counter({'b': 1})
509
510 '''
511 if not isinstance(other, Counter):
512 return NotImplemented
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000513 result = Counter()
514 if len(self) < len(other):
515 self, other = other, self
516 for elem in filter(self.__contains__, other):
Raymond Hettingerc4791702009-04-04 08:48:03 +0000517 p, q = self[elem], other[elem]
518 newcount = p if p < q else q
Raymond Hettinger4d2073a2009-01-20 03:41:22 +0000519 if newcount > 0:
520 result[elem] = newcount
521 return result
522
Guido van Rossumd8faa362007-04-27 19:54:29 +0000523
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000524################################################################################
525### UserDict
526################################################################################
Guido van Rossumd8faa362007-04-27 19:54:29 +0000527
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000528class UserDict(MutableMapping):
529
530 # Start by filling-out the abstract methods
531 def __init__(self, dict=None, **kwargs):
532 self.data = {}
533 if dict is not None:
534 self.update(dict)
535 if len(kwargs):
536 self.update(kwargs)
537 def __len__(self): return len(self.data)
538 def __getitem__(self, key):
539 if key in self.data:
540 return self.data[key]
541 if hasattr(self.__class__, "__missing__"):
542 return self.__class__.__missing__(self, key)
543 raise KeyError(key)
544 def __setitem__(self, key, item): self.data[key] = item
545 def __delitem__(self, key): del self.data[key]
546 def __iter__(self):
547 return iter(self.data)
548
Raymond Hettinger554c8b82008-02-05 22:54:43 +0000549 # Modify __contains__ to work correctly when __missing__ is present
550 def __contains__(self, key):
551 return key in self.data
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000552
553 # Now, add the methods in dicts but not in MutableMapping
554 def __repr__(self): return repr(self.data)
555 def copy(self):
556 if self.__class__ is UserDict:
557 return UserDict(self.data.copy())
558 import copy
559 data = self.data
560 try:
561 self.data = {}
562 c = copy.copy(self)
563 finally:
564 self.data = data
565 c.update(self)
566 return c
567 @classmethod
568 def fromkeys(cls, iterable, value=None):
569 d = cls()
570 for key in iterable:
571 d[key] = value
572 return d
573
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000574
575
576################################################################################
Raymond Hettinger53dbe392008-02-12 20:03:09 +0000577### UserList
578################################################################################
579
580class UserList(MutableSequence):
581 """A more or less complete user-defined wrapper around list objects."""
582 def __init__(self, initlist=None):
583 self.data = []
584 if initlist is not None:
585 # XXX should this accept an arbitrary sequence?
586 if type(initlist) == type(self.data):
587 self.data[:] = initlist
588 elif isinstance(initlist, UserList):
589 self.data[:] = initlist.data[:]
590 else:
591 self.data = list(initlist)
592 def __repr__(self): return repr(self.data)
593 def __lt__(self, other): return self.data < self.__cast(other)
594 def __le__(self, other): return self.data <= self.__cast(other)
595 def __eq__(self, other): return self.data == self.__cast(other)
596 def __ne__(self, other): return self.data != self.__cast(other)
597 def __gt__(self, other): return self.data > self.__cast(other)
598 def __ge__(self, other): return self.data >= self.__cast(other)
599 def __cast(self, other):
600 return other.data if isinstance(other, UserList) else other
Raymond Hettinger53dbe392008-02-12 20:03:09 +0000601 def __contains__(self, item): return item in self.data
602 def __len__(self): return len(self.data)
603 def __getitem__(self, i): return self.data[i]
604 def __setitem__(self, i, item): self.data[i] = item
605 def __delitem__(self, i): del self.data[i]
606 def __add__(self, other):
607 if isinstance(other, UserList):
608 return self.__class__(self.data + other.data)
609 elif isinstance(other, type(self.data)):
610 return self.__class__(self.data + other)
611 return self.__class__(self.data + list(other))
612 def __radd__(self, other):
613 if isinstance(other, UserList):
614 return self.__class__(other.data + self.data)
615 elif isinstance(other, type(self.data)):
616 return self.__class__(other + self.data)
617 return self.__class__(list(other) + self.data)
618 def __iadd__(self, other):
619 if isinstance(other, UserList):
620 self.data += other.data
621 elif isinstance(other, type(self.data)):
622 self.data += other
623 else:
624 self.data += list(other)
625 return self
626 def __mul__(self, n):
627 return self.__class__(self.data*n)
628 __rmul__ = __mul__
629 def __imul__(self, n):
630 self.data *= n
631 return self
632 def append(self, item): self.data.append(item)
633 def insert(self, i, item): self.data.insert(i, item)
634 def pop(self, i=-1): return self.data.pop(i)
635 def remove(self, item): self.data.remove(item)
636 def count(self, item): return self.data.count(item)
637 def index(self, item, *args): return self.data.index(item, *args)
638 def reverse(self): self.data.reverse()
639 def sort(self, *args, **kwds): self.data.sort(*args, **kwds)
640 def extend(self, other):
641 if isinstance(other, UserList):
642 self.data.extend(other.data)
643 else:
644 self.data.extend(other)
645
646
647
648################################################################################
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000649### UserString
650################################################################################
651
652class UserString(Sequence):
653 def __init__(self, seq):
654 if isinstance(seq, str):
655 self.data = seq
656 elif isinstance(seq, UserString):
657 self.data = seq.data[:]
658 else:
659 self.data = str(seq)
660 def __str__(self): return str(self.data)
661 def __repr__(self): return repr(self.data)
662 def __int__(self): return int(self.data)
Raymond Hettingerb3a65f82008-02-21 22:11:37 +0000663 def __float__(self): return float(self.data)
664 def __complex__(self): return complex(self.data)
665 def __hash__(self): return hash(self.data)
666
667 def __eq__(self, string):
668 if isinstance(string, UserString):
669 return self.data == string.data
670 return self.data == string
671 def __ne__(self, string):
672 if isinstance(string, UserString):
673 return self.data != string.data
674 return self.data != string
675 def __lt__(self, string):
676 if isinstance(string, UserString):
677 return self.data < string.data
678 return self.data < string
679 def __le__(self, string):
680 if isinstance(string, UserString):
681 return self.data <= string.data
682 return self.data <= string
683 def __gt__(self, string):
684 if isinstance(string, UserString):
685 return self.data > string.data
686 return self.data > string
687 def __ge__(self, string):
688 if isinstance(string, UserString):
689 return self.data >= string.data
690 return self.data >= string
691
692 def __contains__(self, char):
693 if isinstance(char, UserString):
694 char = char.data
695 return char in self.data
696
697 def __len__(self): return len(self.data)
698 def __getitem__(self, index): return self.__class__(self.data[index])
699 def __add__(self, other):
700 if isinstance(other, UserString):
701 return self.__class__(self.data + other.data)
702 elif isinstance(other, str):
703 return self.__class__(self.data + other)
704 return self.__class__(self.data + str(other))
705 def __radd__(self, other):
706 if isinstance(other, str):
707 return self.__class__(other + self.data)
708 return self.__class__(str(other) + self.data)
709 def __mul__(self, n):
710 return self.__class__(self.data*n)
711 __rmul__ = __mul__
712 def __mod__(self, args):
713 return self.__class__(self.data % args)
714
715 # the following methods are defined in alphabetical order:
716 def capitalize(self): return self.__class__(self.data.capitalize())
717 def center(self, width, *args):
718 return self.__class__(self.data.center(width, *args))
719 def count(self, sub, start=0, end=_sys.maxsize):
720 if isinstance(sub, UserString):
721 sub = sub.data
722 return self.data.count(sub, start, end)
723 def encode(self, encoding=None, errors=None): # XXX improve this?
724 if encoding:
725 if errors:
726 return self.__class__(self.data.encode(encoding, errors))
727 return self.__class__(self.data.encode(encoding))
728 return self.__class__(self.data.encode())
729 def endswith(self, suffix, start=0, end=_sys.maxsize):
730 return self.data.endswith(suffix, start, end)
731 def expandtabs(self, tabsize=8):
732 return self.__class__(self.data.expandtabs(tabsize))
733 def find(self, sub, start=0, end=_sys.maxsize):
734 if isinstance(sub, UserString):
735 sub = sub.data
736 return self.data.find(sub, start, end)
737 def format(self, *args, **kwds):
738 return self.data.format(*args, **kwds)
739 def index(self, sub, start=0, end=_sys.maxsize):
740 return self.data.index(sub, start, end)
741 def isalpha(self): return self.data.isalpha()
742 def isalnum(self): return self.data.isalnum()
743 def isdecimal(self): return self.data.isdecimal()
744 def isdigit(self): return self.data.isdigit()
745 def isidentifier(self): return self.data.isidentifier()
746 def islower(self): return self.data.islower()
747 def isnumeric(self): return self.data.isnumeric()
748 def isspace(self): return self.data.isspace()
749 def istitle(self): return self.data.istitle()
750 def isupper(self): return self.data.isupper()
751 def join(self, seq): return self.data.join(seq)
752 def ljust(self, width, *args):
753 return self.__class__(self.data.ljust(width, *args))
754 def lower(self): return self.__class__(self.data.lower())
755 def lstrip(self, chars=None): return self.__class__(self.data.lstrip(chars))
756 def partition(self, sep):
757 return self.data.partition(sep)
758 def replace(self, old, new, maxsplit=-1):
759 if isinstance(old, UserString):
760 old = old.data
761 if isinstance(new, UserString):
762 new = new.data
763 return self.__class__(self.data.replace(old, new, maxsplit))
764 def rfind(self, sub, start=0, end=_sys.maxsize):
765 return self.data.rfind(sub, start, end)
766 def rindex(self, sub, start=0, end=_sys.maxsize):
767 return self.data.rindex(sub, start, end)
768 def rjust(self, width, *args):
769 return self.__class__(self.data.rjust(width, *args))
770 def rpartition(self, sep):
771 return self.data.rpartition(sep)
772 def rstrip(self, chars=None):
773 return self.__class__(self.data.rstrip(chars))
774 def split(self, sep=None, maxsplit=-1):
775 return self.data.split(sep, maxsplit)
776 def rsplit(self, sep=None, maxsplit=-1):
777 return self.data.rsplit(sep, maxsplit)
778 def splitlines(self, keepends=0): return self.data.splitlines(keepends)
779 def startswith(self, prefix, start=0, end=_sys.maxsize):
780 return self.data.startswith(prefix, start, end)
781 def strip(self, chars=None): return self.__class__(self.data.strip(chars))
782 def swapcase(self): return self.__class__(self.data.swapcase())
783 def title(self): return self.__class__(self.data.title())
784 def translate(self, *args):
785 return self.__class__(self.data.translate(*args))
786 def upper(self): return self.__class__(self.data.upper())
787 def zfill(self, width): return self.__class__(self.data.zfill(width))
788
789
790
791################################################################################
Raymond Hettinger48b8b662008-02-05 01:53:00 +0000792### Simple tests
793################################################################################
Guido van Rossumd8faa362007-04-27 19:54:29 +0000794
795if __name__ == '__main__':
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000796 # verify that instances can be pickled
Guido van Rossum99603b02007-07-20 00:22:32 +0000797 from pickle import loads, dumps
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000798 Point = namedtuple('Point', 'x, y', True)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000799 p = Point(x=10, y=20)
800 assert p == loads(dumps(p))
801
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000802 # test and demonstrate ability to override methods
Christian Heimes043d6f62008-01-07 17:19:16 +0000803 class Point(namedtuple('Point', 'x y')):
Christian Heimes25bb7832008-01-11 16:17:00 +0000804 __slots__ = ()
Christian Heimes043d6f62008-01-07 17:19:16 +0000805 @property
806 def hypot(self):
807 return (self.x ** 2 + self.y ** 2) ** 0.5
Christian Heimes790c8232008-01-07 21:14:23 +0000808 def __str__(self):
Christian Heimes25bb7832008-01-11 16:17:00 +0000809 return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
Christian Heimes043d6f62008-01-07 17:19:16 +0000810
Christian Heimes25bb7832008-01-11 16:17:00 +0000811 for p in Point(3, 4), Point(14, 5/7.):
Christian Heimes790c8232008-01-07 21:14:23 +0000812 print (p)
Christian Heimes043d6f62008-01-07 17:19:16 +0000813
814 class Point(namedtuple('Point', 'x y')):
815 'Point class with optimized _make() and _replace() without error-checking'
Christian Heimes25bb7832008-01-11 16:17:00 +0000816 __slots__ = ()
Christian Heimes043d6f62008-01-07 17:19:16 +0000817 _make = classmethod(tuple.__new__)
818 def _replace(self, _map=map, **kwds):
Christian Heimes2380ac72008-01-09 00:17:24 +0000819 return self._make(_map(kwds.get, ('x', 'y'), self))
Christian Heimes043d6f62008-01-07 17:19:16 +0000820
821 print(Point(11, 22)._replace(x=100))
Guido van Rossum3d392eb2007-11-16 00:35:22 +0000822
Christian Heimes25bb7832008-01-11 16:17:00 +0000823 Point3D = namedtuple('Point3D', Point._fields + ('z',))
824 print(Point3D.__doc__)
825
Guido van Rossumd8faa362007-04-27 19:54:29 +0000826 import doctest
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000827 TestResults = namedtuple('TestResults', 'failed attempted')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000828 print(TestResults(*doctest.testmod()))