blob: 1b80ef8f07578764a2cb0f7e8a54e8b09b597707 [file] [log] [blame]
Raymond Hettingerbc512d32009-03-03 04:45:34 +00001__all__ = ['Counter', 'deque', 'defaultdict', 'namedtuple', 'OrderedDict']
Raymond Hettinger88880b22007-12-18 00:13:45 +00002# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
3# They should however be considered an integral part of collections.py.
4from _abcoll import *
5import _abcoll
6__all__ += _abcoll.__all__
Raymond Hettingereb979882007-02-28 18:37:52 +00007
8from _collections import deque, defaultdict
Raymond Hettingerbc512d32009-03-03 04:45:34 +00009from operator import itemgetter as _itemgetter, eq as _eq
Raymond Hettingerabfd8df2007-10-16 21:28:32 +000010from keyword import iskeyword as _iskeyword
Raymond Hettingerc37e5e02007-03-01 06:16:43 +000011import sys as _sys
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +000012import heapq as _heapq
Raymond Hettingere5b78562009-03-23 18:26:59 +000013from weakref import proxy as _proxy
Raymond Hettingerbc512d32009-03-03 04:45:34 +000014from itertools import repeat as _repeat, chain as _chain, starmap as _starmap, \
Georg Brandl7d4b7592010-02-06 22:49:47 +000015 ifilter as _ifilter, imap as _imap
Raymond Hettingerbc512d32009-03-03 04:45:34 +000016
17################################################################################
18### OrderedDict
19################################################################################
20
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000021class _Link(object):
22 __slots__ = 'prev', 'next', 'key', '__weakref__'
23
Raymond Hettingerbc512d32009-03-03 04:45:34 +000024class OrderedDict(dict, MutableMapping):
Raymond Hettingere980d2d2009-03-19 23:12:41 +000025 'Dictionary that remembers insertion order'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000026 # An inherited dict maps keys to values.
Raymond Hettingere980d2d2009-03-19 23:12:41 +000027 # The inherited dict provides __getitem__, __len__, __contains__, and get.
28 # The remaining methods are order-aware.
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000029 # Big-O running times for all methods are the same as for regular dictionaries.
30
31 # The internal self.__map dictionary maps keys to links in a doubly linked list.
32 # The circular doubly linked list starts and ends with a sentinel element.
33 # The sentinel element never gets deleted (this simplifies the algorithm).
34 # The prev/next links are weakref proxies (to prevent circular references).
35 # Individual links are kept alive by the hard reference in self.__map.
36 # Those hard references disappear when a key is deleted from an OrderedDict.
Raymond Hettingerbc512d32009-03-03 04:45:34 +000037
38 def __init__(self, *args, **kwds):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000039 '''Initialize an ordered dictionary. Signature is the same as for
40 regular dictionaries, but keyword arguments are not recommended
41 because their insertion order is arbitrary.
42
43 '''
Raymond Hettingerbc512d32009-03-03 04:45:34 +000044 if len(args) > 1:
45 raise TypeError('expected at most 1 arguments, got %d' % len(args))
Raymond Hettinger9353ea22009-03-03 20:53:51 +000046 try:
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000047 self.__root
Raymond Hettinger9353ea22009-03-03 20:53:51 +000048 except AttributeError:
Raymond Hettinger2dc90fd2009-03-25 22:41:32 +000049 self.__root = root = _Link() # sentinel node for the doubly linked list
50 root.prev = root.next = root
51 self.__map = {}
Raymond Hettingerbc512d32009-03-03 04:45:34 +000052 self.update(*args, **kwds)
53
54 def clear(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000055 'od.clear() -> None. Remove all items from od.'
Raymond Hettinger906f95e2009-03-23 04:42:18 +000056 root = self.__root
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000057 root.prev = root.next = root
Raymond Hettinger2dc90fd2009-03-25 22:41:32 +000058 self.__map.clear()
Raymond Hettingerbc512d32009-03-03 04:45:34 +000059 dict.clear(self)
60
61 def __setitem__(self, key, value):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000062 'od.__setitem__(i, y) <==> od[i]=y'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000063 # Setting a new item creates a new link which goes at the end of the linked
64 # list, and the inherited dictionary is updated with the new key/value pair.
Raymond Hettingerbc512d32009-03-03 04:45:34 +000065 if key not in self:
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000066 self.__map[key] = link = _Link()
67 root = self.__root
68 last = root.prev
69 link.prev, link.next, link.key = last, root, key
Raymond Hettingere5b78562009-03-23 18:26:59 +000070 last.next = root.prev = _proxy(link)
Raymond Hettingerbc512d32009-03-03 04:45:34 +000071 dict.__setitem__(self, key, value)
72
73 def __delitem__(self, key):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000074 'od.__delitem__(y) <==> del od[y]'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000075 # Deleting an existing item uses self.__map to find the link which is
76 # then removed by updating the links in the predecessor and successor nodes.
Raymond Hettingerbc512d32009-03-03 04:45:34 +000077 dict.__delitem__(self, key)
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000078 link = self.__map.pop(key)
79 link.prev.next = link.next
80 link.next.prev = link.prev
Raymond Hettingerbc512d32009-03-03 04:45:34 +000081
82 def __iter__(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000083 'od.__iter__() <==> iter(od)'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000084 # Traverse the linked list in order.
85 root = self.__root
86 curr = root.next
87 while curr is not root:
88 yield curr.key
89 curr = curr.next
Raymond Hettingerbc512d32009-03-03 04:45:34 +000090
91 def __reversed__(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000092 'od.__reversed__() <==> reversed(od)'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000093 # Traverse the linked list in reverse order.
94 root = self.__root
95 curr = root.prev
96 while curr is not root:
97 yield curr.key
98 curr = curr.prev
Raymond Hettingerbc512d32009-03-03 04:45:34 +000099
100 def __reduce__(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000101 'Return state information for pickling'
Alexandre Vassalotticb73bda2009-06-12 23:03:35 +0000102 items = [[k, self[k]] for k in self]
Raymond Hettingerf1e2df92009-03-23 00:08:09 +0000103 tmp = self.__map, self.__root
104 del self.__map, self.__root
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000105 inst_dict = vars(self).copy()
Raymond Hettingerf1e2df92009-03-23 00:08:09 +0000106 self.__map, self.__root = tmp
Alexandre Vassalotticb73bda2009-06-12 23:03:35 +0000107 if inst_dict:
108 return (self.__class__, (items,), inst_dict)
109 return self.__class__, (items,)
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000110
111 setdefault = MutableMapping.setdefault
112 update = MutableMapping.update
113 pop = MutableMapping.pop
Raymond Hettingera61ae692009-03-18 22:13:20 +0000114 keys = MutableMapping.keys
115 values = MutableMapping.values
116 items = MutableMapping.items
117 iterkeys = MutableMapping.iterkeys
118 itervalues = MutableMapping.itervalues
119 iteritems = MutableMapping.iteritems
120 __ne__ = MutableMapping.__ne__
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000121
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000122 def popitem(self, last=True):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000123 '''od.popitem() -> (k, v), return and remove a (key, value) pair.
124 Pairs are returned in LIFO order if last is true or FIFO order if false.
125
126 '''
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000127 if not self:
128 raise KeyError('dictionary is empty')
Raymond Hettinger1355a3d2009-04-08 08:26:55 +0000129 key = next(reversed(self) if last else iter(self))
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000130 value = self.pop(key)
131 return key, value
132
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000133 def __repr__(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000134 'od.__repr__() <==> repr(od)'
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000135 if not self:
136 return '%s()' % (self.__class__.__name__,)
Raymond Hettinger0b155412009-03-03 21:13:51 +0000137 return '%s(%r)' % (self.__class__.__name__, self.items())
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000138
139 def copy(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000140 'od.copy() -> a shallow copy of od'
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000141 return self.__class__(self)
142
143 @classmethod
144 def fromkeys(cls, iterable, value=None):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000145 '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
146 and values equal to v (which defaults to None).
147
148 '''
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000149 d = cls()
150 for key in iterable:
151 d[key] = value
152 return d
153
154 def __eq__(self, other):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000155 '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
156 while comparison to a regular mapping is order-insensitive.
157
158 '''
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000159 if isinstance(other, OrderedDict):
Raymond Hettingere5b78562009-03-23 18:26:59 +0000160 return len(self)==len(other) and \
161 all(_imap(_eq, self.iteritems(), other.iteritems()))
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000162 return dict.__eq__(self, other)
163
164
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000165
Raymond Hettingerae3f0682009-01-20 03:36:36 +0000166################################################################################
167### namedtuple
168################################################################################
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000169
Raymond Hettinger322daea2009-02-10 01:24:05 +0000170def namedtuple(typename, field_names, verbose=False, rename=False):
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000171 """Returns a new subclass of tuple with named fields.
172
Raymond Hettinger01a09572007-10-23 20:37:41 +0000173 >>> Point = namedtuple('Point', 'x y')
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000174 >>> Point.__doc__ # docstring for the new class
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000175 'Point(x, y)'
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000176 >>> p = Point(11, y=22) # instantiate with positional args or keywords
Raymond Hettinger8777bca2007-12-18 22:21:27 +0000177 >>> p[0] + p[1] # indexable like a plain tuple
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000178 33
Raymond Hettinger88880b22007-12-18 00:13:45 +0000179 >>> x, y = p # unpack like a regular tuple
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000180 >>> x, y
181 (11, 22)
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000182 >>> p.x + p.y # fields also accessable by name
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000183 33
Raymond Hettinger42da8742007-12-14 02:49:47 +0000184 >>> d = p._asdict() # convert to a dictionary
Raymond Hettingera7fc4b12007-10-05 02:47:07 +0000185 >>> d['x']
186 11
187 >>> Point(**d) # convert from a dictionary
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000188 Point(x=11, y=22)
Raymond Hettinger42da8742007-12-14 02:49:47 +0000189 >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000190 Point(x=100, y=22)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000191
192 """
193
Raymond Hettinger58167142008-01-08 02:02:05 +0000194 # Parse and validate the field names. Validation serves two purposes,
195 # generating informative error messages and preventing template injection attacks.
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000196 if isinstance(field_names, basestring):
Raymond Hettinger0e1d6062007-10-08 10:11:51 +0000197 field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas
Raymond Hettinger6ee7bc02008-09-25 23:31:52 +0000198 field_names = tuple(map(str, field_names))
Raymond Hettinger322daea2009-02-10 01:24:05 +0000199 if rename:
200 names = list(field_names)
201 seen = set()
202 for i, name in enumerate(names):
203 if (not all(c.isalnum() or c=='_' for c in name) or _iskeyword(name)
204 or not name or name[0].isdigit() or name.startswith('_')
205 or name in seen):
Raymond Hettinger756ab672009-04-02 22:25:40 +0000206 names[i] = '_%d' % i
Raymond Hettinger322daea2009-02-10 01:24:05 +0000207 seen.add(name)
208 field_names = tuple(names)
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000209 for name in (typename,) + field_names:
Raymond Hettinger2e1af252007-12-05 18:11:08 +0000210 if not all(c.isalnum() or c=='_' for c in name):
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000211 raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
Raymond Hettingerabfd8df2007-10-16 21:28:32 +0000212 if _iskeyword(name):
213 raise ValueError('Type names and field names cannot be a keyword: %r' % name)
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000214 if name[0].isdigit():
215 raise ValueError('Type names and field names cannot start with a number: %r' % name)
Raymond Hettinger163f6222007-10-09 01:36:23 +0000216 seen_names = set()
217 for name in field_names:
Raymond Hettinger322daea2009-02-10 01:24:05 +0000218 if name.startswith('_') and not rename:
Raymond Hettinger42da8742007-12-14 02:49:47 +0000219 raise ValueError('Field names cannot start with an underscore: %r' % name)
Raymond Hettinger163f6222007-10-09 01:36:23 +0000220 if name in seen_names:
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000221 raise ValueError('Encountered duplicate field name: %r' % name)
Raymond Hettinger163f6222007-10-09 01:36:23 +0000222 seen_names.add(name)
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000223
224 # Create and fill-in the class template
Raymond Hettinger02740f72008-01-05 01:35:43 +0000225 numfields = len(field_names)
Raymond Hettinger2b03d452007-09-18 03:33:19 +0000226 argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000227 reprtxt = ', '.join('%s=%%r' % name for name in field_names)
228 template = '''class %(typename)s(tuple):
Raymond Hettinger48eca672007-12-14 18:08:20 +0000229 '%(typename)s(%(argtxt)s)' \n
230 __slots__ = () \n
Raymond Hettingere0734e72008-01-04 03:22:53 +0000231 _fields = %(field_names)r \n
Raymond Hettingera68cad12009-05-27 02:24:45 +0000232 def __new__(_cls, %(argtxt)s):
Raymond Hettinger9bd35082010-03-09 09:01:46 +0000233 'Create new instance of %(typename)s(%(argtxt)s)'
Raymond Hettingera68cad12009-05-27 02:24:45 +0000234 return _tuple.__new__(_cls, (%(argtxt)s)) \n
Raymond Hettinger02740f72008-01-05 01:35:43 +0000235 @classmethod
Raymond Hettinger844f71b2008-01-06 22:11:54 +0000236 def _make(cls, iterable, new=tuple.__new__, len=len):
Raymond Hettinger02740f72008-01-05 01:35:43 +0000237 'Make a new %(typename)s object from a sequence or iterable'
Raymond Hettinger844f71b2008-01-06 22:11:54 +0000238 result = new(cls, iterable)
Raymond Hettinger02740f72008-01-05 01:35:43 +0000239 if len(result) != %(numfields)d:
240 raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
241 return result \n
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000242 def __repr__(self):
Raymond Hettinger9bd35082010-03-09 09:01:46 +0000243 'Return a nicely formatted representation string'
Raymond Hettinger48eca672007-12-14 18:08:20 +0000244 return '%(typename)s(%(reprtxt)s)' %% self \n
Raymond Hettinger88a91642009-03-03 04:51:24 +0000245 def _asdict(self):
246 'Return a new OrderedDict which maps field names to their values'
247 return OrderedDict(zip(self._fields, self)) \n
Raymond Hettingera68cad12009-05-27 02:24:45 +0000248 def _replace(_self, **kwds):
Raymond Hettingereeeb9c42007-11-15 02:44:53 +0000249 'Return a new %(typename)s object replacing specified fields with new values'
Raymond Hettingera68cad12009-05-27 02:24:45 +0000250 result = _self._make(map(kwds.pop, %(field_names)r, _self))
Raymond Hettinger1b50fd72008-01-05 02:17:24 +0000251 if kwds:
252 raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
Raymond Hettingere98839a2008-06-09 01:28:30 +0000253 return result \n
254 def __getnewargs__(self):
Raymond Hettinger9bd35082010-03-09 09:01:46 +0000255 'Return self as a plain tuple. Used by copy and pickle.'
Raymond Hettingere98839a2008-06-09 01:28:30 +0000256 return tuple(self) \n\n''' % locals()
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000257 for i, name in enumerate(field_names):
Raymond Hettinger9bd35082010-03-09 09:01:46 +0000258 template += " %s = _property(_itemgetter(%d), doc='Alias for field number %d')\n" % (name, i, i)
Raymond Hettinger2b03d452007-09-18 03:33:19 +0000259 if verbose:
260 print template
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000261
Raymond Hettinger3c2523c2008-05-30 07:16:53 +0000262 # Execute the template string in a temporary namespace and
263 # support tracing utilities by setting a value for frame.f_globals['__name__']
Raymond Hettingera68cad12009-05-27 02:24:45 +0000264 namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
265 OrderedDict=OrderedDict, _property=property, _tuple=tuple)
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000266 try:
Raymond Hettinger0e1d6062007-10-08 10:11:51 +0000267 exec template in namespace
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000268 except SyntaxError, e:
269 raise SyntaxError(e.message + ':\n' + template)
Raymond Hettinger0e1d6062007-10-08 10:11:51 +0000270 result = namespace[typename]
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000271
272 # For pickling to work, the __module__ variable needs to be set to the frame
273 # where the named tuple is created. Bypass this step in enviroments where
Benjamin Peterson7c67b032009-05-05 00:55:24 +0000274 # sys._getframe is not defined (Jython for example) or sys._getframe is not
275 # defined for arguments greater than 0 (IronPython).
276 try:
Raymond Hettingerecf252a2009-01-27 10:03:04 +0000277 result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
Benjamin Peterson7c67b032009-05-05 00:55:24 +0000278 except (AttributeError, ValueError):
279 pass
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000280
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000281 return result
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000282
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000283
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000284########################################################################
Raymond Hettingerae3f0682009-01-20 03:36:36 +0000285### Counter
286########################################################################
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000287
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000288class Counter(dict):
289 '''Dict subclass for counting hashable items. Sometimes called a bag
290 or multiset. Elements are stored as dictionary keys and their counts
291 are stored as dictionary values.
292
293 >>> c = Counter('abracadabra') # count elements from a string
294
295 >>> c.most_common(3) # three most common elements
296 [('a', 5), ('r', 2), ('b', 2)]
297 >>> sorted(c) # list all unique elements
298 ['a', 'b', 'c', 'd', 'r']
299 >>> ''.join(sorted(c.elements())) # list elements with repetitions
300 'aaaaabbcdrr'
301 >>> sum(c.values()) # total of all counts
302 11
303
304 >>> c['a'] # count of letter 'a'
305 5
306 >>> for elem in 'shazam': # update counts from an iterable
307 ... c[elem] += 1 # by adding 1 to each element's count
308 >>> c['a'] # now there are seven 'a'
309 7
310 >>> del c['r'] # remove all 'r'
311 >>> c['r'] # now there are zero 'r'
312 0
313
314 >>> d = Counter('simsalabim') # make another counter
315 >>> c.update(d) # add in the second counter
316 >>> c['a'] # now there are nine 'a'
317 9
318
319 >>> c.clear() # empty the counter
320 >>> c
321 Counter()
322
323 Note: If a count is set to zero or reduced to zero, it will remain
324 in the counter until the entry is deleted or the counter is cleared:
325
326 >>> c = Counter('aaabbc')
327 >>> c['b'] -= 2 # reduce the count of 'b' by two
328 >>> c.most_common() # 'b' is still in, but its count is zero
329 [('a', 3), ('c', 1), ('b', 0)]
330
331 '''
332 # References:
333 # http://en.wikipedia.org/wiki/Multiset
334 # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
335 # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
336 # http://code.activestate.com/recipes/259174/
337 # Knuth, TAOCP Vol. II section 4.6.3
338
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000339 def __init__(self, iterable=None, **kwds):
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000340 '''Create a new, empty Counter object. And if given, count elements
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000341 from an input iterable. Or, initialize the count from another mapping
342 of elements to their counts.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000343
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000344 >>> c = Counter() # a new, empty counter
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000345 >>> c = Counter('gallahad') # a new counter from an iterable
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000346 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000347 >>> c = Counter(a=4, b=2) # a new counter from keyword args
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000348
349 '''
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000350 self.update(iterable, **kwds)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000351
352 def __missing__(self, key):
353 'The count of elements not in the Counter is zero.'
354 # Needed so that self[missing_item] does not raise KeyError
355 return 0
356
357 def most_common(self, n=None):
358 '''List the n most common elements and their counts from the most
359 common to the least. If n is None, then list all element counts.
360
361 >>> Counter('abracadabra').most_common(3)
362 [('a', 5), ('r', 2), ('b', 2)]
363
364 '''
365 # Emulate Bag.sortedByCount from Smalltalk
366 if n is None:
367 return sorted(self.iteritems(), key=_itemgetter(1), reverse=True)
368 return _heapq.nlargest(n, self.iteritems(), key=_itemgetter(1))
369
370 def elements(self):
371 '''Iterator over elements repeating each as many times as its count.
372
373 >>> c = Counter('ABCABC')
374 >>> sorted(c.elements())
375 ['A', 'A', 'B', 'B', 'C', 'C']
376
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000377 # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
378 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
379 >>> product = 1
380 >>> for factor in prime_factors.elements(): # loop over factors
381 ... product *= factor # and multiply them
382 >>> product
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000383 1836
384
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000385 Note, if an element's count has been set to zero or is a negative
386 number, elements() will ignore it.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000387
388 '''
389 # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
Raymond Hettinger35288c62009-01-13 04:50:35 +0000390 return _chain.from_iterable(_starmap(_repeat, self.iteritems()))
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000391
392 # Override dict methods where necessary
393
394 @classmethod
395 def fromkeys(cls, iterable, v=None):
396 # There is no equivalent method for counters because setting v=1
397 # means that no element can have a count greater than one.
398 raise NotImplementedError(
399 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
400
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000401 def update(self, iterable=None, **kwds):
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000402 '''Like dict.update() but add counts instead of replacing them.
403
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000404 Source can be an iterable, a dictionary, or another Counter instance.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000405
406 >>> c = Counter('which')
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000407 >>> c.update('witch') # add elements from another iterable
408 >>> d = Counter('watch')
409 >>> c.update(d) # add elements from another counter
410 >>> c['h'] # four 'h' in which, witch, and watch
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000411 4
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000412
413 '''
414 # The regular dict.update() operation makes no sense here because the
415 # replace behavior results in the some of original untouched counts
416 # being mixed-in with all of the other counts for a mismash that
417 # doesn't have a straight-forward interpretation in most counting
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000418 # contexts. Instead, we implement straight-addition. Both the inputs
419 # and outputs are allowed to contain zero and negative counts.
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000420
421 if iterable is not None:
422 if isinstance(iterable, Mapping):
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000423 if self:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000424 self_get = self.get
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000425 for elem, count in iterable.iteritems():
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000426 self[elem] = self_get(elem, 0) + count
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000427 else:
428 dict.update(self, iterable) # fast path when counter is empty
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000429 else:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000430 self_get = self.get
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000431 for elem in iterable:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000432 self[elem] = self_get(elem, 0) + 1
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000433 if kwds:
434 self.update(kwds)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000435
436 def copy(self):
437 'Like dict.copy() but returns a Counter instance instead of a dict.'
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000438 return Counter(self)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000439
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000440 def __delitem__(self, elem):
441 'Like dict.__delitem__() but does not raise KeyError for missing values.'
442 if elem in self:
443 dict.__delitem__(self, elem)
444
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000445 def __repr__(self):
446 if not self:
447 return '%s()' % self.__class__.__name__
Raymond Hettinger35288c62009-01-13 04:50:35 +0000448 items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000449 return '%s({%s})' % (self.__class__.__name__, items)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000450
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000451 # Multiset-style mathematical operations discussed in:
452 # Knuth TAOCP Volume II section 4.6.3 exercise 19
453 # and at http://en.wikipedia.org/wiki/Multiset
454 #
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000455 # Outputs guaranteed to only include positive counts.
456 #
457 # To strip negative and zero counts, add-in an empty counter:
458 # c += Counter()
459
460 def __add__(self, other):
461 '''Add counts from two counters.
462
463 >>> Counter('abbb') + Counter('bcc')
464 Counter({'b': 4, 'c': 2, 'a': 1})
465
466 '''
467 if not isinstance(other, Counter):
468 return NotImplemented
469 result = Counter()
470 for elem in set(self) | set(other):
471 newcount = self[elem] + other[elem]
472 if newcount > 0:
473 result[elem] = newcount
474 return result
475
476 def __sub__(self, other):
477 ''' Subtract count, but keep only results with positive counts.
478
479 >>> Counter('abbbc') - Counter('bccd')
480 Counter({'b': 2, 'a': 1})
481
482 '''
483 if not isinstance(other, Counter):
484 return NotImplemented
485 result = Counter()
Raymond Hettinger4571f342009-01-21 20:31:50 +0000486 for elem in set(self) | set(other):
487 newcount = self[elem] - other[elem]
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000488 if newcount > 0:
489 result[elem] = newcount
490 return result
491
492 def __or__(self, other):
493 '''Union is the maximum of value in either of the input counters.
494
495 >>> Counter('abbb') | Counter('bcc')
496 Counter({'b': 3, 'c': 2, 'a': 1})
497
498 '''
499 if not isinstance(other, Counter):
500 return NotImplemented
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000501 result = Counter()
502 for elem in set(self) | set(other):
Raymond Hettingere3bc5572009-04-04 08:46:58 +0000503 p, q = self[elem], other[elem]
504 newcount = q if p < q else p
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000505 if newcount > 0:
506 result[elem] = newcount
507 return result
508
509 def __and__(self, other):
510 ''' Intersection is the minimum of corresponding counts.
511
512 >>> Counter('abbb') & Counter('bcc')
513 Counter({'b': 1})
514
515 '''
516 if not isinstance(other, Counter):
517 return NotImplemented
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000518 result = Counter()
519 if len(self) < len(other):
520 self, other = other, self
521 for elem in _ifilter(self.__contains__, other):
Raymond Hettingere3bc5572009-04-04 08:46:58 +0000522 p, q = self[elem], other[elem]
523 newcount = p if p < q else q
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000524 if newcount > 0:
525 result[elem] = newcount
526 return result
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000527
528
529if __name__ == '__main__':
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000530 # verify that instances can be pickled
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000531 from cPickle import loads, dumps
Raymond Hettinger01a09572007-10-23 20:37:41 +0000532 Point = namedtuple('Point', 'x, y', True)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000533 p = Point(x=10, y=20)
534 assert p == loads(dumps(p))
535
Raymond Hettingereeeb9c42007-11-15 02:44:53 +0000536 # test and demonstrate ability to override methods
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000537 class Point(namedtuple('Point', 'x y')):
Raymond Hettingere1655082008-01-10 19:15:10 +0000538 __slots__ = ()
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000539 @property
540 def hypot(self):
541 return (self.x ** 2 + self.y ** 2) ** 0.5
Raymond Hettinger9a359212008-01-07 20:07:38 +0000542 def __str__(self):
Raymond Hettinger15b5e552008-01-10 23:00:01 +0000543 return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000544
Raymond Hettingere1655082008-01-10 19:15:10 +0000545 for p in Point(3, 4), Point(14, 5/7.):
Raymond Hettinger9a359212008-01-07 20:07:38 +0000546 print p
Raymond Hettingereeeb9c42007-11-15 02:44:53 +0000547
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000548 class Point(namedtuple('Point', 'x y')):
549 'Point class with optimized _make() and _replace() without error-checking'
Raymond Hettingere1655082008-01-10 19:15:10 +0000550 __slots__ = ()
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000551 _make = classmethod(tuple.__new__)
552 def _replace(self, _map=map, **kwds):
Raymond Hettingerf5e8af12008-01-07 20:56:05 +0000553 return self._make(_map(kwds.get, ('x', 'y'), self))
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000554
555 print Point(11, 22)._replace(x=100)
556
Raymond Hettingere850c462008-01-10 20:37:12 +0000557 Point3D = namedtuple('Point3D', Point._fields + ('z',))
558 print Point3D.__doc__
559
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000560 import doctest
Raymond Hettinger01a09572007-10-23 20:37:41 +0000561 TestResults = namedtuple('TestResults', 'failed attempted')
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000562 print TestResults(*doctest.testmod())