blob: a49ecc7719eca128a49927f7edd5205a94afaaaf [file] [log] [blame]
Raymond Hettingerbc512d32009-03-03 04:45:34 +00001__all__ = ['Counter', 'deque', 'defaultdict', 'namedtuple', 'OrderedDict']
Raymond Hettinger88880b22007-12-18 00:13:45 +00002# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
3# They should however be considered an integral part of collections.py.
4from _abcoll import *
5import _abcoll
6__all__ += _abcoll.__all__
Raymond Hettingereb979882007-02-28 18:37:52 +00007
8from _collections import deque, defaultdict
Raymond Hettingerbc512d32009-03-03 04:45:34 +00009from operator import itemgetter as _itemgetter, eq as _eq
Raymond Hettingerabfd8df2007-10-16 21:28:32 +000010from keyword import iskeyword as _iskeyword
Raymond Hettingerc37e5e02007-03-01 06:16:43 +000011import sys as _sys
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +000012import heapq as _heapq
Raymond Hettingerbc512d32009-03-03 04:45:34 +000013from itertools import repeat as _repeat, chain as _chain, starmap as _starmap, \
Georg Brandl7d4b7592010-02-06 22:49:47 +000014 ifilter as _ifilter, imap as _imap
Raymond Hettinger74f869e2010-09-13 22:14:36 +000015try:
16 from thread import get_ident
Amaury Forgeot d'Arc71431ef2010-11-09 07:35:26 +000017except ImportError:
Raymond Hettinger74f869e2010-09-13 22:14:36 +000018 from dummy_thread import get_ident
19
20def _recursive_repr(user_function):
21 'Decorator to make a repr function return "..." for a recursive call'
22 repr_running = set()
23
24 def wrapper(self):
25 key = id(self), get_ident()
26 if key in repr_running:
27 return '...'
28 repr_running.add(key)
29 try:
30 result = user_function(self)
31 finally:
32 repr_running.discard(key)
33 return result
34
35 # Can't use functools.wraps() here because of bootstrap issues
36 wrapper.__module__ = getattr(user_function, '__module__')
37 wrapper.__doc__ = getattr(user_function, '__doc__')
38 wrapper.__name__ = getattr(user_function, '__name__')
39 return wrapper
40
Raymond Hettingerbc512d32009-03-03 04:45:34 +000041
42################################################################################
43### OrderedDict
44################################################################################
45
46class OrderedDict(dict, MutableMapping):
Raymond Hettingere980d2d2009-03-19 23:12:41 +000047 'Dictionary that remembers insertion order'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000048 # An inherited dict maps keys to values.
Raymond Hettingere980d2d2009-03-19 23:12:41 +000049 # The inherited dict provides __getitem__, __len__, __contains__, and get.
50 # The remaining methods are order-aware.
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000051 # Big-O running times for all methods are the same as for regular dictionaries.
52
53 # The internal self.__map dictionary maps keys to links in a doubly linked list.
54 # The circular doubly linked list starts and ends with a sentinel element.
55 # The sentinel element never gets deleted (this simplifies the algorithm).
Raymond Hettingeraba22932010-03-09 09:58:53 +000056 # Each link is stored as a list of length three: [PREV, NEXT, KEY].
Raymond Hettingerbc512d32009-03-03 04:45:34 +000057
58 def __init__(self, *args, **kwds):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000059 '''Initialize an ordered dictionary. Signature is the same as for
60 regular dictionaries, but keyword arguments are not recommended
61 because their insertion order is arbitrary.
62
63 '''
Raymond Hettingerbc512d32009-03-03 04:45:34 +000064 if len(args) > 1:
65 raise TypeError('expected at most 1 arguments, got %d' % len(args))
Raymond Hettinger9353ea22009-03-03 20:53:51 +000066 try:
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000067 self.__root
Raymond Hettinger9353ea22009-03-03 20:53:51 +000068 except AttributeError:
Raymond Hettingeraba22932010-03-09 09:58:53 +000069 self.__root = root = [None, None, None] # sentinel node
Raymond Hettingere30bc382010-03-09 11:29:10 +000070 PREV = 0
71 NEXT = 1
Raymond Hettingeraba22932010-03-09 09:58:53 +000072 root[PREV] = root[NEXT] = root
Raymond Hettinger2dc90fd2009-03-25 22:41:32 +000073 self.__map = {}
Raymond Hettingerbc512d32009-03-03 04:45:34 +000074 self.update(*args, **kwds)
75
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000076 def __setitem__(self, key, value, PREV=0, NEXT=1, dict_setitem=dict.__setitem__):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000077 'od.__setitem__(i, y) <==> od[i]=y'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000078 # Setting a new item creates a new link which goes at the end of the linked
79 # list, and the inherited dictionary is updated with the new key/value pair.
Raymond Hettingerbc512d32009-03-03 04:45:34 +000080 if key not in self:
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000081 root = self.__root
Raymond Hettingeraba22932010-03-09 09:58:53 +000082 last = root[PREV]
83 last[NEXT] = root[PREV] = self.__map[key] = [last, root, key]
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000084 dict_setitem(self, key, value)
Raymond Hettingerbc512d32009-03-03 04:45:34 +000085
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000086 def __delitem__(self, key, PREV=0, NEXT=1, dict_delitem=dict.__delitem__):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000087 'od.__delitem__(y) <==> del od[y]'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000088 # Deleting an existing item uses self.__map to find the link which is
89 # then removed by updating the links in the predecessor and successor nodes.
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000090 dict_delitem(self, key)
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000091 link = self.__map.pop(key)
Raymond Hettinger39282762010-04-03 00:39:26 +000092 link_prev = link[PREV]
93 link_next = link[NEXT]
94 link_prev[NEXT] = link_next
95 link_next[PREV] = link_prev
Raymond Hettingerbc512d32009-03-03 04:45:34 +000096
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000097 def __iter__(self, NEXT=1, KEY=2):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000098 'od.__iter__() <==> iter(od)'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000099 # Traverse the linked list in order.
100 root = self.__root
Raymond Hettingeraba22932010-03-09 09:58:53 +0000101 curr = root[NEXT]
Raymond Hettingerf1e2df92009-03-23 00:08:09 +0000102 while curr is not root:
Raymond Hettingeraba22932010-03-09 09:58:53 +0000103 yield curr[KEY]
104 curr = curr[NEXT]
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000105
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +0000106 def __reversed__(self, PREV=0, KEY=2):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000107 'od.__reversed__() <==> reversed(od)'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +0000108 # Traverse the linked list in reverse order.
109 root = self.__root
Raymond Hettingeraba22932010-03-09 09:58:53 +0000110 curr = root[PREV]
Raymond Hettingerf1e2df92009-03-23 00:08:09 +0000111 while curr is not root:
Raymond Hettingeraba22932010-03-09 09:58:53 +0000112 yield curr[KEY]
113 curr = curr[PREV]
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000114
115 def __reduce__(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000116 'Return state information for pickling'
Alexandre Vassalotticb73bda2009-06-12 23:03:35 +0000117 items = [[k, self[k]] for k in self]
Raymond Hettingerf1e2df92009-03-23 00:08:09 +0000118 tmp = self.__map, self.__root
119 del self.__map, self.__root
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000120 inst_dict = vars(self).copy()
Raymond Hettingerf1e2df92009-03-23 00:08:09 +0000121 self.__map, self.__root = tmp
Alexandre Vassalotticb73bda2009-06-12 23:03:35 +0000122 if inst_dict:
123 return (self.__class__, (items,), inst_dict)
124 return self.__class__, (items,)
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000125
Raymond Hettinger39282762010-04-03 00:39:26 +0000126 def clear(self):
127 'od.clear() -> None. Remove all items from od.'
Raymond Hettinger6b96ecb2010-04-03 03:14:28 +0000128 try:
129 for node in self.__map.itervalues():
130 del node[:]
131 self.__root[:] = [self.__root, self.__root, None]
132 self.__map.clear()
133 except AttributeError:
134 pass
135 dict.clear(self)
Raymond Hettinger39282762010-04-03 00:39:26 +0000136
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000137 setdefault = MutableMapping.setdefault
138 update = MutableMapping.update
139 pop = MutableMapping.pop
Raymond Hettingera61ae692009-03-18 22:13:20 +0000140 keys = MutableMapping.keys
141 values = MutableMapping.values
142 items = MutableMapping.items
143 iterkeys = MutableMapping.iterkeys
144 itervalues = MutableMapping.itervalues
145 iteritems = MutableMapping.iteritems
146 __ne__ = MutableMapping.__ne__
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000147
Raymond Hettingera54b2da2010-08-17 19:03:06 +0000148 def viewkeys(self):
149 "od.viewkeys() -> a set-like object providing a view on od's keys"
150 return KeysView(self)
151
152 def viewvalues(self):
153 "od.viewvalues() -> an object providing a view on od's values"
154 return ValuesView(self)
155
156 def viewitems(self):
157 "od.viewitems() -> a set-like object providing a view on od's items"
158 return ItemsView(self)
159
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000160 def popitem(self, last=True):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000161 '''od.popitem() -> (k, v), return and remove a (key, value) pair.
162 Pairs are returned in LIFO order if last is true or FIFO order if false.
163
164 '''
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000165 if not self:
166 raise KeyError('dictionary is empty')
Raymond Hettinger1355a3d2009-04-08 08:26:55 +0000167 key = next(reversed(self) if last else iter(self))
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000168 value = self.pop(key)
169 return key, value
170
Raymond Hettinger74f869e2010-09-13 22:14:36 +0000171 @_recursive_repr
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000172 def __repr__(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000173 'od.__repr__() <==> repr(od)'
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000174 if not self:
175 return '%s()' % (self.__class__.__name__,)
Raymond Hettinger0b155412009-03-03 21:13:51 +0000176 return '%s(%r)' % (self.__class__.__name__, self.items())
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000177
178 def copy(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000179 'od.copy() -> a shallow copy of od'
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000180 return self.__class__(self)
181
182 @classmethod
183 def fromkeys(cls, iterable, value=None):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000184 '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
185 and values equal to v (which defaults to None).
186
187 '''
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000188 d = cls()
189 for key in iterable:
190 d[key] = value
191 return d
192
193 def __eq__(self, other):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000194 '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
195 while comparison to a regular mapping is order-insensitive.
196
197 '''
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000198 if isinstance(other, OrderedDict):
Raymond Hettingere5b78562009-03-23 18:26:59 +0000199 return len(self)==len(other) and \
200 all(_imap(_eq, self.iteritems(), other.iteritems()))
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000201 return dict.__eq__(self, other)
202
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000203
Raymond Hettingerae3f0682009-01-20 03:36:36 +0000204################################################################################
205### namedtuple
206################################################################################
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000207
Raymond Hettinger322daea2009-02-10 01:24:05 +0000208def namedtuple(typename, field_names, verbose=False, rename=False):
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000209 """Returns a new subclass of tuple with named fields.
210
Raymond Hettinger01a09572007-10-23 20:37:41 +0000211 >>> Point = namedtuple('Point', 'x y')
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000212 >>> Point.__doc__ # docstring for the new class
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000213 'Point(x, y)'
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000214 >>> p = Point(11, y=22) # instantiate with positional args or keywords
Raymond Hettinger8777bca2007-12-18 22:21:27 +0000215 >>> p[0] + p[1] # indexable like a plain tuple
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000216 33
Raymond Hettinger88880b22007-12-18 00:13:45 +0000217 >>> x, y = p # unpack like a regular tuple
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000218 >>> x, y
219 (11, 22)
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000220 >>> p.x + p.y # fields also accessable by name
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000221 33
Raymond Hettinger42da8742007-12-14 02:49:47 +0000222 >>> d = p._asdict() # convert to a dictionary
Raymond Hettingera7fc4b12007-10-05 02:47:07 +0000223 >>> d['x']
224 11
225 >>> Point(**d) # convert from a dictionary
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000226 Point(x=11, y=22)
Raymond Hettinger42da8742007-12-14 02:49:47 +0000227 >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000228 Point(x=100, y=22)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000229
230 """
231
Raymond Hettinger58167142008-01-08 02:02:05 +0000232 # Parse and validate the field names. Validation serves two purposes,
233 # generating informative error messages and preventing template injection attacks.
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000234 if isinstance(field_names, basestring):
Raymond Hettinger0e1d6062007-10-08 10:11:51 +0000235 field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas
Raymond Hettinger6ee7bc02008-09-25 23:31:52 +0000236 field_names = tuple(map(str, field_names))
Raymond Hettinger322daea2009-02-10 01:24:05 +0000237 if rename:
238 names = list(field_names)
239 seen = set()
240 for i, name in enumerate(names):
241 if (not all(c.isalnum() or c=='_' for c in name) or _iskeyword(name)
242 or not name or name[0].isdigit() or name.startswith('_')
243 or name in seen):
Raymond Hettinger756ab672009-04-02 22:25:40 +0000244 names[i] = '_%d' % i
Raymond Hettinger322daea2009-02-10 01:24:05 +0000245 seen.add(name)
246 field_names = tuple(names)
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000247 for name in (typename,) + field_names:
Raymond Hettinger2e1af252007-12-05 18:11:08 +0000248 if not all(c.isalnum() or c=='_' for c in name):
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000249 raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
Raymond Hettingerabfd8df2007-10-16 21:28:32 +0000250 if _iskeyword(name):
251 raise ValueError('Type names and field names cannot be a keyword: %r' % name)
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000252 if name[0].isdigit():
253 raise ValueError('Type names and field names cannot start with a number: %r' % name)
Raymond Hettinger163f6222007-10-09 01:36:23 +0000254 seen_names = set()
255 for name in field_names:
Raymond Hettinger322daea2009-02-10 01:24:05 +0000256 if name.startswith('_') and not rename:
Raymond Hettinger42da8742007-12-14 02:49:47 +0000257 raise ValueError('Field names cannot start with an underscore: %r' % name)
Raymond Hettinger163f6222007-10-09 01:36:23 +0000258 if name in seen_names:
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000259 raise ValueError('Encountered duplicate field name: %r' % name)
Raymond Hettinger163f6222007-10-09 01:36:23 +0000260 seen_names.add(name)
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000261
262 # Create and fill-in the class template
Raymond Hettinger02740f72008-01-05 01:35:43 +0000263 numfields = len(field_names)
Raymond Hettinger2b03d452007-09-18 03:33:19 +0000264 argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000265 reprtxt = ', '.join('%s=%%r' % name for name in field_names)
266 template = '''class %(typename)s(tuple):
Raymond Hettinger48eca672007-12-14 18:08:20 +0000267 '%(typename)s(%(argtxt)s)' \n
268 __slots__ = () \n
Raymond Hettingere0734e72008-01-04 03:22:53 +0000269 _fields = %(field_names)r \n
Raymond Hettingera68cad12009-05-27 02:24:45 +0000270 def __new__(_cls, %(argtxt)s):
Raymond Hettinger9bd35082010-03-09 09:01:46 +0000271 'Create new instance of %(typename)s(%(argtxt)s)'
Raymond Hettingera68cad12009-05-27 02:24:45 +0000272 return _tuple.__new__(_cls, (%(argtxt)s)) \n
Raymond Hettinger02740f72008-01-05 01:35:43 +0000273 @classmethod
Raymond Hettinger844f71b2008-01-06 22:11:54 +0000274 def _make(cls, iterable, new=tuple.__new__, len=len):
Raymond Hettinger02740f72008-01-05 01:35:43 +0000275 'Make a new %(typename)s object from a sequence or iterable'
Raymond Hettinger844f71b2008-01-06 22:11:54 +0000276 result = new(cls, iterable)
Raymond Hettinger02740f72008-01-05 01:35:43 +0000277 if len(result) != %(numfields)d:
278 raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
279 return result \n
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000280 def __repr__(self):
Raymond Hettinger9bd35082010-03-09 09:01:46 +0000281 'Return a nicely formatted representation string'
Raymond Hettinger48eca672007-12-14 18:08:20 +0000282 return '%(typename)s(%(reprtxt)s)' %% self \n
Raymond Hettinger88a91642009-03-03 04:51:24 +0000283 def _asdict(self):
284 'Return a new OrderedDict which maps field names to their values'
285 return OrderedDict(zip(self._fields, self)) \n
Raymond Hettingera68cad12009-05-27 02:24:45 +0000286 def _replace(_self, **kwds):
Raymond Hettingereeeb9c42007-11-15 02:44:53 +0000287 'Return a new %(typename)s object replacing specified fields with new values'
Raymond Hettingera68cad12009-05-27 02:24:45 +0000288 result = _self._make(map(kwds.pop, %(field_names)r, _self))
Raymond Hettinger1b50fd72008-01-05 02:17:24 +0000289 if kwds:
290 raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
Raymond Hettingere98839a2008-06-09 01:28:30 +0000291 return result \n
292 def __getnewargs__(self):
Raymond Hettinger9bd35082010-03-09 09:01:46 +0000293 'Return self as a plain tuple. Used by copy and pickle.'
Raymond Hettingere98839a2008-06-09 01:28:30 +0000294 return tuple(self) \n\n''' % locals()
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000295 for i, name in enumerate(field_names):
Raymond Hettinger9bd35082010-03-09 09:01:46 +0000296 template += " %s = _property(_itemgetter(%d), doc='Alias for field number %d')\n" % (name, i, i)
Raymond Hettinger2b03d452007-09-18 03:33:19 +0000297 if verbose:
298 print template
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000299
Raymond Hettinger3c2523c2008-05-30 07:16:53 +0000300 # Execute the template string in a temporary namespace and
301 # support tracing utilities by setting a value for frame.f_globals['__name__']
Raymond Hettingera68cad12009-05-27 02:24:45 +0000302 namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
303 OrderedDict=OrderedDict, _property=property, _tuple=tuple)
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000304 try:
Raymond Hettinger0e1d6062007-10-08 10:11:51 +0000305 exec template in namespace
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000306 except SyntaxError, e:
307 raise SyntaxError(e.message + ':\n' + template)
Raymond Hettinger0e1d6062007-10-08 10:11:51 +0000308 result = namespace[typename]
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000309
310 # For pickling to work, the __module__ variable needs to be set to the frame
311 # where the named tuple is created. Bypass this step in enviroments where
Benjamin Peterson7c67b032009-05-05 00:55:24 +0000312 # sys._getframe is not defined (Jython for example) or sys._getframe is not
313 # defined for arguments greater than 0 (IronPython).
314 try:
Raymond Hettingerecf252a2009-01-27 10:03:04 +0000315 result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
Benjamin Peterson7c67b032009-05-05 00:55:24 +0000316 except (AttributeError, ValueError):
317 pass
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000318
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000319 return result
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000320
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000321
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000322########################################################################
Raymond Hettingerae3f0682009-01-20 03:36:36 +0000323### Counter
324########################################################################
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000325
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000326class Counter(dict):
327 '''Dict subclass for counting hashable items. Sometimes called a bag
328 or multiset. Elements are stored as dictionary keys and their counts
329 are stored as dictionary values.
330
331 >>> c = Counter('abracadabra') # count elements from a string
332
333 >>> c.most_common(3) # three most common elements
334 [('a', 5), ('r', 2), ('b', 2)]
335 >>> sorted(c) # list all unique elements
336 ['a', 'b', 'c', 'd', 'r']
337 >>> ''.join(sorted(c.elements())) # list elements with repetitions
338 'aaaaabbcdrr'
339 >>> sum(c.values()) # total of all counts
340 11
341
342 >>> c['a'] # count of letter 'a'
343 5
344 >>> for elem in 'shazam': # update counts from an iterable
345 ... c[elem] += 1 # by adding 1 to each element's count
346 >>> c['a'] # now there are seven 'a'
347 7
348 >>> del c['r'] # remove all 'r'
349 >>> c['r'] # now there are zero 'r'
350 0
351
352 >>> d = Counter('simsalabim') # make another counter
353 >>> c.update(d) # add in the second counter
354 >>> c['a'] # now there are nine 'a'
355 9
356
357 >>> c.clear() # empty the counter
358 >>> c
359 Counter()
360
361 Note: If a count is set to zero or reduced to zero, it will remain
362 in the counter until the entry is deleted or the counter is cleared:
363
364 >>> c = Counter('aaabbc')
365 >>> c['b'] -= 2 # reduce the count of 'b' by two
366 >>> c.most_common() # 'b' is still in, but its count is zero
367 [('a', 3), ('c', 1), ('b', 0)]
368
369 '''
370 # References:
371 # http://en.wikipedia.org/wiki/Multiset
372 # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
373 # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
374 # http://code.activestate.com/recipes/259174/
375 # Knuth, TAOCP Vol. II section 4.6.3
376
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000377 def __init__(self, iterable=None, **kwds):
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000378 '''Create a new, empty Counter object. And if given, count elements
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000379 from an input iterable. Or, initialize the count from another mapping
380 of elements to their counts.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000381
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000382 >>> c = Counter() # a new, empty counter
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000383 >>> c = Counter('gallahad') # a new counter from an iterable
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000384 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000385 >>> c = Counter(a=4, b=2) # a new counter from keyword args
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000386
387 '''
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000388 self.update(iterable, **kwds)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000389
390 def __missing__(self, key):
391 'The count of elements not in the Counter is zero.'
392 # Needed so that self[missing_item] does not raise KeyError
393 return 0
394
395 def most_common(self, n=None):
396 '''List the n most common elements and their counts from the most
397 common to the least. If n is None, then list all element counts.
398
399 >>> Counter('abracadabra').most_common(3)
400 [('a', 5), ('r', 2), ('b', 2)]
401
402 '''
403 # Emulate Bag.sortedByCount from Smalltalk
404 if n is None:
405 return sorted(self.iteritems(), key=_itemgetter(1), reverse=True)
406 return _heapq.nlargest(n, self.iteritems(), key=_itemgetter(1))
407
408 def elements(self):
409 '''Iterator over elements repeating each as many times as its count.
410
411 >>> c = Counter('ABCABC')
412 >>> sorted(c.elements())
413 ['A', 'A', 'B', 'B', 'C', 'C']
414
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000415 # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
416 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
417 >>> product = 1
418 >>> for factor in prime_factors.elements(): # loop over factors
419 ... product *= factor # and multiply them
420 >>> product
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000421 1836
422
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000423 Note, if an element's count has been set to zero or is a negative
424 number, elements() will ignore it.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000425
426 '''
427 # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
Raymond Hettinger35288c62009-01-13 04:50:35 +0000428 return _chain.from_iterable(_starmap(_repeat, self.iteritems()))
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000429
430 # Override dict methods where necessary
431
432 @classmethod
433 def fromkeys(cls, iterable, v=None):
434 # There is no equivalent method for counters because setting v=1
435 # means that no element can have a count greater than one.
436 raise NotImplementedError(
437 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
438
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000439 def update(self, iterable=None, **kwds):
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000440 '''Like dict.update() but add counts instead of replacing them.
441
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000442 Source can be an iterable, a dictionary, or another Counter instance.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000443
444 >>> c = Counter('which')
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000445 >>> c.update('witch') # add elements from another iterable
446 >>> d = Counter('watch')
447 >>> c.update(d) # add elements from another counter
448 >>> c['h'] # four 'h' in which, witch, and watch
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000449 4
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000450
451 '''
452 # The regular dict.update() operation makes no sense here because the
453 # replace behavior results in the some of original untouched counts
454 # being mixed-in with all of the other counts for a mismash that
455 # doesn't have a straight-forward interpretation in most counting
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000456 # contexts. Instead, we implement straight-addition. Both the inputs
457 # and outputs are allowed to contain zero and negative counts.
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000458
459 if iterable is not None:
460 if isinstance(iterable, Mapping):
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000461 if self:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000462 self_get = self.get
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000463 for elem, count in iterable.iteritems():
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000464 self[elem] = self_get(elem, 0) + count
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000465 else:
466 dict.update(self, iterable) # fast path when counter is empty
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000467 else:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000468 self_get = self.get
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000469 for elem in iterable:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000470 self[elem] = self_get(elem, 0) + 1
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000471 if kwds:
472 self.update(kwds)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000473
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000474 def subtract(self, iterable=None, **kwds):
475 '''Like dict.update() but subtracts counts instead of replacing them.
476 Counts can be reduced below zero. Both the inputs and outputs are
477 allowed to contain zero and negative counts.
478
479 Source can be an iterable, a dictionary, or another Counter instance.
480
481 >>> c = Counter('which')
482 >>> c.subtract('witch') # subtract elements from another iterable
483 >>> c.subtract(Counter('watch')) # subtract elements from another counter
484 >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch
485 0
486 >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch
487 -1
488
489 '''
490 if iterable is not None:
Raymond Hettingerfdf1b562010-04-11 20:39:28 +0000491 self_get = self.get
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000492 if isinstance(iterable, Mapping):
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000493 for elem, count in iterable.items():
494 self[elem] = self_get(elem, 0) - count
495 else:
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000496 for elem in iterable:
497 self[elem] = self_get(elem, 0) - 1
498 if kwds:
499 self.subtract(kwds)
500
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000501 def copy(self):
502 'Like dict.copy() but returns a Counter instance instead of a dict.'
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000503 return Counter(self)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000504
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000505 def __delitem__(self, elem):
506 'Like dict.__delitem__() but does not raise KeyError for missing values.'
507 if elem in self:
508 dict.__delitem__(self, elem)
509
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000510 def __repr__(self):
511 if not self:
512 return '%s()' % self.__class__.__name__
Raymond Hettinger35288c62009-01-13 04:50:35 +0000513 items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000514 return '%s({%s})' % (self.__class__.__name__, items)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000515
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000516 # Multiset-style mathematical operations discussed in:
517 # Knuth TAOCP Volume II section 4.6.3 exercise 19
518 # and at http://en.wikipedia.org/wiki/Multiset
519 #
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000520 # Outputs guaranteed to only include positive counts.
521 #
522 # To strip negative and zero counts, add-in an empty counter:
523 # c += Counter()
524
525 def __add__(self, other):
526 '''Add counts from two counters.
527
528 >>> Counter('abbb') + Counter('bcc')
529 Counter({'b': 4, 'c': 2, 'a': 1})
530
531 '''
532 if not isinstance(other, Counter):
533 return NotImplemented
534 result = Counter()
535 for elem in set(self) | set(other):
536 newcount = self[elem] + other[elem]
537 if newcount > 0:
538 result[elem] = newcount
539 return result
540
541 def __sub__(self, other):
542 ''' Subtract count, but keep only results with positive counts.
543
544 >>> Counter('abbbc') - Counter('bccd')
545 Counter({'b': 2, 'a': 1})
546
547 '''
548 if not isinstance(other, Counter):
549 return NotImplemented
550 result = Counter()
Raymond Hettinger4571f342009-01-21 20:31:50 +0000551 for elem in set(self) | set(other):
552 newcount = self[elem] - other[elem]
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000553 if newcount > 0:
554 result[elem] = newcount
555 return result
556
557 def __or__(self, other):
558 '''Union is the maximum of value in either of the input counters.
559
560 >>> Counter('abbb') | Counter('bcc')
561 Counter({'b': 3, 'c': 2, 'a': 1})
562
563 '''
564 if not isinstance(other, Counter):
565 return NotImplemented
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000566 result = Counter()
567 for elem in set(self) | set(other):
Raymond Hettingere3bc5572009-04-04 08:46:58 +0000568 p, q = self[elem], other[elem]
569 newcount = q if p < q else p
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000570 if newcount > 0:
571 result[elem] = newcount
572 return result
573
574 def __and__(self, other):
575 ''' Intersection is the minimum of corresponding counts.
576
577 >>> Counter('abbb') & Counter('bcc')
578 Counter({'b': 1})
579
580 '''
581 if not isinstance(other, Counter):
582 return NotImplemented
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000583 result = Counter()
584 if len(self) < len(other):
585 self, other = other, self
586 for elem in _ifilter(self.__contains__, other):
Raymond Hettingere3bc5572009-04-04 08:46:58 +0000587 p, q = self[elem], other[elem]
588 newcount = p if p < q else q
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000589 if newcount > 0:
590 result[elem] = newcount
591 return result
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000592
593
594if __name__ == '__main__':
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000595 # verify that instances can be pickled
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000596 from cPickle import loads, dumps
Raymond Hettinger01a09572007-10-23 20:37:41 +0000597 Point = namedtuple('Point', 'x, y', True)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000598 p = Point(x=10, y=20)
599 assert p == loads(dumps(p))
600
Raymond Hettingereeeb9c42007-11-15 02:44:53 +0000601 # test and demonstrate ability to override methods
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000602 class Point(namedtuple('Point', 'x y')):
Raymond Hettingere1655082008-01-10 19:15:10 +0000603 __slots__ = ()
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000604 @property
605 def hypot(self):
606 return (self.x ** 2 + self.y ** 2) ** 0.5
Raymond Hettinger9a359212008-01-07 20:07:38 +0000607 def __str__(self):
Raymond Hettinger15b5e552008-01-10 23:00:01 +0000608 return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000609
Raymond Hettingere1655082008-01-10 19:15:10 +0000610 for p in Point(3, 4), Point(14, 5/7.):
Raymond Hettinger9a359212008-01-07 20:07:38 +0000611 print p
Raymond Hettingereeeb9c42007-11-15 02:44:53 +0000612
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000613 class Point(namedtuple('Point', 'x y')):
614 'Point class with optimized _make() and _replace() without error-checking'
Raymond Hettingere1655082008-01-10 19:15:10 +0000615 __slots__ = ()
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000616 _make = classmethod(tuple.__new__)
617 def _replace(self, _map=map, **kwds):
Raymond Hettingerf5e8af12008-01-07 20:56:05 +0000618 return self._make(_map(kwds.get, ('x', 'y'), self))
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000619
620 print Point(11, 22)._replace(x=100)
621
Raymond Hettingere850c462008-01-10 20:37:12 +0000622 Point3D = namedtuple('Point3D', Point._fields + ('z',))
623 print Point3D.__doc__
624
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000625 import doctest
Raymond Hettinger01a09572007-10-23 20:37:41 +0000626 TestResults = namedtuple('TestResults', 'failed attempted')
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000627 print TestResults(*doctest.testmod())