blob: c889ab37f3383160f3a92c02e790da503220ff3d [file] [log] [blame]
Raymond Hettingerbc512d32009-03-03 04:45:34 +00001__all__ = ['Counter', 'deque', 'defaultdict', 'namedtuple', 'OrderedDict']
Raymond Hettinger88880b22007-12-18 00:13:45 +00002# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
3# They should however be considered an integral part of collections.py.
4from _abcoll import *
5import _abcoll
6__all__ += _abcoll.__all__
Raymond Hettingereb979882007-02-28 18:37:52 +00007
8from _collections import deque, defaultdict
Raymond Hettingerbc512d32009-03-03 04:45:34 +00009from operator import itemgetter as _itemgetter, eq as _eq
Raymond Hettingerabfd8df2007-10-16 21:28:32 +000010from keyword import iskeyword as _iskeyword
Raymond Hettingerc37e5e02007-03-01 06:16:43 +000011import sys as _sys
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +000012import heapq as _heapq
Raymond Hettingerbc512d32009-03-03 04:45:34 +000013from itertools import repeat as _repeat, chain as _chain, starmap as _starmap, \
Georg Brandl7d4b7592010-02-06 22:49:47 +000014 ifilter as _ifilter, imap as _imap
Raymond Hettinger74f869e2010-09-13 22:14:36 +000015try:
16 from thread import get_ident
Amaury Forgeot d'Arc71431ef2010-11-09 07:35:26 +000017except ImportError:
Raymond Hettinger74f869e2010-09-13 22:14:36 +000018 from dummy_thread import get_ident
19
20def _recursive_repr(user_function):
21 'Decorator to make a repr function return "..." for a recursive call'
22 repr_running = set()
23
24 def wrapper(self):
25 key = id(self), get_ident()
26 if key in repr_running:
27 return '...'
28 repr_running.add(key)
29 try:
30 result = user_function(self)
31 finally:
32 repr_running.discard(key)
33 return result
34
35 # Can't use functools.wraps() here because of bootstrap issues
36 wrapper.__module__ = getattr(user_function, '__module__')
37 wrapper.__doc__ = getattr(user_function, '__doc__')
38 wrapper.__name__ = getattr(user_function, '__name__')
39 return wrapper
40
Raymond Hettingerbc512d32009-03-03 04:45:34 +000041
42################################################################################
43### OrderedDict
44################################################################################
45
Raymond Hettinger8ebebd82011-01-02 01:03:26 +000046class OrderedDict(dict):
Raymond Hettingere980d2d2009-03-19 23:12:41 +000047 'Dictionary that remembers insertion order'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000048 # An inherited dict maps keys to values.
Raymond Hettingere980d2d2009-03-19 23:12:41 +000049 # The inherited dict provides __getitem__, __len__, __contains__, and get.
50 # The remaining methods are order-aware.
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000051 # Big-O running times for all methods are the same as for regular dictionaries.
52
53 # The internal self.__map dictionary maps keys to links in a doubly linked list.
54 # The circular doubly linked list starts and ends with a sentinel element.
55 # The sentinel element never gets deleted (this simplifies the algorithm).
Raymond Hettingeraba22932010-03-09 09:58:53 +000056 # Each link is stored as a list of length three: [PREV, NEXT, KEY].
Raymond Hettingerbc512d32009-03-03 04:45:34 +000057
58 def __init__(self, *args, **kwds):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000059 '''Initialize an ordered dictionary. Signature is the same as for
60 regular dictionaries, but keyword arguments are not recommended
61 because their insertion order is arbitrary.
62
63 '''
Raymond Hettingerbc512d32009-03-03 04:45:34 +000064 if len(args) > 1:
65 raise TypeError('expected at most 1 arguments, got %d' % len(args))
Raymond Hettinger9353ea22009-03-03 20:53:51 +000066 try:
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000067 self.__root
Raymond Hettinger9353ea22009-03-03 20:53:51 +000068 except AttributeError:
Raymond Hettingeraba22932010-03-09 09:58:53 +000069 self.__root = root = [None, None, None] # sentinel node
Raymond Hettingere30bc382010-03-09 11:29:10 +000070 PREV = 0
71 NEXT = 1
Raymond Hettingeraba22932010-03-09 09:58:53 +000072 root[PREV] = root[NEXT] = root
Raymond Hettinger2dc90fd2009-03-25 22:41:32 +000073 self.__map = {}
Raymond Hettinger8ebebd82011-01-02 01:03:26 +000074 self.__update(*args, **kwds)
Raymond Hettingerbc512d32009-03-03 04:45:34 +000075
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000076 def __setitem__(self, key, value, PREV=0, NEXT=1, dict_setitem=dict.__setitem__):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000077 'od.__setitem__(i, y) <==> od[i]=y'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000078 # Setting a new item creates a new link which goes at the end of the linked
79 # list, and the inherited dictionary is updated with the new key/value pair.
Raymond Hettingerbc512d32009-03-03 04:45:34 +000080 if key not in self:
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000081 root = self.__root
Raymond Hettingeraba22932010-03-09 09:58:53 +000082 last = root[PREV]
83 last[NEXT] = root[PREV] = self.__map[key] = [last, root, key]
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000084 dict_setitem(self, key, value)
Raymond Hettingerbc512d32009-03-03 04:45:34 +000085
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000086 def __delitem__(self, key, PREV=0, NEXT=1, dict_delitem=dict.__delitem__):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000087 'od.__delitem__(y) <==> del od[y]'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000088 # Deleting an existing item uses self.__map to find the link which is
89 # then removed by updating the links in the predecessor and successor nodes.
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000090 dict_delitem(self, key)
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000091 link = self.__map.pop(key)
Raymond Hettinger39282762010-04-03 00:39:26 +000092 link_prev = link[PREV]
93 link_next = link[NEXT]
94 link_prev[NEXT] = link_next
95 link_next[PREV] = link_prev
Raymond Hettingerbc512d32009-03-03 04:45:34 +000096
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000097 def __iter__(self, NEXT=1, KEY=2):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000098 'od.__iter__() <==> iter(od)'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000099 # Traverse the linked list in order.
100 root = self.__root
Raymond Hettingeraba22932010-03-09 09:58:53 +0000101 curr = root[NEXT]
Raymond Hettingerf1e2df92009-03-23 00:08:09 +0000102 while curr is not root:
Raymond Hettingeraba22932010-03-09 09:58:53 +0000103 yield curr[KEY]
104 curr = curr[NEXT]
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000105
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +0000106 def __reversed__(self, PREV=0, KEY=2):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000107 'od.__reversed__() <==> reversed(od)'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +0000108 # Traverse the linked list in reverse order.
109 root = self.__root
Raymond Hettingeraba22932010-03-09 09:58:53 +0000110 curr = root[PREV]
Raymond Hettingerf1e2df92009-03-23 00:08:09 +0000111 while curr is not root:
Raymond Hettingeraba22932010-03-09 09:58:53 +0000112 yield curr[KEY]
113 curr = curr[PREV]
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000114
Raymond Hettinger39282762010-04-03 00:39:26 +0000115 def clear(self):
116 'od.clear() -> None. Remove all items from od.'
Raymond Hettinger6b96ecb2010-04-03 03:14:28 +0000117 try:
118 for node in self.__map.itervalues():
119 del node[:]
120 self.__root[:] = [self.__root, self.__root, None]
121 self.__map.clear()
122 except AttributeError:
123 pass
124 dict.clear(self)
Raymond Hettinger39282762010-04-03 00:39:26 +0000125
Raymond Hettinger8ebebd82011-01-02 01:03:26 +0000126 update = __update = MutableMapping.update
Raymond Hettingera61ae692009-03-18 22:13:20 +0000127 keys = MutableMapping.keys
128 values = MutableMapping.values
129 items = MutableMapping.items
130 iterkeys = MutableMapping.iterkeys
131 itervalues = MutableMapping.itervalues
132 iteritems = MutableMapping.iteritems
133 __ne__ = MutableMapping.__ne__
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000134
Raymond Hettingera54b2da2010-08-17 19:03:06 +0000135 def viewkeys(self):
136 "od.viewkeys() -> a set-like object providing a view on od's keys"
137 return KeysView(self)
138
139 def viewvalues(self):
140 "od.viewvalues() -> an object providing a view on od's values"
141 return ValuesView(self)
142
143 def viewitems(self):
144 "od.viewitems() -> a set-like object providing a view on od's items"
145 return ItemsView(self)
146
Raymond Hettinger8ebebd82011-01-02 01:03:26 +0000147 __marker = object()
148
149 def pop(self, key, default=__marker):
150 if key in self:
151 result = self[key]
152 del self[key]
153 return result
154 if default is self.__marker:
155 raise KeyError(key)
156 return default
157
158 def setdefault(self, key, default=None):
159 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od'
160 if key in self:
161 return self[key]
162 self[key] = default
163 return default
164
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000165 def popitem(self, last=True):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000166 '''od.popitem() -> (k, v), return and remove a (key, value) pair.
167 Pairs are returned in LIFO order if last is true or FIFO order if false.
168
169 '''
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000170 if not self:
171 raise KeyError('dictionary is empty')
Raymond Hettinger1355a3d2009-04-08 08:26:55 +0000172 key = next(reversed(self) if last else iter(self))
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000173 value = self.pop(key)
174 return key, value
175
Raymond Hettinger74f869e2010-09-13 22:14:36 +0000176 @_recursive_repr
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000177 def __repr__(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000178 'od.__repr__() <==> repr(od)'
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000179 if not self:
180 return '%s()' % (self.__class__.__name__,)
Raymond Hettinger0b155412009-03-03 21:13:51 +0000181 return '%s(%r)' % (self.__class__.__name__, self.items())
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000182
Raymond Hettinger3674c852011-04-20 13:11:38 -0700183 def __reduce__(self):
184 'Return state information for pickling'
185 items = [[k, self[k]] for k in self]
186 inst_dict = vars(self).copy()
187 for k in vars(OrderedDict()):
188 inst_dict.pop(k, None)
189 if inst_dict:
190 return (self.__class__, (items,), inst_dict)
191 return self.__class__, (items,)
192
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000193 def copy(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000194 'od.copy() -> a shallow copy of od'
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000195 return self.__class__(self)
196
197 @classmethod
198 def fromkeys(cls, iterable, value=None):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000199 '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
200 and values equal to v (which defaults to None).
201
202 '''
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000203 d = cls()
204 for key in iterable:
205 d[key] = value
206 return d
207
208 def __eq__(self, other):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000209 '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
210 while comparison to a regular mapping is order-insensitive.
211
212 '''
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000213 if isinstance(other, OrderedDict):
Raymond Hettingere5b78562009-03-23 18:26:59 +0000214 return len(self)==len(other) and \
215 all(_imap(_eq, self.iteritems(), other.iteritems()))
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000216 return dict.__eq__(self, other)
217
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000218
Raymond Hettingerae3f0682009-01-20 03:36:36 +0000219################################################################################
220### namedtuple
221################################################################################
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000222
Raymond Hettinger322daea2009-02-10 01:24:05 +0000223def namedtuple(typename, field_names, verbose=False, rename=False):
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000224 """Returns a new subclass of tuple with named fields.
225
Raymond Hettinger01a09572007-10-23 20:37:41 +0000226 >>> Point = namedtuple('Point', 'x y')
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000227 >>> Point.__doc__ # docstring for the new class
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000228 'Point(x, y)'
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000229 >>> p = Point(11, y=22) # instantiate with positional args or keywords
Raymond Hettinger8777bca2007-12-18 22:21:27 +0000230 >>> p[0] + p[1] # indexable like a plain tuple
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000231 33
Raymond Hettinger88880b22007-12-18 00:13:45 +0000232 >>> x, y = p # unpack like a regular tuple
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000233 >>> x, y
234 (11, 22)
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000235 >>> p.x + p.y # fields also accessable by name
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000236 33
Raymond Hettinger42da8742007-12-14 02:49:47 +0000237 >>> d = p._asdict() # convert to a dictionary
Raymond Hettingera7fc4b12007-10-05 02:47:07 +0000238 >>> d['x']
239 11
240 >>> Point(**d) # convert from a dictionary
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000241 Point(x=11, y=22)
Raymond Hettinger42da8742007-12-14 02:49:47 +0000242 >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000243 Point(x=100, y=22)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000244
245 """
246
Raymond Hettinger58167142008-01-08 02:02:05 +0000247 # Parse and validate the field names. Validation serves two purposes,
248 # generating informative error messages and preventing template injection attacks.
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000249 if isinstance(field_names, basestring):
Raymond Hettinger0e1d6062007-10-08 10:11:51 +0000250 field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas
Raymond Hettinger6ee7bc02008-09-25 23:31:52 +0000251 field_names = tuple(map(str, field_names))
Raymond Hettinger322daea2009-02-10 01:24:05 +0000252 if rename:
253 names = list(field_names)
254 seen = set()
255 for i, name in enumerate(names):
256 if (not all(c.isalnum() or c=='_' for c in name) or _iskeyword(name)
257 or not name or name[0].isdigit() or name.startswith('_')
258 or name in seen):
Raymond Hettinger756ab672009-04-02 22:25:40 +0000259 names[i] = '_%d' % i
Raymond Hettinger322daea2009-02-10 01:24:05 +0000260 seen.add(name)
261 field_names = tuple(names)
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000262 for name in (typename,) + field_names:
Raymond Hettinger2e1af252007-12-05 18:11:08 +0000263 if not all(c.isalnum() or c=='_' for c in name):
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000264 raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
Raymond Hettingerabfd8df2007-10-16 21:28:32 +0000265 if _iskeyword(name):
266 raise ValueError('Type names and field names cannot be a keyword: %r' % name)
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000267 if name[0].isdigit():
268 raise ValueError('Type names and field names cannot start with a number: %r' % name)
Raymond Hettinger163f6222007-10-09 01:36:23 +0000269 seen_names = set()
270 for name in field_names:
Raymond Hettinger322daea2009-02-10 01:24:05 +0000271 if name.startswith('_') and not rename:
Raymond Hettinger42da8742007-12-14 02:49:47 +0000272 raise ValueError('Field names cannot start with an underscore: %r' % name)
Raymond Hettinger163f6222007-10-09 01:36:23 +0000273 if name in seen_names:
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000274 raise ValueError('Encountered duplicate field name: %r' % name)
Raymond Hettinger163f6222007-10-09 01:36:23 +0000275 seen_names.add(name)
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000276
277 # Create and fill-in the class template
Raymond Hettinger02740f72008-01-05 01:35:43 +0000278 numfields = len(field_names)
Raymond Hettinger2b03d452007-09-18 03:33:19 +0000279 argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000280 reprtxt = ', '.join('%s=%%r' % name for name in field_names)
281 template = '''class %(typename)s(tuple):
Raymond Hettinger48eca672007-12-14 18:08:20 +0000282 '%(typename)s(%(argtxt)s)' \n
283 __slots__ = () \n
Raymond Hettingere0734e72008-01-04 03:22:53 +0000284 _fields = %(field_names)r \n
Raymond Hettingera68cad12009-05-27 02:24:45 +0000285 def __new__(_cls, %(argtxt)s):
Raymond Hettinger9bd35082010-03-09 09:01:46 +0000286 'Create new instance of %(typename)s(%(argtxt)s)'
Raymond Hettingera68cad12009-05-27 02:24:45 +0000287 return _tuple.__new__(_cls, (%(argtxt)s)) \n
Raymond Hettinger02740f72008-01-05 01:35:43 +0000288 @classmethod
Raymond Hettinger844f71b2008-01-06 22:11:54 +0000289 def _make(cls, iterable, new=tuple.__new__, len=len):
Raymond Hettinger02740f72008-01-05 01:35:43 +0000290 'Make a new %(typename)s object from a sequence or iterable'
Raymond Hettinger844f71b2008-01-06 22:11:54 +0000291 result = new(cls, iterable)
Raymond Hettinger02740f72008-01-05 01:35:43 +0000292 if len(result) != %(numfields)d:
293 raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
294 return result \n
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000295 def __repr__(self):
Raymond Hettinger9bd35082010-03-09 09:01:46 +0000296 'Return a nicely formatted representation string'
Raymond Hettinger48eca672007-12-14 18:08:20 +0000297 return '%(typename)s(%(reprtxt)s)' %% self \n
Raymond Hettinger88a91642009-03-03 04:51:24 +0000298 def _asdict(self):
299 'Return a new OrderedDict which maps field names to their values'
300 return OrderedDict(zip(self._fields, self)) \n
Raymond Hettingera68cad12009-05-27 02:24:45 +0000301 def _replace(_self, **kwds):
Raymond Hettingereeeb9c42007-11-15 02:44:53 +0000302 'Return a new %(typename)s object replacing specified fields with new values'
Raymond Hettingera68cad12009-05-27 02:24:45 +0000303 result = _self._make(map(kwds.pop, %(field_names)r, _self))
Raymond Hettinger1b50fd72008-01-05 02:17:24 +0000304 if kwds:
305 raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
Raymond Hettingere98839a2008-06-09 01:28:30 +0000306 return result \n
307 def __getnewargs__(self):
Raymond Hettinger9bd35082010-03-09 09:01:46 +0000308 'Return self as a plain tuple. Used by copy and pickle.'
Raymond Hettingere98839a2008-06-09 01:28:30 +0000309 return tuple(self) \n\n''' % locals()
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000310 for i, name in enumerate(field_names):
Raymond Hettinger9bd35082010-03-09 09:01:46 +0000311 template += " %s = _property(_itemgetter(%d), doc='Alias for field number %d')\n" % (name, i, i)
Raymond Hettinger2b03d452007-09-18 03:33:19 +0000312 if verbose:
313 print template
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000314
Raymond Hettinger3c2523c2008-05-30 07:16:53 +0000315 # Execute the template string in a temporary namespace and
316 # support tracing utilities by setting a value for frame.f_globals['__name__']
Raymond Hettingera68cad12009-05-27 02:24:45 +0000317 namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
318 OrderedDict=OrderedDict, _property=property, _tuple=tuple)
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000319 try:
Raymond Hettinger0e1d6062007-10-08 10:11:51 +0000320 exec template in namespace
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000321 except SyntaxError, e:
322 raise SyntaxError(e.message + ':\n' + template)
Raymond Hettinger0e1d6062007-10-08 10:11:51 +0000323 result = namespace[typename]
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000324
325 # For pickling to work, the __module__ variable needs to be set to the frame
326 # where the named tuple is created. Bypass this step in enviroments where
Benjamin Peterson7c67b032009-05-05 00:55:24 +0000327 # sys._getframe is not defined (Jython for example) or sys._getframe is not
328 # defined for arguments greater than 0 (IronPython).
329 try:
Raymond Hettingerecf252a2009-01-27 10:03:04 +0000330 result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
Benjamin Peterson7c67b032009-05-05 00:55:24 +0000331 except (AttributeError, ValueError):
332 pass
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000333
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000334 return result
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000335
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000336
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000337########################################################################
Raymond Hettingerae3f0682009-01-20 03:36:36 +0000338### Counter
339########################################################################
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000340
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000341class Counter(dict):
342 '''Dict subclass for counting hashable items. Sometimes called a bag
343 or multiset. Elements are stored as dictionary keys and their counts
344 are stored as dictionary values.
345
Raymond Hettingerc886a842011-01-03 08:59:18 +0000346 >>> c = Counter('abcdeabcdabcaba') # count elements from a string
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000347
348 >>> c.most_common(3) # three most common elements
Raymond Hettingerc886a842011-01-03 08:59:18 +0000349 [('a', 5), ('b', 4), ('c', 3)]
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000350 >>> sorted(c) # list all unique elements
Raymond Hettingerc886a842011-01-03 08:59:18 +0000351 ['a', 'b', 'c', 'd', 'e']
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000352 >>> ''.join(sorted(c.elements())) # list elements with repetitions
Raymond Hettingerc886a842011-01-03 08:59:18 +0000353 'aaaaabbbbcccdde'
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000354 >>> sum(c.values()) # total of all counts
Raymond Hettingerc886a842011-01-03 08:59:18 +0000355 15
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000356
357 >>> c['a'] # count of letter 'a'
358 5
359 >>> for elem in 'shazam': # update counts from an iterable
360 ... c[elem] += 1 # by adding 1 to each element's count
361 >>> c['a'] # now there are seven 'a'
362 7
Raymond Hettingerc886a842011-01-03 08:59:18 +0000363 >>> del c['b'] # remove all 'b'
364 >>> c['b'] # now there are zero 'b'
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000365 0
366
367 >>> d = Counter('simsalabim') # make another counter
368 >>> c.update(d) # add in the second counter
369 >>> c['a'] # now there are nine 'a'
370 9
371
372 >>> c.clear() # empty the counter
373 >>> c
374 Counter()
375
376 Note: If a count is set to zero or reduced to zero, it will remain
377 in the counter until the entry is deleted or the counter is cleared:
378
379 >>> c = Counter('aaabbc')
380 >>> c['b'] -= 2 # reduce the count of 'b' by two
381 >>> c.most_common() # 'b' is still in, but its count is zero
382 [('a', 3), ('c', 1), ('b', 0)]
383
384 '''
385 # References:
386 # http://en.wikipedia.org/wiki/Multiset
387 # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
388 # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
389 # http://code.activestate.com/recipes/259174/
390 # Knuth, TAOCP Vol. II section 4.6.3
391
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000392 def __init__(self, iterable=None, **kwds):
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000393 '''Create a new, empty Counter object. And if given, count elements
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000394 from an input iterable. Or, initialize the count from another mapping
395 of elements to their counts.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000396
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000397 >>> c = Counter() # a new, empty counter
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000398 >>> c = Counter('gallahad') # a new counter from an iterable
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000399 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000400 >>> c = Counter(a=4, b=2) # a new counter from keyword args
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000401
402 '''
Raymond Hettingerc886a842011-01-03 08:59:18 +0000403 super(Counter, self).__init__()
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000404 self.update(iterable, **kwds)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000405
406 def __missing__(self, key):
407 'The count of elements not in the Counter is zero.'
408 # Needed so that self[missing_item] does not raise KeyError
409 return 0
410
411 def most_common(self, n=None):
412 '''List the n most common elements and their counts from the most
413 common to the least. If n is None, then list all element counts.
414
Raymond Hettingerc886a842011-01-03 08:59:18 +0000415 >>> Counter('abcdeabcdabcaba').most_common(3)
416 [('a', 5), ('b', 4), ('c', 3)]
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000417
418 '''
419 # Emulate Bag.sortedByCount from Smalltalk
420 if n is None:
421 return sorted(self.iteritems(), key=_itemgetter(1), reverse=True)
422 return _heapq.nlargest(n, self.iteritems(), key=_itemgetter(1))
423
424 def elements(self):
425 '''Iterator over elements repeating each as many times as its count.
426
427 >>> c = Counter('ABCABC')
428 >>> sorted(c.elements())
429 ['A', 'A', 'B', 'B', 'C', 'C']
430
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000431 # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
432 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
433 >>> product = 1
434 >>> for factor in prime_factors.elements(): # loop over factors
435 ... product *= factor # and multiply them
436 >>> product
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000437 1836
438
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000439 Note, if an element's count has been set to zero or is a negative
440 number, elements() will ignore it.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000441
442 '''
443 # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
Raymond Hettinger35288c62009-01-13 04:50:35 +0000444 return _chain.from_iterable(_starmap(_repeat, self.iteritems()))
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000445
446 # Override dict methods where necessary
447
448 @classmethod
449 def fromkeys(cls, iterable, v=None):
450 # There is no equivalent method for counters because setting v=1
451 # means that no element can have a count greater than one.
452 raise NotImplementedError(
453 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
454
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000455 def update(self, iterable=None, **kwds):
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000456 '''Like dict.update() but add counts instead of replacing them.
457
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000458 Source can be an iterable, a dictionary, or another Counter instance.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000459
460 >>> c = Counter('which')
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000461 >>> c.update('witch') # add elements from another iterable
462 >>> d = Counter('watch')
463 >>> c.update(d) # add elements from another counter
464 >>> c['h'] # four 'h' in which, witch, and watch
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000465 4
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000466
467 '''
468 # The regular dict.update() operation makes no sense here because the
469 # replace behavior results in the some of original untouched counts
470 # being mixed-in with all of the other counts for a mismash that
471 # doesn't have a straight-forward interpretation in most counting
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000472 # contexts. Instead, we implement straight-addition. Both the inputs
473 # and outputs are allowed to contain zero and negative counts.
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000474
475 if iterable is not None:
476 if isinstance(iterable, Mapping):
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000477 if self:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000478 self_get = self.get
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000479 for elem, count in iterable.iteritems():
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000480 self[elem] = self_get(elem, 0) + count
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000481 else:
Raymond Hettingerc886a842011-01-03 08:59:18 +0000482 super(Counter, self).update(iterable) # fast path when counter is empty
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000483 else:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000484 self_get = self.get
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000485 for elem in iterable:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000486 self[elem] = self_get(elem, 0) + 1
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000487 if kwds:
488 self.update(kwds)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000489
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000490 def subtract(self, iterable=None, **kwds):
491 '''Like dict.update() but subtracts counts instead of replacing them.
492 Counts can be reduced below zero. Both the inputs and outputs are
493 allowed to contain zero and negative counts.
494
495 Source can be an iterable, a dictionary, or another Counter instance.
496
497 >>> c = Counter('which')
498 >>> c.subtract('witch') # subtract elements from another iterable
499 >>> c.subtract(Counter('watch')) # subtract elements from another counter
500 >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch
501 0
502 >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch
503 -1
504
505 '''
506 if iterable is not None:
Raymond Hettingerfdf1b562010-04-11 20:39:28 +0000507 self_get = self.get
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000508 if isinstance(iterable, Mapping):
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000509 for elem, count in iterable.items():
510 self[elem] = self_get(elem, 0) - count
511 else:
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000512 for elem in iterable:
513 self[elem] = self_get(elem, 0) - 1
514 if kwds:
515 self.subtract(kwds)
516
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000517 def copy(self):
Raymond Hettinger37c0fe52011-04-15 13:12:21 -0700518 'Return a shallow copy.'
519 return self.__class__(self)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000520
Raymond Hettingerc886a842011-01-03 08:59:18 +0000521 def __reduce__(self):
522 return self.__class__, (dict(self),)
523
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000524 def __delitem__(self, elem):
525 'Like dict.__delitem__() but does not raise KeyError for missing values.'
526 if elem in self:
Raymond Hettingerc886a842011-01-03 08:59:18 +0000527 super(Counter, self).__delitem__(elem)
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000528
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000529 def __repr__(self):
530 if not self:
531 return '%s()' % self.__class__.__name__
Raymond Hettinger35288c62009-01-13 04:50:35 +0000532 items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000533 return '%s({%s})' % (self.__class__.__name__, items)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000534
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000535 # Multiset-style mathematical operations discussed in:
536 # Knuth TAOCP Volume II section 4.6.3 exercise 19
537 # and at http://en.wikipedia.org/wiki/Multiset
538 #
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000539 # Outputs guaranteed to only include positive counts.
540 #
541 # To strip negative and zero counts, add-in an empty counter:
542 # c += Counter()
543
544 def __add__(self, other):
545 '''Add counts from two counters.
546
547 >>> Counter('abbb') + Counter('bcc')
548 Counter({'b': 4, 'c': 2, 'a': 1})
549
550 '''
551 if not isinstance(other, Counter):
552 return NotImplemented
553 result = Counter()
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700554 for elem, count in self.items():
555 newcount = count + other[elem]
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000556 if newcount > 0:
557 result[elem] = newcount
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700558 for elem, count in other.items():
559 if elem not in self and count > 0:
560 result[elem] = count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000561 return result
562
563 def __sub__(self, other):
564 ''' Subtract count, but keep only results with positive counts.
565
566 >>> Counter('abbbc') - Counter('bccd')
567 Counter({'b': 2, 'a': 1})
568
569 '''
570 if not isinstance(other, Counter):
571 return NotImplemented
572 result = Counter()
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700573 for elem, count in self.items():
574 newcount = count - other[elem]
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000575 if newcount > 0:
576 result[elem] = newcount
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700577 for elem, count in other.items():
578 if elem not in self and count < 0:
579 result[elem] = 0 - count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000580 return result
581
582 def __or__(self, other):
583 '''Union is the maximum of value in either of the input counters.
584
585 >>> Counter('abbb') | Counter('bcc')
586 Counter({'b': 3, 'c': 2, 'a': 1})
587
588 '''
589 if not isinstance(other, Counter):
590 return NotImplemented
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000591 result = Counter()
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700592 for elem, count in self.items():
593 other_count = other[elem]
594 newcount = other_count if count < other_count else count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000595 if newcount > 0:
596 result[elem] = newcount
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700597 for elem, count in other.items():
598 if elem not in self and count > 0:
599 result[elem] = count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000600 return result
601
602 def __and__(self, other):
603 ''' Intersection is the minimum of corresponding counts.
604
605 >>> Counter('abbb') & Counter('bcc')
606 Counter({'b': 1})
607
608 '''
609 if not isinstance(other, Counter):
610 return NotImplemented
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000611 result = Counter()
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700612 for elem, count in self.items():
613 other_count = other[elem]
614 newcount = count if count < other_count else other_count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000615 if newcount > 0:
616 result[elem] = newcount
617 return result
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000618
619
620if __name__ == '__main__':
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000621 # verify that instances can be pickled
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000622 from cPickle import loads, dumps
Raymond Hettinger01a09572007-10-23 20:37:41 +0000623 Point = namedtuple('Point', 'x, y', True)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000624 p = Point(x=10, y=20)
625 assert p == loads(dumps(p))
626
Raymond Hettingereeeb9c42007-11-15 02:44:53 +0000627 # test and demonstrate ability to override methods
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000628 class Point(namedtuple('Point', 'x y')):
Raymond Hettingere1655082008-01-10 19:15:10 +0000629 __slots__ = ()
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000630 @property
631 def hypot(self):
632 return (self.x ** 2 + self.y ** 2) ** 0.5
Raymond Hettinger9a359212008-01-07 20:07:38 +0000633 def __str__(self):
Raymond Hettinger15b5e552008-01-10 23:00:01 +0000634 return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000635
Raymond Hettingere1655082008-01-10 19:15:10 +0000636 for p in Point(3, 4), Point(14, 5/7.):
Raymond Hettinger9a359212008-01-07 20:07:38 +0000637 print p
Raymond Hettingereeeb9c42007-11-15 02:44:53 +0000638
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000639 class Point(namedtuple('Point', 'x y')):
640 'Point class with optimized _make() and _replace() without error-checking'
Raymond Hettingere1655082008-01-10 19:15:10 +0000641 __slots__ = ()
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000642 _make = classmethod(tuple.__new__)
643 def _replace(self, _map=map, **kwds):
Raymond Hettingerf5e8af12008-01-07 20:56:05 +0000644 return self._make(_map(kwds.get, ('x', 'y'), self))
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000645
646 print Point(11, 22)._replace(x=100)
647
Raymond Hettingere850c462008-01-10 20:37:12 +0000648 Point3D = namedtuple('Point3D', Point._fields + ('z',))
649 print Point3D.__doc__
650
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000651 import doctest
Raymond Hettinger01a09572007-10-23 20:37:41 +0000652 TestResults = namedtuple('TestResults', 'failed attempted')
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000653 print TestResults(*doctest.testmod())