blob: d41e00becc4e3a97f0294cef17a67de1b08bb1f9 [file] [log] [blame]
Raymond Hettingerbc512d32009-03-03 04:45:34 +00001__all__ = ['Counter', 'deque', 'defaultdict', 'namedtuple', 'OrderedDict']
Raymond Hettinger88880b22007-12-18 00:13:45 +00002# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
3# They should however be considered an integral part of collections.py.
4from _abcoll import *
5import _abcoll
6__all__ += _abcoll.__all__
Raymond Hettingereb979882007-02-28 18:37:52 +00007
8from _collections import deque, defaultdict
Raymond Hettingerb36f7472011-04-23 18:37:37 -07009from operator import itemgetter as _itemgetter
Raymond Hettingerabfd8df2007-10-16 21:28:32 +000010from keyword import iskeyword as _iskeyword
Raymond Hettingerc37e5e02007-03-01 06:16:43 +000011import sys as _sys
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +000012import heapq as _heapq
Raymond Hettingerb36f7472011-04-23 18:37:37 -070013from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
14
Raymond Hettinger74f869e2010-09-13 22:14:36 +000015try:
Raymond Hettinger7ce6d972011-04-22 18:49:53 -070016 from thread import get_ident as _get_ident
Amaury Forgeot d'Arc71431ef2010-11-09 07:35:26 +000017except ImportError:
Raymond Hettinger7ce6d972011-04-22 18:49:53 -070018 from dummy_thread import get_ident as _get_ident
Raymond Hettinger74f869e2010-09-13 22:14:36 +000019
Raymond Hettingerbc512d32009-03-03 04:45:34 +000020
21################################################################################
22### OrderedDict
23################################################################################
24
Raymond Hettinger8ebebd82011-01-02 01:03:26 +000025class OrderedDict(dict):
Raymond Hettingere980d2d2009-03-19 23:12:41 +000026 'Dictionary that remembers insertion order'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000027 # An inherited dict maps keys to values.
Raymond Hettingere980d2d2009-03-19 23:12:41 +000028 # The inherited dict provides __getitem__, __len__, __contains__, and get.
29 # The remaining methods are order-aware.
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000030 # Big-O running times for all methods are the same as for regular dictionaries.
31
32 # The internal self.__map dictionary maps keys to links in a doubly linked list.
33 # The circular doubly linked list starts and ends with a sentinel element.
34 # The sentinel element never gets deleted (this simplifies the algorithm).
Raymond Hettingeraba22932010-03-09 09:58:53 +000035 # Each link is stored as a list of length three: [PREV, NEXT, KEY].
Raymond Hettingerbc512d32009-03-03 04:45:34 +000036
37 def __init__(self, *args, **kwds):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000038 '''Initialize an ordered dictionary. Signature is the same as for
39 regular dictionaries, but keyword arguments are not recommended
40 because their insertion order is arbitrary.
41
42 '''
Raymond Hettingerbc512d32009-03-03 04:45:34 +000043 if len(args) > 1:
44 raise TypeError('expected at most 1 arguments, got %d' % len(args))
Raymond Hettinger9353ea22009-03-03 20:53:51 +000045 try:
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000046 self.__root
Raymond Hettinger9353ea22009-03-03 20:53:51 +000047 except AttributeError:
Raymond Hettinger0b795e52011-04-23 15:41:38 -070048 self.__root = root = [] # sentinel node
49 root[:] = [root, root, None]
Raymond Hettinger2dc90fd2009-03-25 22:41:32 +000050 self.__map = {}
Raymond Hettinger8ebebd82011-01-02 01:03:26 +000051 self.__update(*args, **kwds)
Raymond Hettingerbc512d32009-03-03 04:45:34 +000052
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000053 def __setitem__(self, key, value, PREV=0, NEXT=1, dict_setitem=dict.__setitem__):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000054 'od.__setitem__(i, y) <==> od[i]=y'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000055 # Setting a new item creates a new link which goes at the end of the linked
56 # list, and the inherited dictionary is updated with the new key/value pair.
Raymond Hettingerbc512d32009-03-03 04:45:34 +000057 if key not in self:
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000058 root = self.__root
Raymond Hettingeraba22932010-03-09 09:58:53 +000059 last = root[PREV]
60 last[NEXT] = root[PREV] = self.__map[key] = [last, root, key]
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000061 dict_setitem(self, key, value)
Raymond Hettingerbc512d32009-03-03 04:45:34 +000062
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000063 def __delitem__(self, key, PREV=0, NEXT=1, dict_delitem=dict.__delitem__):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000064 'od.__delitem__(y) <==> del od[y]'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000065 # Deleting an existing item uses self.__map to find the link which is
66 # then removed by updating the links in the predecessor and successor nodes.
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000067 dict_delitem(self, key)
Raymond Hettinger43a56412011-04-23 15:51:38 -070068 link_prev, link_next, key = self.__map.pop(key)
Raymond Hettinger39282762010-04-03 00:39:26 +000069 link_prev[NEXT] = link_next
70 link_next[PREV] = link_prev
Raymond Hettingerbc512d32009-03-03 04:45:34 +000071
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000072 def __iter__(self, NEXT=1, KEY=2):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000073 'od.__iter__() <==> iter(od)'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000074 # Traverse the linked list in order.
75 root = self.__root
Raymond Hettingeraba22932010-03-09 09:58:53 +000076 curr = root[NEXT]
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000077 while curr is not root:
Raymond Hettingeraba22932010-03-09 09:58:53 +000078 yield curr[KEY]
79 curr = curr[NEXT]
Raymond Hettingerbc512d32009-03-03 04:45:34 +000080
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000081 def __reversed__(self, PREV=0, KEY=2):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000082 'od.__reversed__() <==> reversed(od)'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000083 # Traverse the linked list in reverse order.
84 root = self.__root
Raymond Hettingeraba22932010-03-09 09:58:53 +000085 curr = root[PREV]
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000086 while curr is not root:
Raymond Hettingeraba22932010-03-09 09:58:53 +000087 yield curr[KEY]
88 curr = curr[PREV]
Raymond Hettingerbc512d32009-03-03 04:45:34 +000089
Raymond Hettinger39282762010-04-03 00:39:26 +000090 def clear(self):
91 'od.clear() -> None. Remove all items from od.'
Raymond Hettinger6b96ecb2010-04-03 03:14:28 +000092 try:
93 for node in self.__map.itervalues():
94 del node[:]
Raymond Hettinger536999c2011-04-23 20:11:50 -070095 root = self.__root
96 root[:] = [root, root, None]
Raymond Hettinger6b96ecb2010-04-03 03:14:28 +000097 self.__map.clear()
98 except AttributeError:
99 pass
100 dict.clear(self)
Raymond Hettinger39282762010-04-03 00:39:26 +0000101
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700102 # -- the following methods do not depend on the internal structure --
103
104 def keys(self):
105 'od.keys() -> list of keys in od'
106 return list(self)
107
108 def values(self):
109 'od.values() -> list of values in od'
110 return [self[key] for key in self]
111
112 def items(self):
113 'od.items() -> list of (key, value) pairs in od'
114 return [(key, self[key]) for key in self]
115
116 def iterkeys(self):
117 'od.iterkeys() -> an iterator over the keys in od'
118 return iter(self)
119
120 def itervalues(self):
121 'od.itervalues -> an iterator over the values in od'
122 for k in self:
123 yield self[k]
124
125 def iteritems(self):
126 'od.iteritems -> an iterator over the (key, value) items in od'
127 for k in self:
128 yield (k, self[k])
129
130 update = MutableMapping.update
131
132 __update = update # let subclasses override update without breaking __init__
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000133
Raymond Hettinger8ebebd82011-01-02 01:03:26 +0000134 __marker = object()
135
136 def pop(self, key, default=__marker):
137 if key in self:
138 result = self[key]
139 del self[key]
140 return result
141 if default is self.__marker:
142 raise KeyError(key)
143 return default
144
145 def setdefault(self, key, default=None):
146 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od'
147 if key in self:
148 return self[key]
149 self[key] = default
150 return default
151
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000152 def popitem(self, last=True):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000153 '''od.popitem() -> (k, v), return and remove a (key, value) pair.
154 Pairs are returned in LIFO order if last is true or FIFO order if false.
155
156 '''
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000157 if not self:
158 raise KeyError('dictionary is empty')
Raymond Hettinger1355a3d2009-04-08 08:26:55 +0000159 key = next(reversed(self) if last else iter(self))
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000160 value = self.pop(key)
161 return key, value
162
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700163 def __repr__(self, _repr_running={}):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000164 'od.__repr__() <==> repr(od)'
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700165 call_key = id(self), _get_ident()
166 if call_key in _repr_running:
167 return '...'
168 _repr_running[call_key] = 1
169 try:
170 if not self:
171 return '%s()' % (self.__class__.__name__,)
172 return '%s(%r)' % (self.__class__.__name__, self.items())
173 finally:
174 del _repr_running[call_key]
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000175
Raymond Hettinger3674c852011-04-20 13:11:38 -0700176 def __reduce__(self):
177 'Return state information for pickling'
178 items = [[k, self[k]] for k in self]
179 inst_dict = vars(self).copy()
180 for k in vars(OrderedDict()):
181 inst_dict.pop(k, None)
182 if inst_dict:
183 return (self.__class__, (items,), inst_dict)
184 return self.__class__, (items,)
185
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000186 def copy(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000187 'od.copy() -> a shallow copy of od'
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000188 return self.__class__(self)
189
190 @classmethod
191 def fromkeys(cls, iterable, value=None):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000192 '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
193 and values equal to v (which defaults to None).
194
195 '''
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000196 d = cls()
197 for key in iterable:
198 d[key] = value
199 return d
200
201 def __eq__(self, other):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000202 '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
203 while comparison to a regular mapping is order-insensitive.
204
205 '''
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000206 if isinstance(other, OrderedDict):
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700207 return len(self)==len(other) and self.items() == other.items()
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000208 return dict.__eq__(self, other)
209
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700210 def __ne__(self, other):
211 'od.__ne__(y) <==> od!=y'
212 return not self == other
213
Raymond Hettinger43a56412011-04-23 15:51:38 -0700214 # -- the following methods support python 3.x style dictionary views --
215
216 def viewkeys(self):
217 "od.viewkeys() -> a set-like object providing a view on od's keys"
218 return KeysView(self)
219
220 def viewvalues(self):
221 "od.viewvalues() -> an object providing a view on od's values"
222 return ValuesView(self)
223
224 def viewitems(self):
225 "od.viewitems() -> a set-like object providing a view on od's items"
226 return ItemsView(self)
227
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000228
Raymond Hettingerae3f0682009-01-20 03:36:36 +0000229################################################################################
230### namedtuple
231################################################################################
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000232
Raymond Hettinger322daea2009-02-10 01:24:05 +0000233def namedtuple(typename, field_names, verbose=False, rename=False):
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000234 """Returns a new subclass of tuple with named fields.
235
Raymond Hettinger01a09572007-10-23 20:37:41 +0000236 >>> Point = namedtuple('Point', 'x y')
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000237 >>> Point.__doc__ # docstring for the new class
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000238 'Point(x, y)'
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000239 >>> p = Point(11, y=22) # instantiate with positional args or keywords
Raymond Hettinger8777bca2007-12-18 22:21:27 +0000240 >>> p[0] + p[1] # indexable like a plain tuple
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000241 33
Raymond Hettinger88880b22007-12-18 00:13:45 +0000242 >>> x, y = p # unpack like a regular tuple
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000243 >>> x, y
244 (11, 22)
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000245 >>> p.x + p.y # fields also accessable by name
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000246 33
Raymond Hettinger42da8742007-12-14 02:49:47 +0000247 >>> d = p._asdict() # convert to a dictionary
Raymond Hettingera7fc4b12007-10-05 02:47:07 +0000248 >>> d['x']
249 11
250 >>> Point(**d) # convert from a dictionary
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000251 Point(x=11, y=22)
Raymond Hettinger42da8742007-12-14 02:49:47 +0000252 >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000253 Point(x=100, y=22)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000254
255 """
256
Raymond Hettinger58167142008-01-08 02:02:05 +0000257 # Parse and validate the field names. Validation serves two purposes,
258 # generating informative error messages and preventing template injection attacks.
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000259 if isinstance(field_names, basestring):
Raymond Hettinger0e1d6062007-10-08 10:11:51 +0000260 field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas
Raymond Hettinger6ee7bc02008-09-25 23:31:52 +0000261 field_names = tuple(map(str, field_names))
Raymond Hettinger322daea2009-02-10 01:24:05 +0000262 if rename:
263 names = list(field_names)
264 seen = set()
265 for i, name in enumerate(names):
266 if (not all(c.isalnum() or c=='_' for c in name) or _iskeyword(name)
267 or not name or name[0].isdigit() or name.startswith('_')
268 or name in seen):
Raymond Hettinger756ab672009-04-02 22:25:40 +0000269 names[i] = '_%d' % i
Raymond Hettinger322daea2009-02-10 01:24:05 +0000270 seen.add(name)
271 field_names = tuple(names)
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000272 for name in (typename,) + field_names:
Raymond Hettinger2e1af252007-12-05 18:11:08 +0000273 if not all(c.isalnum() or c=='_' for c in name):
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000274 raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
Raymond Hettingerabfd8df2007-10-16 21:28:32 +0000275 if _iskeyword(name):
276 raise ValueError('Type names and field names cannot be a keyword: %r' % name)
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000277 if name[0].isdigit():
278 raise ValueError('Type names and field names cannot start with a number: %r' % name)
Raymond Hettinger163f6222007-10-09 01:36:23 +0000279 seen_names = set()
280 for name in field_names:
Raymond Hettinger322daea2009-02-10 01:24:05 +0000281 if name.startswith('_') and not rename:
Raymond Hettinger42da8742007-12-14 02:49:47 +0000282 raise ValueError('Field names cannot start with an underscore: %r' % name)
Raymond Hettinger163f6222007-10-09 01:36:23 +0000283 if name in seen_names:
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000284 raise ValueError('Encountered duplicate field name: %r' % name)
Raymond Hettinger163f6222007-10-09 01:36:23 +0000285 seen_names.add(name)
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000286
287 # Create and fill-in the class template
Raymond Hettinger02740f72008-01-05 01:35:43 +0000288 numfields = len(field_names)
Raymond Hettinger2b03d452007-09-18 03:33:19 +0000289 argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000290 reprtxt = ', '.join('%s=%%r' % name for name in field_names)
291 template = '''class %(typename)s(tuple):
Raymond Hettinger48eca672007-12-14 18:08:20 +0000292 '%(typename)s(%(argtxt)s)' \n
293 __slots__ = () \n
Raymond Hettingere0734e72008-01-04 03:22:53 +0000294 _fields = %(field_names)r \n
Raymond Hettingera68cad12009-05-27 02:24:45 +0000295 def __new__(_cls, %(argtxt)s):
Raymond Hettinger9bd35082010-03-09 09:01:46 +0000296 'Create new instance of %(typename)s(%(argtxt)s)'
Raymond Hettingera68cad12009-05-27 02:24:45 +0000297 return _tuple.__new__(_cls, (%(argtxt)s)) \n
Raymond Hettinger02740f72008-01-05 01:35:43 +0000298 @classmethod
Raymond Hettinger844f71b2008-01-06 22:11:54 +0000299 def _make(cls, iterable, new=tuple.__new__, len=len):
Raymond Hettinger02740f72008-01-05 01:35:43 +0000300 'Make a new %(typename)s object from a sequence or iterable'
Raymond Hettinger844f71b2008-01-06 22:11:54 +0000301 result = new(cls, iterable)
Raymond Hettinger02740f72008-01-05 01:35:43 +0000302 if len(result) != %(numfields)d:
303 raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
304 return result \n
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000305 def __repr__(self):
Raymond Hettinger9bd35082010-03-09 09:01:46 +0000306 'Return a nicely formatted representation string'
Raymond Hettinger48eca672007-12-14 18:08:20 +0000307 return '%(typename)s(%(reprtxt)s)' %% self \n
Raymond Hettinger88a91642009-03-03 04:51:24 +0000308 def _asdict(self):
309 'Return a new OrderedDict which maps field names to their values'
310 return OrderedDict(zip(self._fields, self)) \n
Raymond Hettingera68cad12009-05-27 02:24:45 +0000311 def _replace(_self, **kwds):
Raymond Hettingereeeb9c42007-11-15 02:44:53 +0000312 'Return a new %(typename)s object replacing specified fields with new values'
Raymond Hettingera68cad12009-05-27 02:24:45 +0000313 result = _self._make(map(kwds.pop, %(field_names)r, _self))
Raymond Hettinger1b50fd72008-01-05 02:17:24 +0000314 if kwds:
315 raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
Raymond Hettingere98839a2008-06-09 01:28:30 +0000316 return result \n
317 def __getnewargs__(self):
Raymond Hettinger9bd35082010-03-09 09:01:46 +0000318 'Return self as a plain tuple. Used by copy and pickle.'
Raymond Hettingere98839a2008-06-09 01:28:30 +0000319 return tuple(self) \n\n''' % locals()
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000320 for i, name in enumerate(field_names):
Raymond Hettinger9bd35082010-03-09 09:01:46 +0000321 template += " %s = _property(_itemgetter(%d), doc='Alias for field number %d')\n" % (name, i, i)
Raymond Hettinger2b03d452007-09-18 03:33:19 +0000322 if verbose:
323 print template
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000324
Raymond Hettinger3c2523c2008-05-30 07:16:53 +0000325 # Execute the template string in a temporary namespace and
326 # support tracing utilities by setting a value for frame.f_globals['__name__']
Raymond Hettingera68cad12009-05-27 02:24:45 +0000327 namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
328 OrderedDict=OrderedDict, _property=property, _tuple=tuple)
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000329 try:
Raymond Hettinger0e1d6062007-10-08 10:11:51 +0000330 exec template in namespace
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000331 except SyntaxError, e:
332 raise SyntaxError(e.message + ':\n' + template)
Raymond Hettinger0e1d6062007-10-08 10:11:51 +0000333 result = namespace[typename]
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000334
335 # For pickling to work, the __module__ variable needs to be set to the frame
336 # where the named tuple is created. Bypass this step in enviroments where
Benjamin Peterson7c67b032009-05-05 00:55:24 +0000337 # sys._getframe is not defined (Jython for example) or sys._getframe is not
338 # defined for arguments greater than 0 (IronPython).
339 try:
Raymond Hettingerecf252a2009-01-27 10:03:04 +0000340 result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
Benjamin Peterson7c67b032009-05-05 00:55:24 +0000341 except (AttributeError, ValueError):
342 pass
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000343
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000344 return result
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000345
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000346
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000347########################################################################
Raymond Hettingerae3f0682009-01-20 03:36:36 +0000348### Counter
349########################################################################
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000350
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000351class Counter(dict):
352 '''Dict subclass for counting hashable items. Sometimes called a bag
353 or multiset. Elements are stored as dictionary keys and their counts
354 are stored as dictionary values.
355
Raymond Hettingerc886a842011-01-03 08:59:18 +0000356 >>> c = Counter('abcdeabcdabcaba') # count elements from a string
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000357
358 >>> c.most_common(3) # three most common elements
Raymond Hettingerc886a842011-01-03 08:59:18 +0000359 [('a', 5), ('b', 4), ('c', 3)]
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000360 >>> sorted(c) # list all unique elements
Raymond Hettingerc886a842011-01-03 08:59:18 +0000361 ['a', 'b', 'c', 'd', 'e']
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000362 >>> ''.join(sorted(c.elements())) # list elements with repetitions
Raymond Hettingerc886a842011-01-03 08:59:18 +0000363 'aaaaabbbbcccdde'
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000364 >>> sum(c.values()) # total of all counts
Raymond Hettingerc886a842011-01-03 08:59:18 +0000365 15
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000366
367 >>> c['a'] # count of letter 'a'
368 5
369 >>> for elem in 'shazam': # update counts from an iterable
370 ... c[elem] += 1 # by adding 1 to each element's count
371 >>> c['a'] # now there are seven 'a'
372 7
Raymond Hettingerc886a842011-01-03 08:59:18 +0000373 >>> del c['b'] # remove all 'b'
374 >>> c['b'] # now there are zero 'b'
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000375 0
376
377 >>> d = Counter('simsalabim') # make another counter
378 >>> c.update(d) # add in the second counter
379 >>> c['a'] # now there are nine 'a'
380 9
381
382 >>> c.clear() # empty the counter
383 >>> c
384 Counter()
385
386 Note: If a count is set to zero or reduced to zero, it will remain
387 in the counter until the entry is deleted or the counter is cleared:
388
389 >>> c = Counter('aaabbc')
390 >>> c['b'] -= 2 # reduce the count of 'b' by two
391 >>> c.most_common() # 'b' is still in, but its count is zero
392 [('a', 3), ('c', 1), ('b', 0)]
393
394 '''
395 # References:
396 # http://en.wikipedia.org/wiki/Multiset
397 # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
398 # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
399 # http://code.activestate.com/recipes/259174/
400 # Knuth, TAOCP Vol. II section 4.6.3
401
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000402 def __init__(self, iterable=None, **kwds):
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000403 '''Create a new, empty Counter object. And if given, count elements
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000404 from an input iterable. Or, initialize the count from another mapping
405 of elements to their counts.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000406
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000407 >>> c = Counter() # a new, empty counter
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000408 >>> c = Counter('gallahad') # a new counter from an iterable
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000409 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000410 >>> c = Counter(a=4, b=2) # a new counter from keyword args
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000411
412 '''
Raymond Hettingerc886a842011-01-03 08:59:18 +0000413 super(Counter, self).__init__()
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000414 self.update(iterable, **kwds)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000415
416 def __missing__(self, key):
417 'The count of elements not in the Counter is zero.'
418 # Needed so that self[missing_item] does not raise KeyError
419 return 0
420
421 def most_common(self, n=None):
422 '''List the n most common elements and their counts from the most
423 common to the least. If n is None, then list all element counts.
424
Raymond Hettingerc886a842011-01-03 08:59:18 +0000425 >>> Counter('abcdeabcdabcaba').most_common(3)
426 [('a', 5), ('b', 4), ('c', 3)]
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000427
428 '''
429 # Emulate Bag.sortedByCount from Smalltalk
430 if n is None:
431 return sorted(self.iteritems(), key=_itemgetter(1), reverse=True)
432 return _heapq.nlargest(n, self.iteritems(), key=_itemgetter(1))
433
434 def elements(self):
435 '''Iterator over elements repeating each as many times as its count.
436
437 >>> c = Counter('ABCABC')
438 >>> sorted(c.elements())
439 ['A', 'A', 'B', 'B', 'C', 'C']
440
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000441 # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
442 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
443 >>> product = 1
444 >>> for factor in prime_factors.elements(): # loop over factors
445 ... product *= factor # and multiply them
446 >>> product
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000447 1836
448
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000449 Note, if an element's count has been set to zero or is a negative
450 number, elements() will ignore it.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000451
452 '''
453 # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
Raymond Hettinger35288c62009-01-13 04:50:35 +0000454 return _chain.from_iterable(_starmap(_repeat, self.iteritems()))
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000455
456 # Override dict methods where necessary
457
458 @classmethod
459 def fromkeys(cls, iterable, v=None):
460 # There is no equivalent method for counters because setting v=1
461 # means that no element can have a count greater than one.
462 raise NotImplementedError(
463 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
464
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000465 def update(self, iterable=None, **kwds):
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000466 '''Like dict.update() but add counts instead of replacing them.
467
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000468 Source can be an iterable, a dictionary, or another Counter instance.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000469
470 >>> c = Counter('which')
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000471 >>> c.update('witch') # add elements from another iterable
472 >>> d = Counter('watch')
473 >>> c.update(d) # add elements from another counter
474 >>> c['h'] # four 'h' in which, witch, and watch
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000475 4
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000476
477 '''
478 # The regular dict.update() operation makes no sense here because the
479 # replace behavior results in the some of original untouched counts
480 # being mixed-in with all of the other counts for a mismash that
481 # doesn't have a straight-forward interpretation in most counting
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000482 # contexts. Instead, we implement straight-addition. Both the inputs
483 # and outputs are allowed to contain zero and negative counts.
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000484
485 if iterable is not None:
486 if isinstance(iterable, Mapping):
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000487 if self:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000488 self_get = self.get
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000489 for elem, count in iterable.iteritems():
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000490 self[elem] = self_get(elem, 0) + count
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000491 else:
Raymond Hettingerc886a842011-01-03 08:59:18 +0000492 super(Counter, self).update(iterable) # fast path when counter is empty
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000493 else:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000494 self_get = self.get
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000495 for elem in iterable:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000496 self[elem] = self_get(elem, 0) + 1
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000497 if kwds:
498 self.update(kwds)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000499
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000500 def subtract(self, iterable=None, **kwds):
501 '''Like dict.update() but subtracts counts instead of replacing them.
502 Counts can be reduced below zero. Both the inputs and outputs are
503 allowed to contain zero and negative counts.
504
505 Source can be an iterable, a dictionary, or another Counter instance.
506
507 >>> c = Counter('which')
508 >>> c.subtract('witch') # subtract elements from another iterable
509 >>> c.subtract(Counter('watch')) # subtract elements from another counter
510 >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch
511 0
512 >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch
513 -1
514
515 '''
516 if iterable is not None:
Raymond Hettingerfdf1b562010-04-11 20:39:28 +0000517 self_get = self.get
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000518 if isinstance(iterable, Mapping):
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000519 for elem, count in iterable.items():
520 self[elem] = self_get(elem, 0) - count
521 else:
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000522 for elem in iterable:
523 self[elem] = self_get(elem, 0) - 1
524 if kwds:
525 self.subtract(kwds)
526
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000527 def copy(self):
Raymond Hettinger37c0fe52011-04-15 13:12:21 -0700528 'Return a shallow copy.'
529 return self.__class__(self)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000530
Raymond Hettingerc886a842011-01-03 08:59:18 +0000531 def __reduce__(self):
532 return self.__class__, (dict(self),)
533
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000534 def __delitem__(self, elem):
535 'Like dict.__delitem__() but does not raise KeyError for missing values.'
536 if elem in self:
Raymond Hettingerc886a842011-01-03 08:59:18 +0000537 super(Counter, self).__delitem__(elem)
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000538
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000539 def __repr__(self):
540 if not self:
541 return '%s()' % self.__class__.__name__
Raymond Hettinger35288c62009-01-13 04:50:35 +0000542 items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000543 return '%s({%s})' % (self.__class__.__name__, items)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000544
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000545 # Multiset-style mathematical operations discussed in:
546 # Knuth TAOCP Volume II section 4.6.3 exercise 19
547 # and at http://en.wikipedia.org/wiki/Multiset
548 #
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000549 # Outputs guaranteed to only include positive counts.
550 #
551 # To strip negative and zero counts, add-in an empty counter:
552 # c += Counter()
553
554 def __add__(self, other):
555 '''Add counts from two counters.
556
557 >>> Counter('abbb') + Counter('bcc')
558 Counter({'b': 4, 'c': 2, 'a': 1})
559
560 '''
561 if not isinstance(other, Counter):
562 return NotImplemented
563 result = Counter()
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700564 for elem, count in self.items():
565 newcount = count + other[elem]
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000566 if newcount > 0:
567 result[elem] = newcount
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700568 for elem, count in other.items():
569 if elem not in self and count > 0:
570 result[elem] = count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000571 return result
572
573 def __sub__(self, other):
574 ''' Subtract count, but keep only results with positive counts.
575
576 >>> Counter('abbbc') - Counter('bccd')
577 Counter({'b': 2, 'a': 1})
578
579 '''
580 if not isinstance(other, Counter):
581 return NotImplemented
582 result = Counter()
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700583 for elem, count in self.items():
584 newcount = count - other[elem]
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000585 if newcount > 0:
586 result[elem] = newcount
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700587 for elem, count in other.items():
588 if elem not in self and count < 0:
589 result[elem] = 0 - count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000590 return result
591
592 def __or__(self, other):
593 '''Union is the maximum of value in either of the input counters.
594
595 >>> Counter('abbb') | Counter('bcc')
596 Counter({'b': 3, 'c': 2, 'a': 1})
597
598 '''
599 if not isinstance(other, Counter):
600 return NotImplemented
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000601 result = Counter()
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700602 for elem, count in self.items():
603 other_count = other[elem]
604 newcount = other_count if count < other_count else count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000605 if newcount > 0:
606 result[elem] = newcount
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700607 for elem, count in other.items():
608 if elem not in self and count > 0:
609 result[elem] = count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000610 return result
611
612 def __and__(self, other):
613 ''' Intersection is the minimum of corresponding counts.
614
615 >>> Counter('abbb') & Counter('bcc')
616 Counter({'b': 1})
617
618 '''
619 if not isinstance(other, Counter):
620 return NotImplemented
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000621 result = Counter()
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700622 for elem, count in self.items():
623 other_count = other[elem]
624 newcount = count if count < other_count else other_count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000625 if newcount > 0:
626 result[elem] = newcount
627 return result
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000628
629
630if __name__ == '__main__':
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000631 # verify that instances can be pickled
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000632 from cPickle import loads, dumps
Raymond Hettinger01a09572007-10-23 20:37:41 +0000633 Point = namedtuple('Point', 'x, y', True)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000634 p = Point(x=10, y=20)
635 assert p == loads(dumps(p))
636
Raymond Hettingereeeb9c42007-11-15 02:44:53 +0000637 # test and demonstrate ability to override methods
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000638 class Point(namedtuple('Point', 'x y')):
Raymond Hettingere1655082008-01-10 19:15:10 +0000639 __slots__ = ()
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000640 @property
641 def hypot(self):
642 return (self.x ** 2 + self.y ** 2) ** 0.5
Raymond Hettinger9a359212008-01-07 20:07:38 +0000643 def __str__(self):
Raymond Hettinger15b5e552008-01-10 23:00:01 +0000644 return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000645
Raymond Hettingere1655082008-01-10 19:15:10 +0000646 for p in Point(3, 4), Point(14, 5/7.):
Raymond Hettinger9a359212008-01-07 20:07:38 +0000647 print p
Raymond Hettingereeeb9c42007-11-15 02:44:53 +0000648
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000649 class Point(namedtuple('Point', 'x y')):
650 'Point class with optimized _make() and _replace() without error-checking'
Raymond Hettingere1655082008-01-10 19:15:10 +0000651 __slots__ = ()
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000652 _make = classmethod(tuple.__new__)
653 def _replace(self, _map=map, **kwds):
Raymond Hettingerf5e8af12008-01-07 20:56:05 +0000654 return self._make(_map(kwds.get, ('x', 'y'), self))
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000655
656 print Point(11, 22)._replace(x=100)
657
Raymond Hettingere850c462008-01-10 20:37:12 +0000658 Point3D = namedtuple('Point3D', Point._fields + ('z',))
659 print Point3D.__doc__
660
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000661 import doctest
Raymond Hettinger01a09572007-10-23 20:37:41 +0000662 TestResults = namedtuple('TestResults', 'failed attempted')
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000663 print TestResults(*doctest.testmod())