blob: 10c8903697cb5d39d43d0ec3a25f1bd32333184c [file] [log] [blame]
Raymond Hettingerbc512d32009-03-03 04:45:34 +00001__all__ = ['Counter', 'deque', 'defaultdict', 'namedtuple', 'OrderedDict']
Raymond Hettinger88880b22007-12-18 00:13:45 +00002# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
3# They should however be considered an integral part of collections.py.
4from _abcoll import *
5import _abcoll
6__all__ += _abcoll.__all__
Raymond Hettingereb979882007-02-28 18:37:52 +00007
8from _collections import deque, defaultdict
Raymond Hettingerbc512d32009-03-03 04:45:34 +00009from operator import itemgetter as _itemgetter, eq as _eq
Raymond Hettingerabfd8df2007-10-16 21:28:32 +000010from keyword import iskeyword as _iskeyword
Raymond Hettingerc37e5e02007-03-01 06:16:43 +000011import sys as _sys
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +000012import heapq as _heapq
Raymond Hettingerbc512d32009-03-03 04:45:34 +000013from itertools import repeat as _repeat, chain as _chain, starmap as _starmap, \
Georg Brandl7d4b7592010-02-06 22:49:47 +000014 ifilter as _ifilter, imap as _imap
Raymond Hettingerbc512d32009-03-03 04:45:34 +000015
16################################################################################
17### OrderedDict
18################################################################################
19
20class OrderedDict(dict, MutableMapping):
Raymond Hettingere980d2d2009-03-19 23:12:41 +000021 'Dictionary that remembers insertion order'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000022 # An inherited dict maps keys to values.
Raymond Hettingere980d2d2009-03-19 23:12:41 +000023 # The inherited dict provides __getitem__, __len__, __contains__, and get.
24 # The remaining methods are order-aware.
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000025 # Big-O running times for all methods are the same as for regular dictionaries.
26
27 # The internal self.__map dictionary maps keys to links in a doubly linked list.
28 # The circular doubly linked list starts and ends with a sentinel element.
29 # The sentinel element never gets deleted (this simplifies the algorithm).
Raymond Hettingeraba22932010-03-09 09:58:53 +000030 # Each link is stored as a list of length three: [PREV, NEXT, KEY].
Raymond Hettingerbc512d32009-03-03 04:45:34 +000031
32 def __init__(self, *args, **kwds):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000033 '''Initialize an ordered dictionary. Signature is the same as for
34 regular dictionaries, but keyword arguments are not recommended
35 because their insertion order is arbitrary.
36
37 '''
Raymond Hettingerbc512d32009-03-03 04:45:34 +000038 if len(args) > 1:
39 raise TypeError('expected at most 1 arguments, got %d' % len(args))
Raymond Hettinger9353ea22009-03-03 20:53:51 +000040 try:
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000041 self.__root
Raymond Hettinger9353ea22009-03-03 20:53:51 +000042 except AttributeError:
Raymond Hettingeraba22932010-03-09 09:58:53 +000043 self.__root = root = [None, None, None] # sentinel node
Raymond Hettingere30bc382010-03-09 11:29:10 +000044 PREV = 0
45 NEXT = 1
Raymond Hettingeraba22932010-03-09 09:58:53 +000046 root[PREV] = root[NEXT] = root
Raymond Hettinger2dc90fd2009-03-25 22:41:32 +000047 self.__map = {}
Raymond Hettingerbc512d32009-03-03 04:45:34 +000048 self.update(*args, **kwds)
49
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000050 def __setitem__(self, key, value, PREV=0, NEXT=1, dict_setitem=dict.__setitem__):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000051 'od.__setitem__(i, y) <==> od[i]=y'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000052 # Setting a new item creates a new link which goes at the end of the linked
53 # list, and the inherited dictionary is updated with the new key/value pair.
Raymond Hettingerbc512d32009-03-03 04:45:34 +000054 if key not in self:
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000055 root = self.__root
Raymond Hettingeraba22932010-03-09 09:58:53 +000056 last = root[PREV]
57 last[NEXT] = root[PREV] = self.__map[key] = [last, root, key]
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000058 dict_setitem(self, key, value)
Raymond Hettingerbc512d32009-03-03 04:45:34 +000059
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000060 def __delitem__(self, key, PREV=0, NEXT=1, dict_delitem=dict.__delitem__):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000061 'od.__delitem__(y) <==> del od[y]'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000062 # Deleting an existing item uses self.__map to find the link which is
63 # then removed by updating the links in the predecessor and successor nodes.
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000064 dict_delitem(self, key)
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000065 link = self.__map.pop(key)
Raymond Hettinger39282762010-04-03 00:39:26 +000066 link_prev = link[PREV]
67 link_next = link[NEXT]
68 link_prev[NEXT] = link_next
69 link_next[PREV] = link_prev
Raymond Hettingerbc512d32009-03-03 04:45:34 +000070
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000071 def __iter__(self, NEXT=1, KEY=2):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000072 'od.__iter__() <==> iter(od)'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000073 # Traverse the linked list in order.
74 root = self.__root
Raymond Hettingeraba22932010-03-09 09:58:53 +000075 curr = root[NEXT]
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000076 while curr is not root:
Raymond Hettingeraba22932010-03-09 09:58:53 +000077 yield curr[KEY]
78 curr = curr[NEXT]
Raymond Hettingerbc512d32009-03-03 04:45:34 +000079
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000080 def __reversed__(self, PREV=0, KEY=2):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000081 'od.__reversed__() <==> reversed(od)'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000082 # Traverse the linked list in reverse order.
83 root = self.__root
Raymond Hettingeraba22932010-03-09 09:58:53 +000084 curr = root[PREV]
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000085 while curr is not root:
Raymond Hettingeraba22932010-03-09 09:58:53 +000086 yield curr[KEY]
87 curr = curr[PREV]
Raymond Hettingerbc512d32009-03-03 04:45:34 +000088
89 def __reduce__(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000090 'Return state information for pickling'
Alexandre Vassalotticb73bda2009-06-12 23:03:35 +000091 items = [[k, self[k]] for k in self]
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000092 tmp = self.__map, self.__root
93 del self.__map, self.__root
Raymond Hettingerbc512d32009-03-03 04:45:34 +000094 inst_dict = vars(self).copy()
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000095 self.__map, self.__root = tmp
Alexandre Vassalotticb73bda2009-06-12 23:03:35 +000096 if inst_dict:
97 return (self.__class__, (items,), inst_dict)
98 return self.__class__, (items,)
Raymond Hettingerbc512d32009-03-03 04:45:34 +000099
Raymond Hettinger39282762010-04-03 00:39:26 +0000100 def clear(self):
101 'od.clear() -> None. Remove all items from od.'
Raymond Hettinger6b96ecb2010-04-03 03:14:28 +0000102 try:
103 for node in self.__map.itervalues():
104 del node[:]
105 self.__root[:] = [self.__root, self.__root, None]
106 self.__map.clear()
107 except AttributeError:
108 pass
109 dict.clear(self)
Raymond Hettinger39282762010-04-03 00:39:26 +0000110
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000111 setdefault = MutableMapping.setdefault
112 update = MutableMapping.update
113 pop = MutableMapping.pop
Raymond Hettingera61ae692009-03-18 22:13:20 +0000114 keys = MutableMapping.keys
115 values = MutableMapping.values
116 items = MutableMapping.items
117 iterkeys = MutableMapping.iterkeys
118 itervalues = MutableMapping.itervalues
119 iteritems = MutableMapping.iteritems
120 __ne__ = MutableMapping.__ne__
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000121
Raymond Hettingera54b2da2010-08-17 19:03:06 +0000122 def viewkeys(self):
123 "od.viewkeys() -> a set-like object providing a view on od's keys"
124 return KeysView(self)
125
126 def viewvalues(self):
127 "od.viewvalues() -> an object providing a view on od's values"
128 return ValuesView(self)
129
130 def viewitems(self):
131 "od.viewitems() -> a set-like object providing a view on od's items"
132 return ItemsView(self)
133
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000134 def popitem(self, last=True):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000135 '''od.popitem() -> (k, v), return and remove a (key, value) pair.
136 Pairs are returned in LIFO order if last is true or FIFO order if false.
137
138 '''
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000139 if not self:
140 raise KeyError('dictionary is empty')
Raymond Hettinger1355a3d2009-04-08 08:26:55 +0000141 key = next(reversed(self) if last else iter(self))
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000142 value = self.pop(key)
143 return key, value
144
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000145 def __repr__(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000146 'od.__repr__() <==> repr(od)'
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000147 if not self:
148 return '%s()' % (self.__class__.__name__,)
Raymond Hettinger0b155412009-03-03 21:13:51 +0000149 return '%s(%r)' % (self.__class__.__name__, self.items())
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000150
151 def copy(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000152 'od.copy() -> a shallow copy of od'
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000153 return self.__class__(self)
154
155 @classmethod
156 def fromkeys(cls, iterable, value=None):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000157 '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
158 and values equal to v (which defaults to None).
159
160 '''
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000161 d = cls()
162 for key in iterable:
163 d[key] = value
164 return d
165
166 def __eq__(self, other):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000167 '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
168 while comparison to a regular mapping is order-insensitive.
169
170 '''
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000171 if isinstance(other, OrderedDict):
Raymond Hettingere5b78562009-03-23 18:26:59 +0000172 return len(self)==len(other) and \
173 all(_imap(_eq, self.iteritems(), other.iteritems()))
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000174 return dict.__eq__(self, other)
175
Raymond Hettinger39282762010-04-03 00:39:26 +0000176 def __del__(self):
177 self.clear() # eliminate cyclical references
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000178
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000179
Raymond Hettingerae3f0682009-01-20 03:36:36 +0000180################################################################################
181### namedtuple
182################################################################################
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000183
Raymond Hettinger322daea2009-02-10 01:24:05 +0000184def namedtuple(typename, field_names, verbose=False, rename=False):
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000185 """Returns a new subclass of tuple with named fields.
186
Raymond Hettinger01a09572007-10-23 20:37:41 +0000187 >>> Point = namedtuple('Point', 'x y')
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000188 >>> Point.__doc__ # docstring for the new class
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000189 'Point(x, y)'
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000190 >>> p = Point(11, y=22) # instantiate with positional args or keywords
Raymond Hettinger8777bca2007-12-18 22:21:27 +0000191 >>> p[0] + p[1] # indexable like a plain tuple
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000192 33
Raymond Hettinger88880b22007-12-18 00:13:45 +0000193 >>> x, y = p # unpack like a regular tuple
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000194 >>> x, y
195 (11, 22)
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000196 >>> p.x + p.y # fields also accessable by name
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000197 33
Raymond Hettinger42da8742007-12-14 02:49:47 +0000198 >>> d = p._asdict() # convert to a dictionary
Raymond Hettingera7fc4b12007-10-05 02:47:07 +0000199 >>> d['x']
200 11
201 >>> Point(**d) # convert from a dictionary
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000202 Point(x=11, y=22)
Raymond Hettinger42da8742007-12-14 02:49:47 +0000203 >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000204 Point(x=100, y=22)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000205
206 """
207
Raymond Hettinger58167142008-01-08 02:02:05 +0000208 # Parse and validate the field names. Validation serves two purposes,
209 # generating informative error messages and preventing template injection attacks.
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000210 if isinstance(field_names, basestring):
Raymond Hettinger0e1d6062007-10-08 10:11:51 +0000211 field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas
Raymond Hettinger6ee7bc02008-09-25 23:31:52 +0000212 field_names = tuple(map(str, field_names))
Raymond Hettinger322daea2009-02-10 01:24:05 +0000213 if rename:
214 names = list(field_names)
215 seen = set()
216 for i, name in enumerate(names):
217 if (not all(c.isalnum() or c=='_' for c in name) or _iskeyword(name)
218 or not name or name[0].isdigit() or name.startswith('_')
219 or name in seen):
Raymond Hettinger756ab672009-04-02 22:25:40 +0000220 names[i] = '_%d' % i
Raymond Hettinger322daea2009-02-10 01:24:05 +0000221 seen.add(name)
222 field_names = tuple(names)
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000223 for name in (typename,) + field_names:
Raymond Hettinger2e1af252007-12-05 18:11:08 +0000224 if not all(c.isalnum() or c=='_' for c in name):
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000225 raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
Raymond Hettingerabfd8df2007-10-16 21:28:32 +0000226 if _iskeyword(name):
227 raise ValueError('Type names and field names cannot be a keyword: %r' % name)
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000228 if name[0].isdigit():
229 raise ValueError('Type names and field names cannot start with a number: %r' % name)
Raymond Hettinger163f6222007-10-09 01:36:23 +0000230 seen_names = set()
231 for name in field_names:
Raymond Hettinger322daea2009-02-10 01:24:05 +0000232 if name.startswith('_') and not rename:
Raymond Hettinger42da8742007-12-14 02:49:47 +0000233 raise ValueError('Field names cannot start with an underscore: %r' % name)
Raymond Hettinger163f6222007-10-09 01:36:23 +0000234 if name in seen_names:
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000235 raise ValueError('Encountered duplicate field name: %r' % name)
Raymond Hettinger163f6222007-10-09 01:36:23 +0000236 seen_names.add(name)
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000237
238 # Create and fill-in the class template
Raymond Hettinger02740f72008-01-05 01:35:43 +0000239 numfields = len(field_names)
Raymond Hettinger2b03d452007-09-18 03:33:19 +0000240 argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000241 reprtxt = ', '.join('%s=%%r' % name for name in field_names)
242 template = '''class %(typename)s(tuple):
Raymond Hettinger48eca672007-12-14 18:08:20 +0000243 '%(typename)s(%(argtxt)s)' \n
244 __slots__ = () \n
Raymond Hettingere0734e72008-01-04 03:22:53 +0000245 _fields = %(field_names)r \n
Raymond Hettingera68cad12009-05-27 02:24:45 +0000246 def __new__(_cls, %(argtxt)s):
Raymond Hettinger9bd35082010-03-09 09:01:46 +0000247 'Create new instance of %(typename)s(%(argtxt)s)'
Raymond Hettingera68cad12009-05-27 02:24:45 +0000248 return _tuple.__new__(_cls, (%(argtxt)s)) \n
Raymond Hettinger02740f72008-01-05 01:35:43 +0000249 @classmethod
Raymond Hettinger844f71b2008-01-06 22:11:54 +0000250 def _make(cls, iterable, new=tuple.__new__, len=len):
Raymond Hettinger02740f72008-01-05 01:35:43 +0000251 'Make a new %(typename)s object from a sequence or iterable'
Raymond Hettinger844f71b2008-01-06 22:11:54 +0000252 result = new(cls, iterable)
Raymond Hettinger02740f72008-01-05 01:35:43 +0000253 if len(result) != %(numfields)d:
254 raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
255 return result \n
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000256 def __repr__(self):
Raymond Hettinger9bd35082010-03-09 09:01:46 +0000257 'Return a nicely formatted representation string'
Raymond Hettinger48eca672007-12-14 18:08:20 +0000258 return '%(typename)s(%(reprtxt)s)' %% self \n
Raymond Hettinger88a91642009-03-03 04:51:24 +0000259 def _asdict(self):
260 'Return a new OrderedDict which maps field names to their values'
261 return OrderedDict(zip(self._fields, self)) \n
Raymond Hettingera68cad12009-05-27 02:24:45 +0000262 def _replace(_self, **kwds):
Raymond Hettingereeeb9c42007-11-15 02:44:53 +0000263 'Return a new %(typename)s object replacing specified fields with new values'
Raymond Hettingera68cad12009-05-27 02:24:45 +0000264 result = _self._make(map(kwds.pop, %(field_names)r, _self))
Raymond Hettinger1b50fd72008-01-05 02:17:24 +0000265 if kwds:
266 raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
Raymond Hettingere98839a2008-06-09 01:28:30 +0000267 return result \n
268 def __getnewargs__(self):
Raymond Hettinger9bd35082010-03-09 09:01:46 +0000269 'Return self as a plain tuple. Used by copy and pickle.'
Raymond Hettingere98839a2008-06-09 01:28:30 +0000270 return tuple(self) \n\n''' % locals()
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000271 for i, name in enumerate(field_names):
Raymond Hettinger9bd35082010-03-09 09:01:46 +0000272 template += " %s = _property(_itemgetter(%d), doc='Alias for field number %d')\n" % (name, i, i)
Raymond Hettinger2b03d452007-09-18 03:33:19 +0000273 if verbose:
274 print template
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000275
Raymond Hettinger3c2523c2008-05-30 07:16:53 +0000276 # Execute the template string in a temporary namespace and
277 # support tracing utilities by setting a value for frame.f_globals['__name__']
Raymond Hettingera68cad12009-05-27 02:24:45 +0000278 namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
279 OrderedDict=OrderedDict, _property=property, _tuple=tuple)
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000280 try:
Raymond Hettinger0e1d6062007-10-08 10:11:51 +0000281 exec template in namespace
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000282 except SyntaxError, e:
283 raise SyntaxError(e.message + ':\n' + template)
Raymond Hettinger0e1d6062007-10-08 10:11:51 +0000284 result = namespace[typename]
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000285
286 # For pickling to work, the __module__ variable needs to be set to the frame
287 # where the named tuple is created. Bypass this step in enviroments where
Benjamin Peterson7c67b032009-05-05 00:55:24 +0000288 # sys._getframe is not defined (Jython for example) or sys._getframe is not
289 # defined for arguments greater than 0 (IronPython).
290 try:
Raymond Hettingerecf252a2009-01-27 10:03:04 +0000291 result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
Benjamin Peterson7c67b032009-05-05 00:55:24 +0000292 except (AttributeError, ValueError):
293 pass
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000294
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000295 return result
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000296
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000297
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000298########################################################################
Raymond Hettingerae3f0682009-01-20 03:36:36 +0000299### Counter
300########################################################################
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000301
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000302class Counter(dict):
303 '''Dict subclass for counting hashable items. Sometimes called a bag
304 or multiset. Elements are stored as dictionary keys and their counts
305 are stored as dictionary values.
306
307 >>> c = Counter('abracadabra') # count elements from a string
308
309 >>> c.most_common(3) # three most common elements
310 [('a', 5), ('r', 2), ('b', 2)]
311 >>> sorted(c) # list all unique elements
312 ['a', 'b', 'c', 'd', 'r']
313 >>> ''.join(sorted(c.elements())) # list elements with repetitions
314 'aaaaabbcdrr'
315 >>> sum(c.values()) # total of all counts
316 11
317
318 >>> c['a'] # count of letter 'a'
319 5
320 >>> for elem in 'shazam': # update counts from an iterable
321 ... c[elem] += 1 # by adding 1 to each element's count
322 >>> c['a'] # now there are seven 'a'
323 7
324 >>> del c['r'] # remove all 'r'
325 >>> c['r'] # now there are zero 'r'
326 0
327
328 >>> d = Counter('simsalabim') # make another counter
329 >>> c.update(d) # add in the second counter
330 >>> c['a'] # now there are nine 'a'
331 9
332
333 >>> c.clear() # empty the counter
334 >>> c
335 Counter()
336
337 Note: If a count is set to zero or reduced to zero, it will remain
338 in the counter until the entry is deleted or the counter is cleared:
339
340 >>> c = Counter('aaabbc')
341 >>> c['b'] -= 2 # reduce the count of 'b' by two
342 >>> c.most_common() # 'b' is still in, but its count is zero
343 [('a', 3), ('c', 1), ('b', 0)]
344
345 '''
346 # References:
347 # http://en.wikipedia.org/wiki/Multiset
348 # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
349 # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
350 # http://code.activestate.com/recipes/259174/
351 # Knuth, TAOCP Vol. II section 4.6.3
352
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000353 def __init__(self, iterable=None, **kwds):
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000354 '''Create a new, empty Counter object. And if given, count elements
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000355 from an input iterable. Or, initialize the count from another mapping
356 of elements to their counts.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000357
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000358 >>> c = Counter() # a new, empty counter
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000359 >>> c = Counter('gallahad') # a new counter from an iterable
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000360 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000361 >>> c = Counter(a=4, b=2) # a new counter from keyword args
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000362
363 '''
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000364 self.update(iterable, **kwds)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000365
366 def __missing__(self, key):
367 'The count of elements not in the Counter is zero.'
368 # Needed so that self[missing_item] does not raise KeyError
369 return 0
370
371 def most_common(self, n=None):
372 '''List the n most common elements and their counts from the most
373 common to the least. If n is None, then list all element counts.
374
375 >>> Counter('abracadabra').most_common(3)
376 [('a', 5), ('r', 2), ('b', 2)]
377
378 '''
379 # Emulate Bag.sortedByCount from Smalltalk
380 if n is None:
381 return sorted(self.iteritems(), key=_itemgetter(1), reverse=True)
382 return _heapq.nlargest(n, self.iteritems(), key=_itemgetter(1))
383
384 def elements(self):
385 '''Iterator over elements repeating each as many times as its count.
386
387 >>> c = Counter('ABCABC')
388 >>> sorted(c.elements())
389 ['A', 'A', 'B', 'B', 'C', 'C']
390
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000391 # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
392 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
393 >>> product = 1
394 >>> for factor in prime_factors.elements(): # loop over factors
395 ... product *= factor # and multiply them
396 >>> product
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000397 1836
398
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000399 Note, if an element's count has been set to zero or is a negative
400 number, elements() will ignore it.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000401
402 '''
403 # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
Raymond Hettinger35288c62009-01-13 04:50:35 +0000404 return _chain.from_iterable(_starmap(_repeat, self.iteritems()))
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000405
406 # Override dict methods where necessary
407
408 @classmethod
409 def fromkeys(cls, iterable, v=None):
410 # There is no equivalent method for counters because setting v=1
411 # means that no element can have a count greater than one.
412 raise NotImplementedError(
413 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
414
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000415 def update(self, iterable=None, **kwds):
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000416 '''Like dict.update() but add counts instead of replacing them.
417
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000418 Source can be an iterable, a dictionary, or another Counter instance.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000419
420 >>> c = Counter('which')
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000421 >>> c.update('witch') # add elements from another iterable
422 >>> d = Counter('watch')
423 >>> c.update(d) # add elements from another counter
424 >>> c['h'] # four 'h' in which, witch, and watch
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000425 4
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000426
427 '''
428 # The regular dict.update() operation makes no sense here because the
429 # replace behavior results in the some of original untouched counts
430 # being mixed-in with all of the other counts for a mismash that
431 # doesn't have a straight-forward interpretation in most counting
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000432 # contexts. Instead, we implement straight-addition. Both the inputs
433 # and outputs are allowed to contain zero and negative counts.
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000434
435 if iterable is not None:
436 if isinstance(iterable, Mapping):
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000437 if self:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000438 self_get = self.get
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000439 for elem, count in iterable.iteritems():
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000440 self[elem] = self_get(elem, 0) + count
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000441 else:
442 dict.update(self, iterable) # fast path when counter is empty
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000443 else:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000444 self_get = self.get
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000445 for elem in iterable:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000446 self[elem] = self_get(elem, 0) + 1
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000447 if kwds:
448 self.update(kwds)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000449
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000450 def subtract(self, iterable=None, **kwds):
451 '''Like dict.update() but subtracts counts instead of replacing them.
452 Counts can be reduced below zero. Both the inputs and outputs are
453 allowed to contain zero and negative counts.
454
455 Source can be an iterable, a dictionary, or another Counter instance.
456
457 >>> c = Counter('which')
458 >>> c.subtract('witch') # subtract elements from another iterable
459 >>> c.subtract(Counter('watch')) # subtract elements from another counter
460 >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch
461 0
462 >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch
463 -1
464
465 '''
466 if iterable is not None:
Raymond Hettingerfdf1b562010-04-11 20:39:28 +0000467 self_get = self.get
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000468 if isinstance(iterable, Mapping):
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000469 for elem, count in iterable.items():
470 self[elem] = self_get(elem, 0) - count
471 else:
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000472 for elem in iterable:
473 self[elem] = self_get(elem, 0) - 1
474 if kwds:
475 self.subtract(kwds)
476
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000477 def copy(self):
478 'Like dict.copy() but returns a Counter instance instead of a dict.'
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000479 return Counter(self)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000480
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000481 def __delitem__(self, elem):
482 'Like dict.__delitem__() but does not raise KeyError for missing values.'
483 if elem in self:
484 dict.__delitem__(self, elem)
485
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000486 def __repr__(self):
487 if not self:
488 return '%s()' % self.__class__.__name__
Raymond Hettinger35288c62009-01-13 04:50:35 +0000489 items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000490 return '%s({%s})' % (self.__class__.__name__, items)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000491
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000492 # Multiset-style mathematical operations discussed in:
493 # Knuth TAOCP Volume II section 4.6.3 exercise 19
494 # and at http://en.wikipedia.org/wiki/Multiset
495 #
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000496 # Outputs guaranteed to only include positive counts.
497 #
498 # To strip negative and zero counts, add-in an empty counter:
499 # c += Counter()
500
501 def __add__(self, other):
502 '''Add counts from two counters.
503
504 >>> Counter('abbb') + Counter('bcc')
505 Counter({'b': 4, 'c': 2, 'a': 1})
506
507 '''
508 if not isinstance(other, Counter):
509 return NotImplemented
510 result = Counter()
511 for elem in set(self) | set(other):
512 newcount = self[elem] + other[elem]
513 if newcount > 0:
514 result[elem] = newcount
515 return result
516
517 def __sub__(self, other):
518 ''' Subtract count, but keep only results with positive counts.
519
520 >>> Counter('abbbc') - Counter('bccd')
521 Counter({'b': 2, 'a': 1})
522
523 '''
524 if not isinstance(other, Counter):
525 return NotImplemented
526 result = Counter()
Raymond Hettinger4571f342009-01-21 20:31:50 +0000527 for elem in set(self) | set(other):
528 newcount = self[elem] - other[elem]
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000529 if newcount > 0:
530 result[elem] = newcount
531 return result
532
533 def __or__(self, other):
534 '''Union is the maximum of value in either of the input counters.
535
536 >>> Counter('abbb') | Counter('bcc')
537 Counter({'b': 3, 'c': 2, 'a': 1})
538
539 '''
540 if not isinstance(other, Counter):
541 return NotImplemented
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000542 result = Counter()
543 for elem in set(self) | set(other):
Raymond Hettingere3bc5572009-04-04 08:46:58 +0000544 p, q = self[elem], other[elem]
545 newcount = q if p < q else p
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000546 if newcount > 0:
547 result[elem] = newcount
548 return result
549
550 def __and__(self, other):
551 ''' Intersection is the minimum of corresponding counts.
552
553 >>> Counter('abbb') & Counter('bcc')
554 Counter({'b': 1})
555
556 '''
557 if not isinstance(other, Counter):
558 return NotImplemented
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000559 result = Counter()
560 if len(self) < len(other):
561 self, other = other, self
562 for elem in _ifilter(self.__contains__, other):
Raymond Hettingere3bc5572009-04-04 08:46:58 +0000563 p, q = self[elem], other[elem]
564 newcount = p if p < q else q
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000565 if newcount > 0:
566 result[elem] = newcount
567 return result
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000568
569
570if __name__ == '__main__':
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000571 # verify that instances can be pickled
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000572 from cPickle import loads, dumps
Raymond Hettinger01a09572007-10-23 20:37:41 +0000573 Point = namedtuple('Point', 'x, y', True)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000574 p = Point(x=10, y=20)
575 assert p == loads(dumps(p))
576
Raymond Hettingereeeb9c42007-11-15 02:44:53 +0000577 # test and demonstrate ability to override methods
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000578 class Point(namedtuple('Point', 'x y')):
Raymond Hettingere1655082008-01-10 19:15:10 +0000579 __slots__ = ()
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000580 @property
581 def hypot(self):
582 return (self.x ** 2 + self.y ** 2) ** 0.5
Raymond Hettinger9a359212008-01-07 20:07:38 +0000583 def __str__(self):
Raymond Hettinger15b5e552008-01-10 23:00:01 +0000584 return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000585
Raymond Hettingere1655082008-01-10 19:15:10 +0000586 for p in Point(3, 4), Point(14, 5/7.):
Raymond Hettinger9a359212008-01-07 20:07:38 +0000587 print p
Raymond Hettingereeeb9c42007-11-15 02:44:53 +0000588
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000589 class Point(namedtuple('Point', 'x y')):
590 'Point class with optimized _make() and _replace() without error-checking'
Raymond Hettingere1655082008-01-10 19:15:10 +0000591 __slots__ = ()
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000592 _make = classmethod(tuple.__new__)
593 def _replace(self, _map=map, **kwds):
Raymond Hettingerf5e8af12008-01-07 20:56:05 +0000594 return self._make(_map(kwds.get, ('x', 'y'), self))
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000595
596 print Point(11, 22)._replace(x=100)
597
Raymond Hettingere850c462008-01-10 20:37:12 +0000598 Point3D = namedtuple('Point3D', Point._fields + ('z',))
599 print Point3D.__doc__
600
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000601 import doctest
Raymond Hettinger01a09572007-10-23 20:37:41 +0000602 TestResults = namedtuple('TestResults', 'failed attempted')
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000603 print TestResults(*doctest.testmod())