blob: 79f6696bb83140a1f3431bb00c252f2d0f36babf [file] [log] [blame]
Raymond Hettingerbc512d32009-03-03 04:45:34 +00001__all__ = ['Counter', 'deque', 'defaultdict', 'namedtuple', 'OrderedDict']
Raymond Hettinger88880b22007-12-18 00:13:45 +00002# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
3# They should however be considered an integral part of collections.py.
4from _abcoll import *
5import _abcoll
6__all__ += _abcoll.__all__
Raymond Hettingereb979882007-02-28 18:37:52 +00007
8from _collections import deque, defaultdict
Raymond Hettingerbc512d32009-03-03 04:45:34 +00009from operator import itemgetter as _itemgetter, eq as _eq
Raymond Hettingerabfd8df2007-10-16 21:28:32 +000010from keyword import iskeyword as _iskeyword
Raymond Hettingerc37e5e02007-03-01 06:16:43 +000011import sys as _sys
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +000012import heapq as _heapq
Raymond Hettingerbc512d32009-03-03 04:45:34 +000013from itertools import repeat as _repeat, chain as _chain, starmap as _starmap, \
Georg Brandl7d4b7592010-02-06 22:49:47 +000014 ifilter as _ifilter, imap as _imap
Raymond Hettingerbc512d32009-03-03 04:45:34 +000015
16################################################################################
17### OrderedDict
18################################################################################
19
20class OrderedDict(dict, MutableMapping):
Raymond Hettingere980d2d2009-03-19 23:12:41 +000021 'Dictionary that remembers insertion order'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000022 # An inherited dict maps keys to values.
Raymond Hettingere980d2d2009-03-19 23:12:41 +000023 # The inherited dict provides __getitem__, __len__, __contains__, and get.
24 # The remaining methods are order-aware.
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000025 # Big-O running times for all methods are the same as for regular dictionaries.
26
27 # The internal self.__map dictionary maps keys to links in a doubly linked list.
28 # The circular doubly linked list starts and ends with a sentinel element.
29 # The sentinel element never gets deleted (this simplifies the algorithm).
Raymond Hettingeraba22932010-03-09 09:58:53 +000030 # Each link is stored as a list of length three: [PREV, NEXT, KEY].
Raymond Hettingerbc512d32009-03-03 04:45:34 +000031
32 def __init__(self, *args, **kwds):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000033 '''Initialize an ordered dictionary. Signature is the same as for
34 regular dictionaries, but keyword arguments are not recommended
35 because their insertion order is arbitrary.
36
37 '''
Raymond Hettingerbc512d32009-03-03 04:45:34 +000038 if len(args) > 1:
39 raise TypeError('expected at most 1 arguments, got %d' % len(args))
Raymond Hettinger9353ea22009-03-03 20:53:51 +000040 try:
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000041 self.__root
Raymond Hettinger9353ea22009-03-03 20:53:51 +000042 except AttributeError:
Raymond Hettingeraba22932010-03-09 09:58:53 +000043 self.__root = root = [None, None, None] # sentinel node
Raymond Hettingere30bc382010-03-09 11:29:10 +000044 PREV = 0
45 NEXT = 1
Raymond Hettingeraba22932010-03-09 09:58:53 +000046 root[PREV] = root[NEXT] = root
Raymond Hettinger2dc90fd2009-03-25 22:41:32 +000047 self.__map = {}
Raymond Hettingerbc512d32009-03-03 04:45:34 +000048 self.update(*args, **kwds)
49
Raymond Hettingerbc512d32009-03-03 04:45:34 +000050 def __setitem__(self, key, value):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000051 'od.__setitem__(i, y) <==> od[i]=y'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000052 # Setting a new item creates a new link which goes at the end of the linked
53 # list, and the inherited dictionary is updated with the new key/value pair.
Raymond Hettingerbc512d32009-03-03 04:45:34 +000054 if key not in self:
Raymond Hettingere30bc382010-03-09 11:29:10 +000055 PREV = 0
56 NEXT = 1
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000057 root = self.__root
Raymond Hettingeraba22932010-03-09 09:58:53 +000058 last = root[PREV]
59 last[NEXT] = root[PREV] = self.__map[key] = [last, root, key]
Raymond Hettingerbc512d32009-03-03 04:45:34 +000060 dict.__setitem__(self, key, value)
61
62 def __delitem__(self, key):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000063 'od.__delitem__(y) <==> del od[y]'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000064 # Deleting an existing item uses self.__map to find the link which is
65 # then removed by updating the links in the predecessor and successor nodes.
Raymond Hettingerbc512d32009-03-03 04:45:34 +000066 dict.__delitem__(self, key)
Raymond Hettingere30bc382010-03-09 11:29:10 +000067 PREV = 0
68 NEXT = 1
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000069 link = self.__map.pop(key)
Raymond Hettinger39282762010-04-03 00:39:26 +000070 link_prev = link[PREV]
71 link_next = link[NEXT]
72 link_prev[NEXT] = link_next
73 link_next[PREV] = link_prev
Raymond Hettingerbc512d32009-03-03 04:45:34 +000074
75 def __iter__(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000076 'od.__iter__() <==> iter(od)'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000077 # Traverse the linked list in order.
Raymond Hettingere30bc382010-03-09 11:29:10 +000078 NEXT = 1
79 KEY = 2
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000080 root = self.__root
Raymond Hettingeraba22932010-03-09 09:58:53 +000081 curr = root[NEXT]
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000082 while curr is not root:
Raymond Hettingeraba22932010-03-09 09:58:53 +000083 yield curr[KEY]
84 curr = curr[NEXT]
Raymond Hettingerbc512d32009-03-03 04:45:34 +000085
86 def __reversed__(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000087 'od.__reversed__() <==> reversed(od)'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000088 # Traverse the linked list in reverse order.
Raymond Hettingere30bc382010-03-09 11:29:10 +000089 PREV = 0
90 KEY = 2
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000091 root = self.__root
Raymond Hettingeraba22932010-03-09 09:58:53 +000092 curr = root[PREV]
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000093 while curr is not root:
Raymond Hettingeraba22932010-03-09 09:58:53 +000094 yield curr[KEY]
95 curr = curr[PREV]
Raymond Hettingerbc512d32009-03-03 04:45:34 +000096
97 def __reduce__(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000098 'Return state information for pickling'
Alexandre Vassalotticb73bda2009-06-12 23:03:35 +000099 items = [[k, self[k]] for k in self]
Raymond Hettingerf1e2df92009-03-23 00:08:09 +0000100 tmp = self.__map, self.__root
101 del self.__map, self.__root
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000102 inst_dict = vars(self).copy()
Raymond Hettingerf1e2df92009-03-23 00:08:09 +0000103 self.__map, self.__root = tmp
Alexandre Vassalotticb73bda2009-06-12 23:03:35 +0000104 if inst_dict:
105 return (self.__class__, (items,), inst_dict)
106 return self.__class__, (items,)
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000107
Raymond Hettinger39282762010-04-03 00:39:26 +0000108 def clear(self):
109 'od.clear() -> None. Remove all items from od.'
Raymond Hettinger6b96ecb2010-04-03 03:14:28 +0000110 try:
111 for node in self.__map.itervalues():
112 del node[:]
113 self.__root[:] = [self.__root, self.__root, None]
114 self.__map.clear()
115 except AttributeError:
116 pass
117 dict.clear(self)
Raymond Hettinger39282762010-04-03 00:39:26 +0000118
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000119 setdefault = MutableMapping.setdefault
120 update = MutableMapping.update
121 pop = MutableMapping.pop
Raymond Hettingera61ae692009-03-18 22:13:20 +0000122 keys = MutableMapping.keys
123 values = MutableMapping.values
124 items = MutableMapping.items
125 iterkeys = MutableMapping.iterkeys
126 itervalues = MutableMapping.itervalues
127 iteritems = MutableMapping.iteritems
128 __ne__ = MutableMapping.__ne__
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000129
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000130 def popitem(self, last=True):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000131 '''od.popitem() -> (k, v), return and remove a (key, value) pair.
132 Pairs are returned in LIFO order if last is true or FIFO order if false.
133
134 '''
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000135 if not self:
136 raise KeyError('dictionary is empty')
Raymond Hettinger1355a3d2009-04-08 08:26:55 +0000137 key = next(reversed(self) if last else iter(self))
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000138 value = self.pop(key)
139 return key, value
140
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000141 def __repr__(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000142 'od.__repr__() <==> repr(od)'
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000143 if not self:
144 return '%s()' % (self.__class__.__name__,)
Raymond Hettinger0b155412009-03-03 21:13:51 +0000145 return '%s(%r)' % (self.__class__.__name__, self.items())
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000146
147 def copy(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000148 'od.copy() -> a shallow copy of od'
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000149 return self.__class__(self)
150
151 @classmethod
152 def fromkeys(cls, iterable, value=None):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000153 '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
154 and values equal to v (which defaults to None).
155
156 '''
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000157 d = cls()
158 for key in iterable:
159 d[key] = value
160 return d
161
162 def __eq__(self, other):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000163 '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
164 while comparison to a regular mapping is order-insensitive.
165
166 '''
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000167 if isinstance(other, OrderedDict):
Raymond Hettingere5b78562009-03-23 18:26:59 +0000168 return len(self)==len(other) and \
169 all(_imap(_eq, self.iteritems(), other.iteritems()))
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000170 return dict.__eq__(self, other)
171
Raymond Hettinger39282762010-04-03 00:39:26 +0000172 def __del__(self):
173 self.clear() # eliminate cyclical references
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000174
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000175
Raymond Hettingerae3f0682009-01-20 03:36:36 +0000176################################################################################
177### namedtuple
178################################################################################
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000179
Raymond Hettinger322daea2009-02-10 01:24:05 +0000180def namedtuple(typename, field_names, verbose=False, rename=False):
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000181 """Returns a new subclass of tuple with named fields.
182
Raymond Hettinger01a09572007-10-23 20:37:41 +0000183 >>> Point = namedtuple('Point', 'x y')
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000184 >>> Point.__doc__ # docstring for the new class
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000185 'Point(x, y)'
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000186 >>> p = Point(11, y=22) # instantiate with positional args or keywords
Raymond Hettinger8777bca2007-12-18 22:21:27 +0000187 >>> p[0] + p[1] # indexable like a plain tuple
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000188 33
Raymond Hettinger88880b22007-12-18 00:13:45 +0000189 >>> x, y = p # unpack like a regular tuple
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000190 >>> x, y
191 (11, 22)
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000192 >>> p.x + p.y # fields also accessable by name
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000193 33
Raymond Hettinger42da8742007-12-14 02:49:47 +0000194 >>> d = p._asdict() # convert to a dictionary
Raymond Hettingera7fc4b12007-10-05 02:47:07 +0000195 >>> d['x']
196 11
197 >>> Point(**d) # convert from a dictionary
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000198 Point(x=11, y=22)
Raymond Hettinger42da8742007-12-14 02:49:47 +0000199 >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000200 Point(x=100, y=22)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000201
202 """
203
Raymond Hettinger58167142008-01-08 02:02:05 +0000204 # Parse and validate the field names. Validation serves two purposes,
205 # generating informative error messages and preventing template injection attacks.
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000206 if isinstance(field_names, basestring):
Raymond Hettinger0e1d6062007-10-08 10:11:51 +0000207 field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas
Raymond Hettinger6ee7bc02008-09-25 23:31:52 +0000208 field_names = tuple(map(str, field_names))
Raymond Hettinger322daea2009-02-10 01:24:05 +0000209 if rename:
210 names = list(field_names)
211 seen = set()
212 for i, name in enumerate(names):
213 if (not all(c.isalnum() or c=='_' for c in name) or _iskeyword(name)
214 or not name or name[0].isdigit() or name.startswith('_')
215 or name in seen):
Raymond Hettinger756ab672009-04-02 22:25:40 +0000216 names[i] = '_%d' % i
Raymond Hettinger322daea2009-02-10 01:24:05 +0000217 seen.add(name)
218 field_names = tuple(names)
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000219 for name in (typename,) + field_names:
Raymond Hettinger2e1af252007-12-05 18:11:08 +0000220 if not all(c.isalnum() or c=='_' for c in name):
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000221 raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
Raymond Hettingerabfd8df2007-10-16 21:28:32 +0000222 if _iskeyword(name):
223 raise ValueError('Type names and field names cannot be a keyword: %r' % name)
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000224 if name[0].isdigit():
225 raise ValueError('Type names and field names cannot start with a number: %r' % name)
Raymond Hettinger163f6222007-10-09 01:36:23 +0000226 seen_names = set()
227 for name in field_names:
Raymond Hettinger322daea2009-02-10 01:24:05 +0000228 if name.startswith('_') and not rename:
Raymond Hettinger42da8742007-12-14 02:49:47 +0000229 raise ValueError('Field names cannot start with an underscore: %r' % name)
Raymond Hettinger163f6222007-10-09 01:36:23 +0000230 if name in seen_names:
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000231 raise ValueError('Encountered duplicate field name: %r' % name)
Raymond Hettinger163f6222007-10-09 01:36:23 +0000232 seen_names.add(name)
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000233
234 # Create and fill-in the class template
Raymond Hettinger02740f72008-01-05 01:35:43 +0000235 numfields = len(field_names)
Raymond Hettinger2b03d452007-09-18 03:33:19 +0000236 argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000237 reprtxt = ', '.join('%s=%%r' % name for name in field_names)
238 template = '''class %(typename)s(tuple):
Raymond Hettinger48eca672007-12-14 18:08:20 +0000239 '%(typename)s(%(argtxt)s)' \n
240 __slots__ = () \n
Raymond Hettingere0734e72008-01-04 03:22:53 +0000241 _fields = %(field_names)r \n
Raymond Hettingera68cad12009-05-27 02:24:45 +0000242 def __new__(_cls, %(argtxt)s):
Raymond Hettinger9bd35082010-03-09 09:01:46 +0000243 'Create new instance of %(typename)s(%(argtxt)s)'
Raymond Hettingera68cad12009-05-27 02:24:45 +0000244 return _tuple.__new__(_cls, (%(argtxt)s)) \n
Raymond Hettinger02740f72008-01-05 01:35:43 +0000245 @classmethod
Raymond Hettinger844f71b2008-01-06 22:11:54 +0000246 def _make(cls, iterable, new=tuple.__new__, len=len):
Raymond Hettinger02740f72008-01-05 01:35:43 +0000247 'Make a new %(typename)s object from a sequence or iterable'
Raymond Hettinger844f71b2008-01-06 22:11:54 +0000248 result = new(cls, iterable)
Raymond Hettinger02740f72008-01-05 01:35:43 +0000249 if len(result) != %(numfields)d:
250 raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
251 return result \n
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000252 def __repr__(self):
Raymond Hettinger9bd35082010-03-09 09:01:46 +0000253 'Return a nicely formatted representation string'
Raymond Hettinger48eca672007-12-14 18:08:20 +0000254 return '%(typename)s(%(reprtxt)s)' %% self \n
Raymond Hettinger88a91642009-03-03 04:51:24 +0000255 def _asdict(self):
256 'Return a new OrderedDict which maps field names to their values'
257 return OrderedDict(zip(self._fields, self)) \n
Raymond Hettingera68cad12009-05-27 02:24:45 +0000258 def _replace(_self, **kwds):
Raymond Hettingereeeb9c42007-11-15 02:44:53 +0000259 'Return a new %(typename)s object replacing specified fields with new values'
Raymond Hettingera68cad12009-05-27 02:24:45 +0000260 result = _self._make(map(kwds.pop, %(field_names)r, _self))
Raymond Hettinger1b50fd72008-01-05 02:17:24 +0000261 if kwds:
262 raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
Raymond Hettingere98839a2008-06-09 01:28:30 +0000263 return result \n
264 def __getnewargs__(self):
Raymond Hettinger9bd35082010-03-09 09:01:46 +0000265 'Return self as a plain tuple. Used by copy and pickle.'
Raymond Hettingere98839a2008-06-09 01:28:30 +0000266 return tuple(self) \n\n''' % locals()
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000267 for i, name in enumerate(field_names):
Raymond Hettinger9bd35082010-03-09 09:01:46 +0000268 template += " %s = _property(_itemgetter(%d), doc='Alias for field number %d')\n" % (name, i, i)
Raymond Hettinger2b03d452007-09-18 03:33:19 +0000269 if verbose:
270 print template
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000271
Raymond Hettinger3c2523c2008-05-30 07:16:53 +0000272 # Execute the template string in a temporary namespace and
273 # support tracing utilities by setting a value for frame.f_globals['__name__']
Raymond Hettingera68cad12009-05-27 02:24:45 +0000274 namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
275 OrderedDict=OrderedDict, _property=property, _tuple=tuple)
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000276 try:
Raymond Hettinger0e1d6062007-10-08 10:11:51 +0000277 exec template in namespace
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000278 except SyntaxError, e:
279 raise SyntaxError(e.message + ':\n' + template)
Raymond Hettinger0e1d6062007-10-08 10:11:51 +0000280 result = namespace[typename]
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000281
282 # For pickling to work, the __module__ variable needs to be set to the frame
283 # where the named tuple is created. Bypass this step in enviroments where
Benjamin Peterson7c67b032009-05-05 00:55:24 +0000284 # sys._getframe is not defined (Jython for example) or sys._getframe is not
285 # defined for arguments greater than 0 (IronPython).
286 try:
Raymond Hettingerecf252a2009-01-27 10:03:04 +0000287 result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
Benjamin Peterson7c67b032009-05-05 00:55:24 +0000288 except (AttributeError, ValueError):
289 pass
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000290
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000291 return result
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000292
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000293
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000294########################################################################
Raymond Hettingerae3f0682009-01-20 03:36:36 +0000295### Counter
296########################################################################
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000297
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000298class Counter(dict):
299 '''Dict subclass for counting hashable items. Sometimes called a bag
300 or multiset. Elements are stored as dictionary keys and their counts
301 are stored as dictionary values.
302
303 >>> c = Counter('abracadabra') # count elements from a string
304
305 >>> c.most_common(3) # three most common elements
306 [('a', 5), ('r', 2), ('b', 2)]
307 >>> sorted(c) # list all unique elements
308 ['a', 'b', 'c', 'd', 'r']
309 >>> ''.join(sorted(c.elements())) # list elements with repetitions
310 'aaaaabbcdrr'
311 >>> sum(c.values()) # total of all counts
312 11
313
314 >>> c['a'] # count of letter 'a'
315 5
316 >>> for elem in 'shazam': # update counts from an iterable
317 ... c[elem] += 1 # by adding 1 to each element's count
318 >>> c['a'] # now there are seven 'a'
319 7
320 >>> del c['r'] # remove all 'r'
321 >>> c['r'] # now there are zero 'r'
322 0
323
324 >>> d = Counter('simsalabim') # make another counter
325 >>> c.update(d) # add in the second counter
326 >>> c['a'] # now there are nine 'a'
327 9
328
329 >>> c.clear() # empty the counter
330 >>> c
331 Counter()
332
333 Note: If a count is set to zero or reduced to zero, it will remain
334 in the counter until the entry is deleted or the counter is cleared:
335
336 >>> c = Counter('aaabbc')
337 >>> c['b'] -= 2 # reduce the count of 'b' by two
338 >>> c.most_common() # 'b' is still in, but its count is zero
339 [('a', 3), ('c', 1), ('b', 0)]
340
341 '''
342 # References:
343 # http://en.wikipedia.org/wiki/Multiset
344 # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
345 # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
346 # http://code.activestate.com/recipes/259174/
347 # Knuth, TAOCP Vol. II section 4.6.3
348
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000349 def __init__(self, iterable=None, **kwds):
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000350 '''Create a new, empty Counter object. And if given, count elements
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000351 from an input iterable. Or, initialize the count from another mapping
352 of elements to their counts.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000353
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000354 >>> c = Counter() # a new, empty counter
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000355 >>> c = Counter('gallahad') # a new counter from an iterable
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000356 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000357 >>> c = Counter(a=4, b=2) # a new counter from keyword args
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000358
359 '''
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000360 self.update(iterable, **kwds)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000361
362 def __missing__(self, key):
363 'The count of elements not in the Counter is zero.'
364 # Needed so that self[missing_item] does not raise KeyError
365 return 0
366
367 def most_common(self, n=None):
368 '''List the n most common elements and their counts from the most
369 common to the least. If n is None, then list all element counts.
370
371 >>> Counter('abracadabra').most_common(3)
372 [('a', 5), ('r', 2), ('b', 2)]
373
374 '''
375 # Emulate Bag.sortedByCount from Smalltalk
376 if n is None:
377 return sorted(self.iteritems(), key=_itemgetter(1), reverse=True)
378 return _heapq.nlargest(n, self.iteritems(), key=_itemgetter(1))
379
380 def elements(self):
381 '''Iterator over elements repeating each as many times as its count.
382
383 >>> c = Counter('ABCABC')
384 >>> sorted(c.elements())
385 ['A', 'A', 'B', 'B', 'C', 'C']
386
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000387 # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
388 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
389 >>> product = 1
390 >>> for factor in prime_factors.elements(): # loop over factors
391 ... product *= factor # and multiply them
392 >>> product
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000393 1836
394
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000395 Note, if an element's count has been set to zero or is a negative
396 number, elements() will ignore it.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000397
398 '''
399 # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
Raymond Hettinger35288c62009-01-13 04:50:35 +0000400 return _chain.from_iterable(_starmap(_repeat, self.iteritems()))
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000401
402 # Override dict methods where necessary
403
404 @classmethod
405 def fromkeys(cls, iterable, v=None):
406 # There is no equivalent method for counters because setting v=1
407 # means that no element can have a count greater than one.
408 raise NotImplementedError(
409 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
410
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000411 def update(self, iterable=None, **kwds):
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000412 '''Like dict.update() but add counts instead of replacing them.
413
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000414 Source can be an iterable, a dictionary, or another Counter instance.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000415
416 >>> c = Counter('which')
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000417 >>> c.update('witch') # add elements from another iterable
418 >>> d = Counter('watch')
419 >>> c.update(d) # add elements from another counter
420 >>> c['h'] # four 'h' in which, witch, and watch
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000421 4
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000422
423 '''
424 # The regular dict.update() operation makes no sense here because the
425 # replace behavior results in the some of original untouched counts
426 # being mixed-in with all of the other counts for a mismash that
427 # doesn't have a straight-forward interpretation in most counting
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000428 # contexts. Instead, we implement straight-addition. Both the inputs
429 # and outputs are allowed to contain zero and negative counts.
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000430
431 if iterable is not None:
432 if isinstance(iterable, Mapping):
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000433 if self:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000434 self_get = self.get
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000435 for elem, count in iterable.iteritems():
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000436 self[elem] = self_get(elem, 0) + count
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000437 else:
438 dict.update(self, iterable) # fast path when counter is empty
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000439 else:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000440 self_get = self.get
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000441 for elem in iterable:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000442 self[elem] = self_get(elem, 0) + 1
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000443 if kwds:
444 self.update(kwds)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000445
446 def copy(self):
447 'Like dict.copy() but returns a Counter instance instead of a dict.'
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000448 return Counter(self)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000449
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000450 def __delitem__(self, elem):
451 'Like dict.__delitem__() but does not raise KeyError for missing values.'
452 if elem in self:
453 dict.__delitem__(self, elem)
454
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000455 def __repr__(self):
456 if not self:
457 return '%s()' % self.__class__.__name__
Raymond Hettinger35288c62009-01-13 04:50:35 +0000458 items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000459 return '%s({%s})' % (self.__class__.__name__, items)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000460
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000461 # Multiset-style mathematical operations discussed in:
462 # Knuth TAOCP Volume II section 4.6.3 exercise 19
463 # and at http://en.wikipedia.org/wiki/Multiset
464 #
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000465 # Outputs guaranteed to only include positive counts.
466 #
467 # To strip negative and zero counts, add-in an empty counter:
468 # c += Counter()
469
470 def __add__(self, other):
471 '''Add counts from two counters.
472
473 >>> Counter('abbb') + Counter('bcc')
474 Counter({'b': 4, 'c': 2, 'a': 1})
475
476 '''
477 if not isinstance(other, Counter):
478 return NotImplemented
479 result = Counter()
480 for elem in set(self) | set(other):
481 newcount = self[elem] + other[elem]
482 if newcount > 0:
483 result[elem] = newcount
484 return result
485
486 def __sub__(self, other):
487 ''' Subtract count, but keep only results with positive counts.
488
489 >>> Counter('abbbc') - Counter('bccd')
490 Counter({'b': 2, 'a': 1})
491
492 '''
493 if not isinstance(other, Counter):
494 return NotImplemented
495 result = Counter()
Raymond Hettinger4571f342009-01-21 20:31:50 +0000496 for elem in set(self) | set(other):
497 newcount = self[elem] - other[elem]
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000498 if newcount > 0:
499 result[elem] = newcount
500 return result
501
502 def __or__(self, other):
503 '''Union is the maximum of value in either of the input counters.
504
505 >>> Counter('abbb') | Counter('bcc')
506 Counter({'b': 3, 'c': 2, 'a': 1})
507
508 '''
509 if not isinstance(other, Counter):
510 return NotImplemented
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000511 result = Counter()
512 for elem in set(self) | set(other):
Raymond Hettingere3bc5572009-04-04 08:46:58 +0000513 p, q = self[elem], other[elem]
514 newcount = q if p < q else p
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000515 if newcount > 0:
516 result[elem] = newcount
517 return result
518
519 def __and__(self, other):
520 ''' Intersection is the minimum of corresponding counts.
521
522 >>> Counter('abbb') & Counter('bcc')
523 Counter({'b': 1})
524
525 '''
526 if not isinstance(other, Counter):
527 return NotImplemented
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000528 result = Counter()
529 if len(self) < len(other):
530 self, other = other, self
531 for elem in _ifilter(self.__contains__, other):
Raymond Hettingere3bc5572009-04-04 08:46:58 +0000532 p, q = self[elem], other[elem]
533 newcount = p if p < q else q
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000534 if newcount > 0:
535 result[elem] = newcount
536 return result
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000537
538
539if __name__ == '__main__':
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000540 # verify that instances can be pickled
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000541 from cPickle import loads, dumps
Raymond Hettinger01a09572007-10-23 20:37:41 +0000542 Point = namedtuple('Point', 'x, y', True)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000543 p = Point(x=10, y=20)
544 assert p == loads(dumps(p))
545
Raymond Hettingereeeb9c42007-11-15 02:44:53 +0000546 # test and demonstrate ability to override methods
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000547 class Point(namedtuple('Point', 'x y')):
Raymond Hettingere1655082008-01-10 19:15:10 +0000548 __slots__ = ()
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000549 @property
550 def hypot(self):
551 return (self.x ** 2 + self.y ** 2) ** 0.5
Raymond Hettinger9a359212008-01-07 20:07:38 +0000552 def __str__(self):
Raymond Hettinger15b5e552008-01-10 23:00:01 +0000553 return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000554
Raymond Hettingere1655082008-01-10 19:15:10 +0000555 for p in Point(3, 4), Point(14, 5/7.):
Raymond Hettinger9a359212008-01-07 20:07:38 +0000556 print p
Raymond Hettingereeeb9c42007-11-15 02:44:53 +0000557
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000558 class Point(namedtuple('Point', 'x y')):
559 'Point class with optimized _make() and _replace() without error-checking'
Raymond Hettingere1655082008-01-10 19:15:10 +0000560 __slots__ = ()
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000561 _make = classmethod(tuple.__new__)
562 def _replace(self, _map=map, **kwds):
Raymond Hettingerf5e8af12008-01-07 20:56:05 +0000563 return self._make(_map(kwds.get, ('x', 'y'), self))
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000564
565 print Point(11, 22)._replace(x=100)
566
Raymond Hettingere850c462008-01-10 20:37:12 +0000567 Point3D = namedtuple('Point3D', Point._fields + ('z',))
568 print Point3D.__doc__
569
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000570 import doctest
Raymond Hettinger01a09572007-10-23 20:37:41 +0000571 TestResults = namedtuple('TestResults', 'failed attempted')
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000572 print TestResults(*doctest.testmod())