blob: 9b00fb8694f5e8ac18f4507250f4510fac11e346 [file] [log] [blame]
Raymond Hettingerbc512d32009-03-03 04:45:34 +00001__all__ = ['Counter', 'deque', 'defaultdict', 'namedtuple', 'OrderedDict']
Raymond Hettinger88880b22007-12-18 00:13:45 +00002# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
3# They should however be considered an integral part of collections.py.
4from _abcoll import *
5import _abcoll
6__all__ += _abcoll.__all__
Raymond Hettingereb979882007-02-28 18:37:52 +00007
8from _collections import deque, defaultdict
Raymond Hettingerbc512d32009-03-03 04:45:34 +00009from operator import itemgetter as _itemgetter, eq as _eq
Raymond Hettingerabfd8df2007-10-16 21:28:32 +000010from keyword import iskeyword as _iskeyword
Raymond Hettingerc37e5e02007-03-01 06:16:43 +000011import sys as _sys
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +000012import heapq as _heapq
Raymond Hettingerbc512d32009-03-03 04:45:34 +000013from itertools import repeat as _repeat, chain as _chain, starmap as _starmap, \
Georg Brandl7d4b7592010-02-06 22:49:47 +000014 ifilter as _ifilter, imap as _imap
Raymond Hettingerbc512d32009-03-03 04:45:34 +000015
16################################################################################
17### OrderedDict
18################################################################################
19
20class OrderedDict(dict, MutableMapping):
Raymond Hettingere980d2d2009-03-19 23:12:41 +000021 'Dictionary that remembers insertion order'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000022 # An inherited dict maps keys to values.
Raymond Hettingere980d2d2009-03-19 23:12:41 +000023 # The inherited dict provides __getitem__, __len__, __contains__, and get.
24 # The remaining methods are order-aware.
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000025 # Big-O running times for all methods are the same as for regular dictionaries.
26
27 # The internal self.__map dictionary maps keys to links in a doubly linked list.
28 # The circular doubly linked list starts and ends with a sentinel element.
29 # The sentinel element never gets deleted (this simplifies the algorithm).
Raymond Hettingeraba22932010-03-09 09:58:53 +000030 # Each link is stored as a list of length three: [PREV, NEXT, KEY].
Raymond Hettingerbc512d32009-03-03 04:45:34 +000031
32 def __init__(self, *args, **kwds):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000033 '''Initialize an ordered dictionary. Signature is the same as for
34 regular dictionaries, but keyword arguments are not recommended
35 because their insertion order is arbitrary.
36
37 '''
Raymond Hettingerbc512d32009-03-03 04:45:34 +000038 if len(args) > 1:
39 raise TypeError('expected at most 1 arguments, got %d' % len(args))
Raymond Hettinger9353ea22009-03-03 20:53:51 +000040 try:
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000041 self.__root
Raymond Hettinger9353ea22009-03-03 20:53:51 +000042 except AttributeError:
Raymond Hettingeraba22932010-03-09 09:58:53 +000043 self.__root = root = [None, None, None] # sentinel node
44 PREV, NEXT = 0, 1
45 root[PREV] = root[NEXT] = root
Raymond Hettinger2dc90fd2009-03-25 22:41:32 +000046 self.__map = {}
Raymond Hettingerbc512d32009-03-03 04:45:34 +000047 self.update(*args, **kwds)
48
Raymond Hettingerbc512d32009-03-03 04:45:34 +000049 def __setitem__(self, key, value):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000050 'od.__setitem__(i, y) <==> od[i]=y'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000051 # Setting a new item creates a new link which goes at the end of the linked
52 # list, and the inherited dictionary is updated with the new key/value pair.
Raymond Hettingerbc512d32009-03-03 04:45:34 +000053 if key not in self:
Raymond Hettingeraba22932010-03-09 09:58:53 +000054 PREV, NEXT = 0, 1
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000055 root = self.__root
Raymond Hettingeraba22932010-03-09 09:58:53 +000056 last = root[PREV]
57 last[NEXT] = root[PREV] = self.__map[key] = [last, root, key]
Raymond Hettingerbc512d32009-03-03 04:45:34 +000058 dict.__setitem__(self, key, value)
59
60 def __delitem__(self, key):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000061 'od.__delitem__(y) <==> del od[y]'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000062 # Deleting an existing item uses self.__map to find the link which is
63 # then removed by updating the links in the predecessor and successor nodes.
Raymond Hettingerbc512d32009-03-03 04:45:34 +000064 dict.__delitem__(self, key)
Raymond Hettingeraba22932010-03-09 09:58:53 +000065 PREV, NEXT = 0, 1
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000066 link = self.__map.pop(key)
Raymond Hettingeraba22932010-03-09 09:58:53 +000067 link[PREV][NEXT] = link[NEXT]
68 link[NEXT][PREV] = link[PREV]
Raymond Hettingerbc512d32009-03-03 04:45:34 +000069
70 def __iter__(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000071 'od.__iter__() <==> iter(od)'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000072 # Traverse the linked list in order.
Raymond Hettingeraba22932010-03-09 09:58:53 +000073 NEXT, KEY = 1, 2
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000074 root = self.__root
Raymond Hettingeraba22932010-03-09 09:58:53 +000075 curr = root[NEXT]
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000076 while curr is not root:
Raymond Hettingeraba22932010-03-09 09:58:53 +000077 yield curr[KEY]
78 curr = curr[NEXT]
Raymond Hettingerbc512d32009-03-03 04:45:34 +000079
80 def __reversed__(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000081 'od.__reversed__() <==> reversed(od)'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000082 # Traverse the linked list in reverse order.
Raymond Hettingeraba22932010-03-09 09:58:53 +000083 PREV, KEY = 0, 2
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000084 root = self.__root
Raymond Hettingeraba22932010-03-09 09:58:53 +000085 curr = root[PREV]
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000086 while curr is not root:
Raymond Hettingeraba22932010-03-09 09:58:53 +000087 yield curr[KEY]
88 curr = curr[PREV]
Raymond Hettingerbc512d32009-03-03 04:45:34 +000089
90 def __reduce__(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000091 'Return state information for pickling'
Alexandre Vassalotticb73bda2009-06-12 23:03:35 +000092 items = [[k, self[k]] for k in self]
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000093 tmp = self.__map, self.__root
94 del self.__map, self.__root
Raymond Hettingerbc512d32009-03-03 04:45:34 +000095 inst_dict = vars(self).copy()
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000096 self.__map, self.__root = tmp
Alexandre Vassalotticb73bda2009-06-12 23:03:35 +000097 if inst_dict:
98 return (self.__class__, (items,), inst_dict)
99 return self.__class__, (items,)
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000100
Raymond Hettingeraba22932010-03-09 09:58:53 +0000101 clear = MutableMapping.clear
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000102 setdefault = MutableMapping.setdefault
103 update = MutableMapping.update
104 pop = MutableMapping.pop
Raymond Hettingera61ae692009-03-18 22:13:20 +0000105 keys = MutableMapping.keys
106 values = MutableMapping.values
107 items = MutableMapping.items
108 iterkeys = MutableMapping.iterkeys
109 itervalues = MutableMapping.itervalues
110 iteritems = MutableMapping.iteritems
111 __ne__ = MutableMapping.__ne__
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000112
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000113 def popitem(self, last=True):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000114 '''od.popitem() -> (k, v), return and remove a (key, value) pair.
115 Pairs are returned in LIFO order if last is true or FIFO order if false.
116
117 '''
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000118 if not self:
119 raise KeyError('dictionary is empty')
Raymond Hettinger1355a3d2009-04-08 08:26:55 +0000120 key = next(reversed(self) if last else iter(self))
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000121 value = self.pop(key)
122 return key, value
123
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000124 def __repr__(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000125 'od.__repr__() <==> repr(od)'
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000126 if not self:
127 return '%s()' % (self.__class__.__name__,)
Raymond Hettinger0b155412009-03-03 21:13:51 +0000128 return '%s(%r)' % (self.__class__.__name__, self.items())
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000129
130 def copy(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000131 'od.copy() -> a shallow copy of od'
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000132 return self.__class__(self)
133
134 @classmethod
135 def fromkeys(cls, iterable, value=None):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000136 '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
137 and values equal to v (which defaults to None).
138
139 '''
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000140 d = cls()
141 for key in iterable:
142 d[key] = value
143 return d
144
145 def __eq__(self, other):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000146 '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
147 while comparison to a regular mapping is order-insensitive.
148
149 '''
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000150 if isinstance(other, OrderedDict):
Raymond Hettingere5b78562009-03-23 18:26:59 +0000151 return len(self)==len(other) and \
152 all(_imap(_eq, self.iteritems(), other.iteritems()))
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000153 return dict.__eq__(self, other)
154
155
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000156
Raymond Hettingerae3f0682009-01-20 03:36:36 +0000157################################################################################
158### namedtuple
159################################################################################
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000160
Raymond Hettinger322daea2009-02-10 01:24:05 +0000161def namedtuple(typename, field_names, verbose=False, rename=False):
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000162 """Returns a new subclass of tuple with named fields.
163
Raymond Hettinger01a09572007-10-23 20:37:41 +0000164 >>> Point = namedtuple('Point', 'x y')
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000165 >>> Point.__doc__ # docstring for the new class
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000166 'Point(x, y)'
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000167 >>> p = Point(11, y=22) # instantiate with positional args or keywords
Raymond Hettinger8777bca2007-12-18 22:21:27 +0000168 >>> p[0] + p[1] # indexable like a plain tuple
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000169 33
Raymond Hettinger88880b22007-12-18 00:13:45 +0000170 >>> x, y = p # unpack like a regular tuple
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000171 >>> x, y
172 (11, 22)
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000173 >>> p.x + p.y # fields also accessable by name
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000174 33
Raymond Hettinger42da8742007-12-14 02:49:47 +0000175 >>> d = p._asdict() # convert to a dictionary
Raymond Hettingera7fc4b12007-10-05 02:47:07 +0000176 >>> d['x']
177 11
178 >>> Point(**d) # convert from a dictionary
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000179 Point(x=11, y=22)
Raymond Hettinger42da8742007-12-14 02:49:47 +0000180 >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000181 Point(x=100, y=22)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000182
183 """
184
Raymond Hettinger58167142008-01-08 02:02:05 +0000185 # Parse and validate the field names. Validation serves two purposes,
186 # generating informative error messages and preventing template injection attacks.
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000187 if isinstance(field_names, basestring):
Raymond Hettinger0e1d6062007-10-08 10:11:51 +0000188 field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas
Raymond Hettinger6ee7bc02008-09-25 23:31:52 +0000189 field_names = tuple(map(str, field_names))
Raymond Hettinger322daea2009-02-10 01:24:05 +0000190 if rename:
191 names = list(field_names)
192 seen = set()
193 for i, name in enumerate(names):
194 if (not all(c.isalnum() or c=='_' for c in name) or _iskeyword(name)
195 or not name or name[0].isdigit() or name.startswith('_')
196 or name in seen):
Raymond Hettinger756ab672009-04-02 22:25:40 +0000197 names[i] = '_%d' % i
Raymond Hettinger322daea2009-02-10 01:24:05 +0000198 seen.add(name)
199 field_names = tuple(names)
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000200 for name in (typename,) + field_names:
Raymond Hettinger2e1af252007-12-05 18:11:08 +0000201 if not all(c.isalnum() or c=='_' for c in name):
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000202 raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
Raymond Hettingerabfd8df2007-10-16 21:28:32 +0000203 if _iskeyword(name):
204 raise ValueError('Type names and field names cannot be a keyword: %r' % name)
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000205 if name[0].isdigit():
206 raise ValueError('Type names and field names cannot start with a number: %r' % name)
Raymond Hettinger163f6222007-10-09 01:36:23 +0000207 seen_names = set()
208 for name in field_names:
Raymond Hettinger322daea2009-02-10 01:24:05 +0000209 if name.startswith('_') and not rename:
Raymond Hettinger42da8742007-12-14 02:49:47 +0000210 raise ValueError('Field names cannot start with an underscore: %r' % name)
Raymond Hettinger163f6222007-10-09 01:36:23 +0000211 if name in seen_names:
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000212 raise ValueError('Encountered duplicate field name: %r' % name)
Raymond Hettinger163f6222007-10-09 01:36:23 +0000213 seen_names.add(name)
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000214
215 # Create and fill-in the class template
Raymond Hettinger02740f72008-01-05 01:35:43 +0000216 numfields = len(field_names)
Raymond Hettinger2b03d452007-09-18 03:33:19 +0000217 argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000218 reprtxt = ', '.join('%s=%%r' % name for name in field_names)
219 template = '''class %(typename)s(tuple):
Raymond Hettinger48eca672007-12-14 18:08:20 +0000220 '%(typename)s(%(argtxt)s)' \n
221 __slots__ = () \n
Raymond Hettingere0734e72008-01-04 03:22:53 +0000222 _fields = %(field_names)r \n
Raymond Hettingera68cad12009-05-27 02:24:45 +0000223 def __new__(_cls, %(argtxt)s):
Raymond Hettinger9bd35082010-03-09 09:01:46 +0000224 'Create new instance of %(typename)s(%(argtxt)s)'
Raymond Hettingera68cad12009-05-27 02:24:45 +0000225 return _tuple.__new__(_cls, (%(argtxt)s)) \n
Raymond Hettinger02740f72008-01-05 01:35:43 +0000226 @classmethod
Raymond Hettinger844f71b2008-01-06 22:11:54 +0000227 def _make(cls, iterable, new=tuple.__new__, len=len):
Raymond Hettinger02740f72008-01-05 01:35:43 +0000228 'Make a new %(typename)s object from a sequence or iterable'
Raymond Hettinger844f71b2008-01-06 22:11:54 +0000229 result = new(cls, iterable)
Raymond Hettinger02740f72008-01-05 01:35:43 +0000230 if len(result) != %(numfields)d:
231 raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
232 return result \n
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000233 def __repr__(self):
Raymond Hettinger9bd35082010-03-09 09:01:46 +0000234 'Return a nicely formatted representation string'
Raymond Hettinger48eca672007-12-14 18:08:20 +0000235 return '%(typename)s(%(reprtxt)s)' %% self \n
Raymond Hettinger88a91642009-03-03 04:51:24 +0000236 def _asdict(self):
237 'Return a new OrderedDict which maps field names to their values'
238 return OrderedDict(zip(self._fields, self)) \n
Raymond Hettingera68cad12009-05-27 02:24:45 +0000239 def _replace(_self, **kwds):
Raymond Hettingereeeb9c42007-11-15 02:44:53 +0000240 'Return a new %(typename)s object replacing specified fields with new values'
Raymond Hettingera68cad12009-05-27 02:24:45 +0000241 result = _self._make(map(kwds.pop, %(field_names)r, _self))
Raymond Hettinger1b50fd72008-01-05 02:17:24 +0000242 if kwds:
243 raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
Raymond Hettingere98839a2008-06-09 01:28:30 +0000244 return result \n
245 def __getnewargs__(self):
Raymond Hettinger9bd35082010-03-09 09:01:46 +0000246 'Return self as a plain tuple. Used by copy and pickle.'
Raymond Hettingere98839a2008-06-09 01:28:30 +0000247 return tuple(self) \n\n''' % locals()
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000248 for i, name in enumerate(field_names):
Raymond Hettinger9bd35082010-03-09 09:01:46 +0000249 template += " %s = _property(_itemgetter(%d), doc='Alias for field number %d')\n" % (name, i, i)
Raymond Hettinger2b03d452007-09-18 03:33:19 +0000250 if verbose:
251 print template
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000252
Raymond Hettinger3c2523c2008-05-30 07:16:53 +0000253 # Execute the template string in a temporary namespace and
254 # support tracing utilities by setting a value for frame.f_globals['__name__']
Raymond Hettingera68cad12009-05-27 02:24:45 +0000255 namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
256 OrderedDict=OrderedDict, _property=property, _tuple=tuple)
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000257 try:
Raymond Hettinger0e1d6062007-10-08 10:11:51 +0000258 exec template in namespace
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000259 except SyntaxError, e:
260 raise SyntaxError(e.message + ':\n' + template)
Raymond Hettinger0e1d6062007-10-08 10:11:51 +0000261 result = namespace[typename]
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000262
263 # For pickling to work, the __module__ variable needs to be set to the frame
264 # where the named tuple is created. Bypass this step in enviroments where
Benjamin Peterson7c67b032009-05-05 00:55:24 +0000265 # sys._getframe is not defined (Jython for example) or sys._getframe is not
266 # defined for arguments greater than 0 (IronPython).
267 try:
Raymond Hettingerecf252a2009-01-27 10:03:04 +0000268 result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
Benjamin Peterson7c67b032009-05-05 00:55:24 +0000269 except (AttributeError, ValueError):
270 pass
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000271
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000272 return result
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000273
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000274
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000275########################################################################
Raymond Hettingerae3f0682009-01-20 03:36:36 +0000276### Counter
277########################################################################
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000278
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000279class Counter(dict):
280 '''Dict subclass for counting hashable items. Sometimes called a bag
281 or multiset. Elements are stored as dictionary keys and their counts
282 are stored as dictionary values.
283
284 >>> c = Counter('abracadabra') # count elements from a string
285
286 >>> c.most_common(3) # three most common elements
287 [('a', 5), ('r', 2), ('b', 2)]
288 >>> sorted(c) # list all unique elements
289 ['a', 'b', 'c', 'd', 'r']
290 >>> ''.join(sorted(c.elements())) # list elements with repetitions
291 'aaaaabbcdrr'
292 >>> sum(c.values()) # total of all counts
293 11
294
295 >>> c['a'] # count of letter 'a'
296 5
297 >>> for elem in 'shazam': # update counts from an iterable
298 ... c[elem] += 1 # by adding 1 to each element's count
299 >>> c['a'] # now there are seven 'a'
300 7
301 >>> del c['r'] # remove all 'r'
302 >>> c['r'] # now there are zero 'r'
303 0
304
305 >>> d = Counter('simsalabim') # make another counter
306 >>> c.update(d) # add in the second counter
307 >>> c['a'] # now there are nine 'a'
308 9
309
310 >>> c.clear() # empty the counter
311 >>> c
312 Counter()
313
314 Note: If a count is set to zero or reduced to zero, it will remain
315 in the counter until the entry is deleted or the counter is cleared:
316
317 >>> c = Counter('aaabbc')
318 >>> c['b'] -= 2 # reduce the count of 'b' by two
319 >>> c.most_common() # 'b' is still in, but its count is zero
320 [('a', 3), ('c', 1), ('b', 0)]
321
322 '''
323 # References:
324 # http://en.wikipedia.org/wiki/Multiset
325 # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
326 # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
327 # http://code.activestate.com/recipes/259174/
328 # Knuth, TAOCP Vol. II section 4.6.3
329
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000330 def __init__(self, iterable=None, **kwds):
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000331 '''Create a new, empty Counter object. And if given, count elements
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000332 from an input iterable. Or, initialize the count from another mapping
333 of elements to their counts.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000334
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000335 >>> c = Counter() # a new, empty counter
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000336 >>> c = Counter('gallahad') # a new counter from an iterable
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000337 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000338 >>> c = Counter(a=4, b=2) # a new counter from keyword args
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000339
340 '''
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000341 self.update(iterable, **kwds)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000342
343 def __missing__(self, key):
344 'The count of elements not in the Counter is zero.'
345 # Needed so that self[missing_item] does not raise KeyError
346 return 0
347
348 def most_common(self, n=None):
349 '''List the n most common elements and their counts from the most
350 common to the least. If n is None, then list all element counts.
351
352 >>> Counter('abracadabra').most_common(3)
353 [('a', 5), ('r', 2), ('b', 2)]
354
355 '''
356 # Emulate Bag.sortedByCount from Smalltalk
357 if n is None:
358 return sorted(self.iteritems(), key=_itemgetter(1), reverse=True)
359 return _heapq.nlargest(n, self.iteritems(), key=_itemgetter(1))
360
361 def elements(self):
362 '''Iterator over elements repeating each as many times as its count.
363
364 >>> c = Counter('ABCABC')
365 >>> sorted(c.elements())
366 ['A', 'A', 'B', 'B', 'C', 'C']
367
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000368 # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
369 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
370 >>> product = 1
371 >>> for factor in prime_factors.elements(): # loop over factors
372 ... product *= factor # and multiply them
373 >>> product
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000374 1836
375
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000376 Note, if an element's count has been set to zero or is a negative
377 number, elements() will ignore it.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000378
379 '''
380 # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
Raymond Hettinger35288c62009-01-13 04:50:35 +0000381 return _chain.from_iterable(_starmap(_repeat, self.iteritems()))
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000382
383 # Override dict methods where necessary
384
385 @classmethod
386 def fromkeys(cls, iterable, v=None):
387 # There is no equivalent method for counters because setting v=1
388 # means that no element can have a count greater than one.
389 raise NotImplementedError(
390 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
391
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000392 def update(self, iterable=None, **kwds):
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000393 '''Like dict.update() but add counts instead of replacing them.
394
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000395 Source can be an iterable, a dictionary, or another Counter instance.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000396
397 >>> c = Counter('which')
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000398 >>> c.update('witch') # add elements from another iterable
399 >>> d = Counter('watch')
400 >>> c.update(d) # add elements from another counter
401 >>> c['h'] # four 'h' in which, witch, and watch
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000402 4
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000403
404 '''
405 # The regular dict.update() operation makes no sense here because the
406 # replace behavior results in the some of original untouched counts
407 # being mixed-in with all of the other counts for a mismash that
408 # doesn't have a straight-forward interpretation in most counting
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000409 # contexts. Instead, we implement straight-addition. Both the inputs
410 # and outputs are allowed to contain zero and negative counts.
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000411
412 if iterable is not None:
413 if isinstance(iterable, Mapping):
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000414 if self:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000415 self_get = self.get
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000416 for elem, count in iterable.iteritems():
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000417 self[elem] = self_get(elem, 0) + count
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000418 else:
419 dict.update(self, iterable) # fast path when counter is empty
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000420 else:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000421 self_get = self.get
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000422 for elem in iterable:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000423 self[elem] = self_get(elem, 0) + 1
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000424 if kwds:
425 self.update(kwds)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000426
427 def copy(self):
428 'Like dict.copy() but returns a Counter instance instead of a dict.'
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000429 return Counter(self)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000430
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000431 def __delitem__(self, elem):
432 'Like dict.__delitem__() but does not raise KeyError for missing values.'
433 if elem in self:
434 dict.__delitem__(self, elem)
435
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000436 def __repr__(self):
437 if not self:
438 return '%s()' % self.__class__.__name__
Raymond Hettinger35288c62009-01-13 04:50:35 +0000439 items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000440 return '%s({%s})' % (self.__class__.__name__, items)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000441
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000442 # Multiset-style mathematical operations discussed in:
443 # Knuth TAOCP Volume II section 4.6.3 exercise 19
444 # and at http://en.wikipedia.org/wiki/Multiset
445 #
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000446 # Outputs guaranteed to only include positive counts.
447 #
448 # To strip negative and zero counts, add-in an empty counter:
449 # c += Counter()
450
451 def __add__(self, other):
452 '''Add counts from two counters.
453
454 >>> Counter('abbb') + Counter('bcc')
455 Counter({'b': 4, 'c': 2, 'a': 1})
456
457 '''
458 if not isinstance(other, Counter):
459 return NotImplemented
460 result = Counter()
461 for elem in set(self) | set(other):
462 newcount = self[elem] + other[elem]
463 if newcount > 0:
464 result[elem] = newcount
465 return result
466
467 def __sub__(self, other):
468 ''' Subtract count, but keep only results with positive counts.
469
470 >>> Counter('abbbc') - Counter('bccd')
471 Counter({'b': 2, 'a': 1})
472
473 '''
474 if not isinstance(other, Counter):
475 return NotImplemented
476 result = Counter()
Raymond Hettinger4571f342009-01-21 20:31:50 +0000477 for elem in set(self) | set(other):
478 newcount = self[elem] - other[elem]
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000479 if newcount > 0:
480 result[elem] = newcount
481 return result
482
483 def __or__(self, other):
484 '''Union is the maximum of value in either of the input counters.
485
486 >>> Counter('abbb') | Counter('bcc')
487 Counter({'b': 3, 'c': 2, 'a': 1})
488
489 '''
490 if not isinstance(other, Counter):
491 return NotImplemented
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000492 result = Counter()
493 for elem in set(self) | set(other):
Raymond Hettingere3bc5572009-04-04 08:46:58 +0000494 p, q = self[elem], other[elem]
495 newcount = q if p < q else p
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000496 if newcount > 0:
497 result[elem] = newcount
498 return result
499
500 def __and__(self, other):
501 ''' Intersection is the minimum of corresponding counts.
502
503 >>> Counter('abbb') & Counter('bcc')
504 Counter({'b': 1})
505
506 '''
507 if not isinstance(other, Counter):
508 return NotImplemented
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000509 result = Counter()
510 if len(self) < len(other):
511 self, other = other, self
512 for elem in _ifilter(self.__contains__, other):
Raymond Hettingere3bc5572009-04-04 08:46:58 +0000513 p, q = self[elem], other[elem]
514 newcount = p if p < q else q
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000515 if newcount > 0:
516 result[elem] = newcount
517 return result
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000518
519
520if __name__ == '__main__':
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000521 # verify that instances can be pickled
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000522 from cPickle import loads, dumps
Raymond Hettinger01a09572007-10-23 20:37:41 +0000523 Point = namedtuple('Point', 'x, y', True)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000524 p = Point(x=10, y=20)
525 assert p == loads(dumps(p))
526
Raymond Hettingereeeb9c42007-11-15 02:44:53 +0000527 # test and demonstrate ability to override methods
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000528 class Point(namedtuple('Point', 'x y')):
Raymond Hettingere1655082008-01-10 19:15:10 +0000529 __slots__ = ()
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000530 @property
531 def hypot(self):
532 return (self.x ** 2 + self.y ** 2) ** 0.5
Raymond Hettinger9a359212008-01-07 20:07:38 +0000533 def __str__(self):
Raymond Hettinger15b5e552008-01-10 23:00:01 +0000534 return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000535
Raymond Hettingere1655082008-01-10 19:15:10 +0000536 for p in Point(3, 4), Point(14, 5/7.):
Raymond Hettinger9a359212008-01-07 20:07:38 +0000537 print p
Raymond Hettingereeeb9c42007-11-15 02:44:53 +0000538
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000539 class Point(namedtuple('Point', 'x y')):
540 'Point class with optimized _make() and _replace() without error-checking'
Raymond Hettingere1655082008-01-10 19:15:10 +0000541 __slots__ = ()
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000542 _make = classmethod(tuple.__new__)
543 def _replace(self, _map=map, **kwds):
Raymond Hettingerf5e8af12008-01-07 20:56:05 +0000544 return self._make(_map(kwds.get, ('x', 'y'), self))
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000545
546 print Point(11, 22)._replace(x=100)
547
Raymond Hettingere850c462008-01-10 20:37:12 +0000548 Point3D = namedtuple('Point3D', Point._fields + ('z',))
549 print Point3D.__doc__
550
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000551 import doctest
Raymond Hettinger01a09572007-10-23 20:37:41 +0000552 TestResults = namedtuple('TestResults', 'failed attempted')
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000553 print TestResults(*doctest.testmod())