blob: f2ae8ea5c1d45a027c63a3f2f1330cefcf743d69 [file] [log] [blame]
Raymond Hettingerbc512d32009-03-03 04:45:34 +00001__all__ = ['Counter', 'deque', 'defaultdict', 'namedtuple', 'OrderedDict']
Raymond Hettinger88880b22007-12-18 00:13:45 +00002# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
3# They should however be considered an integral part of collections.py.
4from _abcoll import *
5import _abcoll
6__all__ += _abcoll.__all__
Raymond Hettingereb979882007-02-28 18:37:52 +00007
8from _collections import deque, defaultdict
Raymond Hettingerbc512d32009-03-03 04:45:34 +00009from operator import itemgetter as _itemgetter, eq as _eq
Raymond Hettingerabfd8df2007-10-16 21:28:32 +000010from keyword import iskeyword as _iskeyword
Raymond Hettingerc37e5e02007-03-01 06:16:43 +000011import sys as _sys
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +000012import heapq as _heapq
Raymond Hettingerbc512d32009-03-03 04:45:34 +000013from itertools import repeat as _repeat, chain as _chain, starmap as _starmap, \
Georg Brandl7d4b7592010-02-06 22:49:47 +000014 ifilter as _ifilter, imap as _imap
Raymond Hettinger74f869e2010-09-13 22:14:36 +000015try:
Raymond Hettinger7ce6d972011-04-22 18:49:53 -070016 from thread import get_ident as _get_ident
Amaury Forgeot d'Arc71431ef2010-11-09 07:35:26 +000017except ImportError:
Raymond Hettinger7ce6d972011-04-22 18:49:53 -070018 from dummy_thread import get_ident as _get_ident
Raymond Hettinger74f869e2010-09-13 22:14:36 +000019
Raymond Hettingerbc512d32009-03-03 04:45:34 +000020
21################################################################################
22### OrderedDict
23################################################################################
24
Raymond Hettinger8ebebd82011-01-02 01:03:26 +000025class OrderedDict(dict):
Raymond Hettingere980d2d2009-03-19 23:12:41 +000026 'Dictionary that remembers insertion order'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000027 # An inherited dict maps keys to values.
Raymond Hettingere980d2d2009-03-19 23:12:41 +000028 # The inherited dict provides __getitem__, __len__, __contains__, and get.
29 # The remaining methods are order-aware.
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000030 # Big-O running times for all methods are the same as for regular dictionaries.
31
32 # The internal self.__map dictionary maps keys to links in a doubly linked list.
33 # The circular doubly linked list starts and ends with a sentinel element.
34 # The sentinel element never gets deleted (this simplifies the algorithm).
Raymond Hettingeraba22932010-03-09 09:58:53 +000035 # Each link is stored as a list of length three: [PREV, NEXT, KEY].
Raymond Hettingerbc512d32009-03-03 04:45:34 +000036
37 def __init__(self, *args, **kwds):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000038 '''Initialize an ordered dictionary. Signature is the same as for
39 regular dictionaries, but keyword arguments are not recommended
40 because their insertion order is arbitrary.
41
42 '''
Raymond Hettingerbc512d32009-03-03 04:45:34 +000043 if len(args) > 1:
44 raise TypeError('expected at most 1 arguments, got %d' % len(args))
Raymond Hettinger9353ea22009-03-03 20:53:51 +000045 try:
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000046 self.__root
Raymond Hettinger9353ea22009-03-03 20:53:51 +000047 except AttributeError:
Raymond Hettingeraba22932010-03-09 09:58:53 +000048 self.__root = root = [None, None, None] # sentinel node
Raymond Hettingere30bc382010-03-09 11:29:10 +000049 PREV = 0
50 NEXT = 1
Raymond Hettingeraba22932010-03-09 09:58:53 +000051 root[PREV] = root[NEXT] = root
Raymond Hettinger2dc90fd2009-03-25 22:41:32 +000052 self.__map = {}
Raymond Hettinger8ebebd82011-01-02 01:03:26 +000053 self.__update(*args, **kwds)
Raymond Hettingerbc512d32009-03-03 04:45:34 +000054
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000055 def __setitem__(self, key, value, PREV=0, NEXT=1, dict_setitem=dict.__setitem__):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000056 'od.__setitem__(i, y) <==> od[i]=y'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000057 # Setting a new item creates a new link which goes at the end of the linked
58 # list, and the inherited dictionary is updated with the new key/value pair.
Raymond Hettingerbc512d32009-03-03 04:45:34 +000059 if key not in self:
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000060 root = self.__root
Raymond Hettingeraba22932010-03-09 09:58:53 +000061 last = root[PREV]
62 last[NEXT] = root[PREV] = self.__map[key] = [last, root, key]
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000063 dict_setitem(self, key, value)
Raymond Hettingerbc512d32009-03-03 04:45:34 +000064
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000065 def __delitem__(self, key, PREV=0, NEXT=1, dict_delitem=dict.__delitem__):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000066 'od.__delitem__(y) <==> del od[y]'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000067 # Deleting an existing item uses self.__map to find the link which is
68 # then removed by updating the links in the predecessor and successor nodes.
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000069 dict_delitem(self, key)
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000070 link = self.__map.pop(key)
Raymond Hettinger39282762010-04-03 00:39:26 +000071 link_prev = link[PREV]
72 link_next = link[NEXT]
73 link_prev[NEXT] = link_next
74 link_next[PREV] = link_prev
Raymond Hettingerbc512d32009-03-03 04:45:34 +000075
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000076 def __iter__(self, NEXT=1, KEY=2):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000077 'od.__iter__() <==> iter(od)'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000078 # Traverse the linked list in order.
79 root = self.__root
Raymond Hettingeraba22932010-03-09 09:58:53 +000080 curr = root[NEXT]
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000081 while curr is not root:
Raymond Hettingeraba22932010-03-09 09:58:53 +000082 yield curr[KEY]
83 curr = curr[NEXT]
Raymond Hettingerbc512d32009-03-03 04:45:34 +000084
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000085 def __reversed__(self, PREV=0, KEY=2):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000086 'od.__reversed__() <==> reversed(od)'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000087 # Traverse the linked list in reverse order.
88 root = self.__root
Raymond Hettingeraba22932010-03-09 09:58:53 +000089 curr = root[PREV]
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000090 while curr is not root:
Raymond Hettingeraba22932010-03-09 09:58:53 +000091 yield curr[KEY]
92 curr = curr[PREV]
Raymond Hettingerbc512d32009-03-03 04:45:34 +000093
Raymond Hettinger39282762010-04-03 00:39:26 +000094 def clear(self):
95 'od.clear() -> None. Remove all items from od.'
Raymond Hettinger6b96ecb2010-04-03 03:14:28 +000096 try:
97 for node in self.__map.itervalues():
98 del node[:]
99 self.__root[:] = [self.__root, self.__root, None]
100 self.__map.clear()
101 except AttributeError:
102 pass
103 dict.clear(self)
Raymond Hettinger39282762010-04-03 00:39:26 +0000104
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700105 # -- the following methods do not depend on the internal structure --
106
107 def keys(self):
108 'od.keys() -> list of keys in od'
109 return list(self)
110
111 def values(self):
112 'od.values() -> list of values in od'
113 return [self[key] for key in self]
114
115 def items(self):
116 'od.items() -> list of (key, value) pairs in od'
117 return [(key, self[key]) for key in self]
118
119 def iterkeys(self):
120 'od.iterkeys() -> an iterator over the keys in od'
121 return iter(self)
122
123 def itervalues(self):
124 'od.itervalues -> an iterator over the values in od'
125 for k in self:
126 yield self[k]
127
128 def iteritems(self):
129 'od.iteritems -> an iterator over the (key, value) items in od'
130 for k in self:
131 yield (k, self[k])
132
133 update = MutableMapping.update
134
135 __update = update # let subclasses override update without breaking __init__
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000136
Raymond Hettingera54b2da2010-08-17 19:03:06 +0000137 def viewkeys(self):
138 "od.viewkeys() -> a set-like object providing a view on od's keys"
139 return KeysView(self)
140
141 def viewvalues(self):
142 "od.viewvalues() -> an object providing a view on od's values"
143 return ValuesView(self)
144
145 def viewitems(self):
146 "od.viewitems() -> a set-like object providing a view on od's items"
147 return ItemsView(self)
148
Raymond Hettinger8ebebd82011-01-02 01:03:26 +0000149 __marker = object()
150
151 def pop(self, key, default=__marker):
152 if key in self:
153 result = self[key]
154 del self[key]
155 return result
156 if default is self.__marker:
157 raise KeyError(key)
158 return default
159
160 def setdefault(self, key, default=None):
161 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od'
162 if key in self:
163 return self[key]
164 self[key] = default
165 return default
166
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000167 def popitem(self, last=True):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000168 '''od.popitem() -> (k, v), return and remove a (key, value) pair.
169 Pairs are returned in LIFO order if last is true or FIFO order if false.
170
171 '''
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000172 if not self:
173 raise KeyError('dictionary is empty')
Raymond Hettinger1355a3d2009-04-08 08:26:55 +0000174 key = next(reversed(self) if last else iter(self))
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000175 value = self.pop(key)
176 return key, value
177
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700178 def __repr__(self, _repr_running={}):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000179 'od.__repr__() <==> repr(od)'
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700180 call_key = id(self), _get_ident()
181 if call_key in _repr_running:
182 return '...'
183 _repr_running[call_key] = 1
184 try:
185 if not self:
186 return '%s()' % (self.__class__.__name__,)
187 return '%s(%r)' % (self.__class__.__name__, self.items())
188 finally:
189 del _repr_running[call_key]
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000190
Raymond Hettinger3674c852011-04-20 13:11:38 -0700191 def __reduce__(self):
192 'Return state information for pickling'
193 items = [[k, self[k]] for k in self]
194 inst_dict = vars(self).copy()
195 for k in vars(OrderedDict()):
196 inst_dict.pop(k, None)
197 if inst_dict:
198 return (self.__class__, (items,), inst_dict)
199 return self.__class__, (items,)
200
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000201 def copy(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000202 'od.copy() -> a shallow copy of od'
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000203 return self.__class__(self)
204
205 @classmethod
206 def fromkeys(cls, iterable, value=None):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000207 '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
208 and values equal to v (which defaults to None).
209
210 '''
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000211 d = cls()
212 for key in iterable:
213 d[key] = value
214 return d
215
216 def __eq__(self, other):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000217 '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
218 while comparison to a regular mapping is order-insensitive.
219
220 '''
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000221 if isinstance(other, OrderedDict):
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700222 return len(self)==len(other) and self.items() == other.items()
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000223 return dict.__eq__(self, other)
224
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700225 def __ne__(self, other):
226 'od.__ne__(y) <==> od!=y'
227 return not self == other
228
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000229
Raymond Hettingerae3f0682009-01-20 03:36:36 +0000230################################################################################
231### namedtuple
232################################################################################
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000233
Raymond Hettinger322daea2009-02-10 01:24:05 +0000234def namedtuple(typename, field_names, verbose=False, rename=False):
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000235 """Returns a new subclass of tuple with named fields.
236
Raymond Hettinger01a09572007-10-23 20:37:41 +0000237 >>> Point = namedtuple('Point', 'x y')
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000238 >>> Point.__doc__ # docstring for the new class
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000239 'Point(x, y)'
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000240 >>> p = Point(11, y=22) # instantiate with positional args or keywords
Raymond Hettinger8777bca2007-12-18 22:21:27 +0000241 >>> p[0] + p[1] # indexable like a plain tuple
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000242 33
Raymond Hettinger88880b22007-12-18 00:13:45 +0000243 >>> x, y = p # unpack like a regular tuple
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000244 >>> x, y
245 (11, 22)
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000246 >>> p.x + p.y # fields also accessable by name
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000247 33
Raymond Hettinger42da8742007-12-14 02:49:47 +0000248 >>> d = p._asdict() # convert to a dictionary
Raymond Hettingera7fc4b12007-10-05 02:47:07 +0000249 >>> d['x']
250 11
251 >>> Point(**d) # convert from a dictionary
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000252 Point(x=11, y=22)
Raymond Hettinger42da8742007-12-14 02:49:47 +0000253 >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000254 Point(x=100, y=22)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000255
256 """
257
Raymond Hettinger58167142008-01-08 02:02:05 +0000258 # Parse and validate the field names. Validation serves two purposes,
259 # generating informative error messages and preventing template injection attacks.
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000260 if isinstance(field_names, basestring):
Raymond Hettinger0e1d6062007-10-08 10:11:51 +0000261 field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas
Raymond Hettinger6ee7bc02008-09-25 23:31:52 +0000262 field_names = tuple(map(str, field_names))
Raymond Hettinger322daea2009-02-10 01:24:05 +0000263 if rename:
264 names = list(field_names)
265 seen = set()
266 for i, name in enumerate(names):
267 if (not all(c.isalnum() or c=='_' for c in name) or _iskeyword(name)
268 or not name or name[0].isdigit() or name.startswith('_')
269 or name in seen):
Raymond Hettinger756ab672009-04-02 22:25:40 +0000270 names[i] = '_%d' % i
Raymond Hettinger322daea2009-02-10 01:24:05 +0000271 seen.add(name)
272 field_names = tuple(names)
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000273 for name in (typename,) + field_names:
Raymond Hettinger2e1af252007-12-05 18:11:08 +0000274 if not all(c.isalnum() or c=='_' for c in name):
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000275 raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
Raymond Hettingerabfd8df2007-10-16 21:28:32 +0000276 if _iskeyword(name):
277 raise ValueError('Type names and field names cannot be a keyword: %r' % name)
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000278 if name[0].isdigit():
279 raise ValueError('Type names and field names cannot start with a number: %r' % name)
Raymond Hettinger163f6222007-10-09 01:36:23 +0000280 seen_names = set()
281 for name in field_names:
Raymond Hettinger322daea2009-02-10 01:24:05 +0000282 if name.startswith('_') and not rename:
Raymond Hettinger42da8742007-12-14 02:49:47 +0000283 raise ValueError('Field names cannot start with an underscore: %r' % name)
Raymond Hettinger163f6222007-10-09 01:36:23 +0000284 if name in seen_names:
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000285 raise ValueError('Encountered duplicate field name: %r' % name)
Raymond Hettinger163f6222007-10-09 01:36:23 +0000286 seen_names.add(name)
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000287
288 # Create and fill-in the class template
Raymond Hettinger02740f72008-01-05 01:35:43 +0000289 numfields = len(field_names)
Raymond Hettinger2b03d452007-09-18 03:33:19 +0000290 argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000291 reprtxt = ', '.join('%s=%%r' % name for name in field_names)
292 template = '''class %(typename)s(tuple):
Raymond Hettinger48eca672007-12-14 18:08:20 +0000293 '%(typename)s(%(argtxt)s)' \n
294 __slots__ = () \n
Raymond Hettingere0734e72008-01-04 03:22:53 +0000295 _fields = %(field_names)r \n
Raymond Hettingera68cad12009-05-27 02:24:45 +0000296 def __new__(_cls, %(argtxt)s):
Raymond Hettinger9bd35082010-03-09 09:01:46 +0000297 'Create new instance of %(typename)s(%(argtxt)s)'
Raymond Hettingera68cad12009-05-27 02:24:45 +0000298 return _tuple.__new__(_cls, (%(argtxt)s)) \n
Raymond Hettinger02740f72008-01-05 01:35:43 +0000299 @classmethod
Raymond Hettinger844f71b2008-01-06 22:11:54 +0000300 def _make(cls, iterable, new=tuple.__new__, len=len):
Raymond Hettinger02740f72008-01-05 01:35:43 +0000301 'Make a new %(typename)s object from a sequence or iterable'
Raymond Hettinger844f71b2008-01-06 22:11:54 +0000302 result = new(cls, iterable)
Raymond Hettinger02740f72008-01-05 01:35:43 +0000303 if len(result) != %(numfields)d:
304 raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
305 return result \n
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000306 def __repr__(self):
Raymond Hettinger9bd35082010-03-09 09:01:46 +0000307 'Return a nicely formatted representation string'
Raymond Hettinger48eca672007-12-14 18:08:20 +0000308 return '%(typename)s(%(reprtxt)s)' %% self \n
Raymond Hettinger88a91642009-03-03 04:51:24 +0000309 def _asdict(self):
310 'Return a new OrderedDict which maps field names to their values'
311 return OrderedDict(zip(self._fields, self)) \n
Raymond Hettingera68cad12009-05-27 02:24:45 +0000312 def _replace(_self, **kwds):
Raymond Hettingereeeb9c42007-11-15 02:44:53 +0000313 'Return a new %(typename)s object replacing specified fields with new values'
Raymond Hettingera68cad12009-05-27 02:24:45 +0000314 result = _self._make(map(kwds.pop, %(field_names)r, _self))
Raymond Hettinger1b50fd72008-01-05 02:17:24 +0000315 if kwds:
316 raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
Raymond Hettingere98839a2008-06-09 01:28:30 +0000317 return result \n
318 def __getnewargs__(self):
Raymond Hettinger9bd35082010-03-09 09:01:46 +0000319 'Return self as a plain tuple. Used by copy and pickle.'
Raymond Hettingere98839a2008-06-09 01:28:30 +0000320 return tuple(self) \n\n''' % locals()
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000321 for i, name in enumerate(field_names):
Raymond Hettinger9bd35082010-03-09 09:01:46 +0000322 template += " %s = _property(_itemgetter(%d), doc='Alias for field number %d')\n" % (name, i, i)
Raymond Hettinger2b03d452007-09-18 03:33:19 +0000323 if verbose:
324 print template
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000325
Raymond Hettinger3c2523c2008-05-30 07:16:53 +0000326 # Execute the template string in a temporary namespace and
327 # support tracing utilities by setting a value for frame.f_globals['__name__']
Raymond Hettingera68cad12009-05-27 02:24:45 +0000328 namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
329 OrderedDict=OrderedDict, _property=property, _tuple=tuple)
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000330 try:
Raymond Hettinger0e1d6062007-10-08 10:11:51 +0000331 exec template in namespace
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000332 except SyntaxError, e:
333 raise SyntaxError(e.message + ':\n' + template)
Raymond Hettinger0e1d6062007-10-08 10:11:51 +0000334 result = namespace[typename]
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000335
336 # For pickling to work, the __module__ variable needs to be set to the frame
337 # where the named tuple is created. Bypass this step in enviroments where
Benjamin Peterson7c67b032009-05-05 00:55:24 +0000338 # sys._getframe is not defined (Jython for example) or sys._getframe is not
339 # defined for arguments greater than 0 (IronPython).
340 try:
Raymond Hettingerecf252a2009-01-27 10:03:04 +0000341 result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
Benjamin Peterson7c67b032009-05-05 00:55:24 +0000342 except (AttributeError, ValueError):
343 pass
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000344
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000345 return result
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000346
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000347
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000348########################################################################
Raymond Hettingerae3f0682009-01-20 03:36:36 +0000349### Counter
350########################################################################
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000351
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000352class Counter(dict):
353 '''Dict subclass for counting hashable items. Sometimes called a bag
354 or multiset. Elements are stored as dictionary keys and their counts
355 are stored as dictionary values.
356
Raymond Hettingerc886a842011-01-03 08:59:18 +0000357 >>> c = Counter('abcdeabcdabcaba') # count elements from a string
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000358
359 >>> c.most_common(3) # three most common elements
Raymond Hettingerc886a842011-01-03 08:59:18 +0000360 [('a', 5), ('b', 4), ('c', 3)]
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000361 >>> sorted(c) # list all unique elements
Raymond Hettingerc886a842011-01-03 08:59:18 +0000362 ['a', 'b', 'c', 'd', 'e']
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000363 >>> ''.join(sorted(c.elements())) # list elements with repetitions
Raymond Hettingerc886a842011-01-03 08:59:18 +0000364 'aaaaabbbbcccdde'
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000365 >>> sum(c.values()) # total of all counts
Raymond Hettingerc886a842011-01-03 08:59:18 +0000366 15
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000367
368 >>> c['a'] # count of letter 'a'
369 5
370 >>> for elem in 'shazam': # update counts from an iterable
371 ... c[elem] += 1 # by adding 1 to each element's count
372 >>> c['a'] # now there are seven 'a'
373 7
Raymond Hettingerc886a842011-01-03 08:59:18 +0000374 >>> del c['b'] # remove all 'b'
375 >>> c['b'] # now there are zero 'b'
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000376 0
377
378 >>> d = Counter('simsalabim') # make another counter
379 >>> c.update(d) # add in the second counter
380 >>> c['a'] # now there are nine 'a'
381 9
382
383 >>> c.clear() # empty the counter
384 >>> c
385 Counter()
386
387 Note: If a count is set to zero or reduced to zero, it will remain
388 in the counter until the entry is deleted or the counter is cleared:
389
390 >>> c = Counter('aaabbc')
391 >>> c['b'] -= 2 # reduce the count of 'b' by two
392 >>> c.most_common() # 'b' is still in, but its count is zero
393 [('a', 3), ('c', 1), ('b', 0)]
394
395 '''
396 # References:
397 # http://en.wikipedia.org/wiki/Multiset
398 # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
399 # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
400 # http://code.activestate.com/recipes/259174/
401 # Knuth, TAOCP Vol. II section 4.6.3
402
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000403 def __init__(self, iterable=None, **kwds):
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000404 '''Create a new, empty Counter object. And if given, count elements
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000405 from an input iterable. Or, initialize the count from another mapping
406 of elements to their counts.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000407
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000408 >>> c = Counter() # a new, empty counter
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000409 >>> c = Counter('gallahad') # a new counter from an iterable
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000410 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000411 >>> c = Counter(a=4, b=2) # a new counter from keyword args
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000412
413 '''
Raymond Hettingerc886a842011-01-03 08:59:18 +0000414 super(Counter, self).__init__()
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000415 self.update(iterable, **kwds)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000416
417 def __missing__(self, key):
418 'The count of elements not in the Counter is zero.'
419 # Needed so that self[missing_item] does not raise KeyError
420 return 0
421
422 def most_common(self, n=None):
423 '''List the n most common elements and their counts from the most
424 common to the least. If n is None, then list all element counts.
425
Raymond Hettingerc886a842011-01-03 08:59:18 +0000426 >>> Counter('abcdeabcdabcaba').most_common(3)
427 [('a', 5), ('b', 4), ('c', 3)]
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000428
429 '''
430 # Emulate Bag.sortedByCount from Smalltalk
431 if n is None:
432 return sorted(self.iteritems(), key=_itemgetter(1), reverse=True)
433 return _heapq.nlargest(n, self.iteritems(), key=_itemgetter(1))
434
435 def elements(self):
436 '''Iterator over elements repeating each as many times as its count.
437
438 >>> c = Counter('ABCABC')
439 >>> sorted(c.elements())
440 ['A', 'A', 'B', 'B', 'C', 'C']
441
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000442 # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
443 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
444 >>> product = 1
445 >>> for factor in prime_factors.elements(): # loop over factors
446 ... product *= factor # and multiply them
447 >>> product
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000448 1836
449
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000450 Note, if an element's count has been set to zero or is a negative
451 number, elements() will ignore it.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000452
453 '''
454 # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
Raymond Hettinger35288c62009-01-13 04:50:35 +0000455 return _chain.from_iterable(_starmap(_repeat, self.iteritems()))
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000456
457 # Override dict methods where necessary
458
459 @classmethod
460 def fromkeys(cls, iterable, v=None):
461 # There is no equivalent method for counters because setting v=1
462 # means that no element can have a count greater than one.
463 raise NotImplementedError(
464 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
465
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000466 def update(self, iterable=None, **kwds):
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000467 '''Like dict.update() but add counts instead of replacing them.
468
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000469 Source can be an iterable, a dictionary, or another Counter instance.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000470
471 >>> c = Counter('which')
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000472 >>> c.update('witch') # add elements from another iterable
473 >>> d = Counter('watch')
474 >>> c.update(d) # add elements from another counter
475 >>> c['h'] # four 'h' in which, witch, and watch
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000476 4
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000477
478 '''
479 # The regular dict.update() operation makes no sense here because the
480 # replace behavior results in the some of original untouched counts
481 # being mixed-in with all of the other counts for a mismash that
482 # doesn't have a straight-forward interpretation in most counting
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000483 # contexts. Instead, we implement straight-addition. Both the inputs
484 # and outputs are allowed to contain zero and negative counts.
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000485
486 if iterable is not None:
487 if isinstance(iterable, Mapping):
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000488 if self:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000489 self_get = self.get
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000490 for elem, count in iterable.iteritems():
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000491 self[elem] = self_get(elem, 0) + count
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000492 else:
Raymond Hettingerc886a842011-01-03 08:59:18 +0000493 super(Counter, self).update(iterable) # fast path when counter is empty
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000494 else:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000495 self_get = self.get
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000496 for elem in iterable:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000497 self[elem] = self_get(elem, 0) + 1
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000498 if kwds:
499 self.update(kwds)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000500
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000501 def subtract(self, iterable=None, **kwds):
502 '''Like dict.update() but subtracts counts instead of replacing them.
503 Counts can be reduced below zero. Both the inputs and outputs are
504 allowed to contain zero and negative counts.
505
506 Source can be an iterable, a dictionary, or another Counter instance.
507
508 >>> c = Counter('which')
509 >>> c.subtract('witch') # subtract elements from another iterable
510 >>> c.subtract(Counter('watch')) # subtract elements from another counter
511 >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch
512 0
513 >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch
514 -1
515
516 '''
517 if iterable is not None:
Raymond Hettingerfdf1b562010-04-11 20:39:28 +0000518 self_get = self.get
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000519 if isinstance(iterable, Mapping):
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000520 for elem, count in iterable.items():
521 self[elem] = self_get(elem, 0) - count
522 else:
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000523 for elem in iterable:
524 self[elem] = self_get(elem, 0) - 1
525 if kwds:
526 self.subtract(kwds)
527
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000528 def copy(self):
Raymond Hettinger37c0fe52011-04-15 13:12:21 -0700529 'Return a shallow copy.'
530 return self.__class__(self)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000531
Raymond Hettingerc886a842011-01-03 08:59:18 +0000532 def __reduce__(self):
533 return self.__class__, (dict(self),)
534
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000535 def __delitem__(self, elem):
536 'Like dict.__delitem__() but does not raise KeyError for missing values.'
537 if elem in self:
Raymond Hettingerc886a842011-01-03 08:59:18 +0000538 super(Counter, self).__delitem__(elem)
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000539
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000540 def __repr__(self):
541 if not self:
542 return '%s()' % self.__class__.__name__
Raymond Hettinger35288c62009-01-13 04:50:35 +0000543 items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000544 return '%s({%s})' % (self.__class__.__name__, items)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000545
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000546 # Multiset-style mathematical operations discussed in:
547 # Knuth TAOCP Volume II section 4.6.3 exercise 19
548 # and at http://en.wikipedia.org/wiki/Multiset
549 #
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000550 # Outputs guaranteed to only include positive counts.
551 #
552 # To strip negative and zero counts, add-in an empty counter:
553 # c += Counter()
554
555 def __add__(self, other):
556 '''Add counts from two counters.
557
558 >>> Counter('abbb') + Counter('bcc')
559 Counter({'b': 4, 'c': 2, 'a': 1})
560
561 '''
562 if not isinstance(other, Counter):
563 return NotImplemented
564 result = Counter()
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700565 for elem, count in self.items():
566 newcount = count + other[elem]
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000567 if newcount > 0:
568 result[elem] = newcount
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700569 for elem, count in other.items():
570 if elem not in self and count > 0:
571 result[elem] = count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000572 return result
573
574 def __sub__(self, other):
575 ''' Subtract count, but keep only results with positive counts.
576
577 >>> Counter('abbbc') - Counter('bccd')
578 Counter({'b': 2, 'a': 1})
579
580 '''
581 if not isinstance(other, Counter):
582 return NotImplemented
583 result = Counter()
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700584 for elem, count in self.items():
585 newcount = count - other[elem]
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000586 if newcount > 0:
587 result[elem] = newcount
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700588 for elem, count in other.items():
589 if elem not in self and count < 0:
590 result[elem] = 0 - count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000591 return result
592
593 def __or__(self, other):
594 '''Union is the maximum of value in either of the input counters.
595
596 >>> Counter('abbb') | Counter('bcc')
597 Counter({'b': 3, 'c': 2, 'a': 1})
598
599 '''
600 if not isinstance(other, Counter):
601 return NotImplemented
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000602 result = Counter()
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700603 for elem, count in self.items():
604 other_count = other[elem]
605 newcount = other_count if count < other_count else count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000606 if newcount > 0:
607 result[elem] = newcount
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700608 for elem, count in other.items():
609 if elem not in self and count > 0:
610 result[elem] = count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000611 return result
612
613 def __and__(self, other):
614 ''' Intersection is the minimum of corresponding counts.
615
616 >>> Counter('abbb') & Counter('bcc')
617 Counter({'b': 1})
618
619 '''
620 if not isinstance(other, Counter):
621 return NotImplemented
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000622 result = Counter()
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700623 for elem, count in self.items():
624 other_count = other[elem]
625 newcount = count if count < other_count else other_count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000626 if newcount > 0:
627 result[elem] = newcount
628 return result
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000629
630
631if __name__ == '__main__':
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000632 # verify that instances can be pickled
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000633 from cPickle import loads, dumps
Raymond Hettinger01a09572007-10-23 20:37:41 +0000634 Point = namedtuple('Point', 'x, y', True)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000635 p = Point(x=10, y=20)
636 assert p == loads(dumps(p))
637
Raymond Hettingereeeb9c42007-11-15 02:44:53 +0000638 # test and demonstrate ability to override methods
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000639 class Point(namedtuple('Point', 'x y')):
Raymond Hettingere1655082008-01-10 19:15:10 +0000640 __slots__ = ()
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000641 @property
642 def hypot(self):
643 return (self.x ** 2 + self.y ** 2) ** 0.5
Raymond Hettinger9a359212008-01-07 20:07:38 +0000644 def __str__(self):
Raymond Hettinger15b5e552008-01-10 23:00:01 +0000645 return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000646
Raymond Hettingere1655082008-01-10 19:15:10 +0000647 for p in Point(3, 4), Point(14, 5/7.):
Raymond Hettinger9a359212008-01-07 20:07:38 +0000648 print p
Raymond Hettingereeeb9c42007-11-15 02:44:53 +0000649
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000650 class Point(namedtuple('Point', 'x y')):
651 'Point class with optimized _make() and _replace() without error-checking'
Raymond Hettingere1655082008-01-10 19:15:10 +0000652 __slots__ = ()
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000653 _make = classmethod(tuple.__new__)
654 def _replace(self, _map=map, **kwds):
Raymond Hettingerf5e8af12008-01-07 20:56:05 +0000655 return self._make(_map(kwds.get, ('x', 'y'), self))
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000656
657 print Point(11, 22)._replace(x=100)
658
Raymond Hettingere850c462008-01-10 20:37:12 +0000659 Point3D = namedtuple('Point3D', Point._fields + ('z',))
660 print Point3D.__doc__
661
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000662 import doctest
Raymond Hettinger01a09572007-10-23 20:37:41 +0000663 TestResults = namedtuple('TestResults', 'failed attempted')
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000664 print TestResults(*doctest.testmod())