blob: 495601addb0e66e9b40e7ad64be795cdeb99b8e8 [file] [log] [blame]
Raymond Hettingerbc512d32009-03-03 04:45:34 +00001__all__ = ['Counter', 'deque', 'defaultdict', 'namedtuple', 'OrderedDict']
Raymond Hettinger88880b22007-12-18 00:13:45 +00002# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
3# They should however be considered an integral part of collections.py.
4from _abcoll import *
5import _abcoll
6__all__ += _abcoll.__all__
Raymond Hettingereb979882007-02-28 18:37:52 +00007
8from _collections import deque, defaultdict
Raymond Hettingerb36f7472011-04-23 18:37:37 -07009from operator import itemgetter as _itemgetter
Raymond Hettingerabfd8df2007-10-16 21:28:32 +000010from keyword import iskeyword as _iskeyword
Raymond Hettingerc37e5e02007-03-01 06:16:43 +000011import sys as _sys
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +000012import heapq as _heapq
Raymond Hettingerb36f7472011-04-23 18:37:37 -070013from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
14
Raymond Hettinger74f869e2010-09-13 22:14:36 +000015try:
Raymond Hettinger7ce6d972011-04-22 18:49:53 -070016 from thread import get_ident as _get_ident
Amaury Forgeot d'Arc71431ef2010-11-09 07:35:26 +000017except ImportError:
Raymond Hettinger7ce6d972011-04-22 18:49:53 -070018 from dummy_thread import get_ident as _get_ident
Raymond Hettinger74f869e2010-09-13 22:14:36 +000019
Raymond Hettingerbc512d32009-03-03 04:45:34 +000020
21################################################################################
22### OrderedDict
23################################################################################
24
Raymond Hettinger8ebebd82011-01-02 01:03:26 +000025class OrderedDict(dict):
Raymond Hettingere980d2d2009-03-19 23:12:41 +000026 'Dictionary that remembers insertion order'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000027 # An inherited dict maps keys to values.
Raymond Hettingere980d2d2009-03-19 23:12:41 +000028 # The inherited dict provides __getitem__, __len__, __contains__, and get.
29 # The remaining methods are order-aware.
Raymond Hettingerc6467432011-04-24 12:30:39 -070030 # Big-O running times for all methods are the same as regular dictionaries.
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000031
Raymond Hettingerc6467432011-04-24 12:30:39 -070032 # The internal self.__map dict maps keys to links in a doubly linked list.
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000033 # The circular doubly linked list starts and ends with a sentinel element.
34 # The sentinel element never gets deleted (this simplifies the algorithm).
Raymond Hettingeraba22932010-03-09 09:58:53 +000035 # Each link is stored as a list of length three: [PREV, NEXT, KEY].
Raymond Hettingerbc512d32009-03-03 04:45:34 +000036
37 def __init__(self, *args, **kwds):
Raymond Hettinger3f2b1842011-04-24 12:55:28 -070038 '''Initialize an ordered dictionary. The signature is the same as
39 regular dictionaries, but keyword arguments are not recommended because
40 their insertion order is arbitrary.
Raymond Hettingera5cd6372009-04-08 05:39:38 +000041
42 '''
Raymond Hettingerbc512d32009-03-03 04:45:34 +000043 if len(args) > 1:
44 raise TypeError('expected at most 1 arguments, got %d' % len(args))
Raymond Hettinger9353ea22009-03-03 20:53:51 +000045 try:
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000046 self.__root
Raymond Hettinger9353ea22009-03-03 20:53:51 +000047 except AttributeError:
Raymond Hettinger0b795e52011-04-23 15:41:38 -070048 self.__root = root = [] # sentinel node
49 root[:] = [root, root, None]
Raymond Hettinger2dc90fd2009-03-25 22:41:32 +000050 self.__map = {}
Raymond Hettinger8ebebd82011-01-02 01:03:26 +000051 self.__update(*args, **kwds)
Raymond Hettingerbc512d32009-03-03 04:45:34 +000052
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000053 def __setitem__(self, key, value, PREV=0, NEXT=1, dict_setitem=dict.__setitem__):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000054 'od.__setitem__(i, y) <==> od[i]=y'
Raymond Hettingerc6467432011-04-24 12:30:39 -070055 # Setting a new item creates a new link at the end of the linked list,
56 # and the inherited dictionary is updated with the new key/value pair.
Raymond Hettingerbc512d32009-03-03 04:45:34 +000057 if key not in self:
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000058 root = self.__root
Raymond Hettingeraba22932010-03-09 09:58:53 +000059 last = root[PREV]
60 last[NEXT] = root[PREV] = self.__map[key] = [last, root, key]
Raymond Hettingerec5046b2012-11-20 21:11:26 -080061 return dict_setitem(self, key, value)
Raymond Hettingerbc512d32009-03-03 04:45:34 +000062
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000063 def __delitem__(self, key, PREV=0, NEXT=1, dict_delitem=dict.__delitem__):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000064 'od.__delitem__(y) <==> del od[y]'
Raymond Hettingerc6467432011-04-24 12:30:39 -070065 # Deleting an existing item uses self.__map to find the link which gets
66 # removed by updating the links in the predecessor and successor nodes.
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000067 dict_delitem(self, key)
Raymond Hettinger43a56412011-04-23 15:51:38 -070068 link_prev, link_next, key = self.__map.pop(key)
Raymond Hettinger39282762010-04-03 00:39:26 +000069 link_prev[NEXT] = link_next
70 link_next[PREV] = link_prev
Raymond Hettingerbc512d32009-03-03 04:45:34 +000071
Raymond Hettinger3f2b1842011-04-24 12:55:28 -070072 def __iter__(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000073 'od.__iter__() <==> iter(od)'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000074 # Traverse the linked list in order.
Raymond Hettinger3f2b1842011-04-24 12:55:28 -070075 NEXT, KEY = 1, 2
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000076 root = self.__root
Raymond Hettingeraba22932010-03-09 09:58:53 +000077 curr = root[NEXT]
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000078 while curr is not root:
Raymond Hettingeraba22932010-03-09 09:58:53 +000079 yield curr[KEY]
80 curr = curr[NEXT]
Raymond Hettingerbc512d32009-03-03 04:45:34 +000081
Raymond Hettinger3f2b1842011-04-24 12:55:28 -070082 def __reversed__(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000083 'od.__reversed__() <==> reversed(od)'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000084 # Traverse the linked list in reverse order.
Raymond Hettinger3f2b1842011-04-24 12:55:28 -070085 PREV, KEY = 0, 2
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000086 root = self.__root
Raymond Hettingeraba22932010-03-09 09:58:53 +000087 curr = root[PREV]
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000088 while curr is not root:
Raymond Hettingeraba22932010-03-09 09:58:53 +000089 yield curr[KEY]
90 curr = curr[PREV]
Raymond Hettingerbc512d32009-03-03 04:45:34 +000091
Raymond Hettinger39282762010-04-03 00:39:26 +000092 def clear(self):
93 'od.clear() -> None. Remove all items from od.'
Raymond Hettingerc6467432011-04-24 12:30:39 -070094 root = self.__root
95 root[:] = [root, root, None]
96 self.__map.clear()
Raymond Hettinger6b96ecb2010-04-03 03:14:28 +000097 dict.clear(self)
Raymond Hettinger39282762010-04-03 00:39:26 +000098
Raymond Hettinger7ce6d972011-04-22 18:49:53 -070099 # -- the following methods do not depend on the internal structure --
100
101 def keys(self):
102 'od.keys() -> list of keys in od'
103 return list(self)
104
105 def values(self):
106 'od.values() -> list of values in od'
107 return [self[key] for key in self]
108
109 def items(self):
110 'od.items() -> list of (key, value) pairs in od'
111 return [(key, self[key]) for key in self]
112
113 def iterkeys(self):
114 'od.iterkeys() -> an iterator over the keys in od'
115 return iter(self)
116
117 def itervalues(self):
118 'od.itervalues -> an iterator over the values in od'
119 for k in self:
120 yield self[k]
121
122 def iteritems(self):
Raymond Hettinger3f2b1842011-04-24 12:55:28 -0700123 'od.iteritems -> an iterator over the (key, value) pairs in od'
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700124 for k in self:
125 yield (k, self[k])
126
127 update = MutableMapping.update
128
Raymond Hettingerc6467432011-04-24 12:30:39 -0700129 __update = update # let subclasses override update without breaking __init__
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000130
Raymond Hettinger8ebebd82011-01-02 01:03:26 +0000131 __marker = object()
132
133 def pop(self, key, default=__marker):
Raymond Hettinger3f2b1842011-04-24 12:55:28 -0700134 '''od.pop(k[,d]) -> v, remove specified key and return the corresponding
135 value. If key is not found, d is returned if given, otherwise KeyError
136 is raised.
137
138 '''
Raymond Hettinger8ebebd82011-01-02 01:03:26 +0000139 if key in self:
140 result = self[key]
141 del self[key]
142 return result
143 if default is self.__marker:
144 raise KeyError(key)
145 return default
146
147 def setdefault(self, key, default=None):
148 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od'
149 if key in self:
150 return self[key]
151 self[key] = default
152 return default
153
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000154 def popitem(self, last=True):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000155 '''od.popitem() -> (k, v), return and remove a (key, value) pair.
156 Pairs are returned in LIFO order if last is true or FIFO order if false.
157
158 '''
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000159 if not self:
160 raise KeyError('dictionary is empty')
Raymond Hettinger1355a3d2009-04-08 08:26:55 +0000161 key = next(reversed(self) if last else iter(self))
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000162 value = self.pop(key)
163 return key, value
164
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700165 def __repr__(self, _repr_running={}):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000166 'od.__repr__() <==> repr(od)'
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700167 call_key = id(self), _get_ident()
168 if call_key in _repr_running:
169 return '...'
170 _repr_running[call_key] = 1
171 try:
172 if not self:
173 return '%s()' % (self.__class__.__name__,)
174 return '%s(%r)' % (self.__class__.__name__, self.items())
175 finally:
176 del _repr_running[call_key]
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000177
Raymond Hettinger3674c852011-04-20 13:11:38 -0700178 def __reduce__(self):
179 'Return state information for pickling'
180 items = [[k, self[k]] for k in self]
181 inst_dict = vars(self).copy()
182 for k in vars(OrderedDict()):
183 inst_dict.pop(k, None)
184 if inst_dict:
185 return (self.__class__, (items,), inst_dict)
186 return self.__class__, (items,)
187
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000188 def copy(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000189 'od.copy() -> a shallow copy of od'
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000190 return self.__class__(self)
191
192 @classmethod
193 def fromkeys(cls, iterable, value=None):
Raymond Hettinger3f2b1842011-04-24 12:55:28 -0700194 '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S.
195 If not specified, the value defaults to None.
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000196
197 '''
Raymond Hettingerc6467432011-04-24 12:30:39 -0700198 self = cls()
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000199 for key in iterable:
Raymond Hettingerc6467432011-04-24 12:30:39 -0700200 self[key] = value
201 return self
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000202
203 def __eq__(self, other):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000204 '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
205 while comparison to a regular mapping is order-insensitive.
206
207 '''
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000208 if isinstance(other, OrderedDict):
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700209 return len(self)==len(other) and self.items() == other.items()
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000210 return dict.__eq__(self, other)
211
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700212 def __ne__(self, other):
213 'od.__ne__(y) <==> od!=y'
214 return not self == other
215
Raymond Hettinger43a56412011-04-23 15:51:38 -0700216 # -- the following methods support python 3.x style dictionary views --
217
218 def viewkeys(self):
219 "od.viewkeys() -> a set-like object providing a view on od's keys"
220 return KeysView(self)
221
222 def viewvalues(self):
223 "od.viewvalues() -> an object providing a view on od's values"
224 return ValuesView(self)
225
226 def viewitems(self):
227 "od.viewitems() -> a set-like object providing a view on od's items"
228 return ItemsView(self)
229
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000230
Raymond Hettingerae3f0682009-01-20 03:36:36 +0000231################################################################################
232### namedtuple
233################################################################################
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000234
Raymond Hettinger491f7072012-06-08 13:24:12 -0700235_class_template = '''\
236class {typename}(tuple):
237 '{typename}({arg_list})'
238
239 __slots__ = ()
240
241 _fields = {field_names!r}
242
243 def __new__(_cls, {arg_list}):
244 'Create new instance of {typename}({arg_list})'
245 return _tuple.__new__(_cls, ({arg_list}))
246
247 @classmethod
248 def _make(cls, iterable, new=tuple.__new__, len=len):
249 'Make a new {typename} object from a sequence or iterable'
250 result = new(cls, iterable)
251 if len(result) != {num_fields:d}:
252 raise TypeError('Expected {num_fields:d} arguments, got %d' % len(result))
253 return result
254
255 def __repr__(self):
256 'Return a nicely formatted representation string'
257 return '{typename}({repr_fmt})' % self
258
259 def _asdict(self):
260 'Return a new OrderedDict which maps field names to their values'
261 return OrderedDict(zip(self._fields, self))
262
263 __dict__ = property(_asdict)
264
265 def _replace(_self, **kwds):
266 'Return a new {typename} object replacing specified fields with new values'
267 result = _self._make(map(kwds.pop, {field_names!r}, _self))
268 if kwds:
269 raise ValueError('Got unexpected field names: %r' % kwds.keys())
270 return result
271
272 def __getnewargs__(self):
273 'Return self as a plain tuple. Used by copy and pickle.'
274 return tuple(self)
275
276{field_defs}
277'''
278
279_repr_template = '{name}=%r'
280
281_field_template = '''\
282 {name} = _property(_itemgetter({index:d}), doc='Alias for field number {index:d}')
283'''
284
Raymond Hettinger322daea2009-02-10 01:24:05 +0000285def namedtuple(typename, field_names, verbose=False, rename=False):
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000286 """Returns a new subclass of tuple with named fields.
287
Raymond Hettinger491f7072012-06-08 13:24:12 -0700288 >>> Point = namedtuple('Point', ['x', 'y'])
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000289 >>> Point.__doc__ # docstring for the new class
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000290 'Point(x, y)'
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000291 >>> p = Point(11, y=22) # instantiate with positional args or keywords
Raymond Hettinger8777bca2007-12-18 22:21:27 +0000292 >>> p[0] + p[1] # indexable like a plain tuple
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000293 33
Raymond Hettinger88880b22007-12-18 00:13:45 +0000294 >>> x, y = p # unpack like a regular tuple
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000295 >>> x, y
296 (11, 22)
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000297 >>> p.x + p.y # fields also accessable by name
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000298 33
Raymond Hettinger42da8742007-12-14 02:49:47 +0000299 >>> d = p._asdict() # convert to a dictionary
Raymond Hettingera7fc4b12007-10-05 02:47:07 +0000300 >>> d['x']
301 11
302 >>> Point(**d) # convert from a dictionary
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000303 Point(x=11, y=22)
Raymond Hettinger42da8742007-12-14 02:49:47 +0000304 >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000305 Point(x=100, y=22)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000306
307 """
308
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700309 # Validate the field names. At the user's option, either generate an error
310 # message or automatically replace the field name with a valid name.
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000311 if isinstance(field_names, basestring):
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700312 field_names = field_names.replace(',', ' ').split()
313 field_names = map(str, field_names)
Raymond Hettinger322daea2009-02-10 01:24:05 +0000314 if rename:
Raymond Hettinger322daea2009-02-10 01:24:05 +0000315 seen = set()
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700316 for index, name in enumerate(field_names):
Raymond Hettinger491f7072012-06-08 13:24:12 -0700317 if (not all(c.isalnum() or c=='_' for c in name)
318 or _iskeyword(name)
319 or not name
320 or name[0].isdigit()
321 or name.startswith('_')
Raymond Hettinger322daea2009-02-10 01:24:05 +0000322 or name in seen):
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700323 field_names[index] = '_%d' % index
Raymond Hettinger322daea2009-02-10 01:24:05 +0000324 seen.add(name)
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700325 for name in [typename] + field_names:
Raymond Hettinger2e1af252007-12-05 18:11:08 +0000326 if not all(c.isalnum() or c=='_' for c in name):
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700327 raise ValueError('Type names and field names can only contain '
328 'alphanumeric characters and underscores: %r' % name)
Raymond Hettingerabfd8df2007-10-16 21:28:32 +0000329 if _iskeyword(name):
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700330 raise ValueError('Type names and field names cannot be a '
331 'keyword: %r' % name)
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000332 if name[0].isdigit():
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700333 raise ValueError('Type names and field names cannot start with '
334 'a number: %r' % name)
Raymond Hettinger491f7072012-06-08 13:24:12 -0700335 seen = set()
Raymond Hettinger163f6222007-10-09 01:36:23 +0000336 for name in field_names:
Raymond Hettinger322daea2009-02-10 01:24:05 +0000337 if name.startswith('_') and not rename:
Raymond Hettinger0c2c6922012-06-09 17:27:23 -0700338 raise ValueError('Field names cannot start with an underscore: '
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700339 '%r' % name)
Raymond Hettinger491f7072012-06-08 13:24:12 -0700340 if name in seen:
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000341 raise ValueError('Encountered duplicate field name: %r' % name)
Raymond Hettinger491f7072012-06-08 13:24:12 -0700342 seen.add(name)
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000343
Raymond Hettinger491f7072012-06-08 13:24:12 -0700344 # Fill-in the class template
345 class_definition = _class_template.format(
346 typename = typename,
347 field_names = tuple(field_names),
348 num_fields = len(field_names),
349 arg_list = repr(tuple(field_names)).replace("'", "")[1:-1],
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700350 repr_fmt = ', '.join(_repr_template.format(name=name)
351 for name in field_names),
Raymond Hettinger491f7072012-06-08 13:24:12 -0700352 field_defs = '\n'.join(_field_template.format(index=index, name=name)
353 for index, name in enumerate(field_names))
354 )
Raymond Hettinger2b03d452007-09-18 03:33:19 +0000355 if verbose:
Raymond Hettinger491f7072012-06-08 13:24:12 -0700356 print class_definition
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000357
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700358 # Execute the template string in a temporary namespace and support
359 # tracing utilities by setting a value for frame.f_globals['__name__']
Raymond Hettingera68cad12009-05-27 02:24:45 +0000360 namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
361 OrderedDict=OrderedDict, _property=property, _tuple=tuple)
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000362 try:
Raymond Hettinger491f7072012-06-08 13:24:12 -0700363 exec class_definition in namespace
364 except SyntaxError as e:
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700365 raise SyntaxError(e.message + ':\n' + class_definition)
Raymond Hettinger0e1d6062007-10-08 10:11:51 +0000366 result = namespace[typename]
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000367
368 # For pickling to work, the __module__ variable needs to be set to the frame
369 # where the named tuple is created. Bypass this step in enviroments where
Benjamin Peterson7c67b032009-05-05 00:55:24 +0000370 # sys._getframe is not defined (Jython for example) or sys._getframe is not
371 # defined for arguments greater than 0 (IronPython).
372 try:
Raymond Hettingerecf252a2009-01-27 10:03:04 +0000373 result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
Benjamin Peterson7c67b032009-05-05 00:55:24 +0000374 except (AttributeError, ValueError):
375 pass
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000376
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000377 return result
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000378
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000379
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000380########################################################################
Raymond Hettingerae3f0682009-01-20 03:36:36 +0000381### Counter
382########################################################################
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000383
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000384class Counter(dict):
385 '''Dict subclass for counting hashable items. Sometimes called a bag
386 or multiset. Elements are stored as dictionary keys and their counts
387 are stored as dictionary values.
388
Raymond Hettingerc886a842011-01-03 08:59:18 +0000389 >>> c = Counter('abcdeabcdabcaba') # count elements from a string
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000390
391 >>> c.most_common(3) # three most common elements
Raymond Hettingerc886a842011-01-03 08:59:18 +0000392 [('a', 5), ('b', 4), ('c', 3)]
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000393 >>> sorted(c) # list all unique elements
Raymond Hettingerc886a842011-01-03 08:59:18 +0000394 ['a', 'b', 'c', 'd', 'e']
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000395 >>> ''.join(sorted(c.elements())) # list elements with repetitions
Raymond Hettingerc886a842011-01-03 08:59:18 +0000396 'aaaaabbbbcccdde'
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000397 >>> sum(c.values()) # total of all counts
Raymond Hettingerc886a842011-01-03 08:59:18 +0000398 15
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000399
400 >>> c['a'] # count of letter 'a'
401 5
402 >>> for elem in 'shazam': # update counts from an iterable
403 ... c[elem] += 1 # by adding 1 to each element's count
404 >>> c['a'] # now there are seven 'a'
405 7
Raymond Hettingerc886a842011-01-03 08:59:18 +0000406 >>> del c['b'] # remove all 'b'
407 >>> c['b'] # now there are zero 'b'
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000408 0
409
410 >>> d = Counter('simsalabim') # make another counter
411 >>> c.update(d) # add in the second counter
412 >>> c['a'] # now there are nine 'a'
413 9
414
415 >>> c.clear() # empty the counter
416 >>> c
417 Counter()
418
419 Note: If a count is set to zero or reduced to zero, it will remain
420 in the counter until the entry is deleted or the counter is cleared:
421
422 >>> c = Counter('aaabbc')
423 >>> c['b'] -= 2 # reduce the count of 'b' by two
424 >>> c.most_common() # 'b' is still in, but its count is zero
425 [('a', 3), ('c', 1), ('b', 0)]
426
427 '''
428 # References:
429 # http://en.wikipedia.org/wiki/Multiset
430 # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
431 # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
432 # http://code.activestate.com/recipes/259174/
433 # Knuth, TAOCP Vol. II section 4.6.3
434
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000435 def __init__(self, iterable=None, **kwds):
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000436 '''Create a new, empty Counter object. And if given, count elements
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000437 from an input iterable. Or, initialize the count from another mapping
438 of elements to their counts.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000439
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000440 >>> c = Counter() # a new, empty counter
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000441 >>> c = Counter('gallahad') # a new counter from an iterable
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000442 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000443 >>> c = Counter(a=4, b=2) # a new counter from keyword args
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000444
445 '''
Raymond Hettingerc886a842011-01-03 08:59:18 +0000446 super(Counter, self).__init__()
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000447 self.update(iterable, **kwds)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000448
449 def __missing__(self, key):
450 'The count of elements not in the Counter is zero.'
451 # Needed so that self[missing_item] does not raise KeyError
452 return 0
453
454 def most_common(self, n=None):
455 '''List the n most common elements and their counts from the most
456 common to the least. If n is None, then list all element counts.
457
Raymond Hettingerc886a842011-01-03 08:59:18 +0000458 >>> Counter('abcdeabcdabcaba').most_common(3)
459 [('a', 5), ('b', 4), ('c', 3)]
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000460
461 '''
462 # Emulate Bag.sortedByCount from Smalltalk
463 if n is None:
464 return sorted(self.iteritems(), key=_itemgetter(1), reverse=True)
465 return _heapq.nlargest(n, self.iteritems(), key=_itemgetter(1))
466
467 def elements(self):
468 '''Iterator over elements repeating each as many times as its count.
469
470 >>> c = Counter('ABCABC')
471 >>> sorted(c.elements())
472 ['A', 'A', 'B', 'B', 'C', 'C']
473
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000474 # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
475 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
476 >>> product = 1
477 >>> for factor in prime_factors.elements(): # loop over factors
478 ... product *= factor # and multiply them
479 >>> product
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000480 1836
481
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000482 Note, if an element's count has been set to zero or is a negative
483 number, elements() will ignore it.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000484
485 '''
486 # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
Raymond Hettinger35288c62009-01-13 04:50:35 +0000487 return _chain.from_iterable(_starmap(_repeat, self.iteritems()))
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000488
489 # Override dict methods where necessary
490
491 @classmethod
492 def fromkeys(cls, iterable, v=None):
493 # There is no equivalent method for counters because setting v=1
494 # means that no element can have a count greater than one.
495 raise NotImplementedError(
496 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
497
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000498 def update(self, iterable=None, **kwds):
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000499 '''Like dict.update() but add counts instead of replacing them.
500
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000501 Source can be an iterable, a dictionary, or another Counter instance.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000502
503 >>> c = Counter('which')
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000504 >>> c.update('witch') # add elements from another iterable
505 >>> d = Counter('watch')
506 >>> c.update(d) # add elements from another counter
507 >>> c['h'] # four 'h' in which, witch, and watch
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000508 4
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000509
510 '''
511 # The regular dict.update() operation makes no sense here because the
512 # replace behavior results in the some of original untouched counts
513 # being mixed-in with all of the other counts for a mismash that
514 # doesn't have a straight-forward interpretation in most counting
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000515 # contexts. Instead, we implement straight-addition. Both the inputs
516 # and outputs are allowed to contain zero and negative counts.
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000517
518 if iterable is not None:
519 if isinstance(iterable, Mapping):
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000520 if self:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000521 self_get = self.get
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000522 for elem, count in iterable.iteritems():
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000523 self[elem] = self_get(elem, 0) + count
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000524 else:
Raymond Hettingerc886a842011-01-03 08:59:18 +0000525 super(Counter, self).update(iterable) # fast path when counter is empty
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000526 else:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000527 self_get = self.get
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000528 for elem in iterable:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000529 self[elem] = self_get(elem, 0) + 1
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000530 if kwds:
531 self.update(kwds)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000532
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000533 def subtract(self, iterable=None, **kwds):
534 '''Like dict.update() but subtracts counts instead of replacing them.
535 Counts can be reduced below zero. Both the inputs and outputs are
536 allowed to contain zero and negative counts.
537
538 Source can be an iterable, a dictionary, or another Counter instance.
539
540 >>> c = Counter('which')
541 >>> c.subtract('witch') # subtract elements from another iterable
542 >>> c.subtract(Counter('watch')) # subtract elements from another counter
543 >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch
544 0
545 >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch
546 -1
547
548 '''
549 if iterable is not None:
Raymond Hettingerfdf1b562010-04-11 20:39:28 +0000550 self_get = self.get
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000551 if isinstance(iterable, Mapping):
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000552 for elem, count in iterable.items():
553 self[elem] = self_get(elem, 0) - count
554 else:
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000555 for elem in iterable:
556 self[elem] = self_get(elem, 0) - 1
557 if kwds:
558 self.subtract(kwds)
559
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000560 def copy(self):
Raymond Hettinger37c0fe52011-04-15 13:12:21 -0700561 'Return a shallow copy.'
562 return self.__class__(self)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000563
Raymond Hettingerc886a842011-01-03 08:59:18 +0000564 def __reduce__(self):
565 return self.__class__, (dict(self),)
566
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000567 def __delitem__(self, elem):
568 'Like dict.__delitem__() but does not raise KeyError for missing values.'
569 if elem in self:
Raymond Hettingerc886a842011-01-03 08:59:18 +0000570 super(Counter, self).__delitem__(elem)
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000571
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000572 def __repr__(self):
573 if not self:
574 return '%s()' % self.__class__.__name__
Raymond Hettinger35288c62009-01-13 04:50:35 +0000575 items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000576 return '%s({%s})' % (self.__class__.__name__, items)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000577
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000578 # Multiset-style mathematical operations discussed in:
579 # Knuth TAOCP Volume II section 4.6.3 exercise 19
580 # and at http://en.wikipedia.org/wiki/Multiset
581 #
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000582 # Outputs guaranteed to only include positive counts.
583 #
584 # To strip negative and zero counts, add-in an empty counter:
585 # c += Counter()
586
587 def __add__(self, other):
588 '''Add counts from two counters.
589
590 >>> Counter('abbb') + Counter('bcc')
591 Counter({'b': 4, 'c': 2, 'a': 1})
592
593 '''
594 if not isinstance(other, Counter):
595 return NotImplemented
596 result = Counter()
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700597 for elem, count in self.items():
598 newcount = count + other[elem]
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000599 if newcount > 0:
600 result[elem] = newcount
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700601 for elem, count in other.items():
602 if elem not in self and count > 0:
603 result[elem] = count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000604 return result
605
606 def __sub__(self, other):
607 ''' Subtract count, but keep only results with positive counts.
608
609 >>> Counter('abbbc') - Counter('bccd')
610 Counter({'b': 2, 'a': 1})
611
612 '''
613 if not isinstance(other, Counter):
614 return NotImplemented
615 result = Counter()
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700616 for elem, count in self.items():
617 newcount = count - other[elem]
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000618 if newcount > 0:
619 result[elem] = newcount
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700620 for elem, count in other.items():
621 if elem not in self and count < 0:
622 result[elem] = 0 - count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000623 return result
624
625 def __or__(self, other):
626 '''Union is the maximum of value in either of the input counters.
627
628 >>> Counter('abbb') | Counter('bcc')
629 Counter({'b': 3, 'c': 2, 'a': 1})
630
631 '''
632 if not isinstance(other, Counter):
633 return NotImplemented
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000634 result = Counter()
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700635 for elem, count in self.items():
636 other_count = other[elem]
637 newcount = other_count if count < other_count else count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000638 if newcount > 0:
639 result[elem] = newcount
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700640 for elem, count in other.items():
641 if elem not in self and count > 0:
642 result[elem] = count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000643 return result
644
645 def __and__(self, other):
646 ''' Intersection is the minimum of corresponding counts.
647
648 >>> Counter('abbb') & Counter('bcc')
649 Counter({'b': 1})
650
651 '''
652 if not isinstance(other, Counter):
653 return NotImplemented
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000654 result = Counter()
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700655 for elem, count in self.items():
656 other_count = other[elem]
657 newcount = count if count < other_count else other_count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000658 if newcount > 0:
659 result[elem] = newcount
660 return result
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000661
662
663if __name__ == '__main__':
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000664 # verify that instances can be pickled
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000665 from cPickle import loads, dumps
Raymond Hettinger01a09572007-10-23 20:37:41 +0000666 Point = namedtuple('Point', 'x, y', True)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000667 p = Point(x=10, y=20)
668 assert p == loads(dumps(p))
669
Raymond Hettingereeeb9c42007-11-15 02:44:53 +0000670 # test and demonstrate ability to override methods
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000671 class Point(namedtuple('Point', 'x y')):
Raymond Hettingere1655082008-01-10 19:15:10 +0000672 __slots__ = ()
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000673 @property
674 def hypot(self):
675 return (self.x ** 2 + self.y ** 2) ** 0.5
Raymond Hettinger9a359212008-01-07 20:07:38 +0000676 def __str__(self):
Raymond Hettinger15b5e552008-01-10 23:00:01 +0000677 return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000678
Raymond Hettingere1655082008-01-10 19:15:10 +0000679 for p in Point(3, 4), Point(14, 5/7.):
Raymond Hettinger9a359212008-01-07 20:07:38 +0000680 print p
Raymond Hettingereeeb9c42007-11-15 02:44:53 +0000681
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000682 class Point(namedtuple('Point', 'x y')):
683 'Point class with optimized _make() and _replace() without error-checking'
Raymond Hettingere1655082008-01-10 19:15:10 +0000684 __slots__ = ()
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000685 _make = classmethod(tuple.__new__)
686 def _replace(self, _map=map, **kwds):
Raymond Hettingerf5e8af12008-01-07 20:56:05 +0000687 return self._make(_map(kwds.get, ('x', 'y'), self))
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000688
689 print Point(11, 22)._replace(x=100)
690
Raymond Hettingere850c462008-01-10 20:37:12 +0000691 Point3D = namedtuple('Point3D', Point._fields + ('z',))
692 print Point3D.__doc__
693
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000694 import doctest
Raymond Hettinger01a09572007-10-23 20:37:41 +0000695 TestResults = namedtuple('TestResults', 'failed attempted')
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000696 print TestResults(*doctest.testmod())