blob: ba6d7caf62886d523c75263ab1b185d567244960 [file] [log] [blame]
Raymond Hettingerbc512d32009-03-03 04:45:34 +00001__all__ = ['Counter', 'deque', 'defaultdict', 'namedtuple', 'OrderedDict']
Raymond Hettinger88880b22007-12-18 00:13:45 +00002# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
3# They should however be considered an integral part of collections.py.
4from _abcoll import *
5import _abcoll
6__all__ += _abcoll.__all__
Raymond Hettingereb979882007-02-28 18:37:52 +00007
8from _collections import deque, defaultdict
Raymond Hettingerb36f7472011-04-23 18:37:37 -07009from operator import itemgetter as _itemgetter
Raymond Hettingerabfd8df2007-10-16 21:28:32 +000010from keyword import iskeyword as _iskeyword
Raymond Hettingerc37e5e02007-03-01 06:16:43 +000011import sys as _sys
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +000012import heapq as _heapq
Raymond Hettingerb36f7472011-04-23 18:37:37 -070013from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
14
Raymond Hettinger74f869e2010-09-13 22:14:36 +000015try:
Raymond Hettinger7ce6d972011-04-22 18:49:53 -070016 from thread import get_ident as _get_ident
Amaury Forgeot d'Arc71431ef2010-11-09 07:35:26 +000017except ImportError:
Raymond Hettinger7ce6d972011-04-22 18:49:53 -070018 from dummy_thread import get_ident as _get_ident
Raymond Hettinger74f869e2010-09-13 22:14:36 +000019
Raymond Hettingerbc512d32009-03-03 04:45:34 +000020
21################################################################################
22### OrderedDict
23################################################################################
24
Raymond Hettinger8ebebd82011-01-02 01:03:26 +000025class OrderedDict(dict):
Raymond Hettingere980d2d2009-03-19 23:12:41 +000026 'Dictionary that remembers insertion order'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000027 # An inherited dict maps keys to values.
Raymond Hettingere980d2d2009-03-19 23:12:41 +000028 # The inherited dict provides __getitem__, __len__, __contains__, and get.
29 # The remaining methods are order-aware.
Raymond Hettingerc6467432011-04-24 12:30:39 -070030 # Big-O running times for all methods are the same as regular dictionaries.
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000031
Raymond Hettingerc6467432011-04-24 12:30:39 -070032 # The internal self.__map dict maps keys to links in a doubly linked list.
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000033 # The circular doubly linked list starts and ends with a sentinel element.
34 # The sentinel element never gets deleted (this simplifies the algorithm).
Raymond Hettingeraba22932010-03-09 09:58:53 +000035 # Each link is stored as a list of length three: [PREV, NEXT, KEY].
Raymond Hettingerbc512d32009-03-03 04:45:34 +000036
37 def __init__(self, *args, **kwds):
Raymond Hettinger3f2b1842011-04-24 12:55:28 -070038 '''Initialize an ordered dictionary. The signature is the same as
39 regular dictionaries, but keyword arguments are not recommended because
40 their insertion order is arbitrary.
Raymond Hettingera5cd6372009-04-08 05:39:38 +000041
42 '''
Raymond Hettingerbc512d32009-03-03 04:45:34 +000043 if len(args) > 1:
44 raise TypeError('expected at most 1 arguments, got %d' % len(args))
Raymond Hettinger9353ea22009-03-03 20:53:51 +000045 try:
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000046 self.__root
Raymond Hettinger9353ea22009-03-03 20:53:51 +000047 except AttributeError:
Raymond Hettinger0b795e52011-04-23 15:41:38 -070048 self.__root = root = [] # sentinel node
49 root[:] = [root, root, None]
Raymond Hettinger2dc90fd2009-03-25 22:41:32 +000050 self.__map = {}
Raymond Hettinger8ebebd82011-01-02 01:03:26 +000051 self.__update(*args, **kwds)
Raymond Hettingerbc512d32009-03-03 04:45:34 +000052
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000053 def __setitem__(self, key, value, PREV=0, NEXT=1, dict_setitem=dict.__setitem__):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000054 'od.__setitem__(i, y) <==> od[i]=y'
Raymond Hettingerc6467432011-04-24 12:30:39 -070055 # Setting a new item creates a new link at the end of the linked list,
56 # and the inherited dictionary is updated with the new key/value pair.
Raymond Hettingerbc512d32009-03-03 04:45:34 +000057 if key not in self:
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000058 root = self.__root
Raymond Hettingeraba22932010-03-09 09:58:53 +000059 last = root[PREV]
60 last[NEXT] = root[PREV] = self.__map[key] = [last, root, key]
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000061 dict_setitem(self, key, value)
Raymond Hettingerbc512d32009-03-03 04:45:34 +000062
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000063 def __delitem__(self, key, PREV=0, NEXT=1, dict_delitem=dict.__delitem__):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000064 'od.__delitem__(y) <==> del od[y]'
Raymond Hettingerc6467432011-04-24 12:30:39 -070065 # Deleting an existing item uses self.__map to find the link which gets
66 # removed by updating the links in the predecessor and successor nodes.
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000067 dict_delitem(self, key)
Raymond Hettinger43a56412011-04-23 15:51:38 -070068 link_prev, link_next, key = self.__map.pop(key)
Raymond Hettinger39282762010-04-03 00:39:26 +000069 link_prev[NEXT] = link_next
70 link_next[PREV] = link_prev
Raymond Hettingerbc512d32009-03-03 04:45:34 +000071
Raymond Hettinger3f2b1842011-04-24 12:55:28 -070072 def __iter__(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000073 'od.__iter__() <==> iter(od)'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000074 # Traverse the linked list in order.
Raymond Hettinger3f2b1842011-04-24 12:55:28 -070075 NEXT, KEY = 1, 2
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000076 root = self.__root
Raymond Hettingeraba22932010-03-09 09:58:53 +000077 curr = root[NEXT]
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000078 while curr is not root:
Raymond Hettingeraba22932010-03-09 09:58:53 +000079 yield curr[KEY]
80 curr = curr[NEXT]
Raymond Hettingerbc512d32009-03-03 04:45:34 +000081
Raymond Hettinger3f2b1842011-04-24 12:55:28 -070082 def __reversed__(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000083 'od.__reversed__() <==> reversed(od)'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000084 # Traverse the linked list in reverse order.
Raymond Hettinger3f2b1842011-04-24 12:55:28 -070085 PREV, KEY = 0, 2
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000086 root = self.__root
Raymond Hettingeraba22932010-03-09 09:58:53 +000087 curr = root[PREV]
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000088 while curr is not root:
Raymond Hettingeraba22932010-03-09 09:58:53 +000089 yield curr[KEY]
90 curr = curr[PREV]
Raymond Hettingerbc512d32009-03-03 04:45:34 +000091
Raymond Hettinger39282762010-04-03 00:39:26 +000092 def clear(self):
93 'od.clear() -> None. Remove all items from od.'
Raymond Hettingerc6467432011-04-24 12:30:39 -070094 for node in self.__map.itervalues():
95 del node[:]
96 root = self.__root
97 root[:] = [root, root, None]
98 self.__map.clear()
Raymond Hettinger6b96ecb2010-04-03 03:14:28 +000099 dict.clear(self)
Raymond Hettinger39282762010-04-03 00:39:26 +0000100
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700101 # -- the following methods do not depend on the internal structure --
102
103 def keys(self):
104 'od.keys() -> list of keys in od'
105 return list(self)
106
107 def values(self):
108 'od.values() -> list of values in od'
109 return [self[key] for key in self]
110
111 def items(self):
112 'od.items() -> list of (key, value) pairs in od'
113 return [(key, self[key]) for key in self]
114
115 def iterkeys(self):
116 'od.iterkeys() -> an iterator over the keys in od'
117 return iter(self)
118
119 def itervalues(self):
120 'od.itervalues -> an iterator over the values in od'
121 for k in self:
122 yield self[k]
123
124 def iteritems(self):
Raymond Hettinger3f2b1842011-04-24 12:55:28 -0700125 'od.iteritems -> an iterator over the (key, value) pairs in od'
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700126 for k in self:
127 yield (k, self[k])
128
129 update = MutableMapping.update
130
Raymond Hettingerc6467432011-04-24 12:30:39 -0700131 __update = update # let subclasses override update without breaking __init__
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000132
Raymond Hettinger8ebebd82011-01-02 01:03:26 +0000133 __marker = object()
134
135 def pop(self, key, default=__marker):
Raymond Hettinger3f2b1842011-04-24 12:55:28 -0700136 '''od.pop(k[,d]) -> v, remove specified key and return the corresponding
137 value. If key is not found, d is returned if given, otherwise KeyError
138 is raised.
139
140 '''
Raymond Hettinger8ebebd82011-01-02 01:03:26 +0000141 if key in self:
142 result = self[key]
143 del self[key]
144 return result
145 if default is self.__marker:
146 raise KeyError(key)
147 return default
148
149 def setdefault(self, key, default=None):
150 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od'
151 if key in self:
152 return self[key]
153 self[key] = default
154 return default
155
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000156 def popitem(self, last=True):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000157 '''od.popitem() -> (k, v), return and remove a (key, value) pair.
158 Pairs are returned in LIFO order if last is true or FIFO order if false.
159
160 '''
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000161 if not self:
162 raise KeyError('dictionary is empty')
Raymond Hettinger1355a3d2009-04-08 08:26:55 +0000163 key = next(reversed(self) if last else iter(self))
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000164 value = self.pop(key)
165 return key, value
166
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700167 def __repr__(self, _repr_running={}):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000168 'od.__repr__() <==> repr(od)'
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700169 call_key = id(self), _get_ident()
170 if call_key in _repr_running:
171 return '...'
172 _repr_running[call_key] = 1
173 try:
174 if not self:
175 return '%s()' % (self.__class__.__name__,)
176 return '%s(%r)' % (self.__class__.__name__, self.items())
177 finally:
178 del _repr_running[call_key]
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000179
Raymond Hettinger3674c852011-04-20 13:11:38 -0700180 def __reduce__(self):
181 'Return state information for pickling'
182 items = [[k, self[k]] for k in self]
183 inst_dict = vars(self).copy()
184 for k in vars(OrderedDict()):
185 inst_dict.pop(k, None)
186 if inst_dict:
187 return (self.__class__, (items,), inst_dict)
188 return self.__class__, (items,)
189
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000190 def copy(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000191 'od.copy() -> a shallow copy of od'
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000192 return self.__class__(self)
193
194 @classmethod
195 def fromkeys(cls, iterable, value=None):
Raymond Hettinger3f2b1842011-04-24 12:55:28 -0700196 '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S.
197 If not specified, the value defaults to None.
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000198
199 '''
Raymond Hettingerc6467432011-04-24 12:30:39 -0700200 self = cls()
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000201 for key in iterable:
Raymond Hettingerc6467432011-04-24 12:30:39 -0700202 self[key] = value
203 return self
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000204
205 def __eq__(self, other):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000206 '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
207 while comparison to a regular mapping is order-insensitive.
208
209 '''
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000210 if isinstance(other, OrderedDict):
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700211 return len(self)==len(other) and self.items() == other.items()
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000212 return dict.__eq__(self, other)
213
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700214 def __ne__(self, other):
215 'od.__ne__(y) <==> od!=y'
216 return not self == other
217
Raymond Hettinger43a56412011-04-23 15:51:38 -0700218 # -- the following methods support python 3.x style dictionary views --
219
220 def viewkeys(self):
221 "od.viewkeys() -> a set-like object providing a view on od's keys"
222 return KeysView(self)
223
224 def viewvalues(self):
225 "od.viewvalues() -> an object providing a view on od's values"
226 return ValuesView(self)
227
228 def viewitems(self):
229 "od.viewitems() -> a set-like object providing a view on od's items"
230 return ItemsView(self)
231
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000232
Raymond Hettingerae3f0682009-01-20 03:36:36 +0000233################################################################################
234### namedtuple
235################################################################################
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000236
Raymond Hettinger491f7072012-06-08 13:24:12 -0700237_class_template = '''\
238class {typename}(tuple):
239 '{typename}({arg_list})'
240
241 __slots__ = ()
242
243 _fields = {field_names!r}
244
245 def __new__(_cls, {arg_list}):
246 'Create new instance of {typename}({arg_list})'
247 return _tuple.__new__(_cls, ({arg_list}))
248
249 @classmethod
250 def _make(cls, iterable, new=tuple.__new__, len=len):
251 'Make a new {typename} object from a sequence or iterable'
252 result = new(cls, iterable)
253 if len(result) != {num_fields:d}:
254 raise TypeError('Expected {num_fields:d} arguments, got %d' % len(result))
255 return result
256
257 def __repr__(self):
258 'Return a nicely formatted representation string'
259 return '{typename}({repr_fmt})' % self
260
261 def _asdict(self):
262 'Return a new OrderedDict which maps field names to their values'
263 return OrderedDict(zip(self._fields, self))
264
265 __dict__ = property(_asdict)
266
267 def _replace(_self, **kwds):
268 'Return a new {typename} object replacing specified fields with new values'
269 result = _self._make(map(kwds.pop, {field_names!r}, _self))
270 if kwds:
271 raise ValueError('Got unexpected field names: %r' % kwds.keys())
272 return result
273
274 def __getnewargs__(self):
275 'Return self as a plain tuple. Used by copy and pickle.'
276 return tuple(self)
277
278{field_defs}
279'''
280
281_repr_template = '{name}=%r'
282
283_field_template = '''\
284 {name} = _property(_itemgetter({index:d}), doc='Alias for field number {index:d}')
285'''
286
Raymond Hettinger322daea2009-02-10 01:24:05 +0000287def namedtuple(typename, field_names, verbose=False, rename=False):
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000288 """Returns a new subclass of tuple with named fields.
289
Raymond Hettinger491f7072012-06-08 13:24:12 -0700290 >>> Point = namedtuple('Point', ['x', 'y'])
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000291 >>> Point.__doc__ # docstring for the new class
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000292 'Point(x, y)'
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000293 >>> p = Point(11, y=22) # instantiate with positional args or keywords
Raymond Hettinger8777bca2007-12-18 22:21:27 +0000294 >>> p[0] + p[1] # indexable like a plain tuple
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000295 33
Raymond Hettinger88880b22007-12-18 00:13:45 +0000296 >>> x, y = p # unpack like a regular tuple
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000297 >>> x, y
298 (11, 22)
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000299 >>> p.x + p.y # fields also accessable by name
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000300 33
Raymond Hettinger42da8742007-12-14 02:49:47 +0000301 >>> d = p._asdict() # convert to a dictionary
Raymond Hettingera7fc4b12007-10-05 02:47:07 +0000302 >>> d['x']
303 11
304 >>> Point(**d) # convert from a dictionary
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000305 Point(x=11, y=22)
Raymond Hettinger42da8742007-12-14 02:49:47 +0000306 >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000307 Point(x=100, y=22)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000308
309 """
310
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700311 # Validate the field names. At the user's option, either generate an error
312 # message or automatically replace the field name with a valid name.
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000313 if isinstance(field_names, basestring):
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700314 field_names = field_names.replace(',', ' ').split()
315 field_names = map(str, field_names)
Raymond Hettinger322daea2009-02-10 01:24:05 +0000316 if rename:
Raymond Hettinger322daea2009-02-10 01:24:05 +0000317 seen = set()
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700318 for index, name in enumerate(field_names):
Raymond Hettinger491f7072012-06-08 13:24:12 -0700319 if (not all(c.isalnum() or c=='_' for c in name)
320 or _iskeyword(name)
321 or not name
322 or name[0].isdigit()
323 or name.startswith('_')
Raymond Hettinger322daea2009-02-10 01:24:05 +0000324 or name in seen):
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700325 field_names[index] = '_%d' % index
Raymond Hettinger322daea2009-02-10 01:24:05 +0000326 seen.add(name)
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700327 for name in [typename] + field_names:
Raymond Hettinger2e1af252007-12-05 18:11:08 +0000328 if not all(c.isalnum() or c=='_' for c in name):
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700329 raise ValueError('Type names and field names can only contain '
330 'alphanumeric characters and underscores: %r' % name)
Raymond Hettingerabfd8df2007-10-16 21:28:32 +0000331 if _iskeyword(name):
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700332 raise ValueError('Type names and field names cannot be a '
333 'keyword: %r' % name)
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000334 if name[0].isdigit():
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700335 raise ValueError('Type names and field names cannot start with '
336 'a number: %r' % name)
Raymond Hettinger491f7072012-06-08 13:24:12 -0700337 seen = set()
Raymond Hettinger163f6222007-10-09 01:36:23 +0000338 for name in field_names:
Raymond Hettinger322daea2009-02-10 01:24:05 +0000339 if name.startswith('_') and not rename:
Raymond Hettinger0c2c6922012-06-09 17:27:23 -0700340 raise ValueError('Field names cannot start with an underscore: '
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700341 '%r' % name)
Raymond Hettinger491f7072012-06-08 13:24:12 -0700342 if name in seen:
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000343 raise ValueError('Encountered duplicate field name: %r' % name)
Raymond Hettinger491f7072012-06-08 13:24:12 -0700344 seen.add(name)
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000345
Raymond Hettinger491f7072012-06-08 13:24:12 -0700346 # Fill-in the class template
347 class_definition = _class_template.format(
348 typename = typename,
349 field_names = tuple(field_names),
350 num_fields = len(field_names),
351 arg_list = repr(tuple(field_names)).replace("'", "")[1:-1],
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700352 repr_fmt = ', '.join(_repr_template.format(name=name)
353 for name in field_names),
Raymond Hettinger491f7072012-06-08 13:24:12 -0700354 field_defs = '\n'.join(_field_template.format(index=index, name=name)
355 for index, name in enumerate(field_names))
356 )
Raymond Hettinger2b03d452007-09-18 03:33:19 +0000357 if verbose:
Raymond Hettinger491f7072012-06-08 13:24:12 -0700358 print class_definition
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000359
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700360 # Execute the template string in a temporary namespace and support
361 # tracing utilities by setting a value for frame.f_globals['__name__']
Raymond Hettingera68cad12009-05-27 02:24:45 +0000362 namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
363 OrderedDict=OrderedDict, _property=property, _tuple=tuple)
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000364 try:
Raymond Hettinger491f7072012-06-08 13:24:12 -0700365 exec class_definition in namespace
366 except SyntaxError as e:
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700367 raise SyntaxError(e.message + ':\n' + class_definition)
Raymond Hettinger0e1d6062007-10-08 10:11:51 +0000368 result = namespace[typename]
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000369
370 # For pickling to work, the __module__ variable needs to be set to the frame
371 # where the named tuple is created. Bypass this step in enviroments where
Benjamin Peterson7c67b032009-05-05 00:55:24 +0000372 # sys._getframe is not defined (Jython for example) or sys._getframe is not
373 # defined for arguments greater than 0 (IronPython).
374 try:
Raymond Hettingerecf252a2009-01-27 10:03:04 +0000375 result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
Benjamin Peterson7c67b032009-05-05 00:55:24 +0000376 except (AttributeError, ValueError):
377 pass
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000378
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000379 return result
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000380
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000381
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000382########################################################################
Raymond Hettingerae3f0682009-01-20 03:36:36 +0000383### Counter
384########################################################################
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000385
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000386class Counter(dict):
387 '''Dict subclass for counting hashable items. Sometimes called a bag
388 or multiset. Elements are stored as dictionary keys and their counts
389 are stored as dictionary values.
390
Raymond Hettingerc886a842011-01-03 08:59:18 +0000391 >>> c = Counter('abcdeabcdabcaba') # count elements from a string
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000392
393 >>> c.most_common(3) # three most common elements
Raymond Hettingerc886a842011-01-03 08:59:18 +0000394 [('a', 5), ('b', 4), ('c', 3)]
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000395 >>> sorted(c) # list all unique elements
Raymond Hettingerc886a842011-01-03 08:59:18 +0000396 ['a', 'b', 'c', 'd', 'e']
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000397 >>> ''.join(sorted(c.elements())) # list elements with repetitions
Raymond Hettingerc886a842011-01-03 08:59:18 +0000398 'aaaaabbbbcccdde'
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000399 >>> sum(c.values()) # total of all counts
Raymond Hettingerc886a842011-01-03 08:59:18 +0000400 15
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000401
402 >>> c['a'] # count of letter 'a'
403 5
404 >>> for elem in 'shazam': # update counts from an iterable
405 ... c[elem] += 1 # by adding 1 to each element's count
406 >>> c['a'] # now there are seven 'a'
407 7
Raymond Hettingerc886a842011-01-03 08:59:18 +0000408 >>> del c['b'] # remove all 'b'
409 >>> c['b'] # now there are zero 'b'
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000410 0
411
412 >>> d = Counter('simsalabim') # make another counter
413 >>> c.update(d) # add in the second counter
414 >>> c['a'] # now there are nine 'a'
415 9
416
417 >>> c.clear() # empty the counter
418 >>> c
419 Counter()
420
421 Note: If a count is set to zero or reduced to zero, it will remain
422 in the counter until the entry is deleted or the counter is cleared:
423
424 >>> c = Counter('aaabbc')
425 >>> c['b'] -= 2 # reduce the count of 'b' by two
426 >>> c.most_common() # 'b' is still in, but its count is zero
427 [('a', 3), ('c', 1), ('b', 0)]
428
429 '''
430 # References:
431 # http://en.wikipedia.org/wiki/Multiset
432 # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
433 # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
434 # http://code.activestate.com/recipes/259174/
435 # Knuth, TAOCP Vol. II section 4.6.3
436
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000437 def __init__(self, iterable=None, **kwds):
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000438 '''Create a new, empty Counter object. And if given, count elements
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000439 from an input iterable. Or, initialize the count from another mapping
440 of elements to their counts.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000441
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000442 >>> c = Counter() # a new, empty counter
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000443 >>> c = Counter('gallahad') # a new counter from an iterable
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000444 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000445 >>> c = Counter(a=4, b=2) # a new counter from keyword args
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000446
447 '''
Raymond Hettingerc886a842011-01-03 08:59:18 +0000448 super(Counter, self).__init__()
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000449 self.update(iterable, **kwds)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000450
451 def __missing__(self, key):
452 'The count of elements not in the Counter is zero.'
453 # Needed so that self[missing_item] does not raise KeyError
454 return 0
455
456 def most_common(self, n=None):
457 '''List the n most common elements and their counts from the most
458 common to the least. If n is None, then list all element counts.
459
Raymond Hettingerc886a842011-01-03 08:59:18 +0000460 >>> Counter('abcdeabcdabcaba').most_common(3)
461 [('a', 5), ('b', 4), ('c', 3)]
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000462
463 '''
464 # Emulate Bag.sortedByCount from Smalltalk
465 if n is None:
466 return sorted(self.iteritems(), key=_itemgetter(1), reverse=True)
467 return _heapq.nlargest(n, self.iteritems(), key=_itemgetter(1))
468
469 def elements(self):
470 '''Iterator over elements repeating each as many times as its count.
471
472 >>> c = Counter('ABCABC')
473 >>> sorted(c.elements())
474 ['A', 'A', 'B', 'B', 'C', 'C']
475
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000476 # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
477 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
478 >>> product = 1
479 >>> for factor in prime_factors.elements(): # loop over factors
480 ... product *= factor # and multiply them
481 >>> product
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000482 1836
483
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000484 Note, if an element's count has been set to zero or is a negative
485 number, elements() will ignore it.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000486
487 '''
488 # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
Raymond Hettinger35288c62009-01-13 04:50:35 +0000489 return _chain.from_iterable(_starmap(_repeat, self.iteritems()))
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000490
491 # Override dict methods where necessary
492
493 @classmethod
494 def fromkeys(cls, iterable, v=None):
495 # There is no equivalent method for counters because setting v=1
496 # means that no element can have a count greater than one.
497 raise NotImplementedError(
498 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
499
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000500 def update(self, iterable=None, **kwds):
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000501 '''Like dict.update() but add counts instead of replacing them.
502
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000503 Source can be an iterable, a dictionary, or another Counter instance.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000504
505 >>> c = Counter('which')
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000506 >>> c.update('witch') # add elements from another iterable
507 >>> d = Counter('watch')
508 >>> c.update(d) # add elements from another counter
509 >>> c['h'] # four 'h' in which, witch, and watch
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000510 4
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000511
512 '''
513 # The regular dict.update() operation makes no sense here because the
514 # replace behavior results in the some of original untouched counts
515 # being mixed-in with all of the other counts for a mismash that
516 # doesn't have a straight-forward interpretation in most counting
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000517 # contexts. Instead, we implement straight-addition. Both the inputs
518 # and outputs are allowed to contain zero and negative counts.
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000519
520 if iterable is not None:
521 if isinstance(iterable, Mapping):
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000522 if self:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000523 self_get = self.get
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000524 for elem, count in iterable.iteritems():
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000525 self[elem] = self_get(elem, 0) + count
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000526 else:
Raymond Hettingerc886a842011-01-03 08:59:18 +0000527 super(Counter, self).update(iterable) # fast path when counter is empty
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000528 else:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000529 self_get = self.get
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000530 for elem in iterable:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000531 self[elem] = self_get(elem, 0) + 1
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000532 if kwds:
533 self.update(kwds)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000534
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000535 def subtract(self, iterable=None, **kwds):
536 '''Like dict.update() but subtracts counts instead of replacing them.
537 Counts can be reduced below zero. Both the inputs and outputs are
538 allowed to contain zero and negative counts.
539
540 Source can be an iterable, a dictionary, or another Counter instance.
541
542 >>> c = Counter('which')
543 >>> c.subtract('witch') # subtract elements from another iterable
544 >>> c.subtract(Counter('watch')) # subtract elements from another counter
545 >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch
546 0
547 >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch
548 -1
549
550 '''
551 if iterable is not None:
Raymond Hettingerfdf1b562010-04-11 20:39:28 +0000552 self_get = self.get
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000553 if isinstance(iterable, Mapping):
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000554 for elem, count in iterable.items():
555 self[elem] = self_get(elem, 0) - count
556 else:
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000557 for elem in iterable:
558 self[elem] = self_get(elem, 0) - 1
559 if kwds:
560 self.subtract(kwds)
561
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000562 def copy(self):
Raymond Hettinger37c0fe52011-04-15 13:12:21 -0700563 'Return a shallow copy.'
564 return self.__class__(self)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000565
Raymond Hettingerc886a842011-01-03 08:59:18 +0000566 def __reduce__(self):
567 return self.__class__, (dict(self),)
568
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000569 def __delitem__(self, elem):
570 'Like dict.__delitem__() but does not raise KeyError for missing values.'
571 if elem in self:
Raymond Hettingerc886a842011-01-03 08:59:18 +0000572 super(Counter, self).__delitem__(elem)
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000573
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000574 def __repr__(self):
575 if not self:
576 return '%s()' % self.__class__.__name__
Raymond Hettinger35288c62009-01-13 04:50:35 +0000577 items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000578 return '%s({%s})' % (self.__class__.__name__, items)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000579
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000580 # Multiset-style mathematical operations discussed in:
581 # Knuth TAOCP Volume II section 4.6.3 exercise 19
582 # and at http://en.wikipedia.org/wiki/Multiset
583 #
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000584 # Outputs guaranteed to only include positive counts.
585 #
586 # To strip negative and zero counts, add-in an empty counter:
587 # c += Counter()
588
589 def __add__(self, other):
590 '''Add counts from two counters.
591
592 >>> Counter('abbb') + Counter('bcc')
593 Counter({'b': 4, 'c': 2, 'a': 1})
594
595 '''
596 if not isinstance(other, Counter):
597 return NotImplemented
598 result = Counter()
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700599 for elem, count in self.items():
600 newcount = count + other[elem]
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000601 if newcount > 0:
602 result[elem] = newcount
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700603 for elem, count in other.items():
604 if elem not in self and count > 0:
605 result[elem] = count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000606 return result
607
608 def __sub__(self, other):
609 ''' Subtract count, but keep only results with positive counts.
610
611 >>> Counter('abbbc') - Counter('bccd')
612 Counter({'b': 2, 'a': 1})
613
614 '''
615 if not isinstance(other, Counter):
616 return NotImplemented
617 result = Counter()
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700618 for elem, count in self.items():
619 newcount = count - other[elem]
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000620 if newcount > 0:
621 result[elem] = newcount
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700622 for elem, count in other.items():
623 if elem not in self and count < 0:
624 result[elem] = 0 - count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000625 return result
626
627 def __or__(self, other):
628 '''Union is the maximum of value in either of the input counters.
629
630 >>> Counter('abbb') | Counter('bcc')
631 Counter({'b': 3, 'c': 2, 'a': 1})
632
633 '''
634 if not isinstance(other, Counter):
635 return NotImplemented
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000636 result = Counter()
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700637 for elem, count in self.items():
638 other_count = other[elem]
639 newcount = other_count if count < other_count else count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000640 if newcount > 0:
641 result[elem] = newcount
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700642 for elem, count in other.items():
643 if elem not in self and count > 0:
644 result[elem] = count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000645 return result
646
647 def __and__(self, other):
648 ''' Intersection is the minimum of corresponding counts.
649
650 >>> Counter('abbb') & Counter('bcc')
651 Counter({'b': 1})
652
653 '''
654 if not isinstance(other, Counter):
655 return NotImplemented
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000656 result = Counter()
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700657 for elem, count in self.items():
658 other_count = other[elem]
659 newcount = count if count < other_count else other_count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000660 if newcount > 0:
661 result[elem] = newcount
662 return result
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000663
664
665if __name__ == '__main__':
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000666 # verify that instances can be pickled
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000667 from cPickle import loads, dumps
Raymond Hettinger01a09572007-10-23 20:37:41 +0000668 Point = namedtuple('Point', 'x, y', True)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000669 p = Point(x=10, y=20)
670 assert p == loads(dumps(p))
671
Raymond Hettingereeeb9c42007-11-15 02:44:53 +0000672 # test and demonstrate ability to override methods
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000673 class Point(namedtuple('Point', 'x y')):
Raymond Hettingere1655082008-01-10 19:15:10 +0000674 __slots__ = ()
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000675 @property
676 def hypot(self):
677 return (self.x ** 2 + self.y ** 2) ** 0.5
Raymond Hettinger9a359212008-01-07 20:07:38 +0000678 def __str__(self):
Raymond Hettinger15b5e552008-01-10 23:00:01 +0000679 return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000680
Raymond Hettingere1655082008-01-10 19:15:10 +0000681 for p in Point(3, 4), Point(14, 5/7.):
Raymond Hettinger9a359212008-01-07 20:07:38 +0000682 print p
Raymond Hettingereeeb9c42007-11-15 02:44:53 +0000683
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000684 class Point(namedtuple('Point', 'x y')):
685 'Point class with optimized _make() and _replace() without error-checking'
Raymond Hettingere1655082008-01-10 19:15:10 +0000686 __slots__ = ()
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000687 _make = classmethod(tuple.__new__)
688 def _replace(self, _map=map, **kwds):
Raymond Hettingerf5e8af12008-01-07 20:56:05 +0000689 return self._make(_map(kwds.get, ('x', 'y'), self))
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000690
691 print Point(11, 22)._replace(x=100)
692
Raymond Hettingere850c462008-01-10 20:37:12 +0000693 Point3D = namedtuple('Point3D', Point._fields + ('z',))
694 print Point3D.__doc__
695
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000696 import doctest
Raymond Hettinger01a09572007-10-23 20:37:41 +0000697 TestResults = namedtuple('TestResults', 'failed attempted')
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000698 print TestResults(*doctest.testmod())