blob: 8831f0be670248dca8296e01174f42e109c5462f [file] [log] [blame]
Raymond Hettingerbc512d32009-03-03 04:45:34 +00001__all__ = ['Counter', 'deque', 'defaultdict', 'namedtuple', 'OrderedDict']
Raymond Hettinger88880b22007-12-18 00:13:45 +00002# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
3# They should however be considered an integral part of collections.py.
4from _abcoll import *
5import _abcoll
6__all__ += _abcoll.__all__
Raymond Hettingereb979882007-02-28 18:37:52 +00007
8from _collections import deque, defaultdict
Raymond Hettinger80e9eed2012-12-01 19:22:02 -08009from operator import itemgetter as _itemgetter, eq as _eq
Raymond Hettingerabfd8df2007-10-16 21:28:32 +000010from keyword import iskeyword as _iskeyword
Raymond Hettingerc37e5e02007-03-01 06:16:43 +000011import sys as _sys
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +000012import heapq as _heapq
Raymond Hettingerb36f7472011-04-23 18:37:37 -070013from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
Raymond Hettinger80e9eed2012-12-01 19:22:02 -080014from itertools import imap as _imap
Raymond Hettingerb36f7472011-04-23 18:37:37 -070015
Raymond Hettinger74f869e2010-09-13 22:14:36 +000016try:
Raymond Hettinger7ce6d972011-04-22 18:49:53 -070017 from thread import get_ident as _get_ident
Amaury Forgeot d'Arc71431ef2010-11-09 07:35:26 +000018except ImportError:
Raymond Hettinger7ce6d972011-04-22 18:49:53 -070019 from dummy_thread import get_ident as _get_ident
Raymond Hettinger74f869e2010-09-13 22:14:36 +000020
Raymond Hettingerbc512d32009-03-03 04:45:34 +000021
22################################################################################
23### OrderedDict
24################################################################################
25
Raymond Hettinger8ebebd82011-01-02 01:03:26 +000026class OrderedDict(dict):
Raymond Hettingere980d2d2009-03-19 23:12:41 +000027 'Dictionary that remembers insertion order'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000028 # An inherited dict maps keys to values.
Raymond Hettingere980d2d2009-03-19 23:12:41 +000029 # The inherited dict provides __getitem__, __len__, __contains__, and get.
30 # The remaining methods are order-aware.
Raymond Hettingerc6467432011-04-24 12:30:39 -070031 # Big-O running times for all methods are the same as regular dictionaries.
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000032
Raymond Hettingerc6467432011-04-24 12:30:39 -070033 # The internal self.__map dict maps keys to links in a doubly linked list.
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000034 # The circular doubly linked list starts and ends with a sentinel element.
35 # The sentinel element never gets deleted (this simplifies the algorithm).
Raymond Hettingeraba22932010-03-09 09:58:53 +000036 # Each link is stored as a list of length three: [PREV, NEXT, KEY].
Raymond Hettingerbc512d32009-03-03 04:45:34 +000037
38 def __init__(self, *args, **kwds):
Raymond Hettinger3f2b1842011-04-24 12:55:28 -070039 '''Initialize an ordered dictionary. The signature is the same as
40 regular dictionaries, but keyword arguments are not recommended because
41 their insertion order is arbitrary.
Raymond Hettingera5cd6372009-04-08 05:39:38 +000042
43 '''
Raymond Hettingerbc512d32009-03-03 04:45:34 +000044 if len(args) > 1:
45 raise TypeError('expected at most 1 arguments, got %d' % len(args))
Raymond Hettinger9353ea22009-03-03 20:53:51 +000046 try:
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000047 self.__root
Raymond Hettinger9353ea22009-03-03 20:53:51 +000048 except AttributeError:
Raymond Hettinger0b795e52011-04-23 15:41:38 -070049 self.__root = root = [] # sentinel node
50 root[:] = [root, root, None]
Raymond Hettinger2dc90fd2009-03-25 22:41:32 +000051 self.__map = {}
Raymond Hettinger8ebebd82011-01-02 01:03:26 +000052 self.__update(*args, **kwds)
Raymond Hettingerbc512d32009-03-03 04:45:34 +000053
Raymond Hettinger80e9eed2012-12-01 19:22:02 -080054 def __setitem__(self, key, value, dict_setitem=dict.__setitem__):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000055 'od.__setitem__(i, y) <==> od[i]=y'
Raymond Hettingerc6467432011-04-24 12:30:39 -070056 # Setting a new item creates a new link at the end of the linked list,
57 # and the inherited dictionary is updated with the new key/value pair.
Raymond Hettingerbc512d32009-03-03 04:45:34 +000058 if key not in self:
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000059 root = self.__root
Raymond Hettinger80e9eed2012-12-01 19:22:02 -080060 last = root[0]
61 last[1] = root[0] = self.__map[key] = [last, root, key]
Raymond Hettingerec5046b2012-11-20 21:11:26 -080062 return dict_setitem(self, key, value)
Raymond Hettingerbc512d32009-03-03 04:45:34 +000063
Raymond Hettinger80e9eed2012-12-01 19:22:02 -080064 def __delitem__(self, key, dict_delitem=dict.__delitem__):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000065 'od.__delitem__(y) <==> del od[y]'
Raymond Hettingerc6467432011-04-24 12:30:39 -070066 # Deleting an existing item uses self.__map to find the link which gets
67 # removed by updating the links in the predecessor and successor nodes.
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000068 dict_delitem(self, key)
Raymond Hettinger5ded7952013-03-23 05:27:51 -070069 link_prev, link_next, _ = self.__map.pop(key)
Raymond Hettinger80e9eed2012-12-01 19:22:02 -080070 link_prev[1] = link_next # update link_prev[NEXT]
71 link_next[0] = link_prev # update link_next[PREV]
Raymond Hettingerbc512d32009-03-03 04:45:34 +000072
Raymond Hettinger3f2b1842011-04-24 12:55:28 -070073 def __iter__(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000074 'od.__iter__() <==> iter(od)'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000075 # Traverse the linked list in order.
76 root = self.__root
Raymond Hettinger80e9eed2012-12-01 19:22:02 -080077 curr = root[1] # start at the first node
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000078 while curr is not root:
Raymond Hettinger80e9eed2012-12-01 19:22:02 -080079 yield curr[2] # yield the curr[KEY]
80 curr = curr[1] # move to next node
Raymond Hettingerbc512d32009-03-03 04:45:34 +000081
Raymond Hettinger3f2b1842011-04-24 12:55:28 -070082 def __reversed__(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000083 'od.__reversed__() <==> reversed(od)'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000084 # Traverse the linked list in reverse order.
85 root = self.__root
Raymond Hettinger80e9eed2012-12-01 19:22:02 -080086 curr = root[0] # start at the last node
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000087 while curr is not root:
Raymond Hettinger80e9eed2012-12-01 19:22:02 -080088 yield curr[2] # yield the curr[KEY]
89 curr = curr[0] # move to previous node
Raymond Hettingerbc512d32009-03-03 04:45:34 +000090
Raymond Hettinger39282762010-04-03 00:39:26 +000091 def clear(self):
92 'od.clear() -> None. Remove all items from od.'
Raymond Hettingerc6467432011-04-24 12:30:39 -070093 root = self.__root
94 root[:] = [root, root, None]
95 self.__map.clear()
Raymond Hettinger6b96ecb2010-04-03 03:14:28 +000096 dict.clear(self)
Raymond Hettinger39282762010-04-03 00:39:26 +000097
Raymond Hettinger7ce6d972011-04-22 18:49:53 -070098 # -- the following methods do not depend on the internal structure --
99
100 def keys(self):
101 'od.keys() -> list of keys in od'
102 return list(self)
103
104 def values(self):
105 'od.values() -> list of values in od'
106 return [self[key] for key in self]
107
108 def items(self):
109 'od.items() -> list of (key, value) pairs in od'
110 return [(key, self[key]) for key in self]
111
112 def iterkeys(self):
113 'od.iterkeys() -> an iterator over the keys in od'
114 return iter(self)
115
116 def itervalues(self):
117 'od.itervalues -> an iterator over the values in od'
118 for k in self:
119 yield self[k]
120
121 def iteritems(self):
Raymond Hettinger3f2b1842011-04-24 12:55:28 -0700122 'od.iteritems -> an iterator over the (key, value) pairs in od'
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700123 for k in self:
124 yield (k, self[k])
125
126 update = MutableMapping.update
127
Raymond Hettingerc6467432011-04-24 12:30:39 -0700128 __update = update # let subclasses override update without breaking __init__
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000129
Raymond Hettinger8ebebd82011-01-02 01:03:26 +0000130 __marker = object()
131
132 def pop(self, key, default=__marker):
Raymond Hettinger3f2b1842011-04-24 12:55:28 -0700133 '''od.pop(k[,d]) -> v, remove specified key and return the corresponding
134 value. If key is not found, d is returned if given, otherwise KeyError
135 is raised.
136
137 '''
Raymond Hettinger8ebebd82011-01-02 01:03:26 +0000138 if key in self:
139 result = self[key]
140 del self[key]
141 return result
142 if default is self.__marker:
143 raise KeyError(key)
144 return default
145
146 def setdefault(self, key, default=None):
147 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od'
148 if key in self:
149 return self[key]
150 self[key] = default
151 return default
152
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000153 def popitem(self, last=True):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000154 '''od.popitem() -> (k, v), return and remove a (key, value) pair.
155 Pairs are returned in LIFO order if last is true or FIFO order if false.
156
157 '''
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000158 if not self:
159 raise KeyError('dictionary is empty')
Raymond Hettinger1355a3d2009-04-08 08:26:55 +0000160 key = next(reversed(self) if last else iter(self))
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000161 value = self.pop(key)
162 return key, value
163
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700164 def __repr__(self, _repr_running={}):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000165 'od.__repr__() <==> repr(od)'
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700166 call_key = id(self), _get_ident()
167 if call_key in _repr_running:
168 return '...'
169 _repr_running[call_key] = 1
170 try:
171 if not self:
172 return '%s()' % (self.__class__.__name__,)
173 return '%s(%r)' % (self.__class__.__name__, self.items())
174 finally:
175 del _repr_running[call_key]
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000176
Raymond Hettinger3674c852011-04-20 13:11:38 -0700177 def __reduce__(self):
178 'Return state information for pickling'
179 items = [[k, self[k]] for k in self]
180 inst_dict = vars(self).copy()
181 for k in vars(OrderedDict()):
182 inst_dict.pop(k, None)
183 if inst_dict:
184 return (self.__class__, (items,), inst_dict)
185 return self.__class__, (items,)
186
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000187 def copy(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000188 'od.copy() -> a shallow copy of od'
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000189 return self.__class__(self)
190
191 @classmethod
192 def fromkeys(cls, iterable, value=None):
Raymond Hettinger3f2b1842011-04-24 12:55:28 -0700193 '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S.
194 If not specified, the value defaults to None.
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000195
196 '''
Raymond Hettingerc6467432011-04-24 12:30:39 -0700197 self = cls()
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000198 for key in iterable:
Raymond Hettingerc6467432011-04-24 12:30:39 -0700199 self[key] = value
200 return self
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000201
202 def __eq__(self, other):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000203 '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
204 while comparison to a regular mapping is order-insensitive.
205
206 '''
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000207 if isinstance(other, OrderedDict):
Raymond Hettinger80e9eed2012-12-01 19:22:02 -0800208 return dict.__eq__(self, other) and all(_imap(_eq, self, other))
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000209 return dict.__eq__(self, other)
210
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700211 def __ne__(self, other):
212 'od.__ne__(y) <==> od!=y'
213 return not self == other
214
Raymond Hettinger43a56412011-04-23 15:51:38 -0700215 # -- the following methods support python 3.x style dictionary views --
216
217 def viewkeys(self):
218 "od.viewkeys() -> a set-like object providing a view on od's keys"
219 return KeysView(self)
220
221 def viewvalues(self):
222 "od.viewvalues() -> an object providing a view on od's values"
223 return ValuesView(self)
224
225 def viewitems(self):
226 "od.viewitems() -> a set-like object providing a view on od's items"
227 return ItemsView(self)
228
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000229
Raymond Hettingerae3f0682009-01-20 03:36:36 +0000230################################################################################
231### namedtuple
232################################################################################
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000233
Raymond Hettinger491f7072012-06-08 13:24:12 -0700234_class_template = '''\
235class {typename}(tuple):
236 '{typename}({arg_list})'
237
238 __slots__ = ()
239
240 _fields = {field_names!r}
241
242 def __new__(_cls, {arg_list}):
243 'Create new instance of {typename}({arg_list})'
244 return _tuple.__new__(_cls, ({arg_list}))
245
246 @classmethod
247 def _make(cls, iterable, new=tuple.__new__, len=len):
248 'Make a new {typename} object from a sequence or iterable'
249 result = new(cls, iterable)
250 if len(result) != {num_fields:d}:
251 raise TypeError('Expected {num_fields:d} arguments, got %d' % len(result))
252 return result
253
254 def __repr__(self):
255 'Return a nicely formatted representation string'
256 return '{typename}({repr_fmt})' % self
257
258 def _asdict(self):
259 'Return a new OrderedDict which maps field names to their values'
260 return OrderedDict(zip(self._fields, self))
261
Raymond Hettinger491f7072012-06-08 13:24:12 -0700262 def _replace(_self, **kwds):
263 'Return a new {typename} object replacing specified fields with new values'
264 result = _self._make(map(kwds.pop, {field_names!r}, _self))
265 if kwds:
266 raise ValueError('Got unexpected field names: %r' % kwds.keys())
267 return result
268
269 def __getnewargs__(self):
270 'Return self as a plain tuple. Used by copy and pickle.'
271 return tuple(self)
272
Raymond Hettinger7393c692013-05-27 10:58:55 -0700273 __dict__ = _property(_asdict)
274
275 def __getstate__(self):
276 'Exclude the OrderedDict from pickling'
277 pass
278
Raymond Hettinger491f7072012-06-08 13:24:12 -0700279{field_defs}
280'''
281
282_repr_template = '{name}=%r'
283
284_field_template = '''\
285 {name} = _property(_itemgetter({index:d}), doc='Alias for field number {index:d}')
286'''
287
Raymond Hettinger322daea2009-02-10 01:24:05 +0000288def namedtuple(typename, field_names, verbose=False, rename=False):
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000289 """Returns a new subclass of tuple with named fields.
290
Raymond Hettinger491f7072012-06-08 13:24:12 -0700291 >>> Point = namedtuple('Point', ['x', 'y'])
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000292 >>> Point.__doc__ # docstring for the new class
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000293 'Point(x, y)'
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000294 >>> p = Point(11, y=22) # instantiate with positional args or keywords
Raymond Hettinger8777bca2007-12-18 22:21:27 +0000295 >>> p[0] + p[1] # indexable like a plain tuple
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000296 33
Raymond Hettinger88880b22007-12-18 00:13:45 +0000297 >>> x, y = p # unpack like a regular tuple
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000298 >>> x, y
299 (11, 22)
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000300 >>> p.x + p.y # fields also accessable by name
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000301 33
Raymond Hettinger42da8742007-12-14 02:49:47 +0000302 >>> d = p._asdict() # convert to a dictionary
Raymond Hettingera7fc4b12007-10-05 02:47:07 +0000303 >>> d['x']
304 11
305 >>> Point(**d) # convert from a dictionary
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000306 Point(x=11, y=22)
Raymond Hettinger42da8742007-12-14 02:49:47 +0000307 >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000308 Point(x=100, y=22)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000309
310 """
311
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700312 # Validate the field names. At the user's option, either generate an error
313 # message or automatically replace the field name with a valid name.
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000314 if isinstance(field_names, basestring):
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700315 field_names = field_names.replace(',', ' ').split()
316 field_names = map(str, field_names)
Raymond Hettingerde5170e2014-06-24 13:49:24 -0700317 typename = str(typename)
Raymond Hettinger322daea2009-02-10 01:24:05 +0000318 if rename:
Raymond Hettinger322daea2009-02-10 01:24:05 +0000319 seen = set()
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700320 for index, name in enumerate(field_names):
Raymond Hettinger491f7072012-06-08 13:24:12 -0700321 if (not all(c.isalnum() or c=='_' for c in name)
322 or _iskeyword(name)
323 or not name
324 or name[0].isdigit()
325 or name.startswith('_')
Raymond Hettinger322daea2009-02-10 01:24:05 +0000326 or name in seen):
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700327 field_names[index] = '_%d' % index
Raymond Hettinger322daea2009-02-10 01:24:05 +0000328 seen.add(name)
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700329 for name in [typename] + field_names:
Raymond Hettingerde5170e2014-06-24 13:49:24 -0700330 if type(name) != str:
331 raise TypeError('Type names and field names must be strings')
Raymond Hettinger2e1af252007-12-05 18:11:08 +0000332 if not all(c.isalnum() or c=='_' for c in name):
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700333 raise ValueError('Type names and field names can only contain '
334 'alphanumeric characters and underscores: %r' % name)
Raymond Hettingerabfd8df2007-10-16 21:28:32 +0000335 if _iskeyword(name):
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700336 raise ValueError('Type names and field names cannot be a '
337 'keyword: %r' % name)
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000338 if name[0].isdigit():
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700339 raise ValueError('Type names and field names cannot start with '
340 'a number: %r' % name)
Raymond Hettinger491f7072012-06-08 13:24:12 -0700341 seen = set()
Raymond Hettinger163f6222007-10-09 01:36:23 +0000342 for name in field_names:
Raymond Hettinger322daea2009-02-10 01:24:05 +0000343 if name.startswith('_') and not rename:
Raymond Hettinger0c2c6922012-06-09 17:27:23 -0700344 raise ValueError('Field names cannot start with an underscore: '
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700345 '%r' % name)
Raymond Hettinger491f7072012-06-08 13:24:12 -0700346 if name in seen:
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000347 raise ValueError('Encountered duplicate field name: %r' % name)
Raymond Hettinger491f7072012-06-08 13:24:12 -0700348 seen.add(name)
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000349
Raymond Hettinger491f7072012-06-08 13:24:12 -0700350 # Fill-in the class template
351 class_definition = _class_template.format(
352 typename = typename,
353 field_names = tuple(field_names),
354 num_fields = len(field_names),
355 arg_list = repr(tuple(field_names)).replace("'", "")[1:-1],
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700356 repr_fmt = ', '.join(_repr_template.format(name=name)
357 for name in field_names),
Raymond Hettinger491f7072012-06-08 13:24:12 -0700358 field_defs = '\n'.join(_field_template.format(index=index, name=name)
359 for index, name in enumerate(field_names))
360 )
Raymond Hettinger2b03d452007-09-18 03:33:19 +0000361 if verbose:
Raymond Hettinger491f7072012-06-08 13:24:12 -0700362 print class_definition
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000363
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700364 # Execute the template string in a temporary namespace and support
365 # tracing utilities by setting a value for frame.f_globals['__name__']
Raymond Hettingera68cad12009-05-27 02:24:45 +0000366 namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
367 OrderedDict=OrderedDict, _property=property, _tuple=tuple)
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000368 try:
Raymond Hettinger491f7072012-06-08 13:24:12 -0700369 exec class_definition in namespace
370 except SyntaxError as e:
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700371 raise SyntaxError(e.message + ':\n' + class_definition)
Raymond Hettinger0e1d6062007-10-08 10:11:51 +0000372 result = namespace[typename]
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000373
374 # For pickling to work, the __module__ variable needs to be set to the frame
Ezio Melotti419e23c2013-08-17 16:56:09 +0300375 # where the named tuple is created. Bypass this step in environments where
Benjamin Peterson7c67b032009-05-05 00:55:24 +0000376 # sys._getframe is not defined (Jython for example) or sys._getframe is not
377 # defined for arguments greater than 0 (IronPython).
378 try:
Raymond Hettingerecf252a2009-01-27 10:03:04 +0000379 result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
Benjamin Peterson7c67b032009-05-05 00:55:24 +0000380 except (AttributeError, ValueError):
381 pass
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000382
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000383 return result
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000384
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000385
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000386########################################################################
Raymond Hettingerae3f0682009-01-20 03:36:36 +0000387### Counter
388########################################################################
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000389
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000390class Counter(dict):
391 '''Dict subclass for counting hashable items. Sometimes called a bag
392 or multiset. Elements are stored as dictionary keys and their counts
393 are stored as dictionary values.
394
Raymond Hettingerc886a842011-01-03 08:59:18 +0000395 >>> c = Counter('abcdeabcdabcaba') # count elements from a string
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000396
397 >>> c.most_common(3) # three most common elements
Raymond Hettingerc886a842011-01-03 08:59:18 +0000398 [('a', 5), ('b', 4), ('c', 3)]
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000399 >>> sorted(c) # list all unique elements
Raymond Hettingerc886a842011-01-03 08:59:18 +0000400 ['a', 'b', 'c', 'd', 'e']
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000401 >>> ''.join(sorted(c.elements())) # list elements with repetitions
Raymond Hettingerc886a842011-01-03 08:59:18 +0000402 'aaaaabbbbcccdde'
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000403 >>> sum(c.values()) # total of all counts
Raymond Hettingerc886a842011-01-03 08:59:18 +0000404 15
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000405
406 >>> c['a'] # count of letter 'a'
407 5
408 >>> for elem in 'shazam': # update counts from an iterable
409 ... c[elem] += 1 # by adding 1 to each element's count
410 >>> c['a'] # now there are seven 'a'
411 7
Raymond Hettingerc886a842011-01-03 08:59:18 +0000412 >>> del c['b'] # remove all 'b'
413 >>> c['b'] # now there are zero 'b'
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000414 0
415
416 >>> d = Counter('simsalabim') # make another counter
417 >>> c.update(d) # add in the second counter
418 >>> c['a'] # now there are nine 'a'
419 9
420
421 >>> c.clear() # empty the counter
422 >>> c
423 Counter()
424
425 Note: If a count is set to zero or reduced to zero, it will remain
426 in the counter until the entry is deleted or the counter is cleared:
427
428 >>> c = Counter('aaabbc')
429 >>> c['b'] -= 2 # reduce the count of 'b' by two
430 >>> c.most_common() # 'b' is still in, but its count is zero
431 [('a', 3), ('c', 1), ('b', 0)]
432
433 '''
434 # References:
435 # http://en.wikipedia.org/wiki/Multiset
436 # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
437 # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
438 # http://code.activestate.com/recipes/259174/
439 # Knuth, TAOCP Vol. II section 4.6.3
440
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000441 def __init__(self, iterable=None, **kwds):
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000442 '''Create a new, empty Counter object. And if given, count elements
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000443 from an input iterable. Or, initialize the count from another mapping
444 of elements to their counts.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000445
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000446 >>> c = Counter() # a new, empty counter
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000447 >>> c = Counter('gallahad') # a new counter from an iterable
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000448 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000449 >>> c = Counter(a=4, b=2) # a new counter from keyword args
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000450
451 '''
Raymond Hettingerc886a842011-01-03 08:59:18 +0000452 super(Counter, self).__init__()
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000453 self.update(iterable, **kwds)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000454
455 def __missing__(self, key):
456 'The count of elements not in the Counter is zero.'
457 # Needed so that self[missing_item] does not raise KeyError
458 return 0
459
460 def most_common(self, n=None):
461 '''List the n most common elements and their counts from the most
462 common to the least. If n is None, then list all element counts.
463
Raymond Hettingerc886a842011-01-03 08:59:18 +0000464 >>> Counter('abcdeabcdabcaba').most_common(3)
465 [('a', 5), ('b', 4), ('c', 3)]
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000466
467 '''
468 # Emulate Bag.sortedByCount from Smalltalk
469 if n is None:
470 return sorted(self.iteritems(), key=_itemgetter(1), reverse=True)
471 return _heapq.nlargest(n, self.iteritems(), key=_itemgetter(1))
472
473 def elements(self):
474 '''Iterator over elements repeating each as many times as its count.
475
476 >>> c = Counter('ABCABC')
477 >>> sorted(c.elements())
478 ['A', 'A', 'B', 'B', 'C', 'C']
479
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000480 # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
481 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
482 >>> product = 1
483 >>> for factor in prime_factors.elements(): # loop over factors
484 ... product *= factor # and multiply them
485 >>> product
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000486 1836
487
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000488 Note, if an element's count has been set to zero or is a negative
489 number, elements() will ignore it.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000490
491 '''
492 # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
Raymond Hettinger35288c62009-01-13 04:50:35 +0000493 return _chain.from_iterable(_starmap(_repeat, self.iteritems()))
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000494
495 # Override dict methods where necessary
496
497 @classmethod
498 def fromkeys(cls, iterable, v=None):
499 # There is no equivalent method for counters because setting v=1
500 # means that no element can have a count greater than one.
501 raise NotImplementedError(
502 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
503
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000504 def update(self, iterable=None, **kwds):
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000505 '''Like dict.update() but add counts instead of replacing them.
506
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000507 Source can be an iterable, a dictionary, or another Counter instance.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000508
509 >>> c = Counter('which')
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000510 >>> c.update('witch') # add elements from another iterable
511 >>> d = Counter('watch')
512 >>> c.update(d) # add elements from another counter
513 >>> c['h'] # four 'h' in which, witch, and watch
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000514 4
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000515
516 '''
517 # The regular dict.update() operation makes no sense here because the
518 # replace behavior results in the some of original untouched counts
519 # being mixed-in with all of the other counts for a mismash that
520 # doesn't have a straight-forward interpretation in most counting
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000521 # contexts. Instead, we implement straight-addition. Both the inputs
522 # and outputs are allowed to contain zero and negative counts.
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000523
524 if iterable is not None:
525 if isinstance(iterable, Mapping):
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000526 if self:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000527 self_get = self.get
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000528 for elem, count in iterable.iteritems():
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000529 self[elem] = self_get(elem, 0) + count
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000530 else:
Raymond Hettingerc886a842011-01-03 08:59:18 +0000531 super(Counter, self).update(iterable) # fast path when counter is empty
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000532 else:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000533 self_get = self.get
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000534 for elem in iterable:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000535 self[elem] = self_get(elem, 0) + 1
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000536 if kwds:
537 self.update(kwds)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000538
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000539 def subtract(self, iterable=None, **kwds):
540 '''Like dict.update() but subtracts counts instead of replacing them.
541 Counts can be reduced below zero. Both the inputs and outputs are
542 allowed to contain zero and negative counts.
543
544 Source can be an iterable, a dictionary, or another Counter instance.
545
546 >>> c = Counter('which')
547 >>> c.subtract('witch') # subtract elements from another iterable
548 >>> c.subtract(Counter('watch')) # subtract elements from another counter
549 >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch
550 0
551 >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch
552 -1
553
554 '''
555 if iterable is not None:
Raymond Hettingerfdf1b562010-04-11 20:39:28 +0000556 self_get = self.get
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000557 if isinstance(iterable, Mapping):
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000558 for elem, count in iterable.items():
559 self[elem] = self_get(elem, 0) - count
560 else:
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000561 for elem in iterable:
562 self[elem] = self_get(elem, 0) - 1
563 if kwds:
564 self.subtract(kwds)
565
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000566 def copy(self):
Raymond Hettinger37c0fe52011-04-15 13:12:21 -0700567 'Return a shallow copy.'
568 return self.__class__(self)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000569
Raymond Hettingerc886a842011-01-03 08:59:18 +0000570 def __reduce__(self):
571 return self.__class__, (dict(self),)
572
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000573 def __delitem__(self, elem):
574 'Like dict.__delitem__() but does not raise KeyError for missing values.'
575 if elem in self:
Raymond Hettingerc886a842011-01-03 08:59:18 +0000576 super(Counter, self).__delitem__(elem)
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000577
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000578 def __repr__(self):
579 if not self:
580 return '%s()' % self.__class__.__name__
Raymond Hettinger35288c62009-01-13 04:50:35 +0000581 items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000582 return '%s({%s})' % (self.__class__.__name__, items)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000583
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000584 # Multiset-style mathematical operations discussed in:
585 # Knuth TAOCP Volume II section 4.6.3 exercise 19
586 # and at http://en.wikipedia.org/wiki/Multiset
587 #
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000588 # Outputs guaranteed to only include positive counts.
589 #
590 # To strip negative and zero counts, add-in an empty counter:
591 # c += Counter()
592
593 def __add__(self, other):
594 '''Add counts from two counters.
595
596 >>> Counter('abbb') + Counter('bcc')
597 Counter({'b': 4, 'c': 2, 'a': 1})
598
599 '''
600 if not isinstance(other, Counter):
601 return NotImplemented
602 result = Counter()
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700603 for elem, count in self.items():
604 newcount = count + other[elem]
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000605 if newcount > 0:
606 result[elem] = newcount
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700607 for elem, count in other.items():
608 if elem not in self and count > 0:
609 result[elem] = count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000610 return result
611
612 def __sub__(self, other):
613 ''' Subtract count, but keep only results with positive counts.
614
615 >>> Counter('abbbc') - Counter('bccd')
616 Counter({'b': 2, 'a': 1})
617
618 '''
619 if not isinstance(other, Counter):
620 return NotImplemented
621 result = Counter()
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700622 for elem, count in self.items():
623 newcount = count - other[elem]
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000624 if newcount > 0:
625 result[elem] = newcount
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700626 for elem, count in other.items():
627 if elem not in self and count < 0:
628 result[elem] = 0 - count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000629 return result
630
631 def __or__(self, other):
632 '''Union is the maximum of value in either of the input counters.
633
634 >>> Counter('abbb') | Counter('bcc')
635 Counter({'b': 3, 'c': 2, 'a': 1})
636
637 '''
638 if not isinstance(other, Counter):
639 return NotImplemented
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000640 result = Counter()
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700641 for elem, count in self.items():
642 other_count = other[elem]
643 newcount = other_count if count < other_count else count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000644 if newcount > 0:
645 result[elem] = newcount
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700646 for elem, count in other.items():
647 if elem not in self and count > 0:
648 result[elem] = count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000649 return result
650
651 def __and__(self, other):
652 ''' Intersection is the minimum of corresponding counts.
653
654 >>> Counter('abbb') & Counter('bcc')
655 Counter({'b': 1})
656
657 '''
658 if not isinstance(other, Counter):
659 return NotImplemented
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000660 result = Counter()
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700661 for elem, count in self.items():
662 other_count = other[elem]
663 newcount = count if count < other_count else other_count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000664 if newcount > 0:
665 result[elem] = newcount
666 return result
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000667
668
669if __name__ == '__main__':
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000670 # verify that instances can be pickled
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000671 from cPickle import loads, dumps
Raymond Hettinger01a09572007-10-23 20:37:41 +0000672 Point = namedtuple('Point', 'x, y', True)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000673 p = Point(x=10, y=20)
674 assert p == loads(dumps(p))
675
Raymond Hettingereeeb9c42007-11-15 02:44:53 +0000676 # test and demonstrate ability to override methods
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000677 class Point(namedtuple('Point', 'x y')):
Raymond Hettingere1655082008-01-10 19:15:10 +0000678 __slots__ = ()
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000679 @property
680 def hypot(self):
681 return (self.x ** 2 + self.y ** 2) ** 0.5
Raymond Hettinger9a359212008-01-07 20:07:38 +0000682 def __str__(self):
Raymond Hettinger15b5e552008-01-10 23:00:01 +0000683 return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000684
Raymond Hettingere1655082008-01-10 19:15:10 +0000685 for p in Point(3, 4), Point(14, 5/7.):
Raymond Hettinger9a359212008-01-07 20:07:38 +0000686 print p
Raymond Hettingereeeb9c42007-11-15 02:44:53 +0000687
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000688 class Point(namedtuple('Point', 'x y')):
689 'Point class with optimized _make() and _replace() without error-checking'
Raymond Hettingere1655082008-01-10 19:15:10 +0000690 __slots__ = ()
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000691 _make = classmethod(tuple.__new__)
692 def _replace(self, _map=map, **kwds):
Raymond Hettingerf5e8af12008-01-07 20:56:05 +0000693 return self._make(_map(kwds.get, ('x', 'y'), self))
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000694
695 print Point(11, 22)._replace(x=100)
696
Raymond Hettingere850c462008-01-10 20:37:12 +0000697 Point3D = namedtuple('Point3D', Point._fields + ('z',))
698 print Point3D.__doc__
699
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000700 import doctest
Raymond Hettinger01a09572007-10-23 20:37:41 +0000701 TestResults = namedtuple('TestResults', 'failed attempted')
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000702 print TestResults(*doctest.testmod())