blob: f2ad9726d571987aa2a606e8250fb3108a53778f [file] [log] [blame]
Raymond Hettinger5fda2f62015-11-23 20:47:05 -08001'''This module implements specialized container datatypes providing
Raymond Hettingerd2f07262015-11-23 21:00:45 -08002alternatives to Python's general purpose built-in containers, dict,
Raymond Hettinger5fda2f62015-11-23 20:47:05 -08003list, set, and tuple.
4
5* namedtuple factory function for creating tuple subclasses with named fields
6* deque list-like container with fast appends and pops on either end
7* Counter dict subclass for counting hashable objects
8* OrderedDict dict subclass that remembers the order entries were added
9* defaultdict dict subclass that calls a factory function to supply missing values
10
11'''
12
Raymond Hettingerbc512d32009-03-03 04:45:34 +000013__all__ = ['Counter', 'deque', 'defaultdict', 'namedtuple', 'OrderedDict']
Raymond Hettinger88880b22007-12-18 00:13:45 +000014# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
15# They should however be considered an integral part of collections.py.
16from _abcoll import *
17import _abcoll
18__all__ += _abcoll.__all__
Raymond Hettingereb979882007-02-28 18:37:52 +000019
20from _collections import deque, defaultdict
Raymond Hettinger80e9eed2012-12-01 19:22:02 -080021from operator import itemgetter as _itemgetter, eq as _eq
Raymond Hettingerabfd8df2007-10-16 21:28:32 +000022from keyword import iskeyword as _iskeyword
Raymond Hettingerc37e5e02007-03-01 06:16:43 +000023import sys as _sys
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +000024import heapq as _heapq
Raymond Hettingerb36f7472011-04-23 18:37:37 -070025from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
Raymond Hettinger80e9eed2012-12-01 19:22:02 -080026from itertools import imap as _imap
Raymond Hettingerb36f7472011-04-23 18:37:37 -070027
Raymond Hettinger74f869e2010-09-13 22:14:36 +000028try:
Raymond Hettinger7ce6d972011-04-22 18:49:53 -070029 from thread import get_ident as _get_ident
Amaury Forgeot d'Arc71431ef2010-11-09 07:35:26 +000030except ImportError:
Raymond Hettinger7ce6d972011-04-22 18:49:53 -070031 from dummy_thread import get_ident as _get_ident
Raymond Hettinger74f869e2010-09-13 22:14:36 +000032
Raymond Hettingerbc512d32009-03-03 04:45:34 +000033
34################################################################################
35### OrderedDict
36################################################################################
37
Raymond Hettinger8ebebd82011-01-02 01:03:26 +000038class OrderedDict(dict):
Raymond Hettingere980d2d2009-03-19 23:12:41 +000039 'Dictionary that remembers insertion order'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000040 # An inherited dict maps keys to values.
Raymond Hettingere980d2d2009-03-19 23:12:41 +000041 # The inherited dict provides __getitem__, __len__, __contains__, and get.
42 # The remaining methods are order-aware.
Raymond Hettingerc6467432011-04-24 12:30:39 -070043 # Big-O running times for all methods are the same as regular dictionaries.
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000044
Raymond Hettingerc6467432011-04-24 12:30:39 -070045 # The internal self.__map dict maps keys to links in a doubly linked list.
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000046 # The circular doubly linked list starts and ends with a sentinel element.
47 # The sentinel element never gets deleted (this simplifies the algorithm).
Raymond Hettingeraba22932010-03-09 09:58:53 +000048 # Each link is stored as a list of length three: [PREV, NEXT, KEY].
Raymond Hettingerbc512d32009-03-03 04:45:34 +000049
Serhiy Storchaka20994f12014-11-27 19:02:56 +020050 def __init__(*args, **kwds):
Raymond Hettinger3f2b1842011-04-24 12:55:28 -070051 '''Initialize an ordered dictionary. The signature is the same as
52 regular dictionaries, but keyword arguments are not recommended because
53 their insertion order is arbitrary.
Raymond Hettingera5cd6372009-04-08 05:39:38 +000054
55 '''
Serhiy Storchaka20994f12014-11-27 19:02:56 +020056 if not args:
57 raise TypeError("descriptor '__init__' of 'OrderedDict' object "
58 "needs an argument")
59 self = args[0]
60 args = args[1:]
Raymond Hettingerbc512d32009-03-03 04:45:34 +000061 if len(args) > 1:
62 raise TypeError('expected at most 1 arguments, got %d' % len(args))
Raymond Hettinger9353ea22009-03-03 20:53:51 +000063 try:
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000064 self.__root
Raymond Hettinger9353ea22009-03-03 20:53:51 +000065 except AttributeError:
Raymond Hettinger0b795e52011-04-23 15:41:38 -070066 self.__root = root = [] # sentinel node
67 root[:] = [root, root, None]
Raymond Hettinger2dc90fd2009-03-25 22:41:32 +000068 self.__map = {}
Raymond Hettinger8ebebd82011-01-02 01:03:26 +000069 self.__update(*args, **kwds)
Raymond Hettingerbc512d32009-03-03 04:45:34 +000070
Raymond Hettinger80e9eed2012-12-01 19:22:02 -080071 def __setitem__(self, key, value, dict_setitem=dict.__setitem__):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000072 'od.__setitem__(i, y) <==> od[i]=y'
Raymond Hettingerc6467432011-04-24 12:30:39 -070073 # Setting a new item creates a new link at the end of the linked list,
74 # and the inherited dictionary is updated with the new key/value pair.
Raymond Hettingerbc512d32009-03-03 04:45:34 +000075 if key not in self:
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000076 root = self.__root
Raymond Hettinger80e9eed2012-12-01 19:22:02 -080077 last = root[0]
78 last[1] = root[0] = self.__map[key] = [last, root, key]
Raymond Hettingerec5046b2012-11-20 21:11:26 -080079 return dict_setitem(self, key, value)
Raymond Hettingerbc512d32009-03-03 04:45:34 +000080
Raymond Hettinger80e9eed2012-12-01 19:22:02 -080081 def __delitem__(self, key, dict_delitem=dict.__delitem__):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000082 'od.__delitem__(y) <==> del od[y]'
Raymond Hettingerc6467432011-04-24 12:30:39 -070083 # Deleting an existing item uses self.__map to find the link which gets
84 # removed by updating the links in the predecessor and successor nodes.
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000085 dict_delitem(self, key)
Raymond Hettinger5ded7952013-03-23 05:27:51 -070086 link_prev, link_next, _ = self.__map.pop(key)
Raymond Hettinger80e9eed2012-12-01 19:22:02 -080087 link_prev[1] = link_next # update link_prev[NEXT]
88 link_next[0] = link_prev # update link_next[PREV]
Raymond Hettingerbc512d32009-03-03 04:45:34 +000089
Raymond Hettinger3f2b1842011-04-24 12:55:28 -070090 def __iter__(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000091 'od.__iter__() <==> iter(od)'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000092 # Traverse the linked list in order.
93 root = self.__root
Raymond Hettinger80e9eed2012-12-01 19:22:02 -080094 curr = root[1] # start at the first node
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000095 while curr is not root:
Raymond Hettinger80e9eed2012-12-01 19:22:02 -080096 yield curr[2] # yield the curr[KEY]
97 curr = curr[1] # move to next node
Raymond Hettingerbc512d32009-03-03 04:45:34 +000098
Raymond Hettinger3f2b1842011-04-24 12:55:28 -070099 def __reversed__(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000100 'od.__reversed__() <==> reversed(od)'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +0000101 # Traverse the linked list in reverse order.
102 root = self.__root
Raymond Hettinger80e9eed2012-12-01 19:22:02 -0800103 curr = root[0] # start at the last node
Raymond Hettingerf1e2df92009-03-23 00:08:09 +0000104 while curr is not root:
Raymond Hettinger80e9eed2012-12-01 19:22:02 -0800105 yield curr[2] # yield the curr[KEY]
106 curr = curr[0] # move to previous node
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000107
Raymond Hettinger39282762010-04-03 00:39:26 +0000108 def clear(self):
109 'od.clear() -> None. Remove all items from od.'
Raymond Hettingerc6467432011-04-24 12:30:39 -0700110 root = self.__root
111 root[:] = [root, root, None]
112 self.__map.clear()
Raymond Hettinger6b96ecb2010-04-03 03:14:28 +0000113 dict.clear(self)
Raymond Hettinger39282762010-04-03 00:39:26 +0000114
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700115 # -- the following methods do not depend on the internal structure --
116
117 def keys(self):
118 'od.keys() -> list of keys in od'
119 return list(self)
120
121 def values(self):
122 'od.values() -> list of values in od'
123 return [self[key] for key in self]
124
125 def items(self):
126 'od.items() -> list of (key, value) pairs in od'
127 return [(key, self[key]) for key in self]
128
129 def iterkeys(self):
130 'od.iterkeys() -> an iterator over the keys in od'
131 return iter(self)
132
133 def itervalues(self):
134 'od.itervalues -> an iterator over the values in od'
135 for k in self:
136 yield self[k]
137
138 def iteritems(self):
Raymond Hettinger3f2b1842011-04-24 12:55:28 -0700139 'od.iteritems -> an iterator over the (key, value) pairs in od'
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700140 for k in self:
141 yield (k, self[k])
142
143 update = MutableMapping.update
144
Raymond Hettingerc6467432011-04-24 12:30:39 -0700145 __update = update # let subclasses override update without breaking __init__
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000146
Raymond Hettinger8ebebd82011-01-02 01:03:26 +0000147 __marker = object()
148
149 def pop(self, key, default=__marker):
Raymond Hettinger3f2b1842011-04-24 12:55:28 -0700150 '''od.pop(k[,d]) -> v, remove specified key and return the corresponding
151 value. If key is not found, d is returned if given, otherwise KeyError
152 is raised.
153
154 '''
Raymond Hettinger8ebebd82011-01-02 01:03:26 +0000155 if key in self:
156 result = self[key]
157 del self[key]
158 return result
159 if default is self.__marker:
160 raise KeyError(key)
161 return default
162
163 def setdefault(self, key, default=None):
164 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od'
165 if key in self:
166 return self[key]
167 self[key] = default
168 return default
169
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000170 def popitem(self, last=True):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000171 '''od.popitem() -> (k, v), return and remove a (key, value) pair.
172 Pairs are returned in LIFO order if last is true or FIFO order if false.
173
174 '''
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000175 if not self:
176 raise KeyError('dictionary is empty')
Raymond Hettinger1355a3d2009-04-08 08:26:55 +0000177 key = next(reversed(self) if last else iter(self))
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000178 value = self.pop(key)
179 return key, value
180
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700181 def __repr__(self, _repr_running={}):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000182 'od.__repr__() <==> repr(od)'
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700183 call_key = id(self), _get_ident()
184 if call_key in _repr_running:
185 return '...'
186 _repr_running[call_key] = 1
187 try:
188 if not self:
189 return '%s()' % (self.__class__.__name__,)
190 return '%s(%r)' % (self.__class__.__name__, self.items())
191 finally:
192 del _repr_running[call_key]
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000193
Raymond Hettinger3674c852011-04-20 13:11:38 -0700194 def __reduce__(self):
195 'Return state information for pickling'
196 items = [[k, self[k]] for k in self]
197 inst_dict = vars(self).copy()
198 for k in vars(OrderedDict()):
199 inst_dict.pop(k, None)
200 if inst_dict:
201 return (self.__class__, (items,), inst_dict)
202 return self.__class__, (items,)
203
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000204 def copy(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000205 'od.copy() -> a shallow copy of od'
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000206 return self.__class__(self)
207
208 @classmethod
209 def fromkeys(cls, iterable, value=None):
Raymond Hettinger3f2b1842011-04-24 12:55:28 -0700210 '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S.
211 If not specified, the value defaults to None.
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000212
213 '''
Raymond Hettingerc6467432011-04-24 12:30:39 -0700214 self = cls()
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000215 for key in iterable:
Raymond Hettingerc6467432011-04-24 12:30:39 -0700216 self[key] = value
217 return self
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000218
219 def __eq__(self, other):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000220 '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
221 while comparison to a regular mapping is order-insensitive.
222
223 '''
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000224 if isinstance(other, OrderedDict):
Raymond Hettinger80e9eed2012-12-01 19:22:02 -0800225 return dict.__eq__(self, other) and all(_imap(_eq, self, other))
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000226 return dict.__eq__(self, other)
227
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700228 def __ne__(self, other):
229 'od.__ne__(y) <==> od!=y'
230 return not self == other
231
Raymond Hettinger43a56412011-04-23 15:51:38 -0700232 # -- the following methods support python 3.x style dictionary views --
233
234 def viewkeys(self):
235 "od.viewkeys() -> a set-like object providing a view on od's keys"
236 return KeysView(self)
237
238 def viewvalues(self):
239 "od.viewvalues() -> an object providing a view on od's values"
240 return ValuesView(self)
241
242 def viewitems(self):
243 "od.viewitems() -> a set-like object providing a view on od's items"
244 return ItemsView(self)
245
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000246
Raymond Hettingerae3f0682009-01-20 03:36:36 +0000247################################################################################
248### namedtuple
249################################################################################
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000250
Raymond Hettinger491f7072012-06-08 13:24:12 -0700251_class_template = '''\
252class {typename}(tuple):
253 '{typename}({arg_list})'
254
255 __slots__ = ()
256
257 _fields = {field_names!r}
258
259 def __new__(_cls, {arg_list}):
260 'Create new instance of {typename}({arg_list})'
261 return _tuple.__new__(_cls, ({arg_list}))
262
263 @classmethod
264 def _make(cls, iterable, new=tuple.__new__, len=len):
265 'Make a new {typename} object from a sequence or iterable'
266 result = new(cls, iterable)
267 if len(result) != {num_fields:d}:
268 raise TypeError('Expected {num_fields:d} arguments, got %d' % len(result))
269 return result
270
271 def __repr__(self):
272 'Return a nicely formatted representation string'
273 return '{typename}({repr_fmt})' % self
274
275 def _asdict(self):
276 'Return a new OrderedDict which maps field names to their values'
277 return OrderedDict(zip(self._fields, self))
278
Raymond Hettinger491f7072012-06-08 13:24:12 -0700279 def _replace(_self, **kwds):
280 'Return a new {typename} object replacing specified fields with new values'
281 result = _self._make(map(kwds.pop, {field_names!r}, _self))
282 if kwds:
283 raise ValueError('Got unexpected field names: %r' % kwds.keys())
284 return result
285
286 def __getnewargs__(self):
287 'Return self as a plain tuple. Used by copy and pickle.'
288 return tuple(self)
289
Raymond Hettinger7393c692013-05-27 10:58:55 -0700290 __dict__ = _property(_asdict)
291
292 def __getstate__(self):
293 'Exclude the OrderedDict from pickling'
294 pass
295
Raymond Hettinger491f7072012-06-08 13:24:12 -0700296{field_defs}
297'''
298
299_repr_template = '{name}=%r'
300
301_field_template = '''\
302 {name} = _property(_itemgetter({index:d}), doc='Alias for field number {index:d}')
303'''
304
Raymond Hettinger322daea2009-02-10 01:24:05 +0000305def namedtuple(typename, field_names, verbose=False, rename=False):
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000306 """Returns a new subclass of tuple with named fields.
307
Raymond Hettinger491f7072012-06-08 13:24:12 -0700308 >>> Point = namedtuple('Point', ['x', 'y'])
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000309 >>> Point.__doc__ # docstring for the new class
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000310 'Point(x, y)'
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000311 >>> p = Point(11, y=22) # instantiate with positional args or keywords
Raymond Hettinger8777bca2007-12-18 22:21:27 +0000312 >>> p[0] + p[1] # indexable like a plain tuple
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000313 33
Raymond Hettinger88880b22007-12-18 00:13:45 +0000314 >>> x, y = p # unpack like a regular tuple
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000315 >>> x, y
316 (11, 22)
Martin Panterb1d867f2016-05-26 05:28:50 +0000317 >>> p.x + p.y # fields also accessible by name
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000318 33
Raymond Hettinger42da8742007-12-14 02:49:47 +0000319 >>> d = p._asdict() # convert to a dictionary
Raymond Hettingera7fc4b12007-10-05 02:47:07 +0000320 >>> d['x']
321 11
322 >>> Point(**d) # convert from a dictionary
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000323 Point(x=11, y=22)
Raymond Hettinger42da8742007-12-14 02:49:47 +0000324 >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000325 Point(x=100, y=22)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000326
327 """
328
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700329 # Validate the field names. At the user's option, either generate an error
330 # message or automatically replace the field name with a valid name.
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000331 if isinstance(field_names, basestring):
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700332 field_names = field_names.replace(',', ' ').split()
333 field_names = map(str, field_names)
Raymond Hettingerde5170e2014-06-24 13:49:24 -0700334 typename = str(typename)
Raymond Hettinger322daea2009-02-10 01:24:05 +0000335 if rename:
Raymond Hettinger322daea2009-02-10 01:24:05 +0000336 seen = set()
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700337 for index, name in enumerate(field_names):
Raymond Hettinger491f7072012-06-08 13:24:12 -0700338 if (not all(c.isalnum() or c=='_' for c in name)
339 or _iskeyword(name)
340 or not name
341 or name[0].isdigit()
342 or name.startswith('_')
Raymond Hettinger322daea2009-02-10 01:24:05 +0000343 or name in seen):
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700344 field_names[index] = '_%d' % index
Raymond Hettinger322daea2009-02-10 01:24:05 +0000345 seen.add(name)
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700346 for name in [typename] + field_names:
Raymond Hettingerde5170e2014-06-24 13:49:24 -0700347 if type(name) != str:
348 raise TypeError('Type names and field names must be strings')
Raymond Hettinger2e1af252007-12-05 18:11:08 +0000349 if not all(c.isalnum() or c=='_' for c in name):
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700350 raise ValueError('Type names and field names can only contain '
351 'alphanumeric characters and underscores: %r' % name)
Raymond Hettingerabfd8df2007-10-16 21:28:32 +0000352 if _iskeyword(name):
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700353 raise ValueError('Type names and field names cannot be a '
354 'keyword: %r' % name)
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000355 if name[0].isdigit():
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700356 raise ValueError('Type names and field names cannot start with '
357 'a number: %r' % name)
Raymond Hettinger491f7072012-06-08 13:24:12 -0700358 seen = set()
Raymond Hettinger163f6222007-10-09 01:36:23 +0000359 for name in field_names:
Raymond Hettinger322daea2009-02-10 01:24:05 +0000360 if name.startswith('_') and not rename:
Raymond Hettinger0c2c6922012-06-09 17:27:23 -0700361 raise ValueError('Field names cannot start with an underscore: '
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700362 '%r' % name)
Raymond Hettinger491f7072012-06-08 13:24:12 -0700363 if name in seen:
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000364 raise ValueError('Encountered duplicate field name: %r' % name)
Raymond Hettinger491f7072012-06-08 13:24:12 -0700365 seen.add(name)
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000366
Raymond Hettinger491f7072012-06-08 13:24:12 -0700367 # Fill-in the class template
368 class_definition = _class_template.format(
369 typename = typename,
370 field_names = tuple(field_names),
371 num_fields = len(field_names),
372 arg_list = repr(tuple(field_names)).replace("'", "")[1:-1],
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700373 repr_fmt = ', '.join(_repr_template.format(name=name)
374 for name in field_names),
Raymond Hettinger491f7072012-06-08 13:24:12 -0700375 field_defs = '\n'.join(_field_template.format(index=index, name=name)
376 for index, name in enumerate(field_names))
377 )
Raymond Hettinger2b03d452007-09-18 03:33:19 +0000378 if verbose:
Raymond Hettinger491f7072012-06-08 13:24:12 -0700379 print class_definition
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000380
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700381 # Execute the template string in a temporary namespace and support
382 # tracing utilities by setting a value for frame.f_globals['__name__']
Raymond Hettingera68cad12009-05-27 02:24:45 +0000383 namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
384 OrderedDict=OrderedDict, _property=property, _tuple=tuple)
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000385 try:
Raymond Hettinger491f7072012-06-08 13:24:12 -0700386 exec class_definition in namespace
387 except SyntaxError as e:
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700388 raise SyntaxError(e.message + ':\n' + class_definition)
Raymond Hettinger0e1d6062007-10-08 10:11:51 +0000389 result = namespace[typename]
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000390
391 # For pickling to work, the __module__ variable needs to be set to the frame
Ezio Melotti419e23c2013-08-17 16:56:09 +0300392 # where the named tuple is created. Bypass this step in environments where
Benjamin Peterson7c67b032009-05-05 00:55:24 +0000393 # sys._getframe is not defined (Jython for example) or sys._getframe is not
394 # defined for arguments greater than 0 (IronPython).
395 try:
Raymond Hettingerecf252a2009-01-27 10:03:04 +0000396 result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
Benjamin Peterson7c67b032009-05-05 00:55:24 +0000397 except (AttributeError, ValueError):
398 pass
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000399
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000400 return result
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000401
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000402
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000403########################################################################
Raymond Hettingerae3f0682009-01-20 03:36:36 +0000404### Counter
405########################################################################
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000406
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000407class Counter(dict):
408 '''Dict subclass for counting hashable items. Sometimes called a bag
409 or multiset. Elements are stored as dictionary keys and their counts
410 are stored as dictionary values.
411
Raymond Hettingerc886a842011-01-03 08:59:18 +0000412 >>> c = Counter('abcdeabcdabcaba') # count elements from a string
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000413
414 >>> c.most_common(3) # three most common elements
Raymond Hettingerc886a842011-01-03 08:59:18 +0000415 [('a', 5), ('b', 4), ('c', 3)]
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000416 >>> sorted(c) # list all unique elements
Raymond Hettingerc886a842011-01-03 08:59:18 +0000417 ['a', 'b', 'c', 'd', 'e']
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000418 >>> ''.join(sorted(c.elements())) # list elements with repetitions
Raymond Hettingerc886a842011-01-03 08:59:18 +0000419 'aaaaabbbbcccdde'
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000420 >>> sum(c.values()) # total of all counts
Raymond Hettingerc886a842011-01-03 08:59:18 +0000421 15
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000422
423 >>> c['a'] # count of letter 'a'
424 5
425 >>> for elem in 'shazam': # update counts from an iterable
426 ... c[elem] += 1 # by adding 1 to each element's count
427 >>> c['a'] # now there are seven 'a'
428 7
Raymond Hettingerc886a842011-01-03 08:59:18 +0000429 >>> del c['b'] # remove all 'b'
430 >>> c['b'] # now there are zero 'b'
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000431 0
432
433 >>> d = Counter('simsalabim') # make another counter
434 >>> c.update(d) # add in the second counter
435 >>> c['a'] # now there are nine 'a'
436 9
437
438 >>> c.clear() # empty the counter
439 >>> c
440 Counter()
441
442 Note: If a count is set to zero or reduced to zero, it will remain
443 in the counter until the entry is deleted or the counter is cleared:
444
445 >>> c = Counter('aaabbc')
446 >>> c['b'] -= 2 # reduce the count of 'b' by two
447 >>> c.most_common() # 'b' is still in, but its count is zero
448 [('a', 3), ('c', 1), ('b', 0)]
449
450 '''
451 # References:
452 # http://en.wikipedia.org/wiki/Multiset
453 # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
454 # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
455 # http://code.activestate.com/recipes/259174/
456 # Knuth, TAOCP Vol. II section 4.6.3
457
Serhiy Storchaka20994f12014-11-27 19:02:56 +0200458 def __init__(*args, **kwds):
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000459 '''Create a new, empty Counter object. And if given, count elements
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000460 from an input iterable. Or, initialize the count from another mapping
461 of elements to their counts.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000462
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000463 >>> c = Counter() # a new, empty counter
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000464 >>> c = Counter('gallahad') # a new counter from an iterable
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000465 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000466 >>> c = Counter(a=4, b=2) # a new counter from keyword args
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000467
468 '''
Serhiy Storchaka20994f12014-11-27 19:02:56 +0200469 if not args:
470 raise TypeError("descriptor '__init__' of 'Counter' object "
471 "needs an argument")
472 self = args[0]
473 args = args[1:]
474 if len(args) > 1:
475 raise TypeError('expected at most 1 arguments, got %d' % len(args))
Raymond Hettingerc886a842011-01-03 08:59:18 +0000476 super(Counter, self).__init__()
Serhiy Storchaka20994f12014-11-27 19:02:56 +0200477 self.update(*args, **kwds)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000478
479 def __missing__(self, key):
480 'The count of elements not in the Counter is zero.'
481 # Needed so that self[missing_item] does not raise KeyError
482 return 0
483
484 def most_common(self, n=None):
485 '''List the n most common elements and their counts from the most
486 common to the least. If n is None, then list all element counts.
487
Raymond Hettingerc886a842011-01-03 08:59:18 +0000488 >>> Counter('abcdeabcdabcaba').most_common(3)
489 [('a', 5), ('b', 4), ('c', 3)]
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000490
491 '''
492 # Emulate Bag.sortedByCount from Smalltalk
493 if n is None:
494 return sorted(self.iteritems(), key=_itemgetter(1), reverse=True)
495 return _heapq.nlargest(n, self.iteritems(), key=_itemgetter(1))
496
497 def elements(self):
498 '''Iterator over elements repeating each as many times as its count.
499
500 >>> c = Counter('ABCABC')
501 >>> sorted(c.elements())
502 ['A', 'A', 'B', 'B', 'C', 'C']
503
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000504 # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
505 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
506 >>> product = 1
507 >>> for factor in prime_factors.elements(): # loop over factors
508 ... product *= factor # and multiply them
509 >>> product
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000510 1836
511
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000512 Note, if an element's count has been set to zero or is a negative
513 number, elements() will ignore it.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000514
515 '''
516 # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
Raymond Hettinger35288c62009-01-13 04:50:35 +0000517 return _chain.from_iterable(_starmap(_repeat, self.iteritems()))
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000518
519 # Override dict methods where necessary
520
521 @classmethod
522 def fromkeys(cls, iterable, v=None):
523 # There is no equivalent method for counters because setting v=1
524 # means that no element can have a count greater than one.
525 raise NotImplementedError(
526 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
527
Serhiy Storchaka20994f12014-11-27 19:02:56 +0200528 def update(*args, **kwds):
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000529 '''Like dict.update() but add counts instead of replacing them.
530
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000531 Source can be an iterable, a dictionary, or another Counter instance.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000532
533 >>> c = Counter('which')
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000534 >>> c.update('witch') # add elements from another iterable
535 >>> d = Counter('watch')
536 >>> c.update(d) # add elements from another counter
537 >>> c['h'] # four 'h' in which, witch, and watch
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000538 4
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000539
540 '''
541 # The regular dict.update() operation makes no sense here because the
542 # replace behavior results in the some of original untouched counts
543 # being mixed-in with all of the other counts for a mismash that
544 # doesn't have a straight-forward interpretation in most counting
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000545 # contexts. Instead, we implement straight-addition. Both the inputs
546 # and outputs are allowed to contain zero and negative counts.
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000547
Serhiy Storchaka20994f12014-11-27 19:02:56 +0200548 if not args:
549 raise TypeError("descriptor 'update' of 'Counter' object "
550 "needs an argument")
551 self = args[0]
552 args = args[1:]
553 if len(args) > 1:
554 raise TypeError('expected at most 1 arguments, got %d' % len(args))
555 iterable = args[0] if args else None
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000556 if iterable is not None:
557 if isinstance(iterable, Mapping):
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000558 if self:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000559 self_get = self.get
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000560 for elem, count in iterable.iteritems():
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000561 self[elem] = self_get(elem, 0) + count
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000562 else:
Raymond Hettingerc886a842011-01-03 08:59:18 +0000563 super(Counter, self).update(iterable) # fast path when counter is empty
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000564 else:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000565 self_get = self.get
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000566 for elem in iterable:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000567 self[elem] = self_get(elem, 0) + 1
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000568 if kwds:
569 self.update(kwds)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000570
Serhiy Storchaka20994f12014-11-27 19:02:56 +0200571 def subtract(*args, **kwds):
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000572 '''Like dict.update() but subtracts counts instead of replacing them.
573 Counts can be reduced below zero. Both the inputs and outputs are
574 allowed to contain zero and negative counts.
575
576 Source can be an iterable, a dictionary, or another Counter instance.
577
578 >>> c = Counter('which')
579 >>> c.subtract('witch') # subtract elements from another iterable
580 >>> c.subtract(Counter('watch')) # subtract elements from another counter
581 >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch
582 0
583 >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch
584 -1
585
586 '''
Serhiy Storchaka20994f12014-11-27 19:02:56 +0200587 if not args:
588 raise TypeError("descriptor 'subtract' of 'Counter' object "
589 "needs an argument")
590 self = args[0]
591 args = args[1:]
592 if len(args) > 1:
593 raise TypeError('expected at most 1 arguments, got %d' % len(args))
594 iterable = args[0] if args else None
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000595 if iterable is not None:
Raymond Hettingerfdf1b562010-04-11 20:39:28 +0000596 self_get = self.get
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000597 if isinstance(iterable, Mapping):
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000598 for elem, count in iterable.items():
599 self[elem] = self_get(elem, 0) - count
600 else:
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000601 for elem in iterable:
602 self[elem] = self_get(elem, 0) - 1
603 if kwds:
604 self.subtract(kwds)
605
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000606 def copy(self):
Raymond Hettinger37c0fe52011-04-15 13:12:21 -0700607 'Return a shallow copy.'
608 return self.__class__(self)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000609
Raymond Hettingerc886a842011-01-03 08:59:18 +0000610 def __reduce__(self):
611 return self.__class__, (dict(self),)
612
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000613 def __delitem__(self, elem):
614 'Like dict.__delitem__() but does not raise KeyError for missing values.'
615 if elem in self:
Raymond Hettingerc886a842011-01-03 08:59:18 +0000616 super(Counter, self).__delitem__(elem)
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000617
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000618 def __repr__(self):
619 if not self:
620 return '%s()' % self.__class__.__name__
Raymond Hettinger35288c62009-01-13 04:50:35 +0000621 items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000622 return '%s({%s})' % (self.__class__.__name__, items)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000623
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000624 # Multiset-style mathematical operations discussed in:
625 # Knuth TAOCP Volume II section 4.6.3 exercise 19
626 # and at http://en.wikipedia.org/wiki/Multiset
627 #
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000628 # Outputs guaranteed to only include positive counts.
629 #
630 # To strip negative and zero counts, add-in an empty counter:
631 # c += Counter()
632
633 def __add__(self, other):
634 '''Add counts from two counters.
635
636 >>> Counter('abbb') + Counter('bcc')
637 Counter({'b': 4, 'c': 2, 'a': 1})
638
639 '''
640 if not isinstance(other, Counter):
641 return NotImplemented
642 result = Counter()
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700643 for elem, count in self.items():
644 newcount = count + other[elem]
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000645 if newcount > 0:
646 result[elem] = newcount
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700647 for elem, count in other.items():
648 if elem not in self and count > 0:
649 result[elem] = count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000650 return result
651
652 def __sub__(self, other):
653 ''' Subtract count, but keep only results with positive counts.
654
655 >>> Counter('abbbc') - Counter('bccd')
656 Counter({'b': 2, 'a': 1})
657
658 '''
659 if not isinstance(other, Counter):
660 return NotImplemented
661 result = Counter()
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700662 for elem, count in self.items():
663 newcount = count - other[elem]
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000664 if newcount > 0:
665 result[elem] = newcount
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700666 for elem, count in other.items():
667 if elem not in self and count < 0:
668 result[elem] = 0 - count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000669 return result
670
671 def __or__(self, other):
672 '''Union is the maximum of value in either of the input counters.
673
674 >>> Counter('abbb') | Counter('bcc')
675 Counter({'b': 3, 'c': 2, 'a': 1})
676
677 '''
678 if not isinstance(other, Counter):
679 return NotImplemented
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000680 result = Counter()
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700681 for elem, count in self.items():
682 other_count = other[elem]
683 newcount = other_count if count < other_count else count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000684 if newcount > 0:
685 result[elem] = newcount
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700686 for elem, count in other.items():
687 if elem not in self and count > 0:
688 result[elem] = count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000689 return result
690
691 def __and__(self, other):
692 ''' Intersection is the minimum of corresponding counts.
693
694 >>> Counter('abbb') & Counter('bcc')
695 Counter({'b': 1})
696
697 '''
698 if not isinstance(other, Counter):
699 return NotImplemented
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000700 result = Counter()
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700701 for elem, count in self.items():
702 other_count = other[elem]
703 newcount = count if count < other_count else other_count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000704 if newcount > 0:
705 result[elem] = newcount
706 return result
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000707
708
709if __name__ == '__main__':
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000710 # verify that instances can be pickled
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000711 from cPickle import loads, dumps
Raymond Hettinger01a09572007-10-23 20:37:41 +0000712 Point = namedtuple('Point', 'x, y', True)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000713 p = Point(x=10, y=20)
714 assert p == loads(dumps(p))
715
Raymond Hettingereeeb9c42007-11-15 02:44:53 +0000716 # test and demonstrate ability to override methods
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000717 class Point(namedtuple('Point', 'x y')):
Raymond Hettingere1655082008-01-10 19:15:10 +0000718 __slots__ = ()
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000719 @property
720 def hypot(self):
721 return (self.x ** 2 + self.y ** 2) ** 0.5
Raymond Hettinger9a359212008-01-07 20:07:38 +0000722 def __str__(self):
Raymond Hettinger15b5e552008-01-10 23:00:01 +0000723 return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000724
Raymond Hettingere1655082008-01-10 19:15:10 +0000725 for p in Point(3, 4), Point(14, 5/7.):
Raymond Hettinger9a359212008-01-07 20:07:38 +0000726 print p
Raymond Hettingereeeb9c42007-11-15 02:44:53 +0000727
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000728 class Point(namedtuple('Point', 'x y')):
729 'Point class with optimized _make() and _replace() without error-checking'
Raymond Hettingere1655082008-01-10 19:15:10 +0000730 __slots__ = ()
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000731 _make = classmethod(tuple.__new__)
732 def _replace(self, _map=map, **kwds):
Raymond Hettingerf5e8af12008-01-07 20:56:05 +0000733 return self._make(_map(kwds.get, ('x', 'y'), self))
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000734
735 print Point(11, 22)._replace(x=100)
736
Raymond Hettingere850c462008-01-10 20:37:12 +0000737 Point3D = namedtuple('Point3D', Point._fields + ('z',))
738 print Point3D.__doc__
739
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000740 import doctest
Raymond Hettinger01a09572007-10-23 20:37:41 +0000741 TestResults = namedtuple('TestResults', 'failed attempted')
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000742 print TestResults(*doctest.testmod())