blob: 1dcd23316afdfca8387e496b88e1a2c425d480a0 [file] [log] [blame]
Raymond Hettingerbc512d32009-03-03 04:45:34 +00001__all__ = ['Counter', 'deque', 'defaultdict', 'namedtuple', 'OrderedDict']
Raymond Hettinger88880b22007-12-18 00:13:45 +00002# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
3# They should however be considered an integral part of collections.py.
4from _abcoll import *
5import _abcoll
6__all__ += _abcoll.__all__
Raymond Hettingereb979882007-02-28 18:37:52 +00007
8from _collections import deque, defaultdict
Raymond Hettinger80e9eed2012-12-01 19:22:02 -08009from operator import itemgetter as _itemgetter, eq as _eq
Raymond Hettingerabfd8df2007-10-16 21:28:32 +000010from keyword import iskeyword as _iskeyword
Raymond Hettingerc37e5e02007-03-01 06:16:43 +000011import sys as _sys
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +000012import heapq as _heapq
Raymond Hettingerb36f7472011-04-23 18:37:37 -070013from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
Raymond Hettinger80e9eed2012-12-01 19:22:02 -080014from itertools import imap as _imap
Raymond Hettingerb36f7472011-04-23 18:37:37 -070015
Raymond Hettinger74f869e2010-09-13 22:14:36 +000016try:
Raymond Hettinger7ce6d972011-04-22 18:49:53 -070017 from thread import get_ident as _get_ident
Amaury Forgeot d'Arc71431ef2010-11-09 07:35:26 +000018except ImportError:
Raymond Hettinger7ce6d972011-04-22 18:49:53 -070019 from dummy_thread import get_ident as _get_ident
Raymond Hettinger74f869e2010-09-13 22:14:36 +000020
Raymond Hettingerbc512d32009-03-03 04:45:34 +000021
22################################################################################
23### OrderedDict
24################################################################################
25
Raymond Hettinger8ebebd82011-01-02 01:03:26 +000026class OrderedDict(dict):
Raymond Hettingere980d2d2009-03-19 23:12:41 +000027 'Dictionary that remembers insertion order'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000028 # An inherited dict maps keys to values.
Raymond Hettingere980d2d2009-03-19 23:12:41 +000029 # The inherited dict provides __getitem__, __len__, __contains__, and get.
30 # The remaining methods are order-aware.
Raymond Hettingerc6467432011-04-24 12:30:39 -070031 # Big-O running times for all methods are the same as regular dictionaries.
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000032
Raymond Hettingerc6467432011-04-24 12:30:39 -070033 # The internal self.__map dict maps keys to links in a doubly linked list.
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000034 # The circular doubly linked list starts and ends with a sentinel element.
35 # The sentinel element never gets deleted (this simplifies the algorithm).
Raymond Hettingeraba22932010-03-09 09:58:53 +000036 # Each link is stored as a list of length three: [PREV, NEXT, KEY].
Raymond Hettingerbc512d32009-03-03 04:45:34 +000037
Serhiy Storchaka20994f12014-11-27 19:02:56 +020038 def __init__(*args, **kwds):
Raymond Hettinger3f2b1842011-04-24 12:55:28 -070039 '''Initialize an ordered dictionary. The signature is the same as
40 regular dictionaries, but keyword arguments are not recommended because
41 their insertion order is arbitrary.
Raymond Hettingera5cd6372009-04-08 05:39:38 +000042
43 '''
Serhiy Storchaka20994f12014-11-27 19:02:56 +020044 if not args:
45 raise TypeError("descriptor '__init__' of 'OrderedDict' object "
46 "needs an argument")
47 self = args[0]
48 args = args[1:]
Raymond Hettingerbc512d32009-03-03 04:45:34 +000049 if len(args) > 1:
50 raise TypeError('expected at most 1 arguments, got %d' % len(args))
Raymond Hettinger9353ea22009-03-03 20:53:51 +000051 try:
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000052 self.__root
Raymond Hettinger9353ea22009-03-03 20:53:51 +000053 except AttributeError:
Raymond Hettinger0b795e52011-04-23 15:41:38 -070054 self.__root = root = [] # sentinel node
55 root[:] = [root, root, None]
Raymond Hettinger2dc90fd2009-03-25 22:41:32 +000056 self.__map = {}
Raymond Hettinger8ebebd82011-01-02 01:03:26 +000057 self.__update(*args, **kwds)
Raymond Hettingerbc512d32009-03-03 04:45:34 +000058
Raymond Hettinger80e9eed2012-12-01 19:22:02 -080059 def __setitem__(self, key, value, dict_setitem=dict.__setitem__):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000060 'od.__setitem__(i, y) <==> od[i]=y'
Raymond Hettingerc6467432011-04-24 12:30:39 -070061 # Setting a new item creates a new link at the end of the linked list,
62 # and the inherited dictionary is updated with the new key/value pair.
Raymond Hettingerbc512d32009-03-03 04:45:34 +000063 if key not in self:
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000064 root = self.__root
Raymond Hettinger80e9eed2012-12-01 19:22:02 -080065 last = root[0]
66 last[1] = root[0] = self.__map[key] = [last, root, key]
Raymond Hettingerec5046b2012-11-20 21:11:26 -080067 return dict_setitem(self, key, value)
Raymond Hettingerbc512d32009-03-03 04:45:34 +000068
Raymond Hettinger80e9eed2012-12-01 19:22:02 -080069 def __delitem__(self, key, dict_delitem=dict.__delitem__):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000070 'od.__delitem__(y) <==> del od[y]'
Raymond Hettingerc6467432011-04-24 12:30:39 -070071 # Deleting an existing item uses self.__map to find the link which gets
72 # removed by updating the links in the predecessor and successor nodes.
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000073 dict_delitem(self, key)
Raymond Hettinger5ded7952013-03-23 05:27:51 -070074 link_prev, link_next, _ = self.__map.pop(key)
Raymond Hettinger80e9eed2012-12-01 19:22:02 -080075 link_prev[1] = link_next # update link_prev[NEXT]
76 link_next[0] = link_prev # update link_next[PREV]
Raymond Hettingerbc512d32009-03-03 04:45:34 +000077
Raymond Hettinger3f2b1842011-04-24 12:55:28 -070078 def __iter__(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000079 'od.__iter__() <==> iter(od)'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000080 # Traverse the linked list in order.
81 root = self.__root
Raymond Hettinger80e9eed2012-12-01 19:22:02 -080082 curr = root[1] # start at the first node
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000083 while curr is not root:
Raymond Hettinger80e9eed2012-12-01 19:22:02 -080084 yield curr[2] # yield the curr[KEY]
85 curr = curr[1] # move to next node
Raymond Hettingerbc512d32009-03-03 04:45:34 +000086
Raymond Hettinger3f2b1842011-04-24 12:55:28 -070087 def __reversed__(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000088 'od.__reversed__() <==> reversed(od)'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000089 # Traverse the linked list in reverse order.
90 root = self.__root
Raymond Hettinger80e9eed2012-12-01 19:22:02 -080091 curr = root[0] # start at the last node
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000092 while curr is not root:
Raymond Hettinger80e9eed2012-12-01 19:22:02 -080093 yield curr[2] # yield the curr[KEY]
94 curr = curr[0] # move to previous node
Raymond Hettingerbc512d32009-03-03 04:45:34 +000095
Raymond Hettinger39282762010-04-03 00:39:26 +000096 def clear(self):
97 'od.clear() -> None. Remove all items from od.'
Raymond Hettingerc6467432011-04-24 12:30:39 -070098 root = self.__root
99 root[:] = [root, root, None]
100 self.__map.clear()
Raymond Hettinger6b96ecb2010-04-03 03:14:28 +0000101 dict.clear(self)
Raymond Hettinger39282762010-04-03 00:39:26 +0000102
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700103 # -- the following methods do not depend on the internal structure --
104
105 def keys(self):
106 'od.keys() -> list of keys in od'
107 return list(self)
108
109 def values(self):
110 'od.values() -> list of values in od'
111 return [self[key] for key in self]
112
113 def items(self):
114 'od.items() -> list of (key, value) pairs in od'
115 return [(key, self[key]) for key in self]
116
117 def iterkeys(self):
118 'od.iterkeys() -> an iterator over the keys in od'
119 return iter(self)
120
121 def itervalues(self):
122 'od.itervalues -> an iterator over the values in od'
123 for k in self:
124 yield self[k]
125
126 def iteritems(self):
Raymond Hettinger3f2b1842011-04-24 12:55:28 -0700127 'od.iteritems -> an iterator over the (key, value) pairs in od'
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700128 for k in self:
129 yield (k, self[k])
130
131 update = MutableMapping.update
132
Raymond Hettingerc6467432011-04-24 12:30:39 -0700133 __update = update # let subclasses override update without breaking __init__
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000134
Raymond Hettinger8ebebd82011-01-02 01:03:26 +0000135 __marker = object()
136
137 def pop(self, key, default=__marker):
Raymond Hettinger3f2b1842011-04-24 12:55:28 -0700138 '''od.pop(k[,d]) -> v, remove specified key and return the corresponding
139 value. If key is not found, d is returned if given, otherwise KeyError
140 is raised.
141
142 '''
Raymond Hettinger8ebebd82011-01-02 01:03:26 +0000143 if key in self:
144 result = self[key]
145 del self[key]
146 return result
147 if default is self.__marker:
148 raise KeyError(key)
149 return default
150
151 def setdefault(self, key, default=None):
152 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od'
153 if key in self:
154 return self[key]
155 self[key] = default
156 return default
157
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000158 def popitem(self, last=True):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000159 '''od.popitem() -> (k, v), return and remove a (key, value) pair.
160 Pairs are returned in LIFO order if last is true or FIFO order if false.
161
162 '''
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000163 if not self:
164 raise KeyError('dictionary is empty')
Raymond Hettinger1355a3d2009-04-08 08:26:55 +0000165 key = next(reversed(self) if last else iter(self))
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000166 value = self.pop(key)
167 return key, value
168
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700169 def __repr__(self, _repr_running={}):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000170 'od.__repr__() <==> repr(od)'
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700171 call_key = id(self), _get_ident()
172 if call_key in _repr_running:
173 return '...'
174 _repr_running[call_key] = 1
175 try:
176 if not self:
177 return '%s()' % (self.__class__.__name__,)
178 return '%s(%r)' % (self.__class__.__name__, self.items())
179 finally:
180 del _repr_running[call_key]
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000181
Raymond Hettinger3674c852011-04-20 13:11:38 -0700182 def __reduce__(self):
183 'Return state information for pickling'
184 items = [[k, self[k]] for k in self]
185 inst_dict = vars(self).copy()
186 for k in vars(OrderedDict()):
187 inst_dict.pop(k, None)
188 if inst_dict:
189 return (self.__class__, (items,), inst_dict)
190 return self.__class__, (items,)
191
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000192 def copy(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000193 'od.copy() -> a shallow copy of od'
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000194 return self.__class__(self)
195
196 @classmethod
197 def fromkeys(cls, iterable, value=None):
Raymond Hettinger3f2b1842011-04-24 12:55:28 -0700198 '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S.
199 If not specified, the value defaults to None.
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000200
201 '''
Raymond Hettingerc6467432011-04-24 12:30:39 -0700202 self = cls()
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000203 for key in iterable:
Raymond Hettingerc6467432011-04-24 12:30:39 -0700204 self[key] = value
205 return self
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000206
207 def __eq__(self, other):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000208 '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
209 while comparison to a regular mapping is order-insensitive.
210
211 '''
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000212 if isinstance(other, OrderedDict):
Raymond Hettinger80e9eed2012-12-01 19:22:02 -0800213 return dict.__eq__(self, other) and all(_imap(_eq, self, other))
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000214 return dict.__eq__(self, other)
215
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700216 def __ne__(self, other):
217 'od.__ne__(y) <==> od!=y'
218 return not self == other
219
Raymond Hettinger43a56412011-04-23 15:51:38 -0700220 # -- the following methods support python 3.x style dictionary views --
221
222 def viewkeys(self):
223 "od.viewkeys() -> a set-like object providing a view on od's keys"
224 return KeysView(self)
225
226 def viewvalues(self):
227 "od.viewvalues() -> an object providing a view on od's values"
228 return ValuesView(self)
229
230 def viewitems(self):
231 "od.viewitems() -> a set-like object providing a view on od's items"
232 return ItemsView(self)
233
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000234
Raymond Hettingerae3f0682009-01-20 03:36:36 +0000235################################################################################
236### namedtuple
237################################################################################
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000238
Raymond Hettinger491f7072012-06-08 13:24:12 -0700239_class_template = '''\
240class {typename}(tuple):
241 '{typename}({arg_list})'
242
243 __slots__ = ()
244
245 _fields = {field_names!r}
246
247 def __new__(_cls, {arg_list}):
248 'Create new instance of {typename}({arg_list})'
249 return _tuple.__new__(_cls, ({arg_list}))
250
251 @classmethod
252 def _make(cls, iterable, new=tuple.__new__, len=len):
253 'Make a new {typename} object from a sequence or iterable'
254 result = new(cls, iterable)
255 if len(result) != {num_fields:d}:
256 raise TypeError('Expected {num_fields:d} arguments, got %d' % len(result))
257 return result
258
259 def __repr__(self):
260 'Return a nicely formatted representation string'
261 return '{typename}({repr_fmt})' % self
262
263 def _asdict(self):
264 'Return a new OrderedDict which maps field names to their values'
265 return OrderedDict(zip(self._fields, self))
266
Raymond Hettinger491f7072012-06-08 13:24:12 -0700267 def _replace(_self, **kwds):
268 'Return a new {typename} object replacing specified fields with new values'
269 result = _self._make(map(kwds.pop, {field_names!r}, _self))
270 if kwds:
271 raise ValueError('Got unexpected field names: %r' % kwds.keys())
272 return result
273
274 def __getnewargs__(self):
275 'Return self as a plain tuple. Used by copy and pickle.'
276 return tuple(self)
277
Raymond Hettinger7393c692013-05-27 10:58:55 -0700278 __dict__ = _property(_asdict)
279
280 def __getstate__(self):
281 'Exclude the OrderedDict from pickling'
282 pass
283
Raymond Hettinger491f7072012-06-08 13:24:12 -0700284{field_defs}
285'''
286
287_repr_template = '{name}=%r'
288
289_field_template = '''\
290 {name} = _property(_itemgetter({index:d}), doc='Alias for field number {index:d}')
291'''
292
Raymond Hettinger322daea2009-02-10 01:24:05 +0000293def namedtuple(typename, field_names, verbose=False, rename=False):
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000294 """Returns a new subclass of tuple with named fields.
295
Raymond Hettinger491f7072012-06-08 13:24:12 -0700296 >>> Point = namedtuple('Point', ['x', 'y'])
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000297 >>> Point.__doc__ # docstring for the new class
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000298 'Point(x, y)'
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000299 >>> p = Point(11, y=22) # instantiate with positional args or keywords
Raymond Hettinger8777bca2007-12-18 22:21:27 +0000300 >>> p[0] + p[1] # indexable like a plain tuple
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000301 33
Raymond Hettinger88880b22007-12-18 00:13:45 +0000302 >>> x, y = p # unpack like a regular tuple
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000303 >>> x, y
304 (11, 22)
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000305 >>> p.x + p.y # fields also accessable by name
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000306 33
Raymond Hettinger42da8742007-12-14 02:49:47 +0000307 >>> d = p._asdict() # convert to a dictionary
Raymond Hettingera7fc4b12007-10-05 02:47:07 +0000308 >>> d['x']
309 11
310 >>> Point(**d) # convert from a dictionary
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000311 Point(x=11, y=22)
Raymond Hettinger42da8742007-12-14 02:49:47 +0000312 >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000313 Point(x=100, y=22)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000314
315 """
316
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700317 # Validate the field names. At the user's option, either generate an error
318 # message or automatically replace the field name with a valid name.
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000319 if isinstance(field_names, basestring):
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700320 field_names = field_names.replace(',', ' ').split()
321 field_names = map(str, field_names)
Raymond Hettingerde5170e2014-06-24 13:49:24 -0700322 typename = str(typename)
Raymond Hettinger322daea2009-02-10 01:24:05 +0000323 if rename:
Raymond Hettinger322daea2009-02-10 01:24:05 +0000324 seen = set()
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700325 for index, name in enumerate(field_names):
Raymond Hettinger491f7072012-06-08 13:24:12 -0700326 if (not all(c.isalnum() or c=='_' for c in name)
327 or _iskeyword(name)
328 or not name
329 or name[0].isdigit()
330 or name.startswith('_')
Raymond Hettinger322daea2009-02-10 01:24:05 +0000331 or name in seen):
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700332 field_names[index] = '_%d' % index
Raymond Hettinger322daea2009-02-10 01:24:05 +0000333 seen.add(name)
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700334 for name in [typename] + field_names:
Raymond Hettingerde5170e2014-06-24 13:49:24 -0700335 if type(name) != str:
336 raise TypeError('Type names and field names must be strings')
Raymond Hettinger2e1af252007-12-05 18:11:08 +0000337 if not all(c.isalnum() or c=='_' for c in name):
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700338 raise ValueError('Type names and field names can only contain '
339 'alphanumeric characters and underscores: %r' % name)
Raymond Hettingerabfd8df2007-10-16 21:28:32 +0000340 if _iskeyword(name):
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700341 raise ValueError('Type names and field names cannot be a '
342 'keyword: %r' % name)
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000343 if name[0].isdigit():
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700344 raise ValueError('Type names and field names cannot start with '
345 'a number: %r' % name)
Raymond Hettinger491f7072012-06-08 13:24:12 -0700346 seen = set()
Raymond Hettinger163f6222007-10-09 01:36:23 +0000347 for name in field_names:
Raymond Hettinger322daea2009-02-10 01:24:05 +0000348 if name.startswith('_') and not rename:
Raymond Hettinger0c2c6922012-06-09 17:27:23 -0700349 raise ValueError('Field names cannot start with an underscore: '
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700350 '%r' % name)
Raymond Hettinger491f7072012-06-08 13:24:12 -0700351 if name in seen:
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000352 raise ValueError('Encountered duplicate field name: %r' % name)
Raymond Hettinger491f7072012-06-08 13:24:12 -0700353 seen.add(name)
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000354
Raymond Hettinger491f7072012-06-08 13:24:12 -0700355 # Fill-in the class template
356 class_definition = _class_template.format(
357 typename = typename,
358 field_names = tuple(field_names),
359 num_fields = len(field_names),
360 arg_list = repr(tuple(field_names)).replace("'", "")[1:-1],
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700361 repr_fmt = ', '.join(_repr_template.format(name=name)
362 for name in field_names),
Raymond Hettinger491f7072012-06-08 13:24:12 -0700363 field_defs = '\n'.join(_field_template.format(index=index, name=name)
364 for index, name in enumerate(field_names))
365 )
Raymond Hettinger2b03d452007-09-18 03:33:19 +0000366 if verbose:
Raymond Hettinger491f7072012-06-08 13:24:12 -0700367 print class_definition
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000368
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700369 # Execute the template string in a temporary namespace and support
370 # tracing utilities by setting a value for frame.f_globals['__name__']
Raymond Hettingera68cad12009-05-27 02:24:45 +0000371 namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
372 OrderedDict=OrderedDict, _property=property, _tuple=tuple)
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000373 try:
Raymond Hettinger491f7072012-06-08 13:24:12 -0700374 exec class_definition in namespace
375 except SyntaxError as e:
Raymond Hettinger3395fda2012-06-09 13:04:29 -0700376 raise SyntaxError(e.message + ':\n' + class_definition)
Raymond Hettinger0e1d6062007-10-08 10:11:51 +0000377 result = namespace[typename]
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000378
379 # For pickling to work, the __module__ variable needs to be set to the frame
Ezio Melotti419e23c2013-08-17 16:56:09 +0300380 # where the named tuple is created. Bypass this step in environments where
Benjamin Peterson7c67b032009-05-05 00:55:24 +0000381 # sys._getframe is not defined (Jython for example) or sys._getframe is not
382 # defined for arguments greater than 0 (IronPython).
383 try:
Raymond Hettingerecf252a2009-01-27 10:03:04 +0000384 result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
Benjamin Peterson7c67b032009-05-05 00:55:24 +0000385 except (AttributeError, ValueError):
386 pass
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000387
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000388 return result
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000389
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000390
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000391########################################################################
Raymond Hettingerae3f0682009-01-20 03:36:36 +0000392### Counter
393########################################################################
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000394
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000395class Counter(dict):
396 '''Dict subclass for counting hashable items. Sometimes called a bag
397 or multiset. Elements are stored as dictionary keys and their counts
398 are stored as dictionary values.
399
Raymond Hettingerc886a842011-01-03 08:59:18 +0000400 >>> c = Counter('abcdeabcdabcaba') # count elements from a string
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000401
402 >>> c.most_common(3) # three most common elements
Raymond Hettingerc886a842011-01-03 08:59:18 +0000403 [('a', 5), ('b', 4), ('c', 3)]
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000404 >>> sorted(c) # list all unique elements
Raymond Hettingerc886a842011-01-03 08:59:18 +0000405 ['a', 'b', 'c', 'd', 'e']
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000406 >>> ''.join(sorted(c.elements())) # list elements with repetitions
Raymond Hettingerc886a842011-01-03 08:59:18 +0000407 'aaaaabbbbcccdde'
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000408 >>> sum(c.values()) # total of all counts
Raymond Hettingerc886a842011-01-03 08:59:18 +0000409 15
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000410
411 >>> c['a'] # count of letter 'a'
412 5
413 >>> for elem in 'shazam': # update counts from an iterable
414 ... c[elem] += 1 # by adding 1 to each element's count
415 >>> c['a'] # now there are seven 'a'
416 7
Raymond Hettingerc886a842011-01-03 08:59:18 +0000417 >>> del c['b'] # remove all 'b'
418 >>> c['b'] # now there are zero 'b'
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000419 0
420
421 >>> d = Counter('simsalabim') # make another counter
422 >>> c.update(d) # add in the second counter
423 >>> c['a'] # now there are nine 'a'
424 9
425
426 >>> c.clear() # empty the counter
427 >>> c
428 Counter()
429
430 Note: If a count is set to zero or reduced to zero, it will remain
431 in the counter until the entry is deleted or the counter is cleared:
432
433 >>> c = Counter('aaabbc')
434 >>> c['b'] -= 2 # reduce the count of 'b' by two
435 >>> c.most_common() # 'b' is still in, but its count is zero
436 [('a', 3), ('c', 1), ('b', 0)]
437
438 '''
439 # References:
440 # http://en.wikipedia.org/wiki/Multiset
441 # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
442 # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
443 # http://code.activestate.com/recipes/259174/
444 # Knuth, TAOCP Vol. II section 4.6.3
445
Serhiy Storchaka20994f12014-11-27 19:02:56 +0200446 def __init__(*args, **kwds):
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000447 '''Create a new, empty Counter object. And if given, count elements
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000448 from an input iterable. Or, initialize the count from another mapping
449 of elements to their counts.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000450
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000451 >>> c = Counter() # a new, empty counter
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000452 >>> c = Counter('gallahad') # a new counter from an iterable
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000453 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000454 >>> c = Counter(a=4, b=2) # a new counter from keyword args
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000455
456 '''
Serhiy Storchaka20994f12014-11-27 19:02:56 +0200457 if not args:
458 raise TypeError("descriptor '__init__' of 'Counter' object "
459 "needs an argument")
460 self = args[0]
461 args = args[1:]
462 if len(args) > 1:
463 raise TypeError('expected at most 1 arguments, got %d' % len(args))
Raymond Hettingerc886a842011-01-03 08:59:18 +0000464 super(Counter, self).__init__()
Serhiy Storchaka20994f12014-11-27 19:02:56 +0200465 self.update(*args, **kwds)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000466
467 def __missing__(self, key):
468 'The count of elements not in the Counter is zero.'
469 # Needed so that self[missing_item] does not raise KeyError
470 return 0
471
472 def most_common(self, n=None):
473 '''List the n most common elements and their counts from the most
474 common to the least. If n is None, then list all element counts.
475
Raymond Hettingerc886a842011-01-03 08:59:18 +0000476 >>> Counter('abcdeabcdabcaba').most_common(3)
477 [('a', 5), ('b', 4), ('c', 3)]
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000478
479 '''
480 # Emulate Bag.sortedByCount from Smalltalk
481 if n is None:
482 return sorted(self.iteritems(), key=_itemgetter(1), reverse=True)
483 return _heapq.nlargest(n, self.iteritems(), key=_itemgetter(1))
484
485 def elements(self):
486 '''Iterator over elements repeating each as many times as its count.
487
488 >>> c = Counter('ABCABC')
489 >>> sorted(c.elements())
490 ['A', 'A', 'B', 'B', 'C', 'C']
491
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000492 # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
493 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
494 >>> product = 1
495 >>> for factor in prime_factors.elements(): # loop over factors
496 ... product *= factor # and multiply them
497 >>> product
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000498 1836
499
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000500 Note, if an element's count has been set to zero or is a negative
501 number, elements() will ignore it.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000502
503 '''
504 # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
Raymond Hettinger35288c62009-01-13 04:50:35 +0000505 return _chain.from_iterable(_starmap(_repeat, self.iteritems()))
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000506
507 # Override dict methods where necessary
508
509 @classmethod
510 def fromkeys(cls, iterable, v=None):
511 # There is no equivalent method for counters because setting v=1
512 # means that no element can have a count greater than one.
513 raise NotImplementedError(
514 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
515
Serhiy Storchaka20994f12014-11-27 19:02:56 +0200516 def update(*args, **kwds):
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000517 '''Like dict.update() but add counts instead of replacing them.
518
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000519 Source can be an iterable, a dictionary, or another Counter instance.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000520
521 >>> c = Counter('which')
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000522 >>> c.update('witch') # add elements from another iterable
523 >>> d = Counter('watch')
524 >>> c.update(d) # add elements from another counter
525 >>> c['h'] # four 'h' in which, witch, and watch
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000526 4
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000527
528 '''
529 # The regular dict.update() operation makes no sense here because the
530 # replace behavior results in the some of original untouched counts
531 # being mixed-in with all of the other counts for a mismash that
532 # doesn't have a straight-forward interpretation in most counting
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000533 # contexts. Instead, we implement straight-addition. Both the inputs
534 # and outputs are allowed to contain zero and negative counts.
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000535
Serhiy Storchaka20994f12014-11-27 19:02:56 +0200536 if not args:
537 raise TypeError("descriptor 'update' of 'Counter' object "
538 "needs an argument")
539 self = args[0]
540 args = args[1:]
541 if len(args) > 1:
542 raise TypeError('expected at most 1 arguments, got %d' % len(args))
543 iterable = args[0] if args else None
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000544 if iterable is not None:
545 if isinstance(iterable, Mapping):
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000546 if self:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000547 self_get = self.get
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000548 for elem, count in iterable.iteritems():
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000549 self[elem] = self_get(elem, 0) + count
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000550 else:
Raymond Hettingerc886a842011-01-03 08:59:18 +0000551 super(Counter, self).update(iterable) # fast path when counter is empty
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000552 else:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000553 self_get = self.get
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000554 for elem in iterable:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000555 self[elem] = self_get(elem, 0) + 1
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000556 if kwds:
557 self.update(kwds)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000558
Serhiy Storchaka20994f12014-11-27 19:02:56 +0200559 def subtract(*args, **kwds):
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000560 '''Like dict.update() but subtracts counts instead of replacing them.
561 Counts can be reduced below zero. Both the inputs and outputs are
562 allowed to contain zero and negative counts.
563
564 Source can be an iterable, a dictionary, or another Counter instance.
565
566 >>> c = Counter('which')
567 >>> c.subtract('witch') # subtract elements from another iterable
568 >>> c.subtract(Counter('watch')) # subtract elements from another counter
569 >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch
570 0
571 >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch
572 -1
573
574 '''
Serhiy Storchaka20994f12014-11-27 19:02:56 +0200575 if not args:
576 raise TypeError("descriptor 'subtract' of 'Counter' object "
577 "needs an argument")
578 self = args[0]
579 args = args[1:]
580 if len(args) > 1:
581 raise TypeError('expected at most 1 arguments, got %d' % len(args))
582 iterable = args[0] if args else None
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000583 if iterable is not None:
Raymond Hettingerfdf1b562010-04-11 20:39:28 +0000584 self_get = self.get
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000585 if isinstance(iterable, Mapping):
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000586 for elem, count in iterable.items():
587 self[elem] = self_get(elem, 0) - count
588 else:
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000589 for elem in iterable:
590 self[elem] = self_get(elem, 0) - 1
591 if kwds:
592 self.subtract(kwds)
593
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000594 def copy(self):
Raymond Hettinger37c0fe52011-04-15 13:12:21 -0700595 'Return a shallow copy.'
596 return self.__class__(self)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000597
Raymond Hettingerc886a842011-01-03 08:59:18 +0000598 def __reduce__(self):
599 return self.__class__, (dict(self),)
600
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000601 def __delitem__(self, elem):
602 'Like dict.__delitem__() but does not raise KeyError for missing values.'
603 if elem in self:
Raymond Hettingerc886a842011-01-03 08:59:18 +0000604 super(Counter, self).__delitem__(elem)
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000605
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000606 def __repr__(self):
607 if not self:
608 return '%s()' % self.__class__.__name__
Raymond Hettinger35288c62009-01-13 04:50:35 +0000609 items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000610 return '%s({%s})' % (self.__class__.__name__, items)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000611
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000612 # Multiset-style mathematical operations discussed in:
613 # Knuth TAOCP Volume II section 4.6.3 exercise 19
614 # and at http://en.wikipedia.org/wiki/Multiset
615 #
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000616 # Outputs guaranteed to only include positive counts.
617 #
618 # To strip negative and zero counts, add-in an empty counter:
619 # c += Counter()
620
621 def __add__(self, other):
622 '''Add counts from two counters.
623
624 >>> Counter('abbb') + Counter('bcc')
625 Counter({'b': 4, 'c': 2, 'a': 1})
626
627 '''
628 if not isinstance(other, Counter):
629 return NotImplemented
630 result = Counter()
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700631 for elem, count in self.items():
632 newcount = count + other[elem]
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000633 if newcount > 0:
634 result[elem] = newcount
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700635 for elem, count in other.items():
636 if elem not in self and count > 0:
637 result[elem] = count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000638 return result
639
640 def __sub__(self, other):
641 ''' Subtract count, but keep only results with positive counts.
642
643 >>> Counter('abbbc') - Counter('bccd')
644 Counter({'b': 2, 'a': 1})
645
646 '''
647 if not isinstance(other, Counter):
648 return NotImplemented
649 result = Counter()
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700650 for elem, count in self.items():
651 newcount = count - other[elem]
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000652 if newcount > 0:
653 result[elem] = newcount
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700654 for elem, count in other.items():
655 if elem not in self and count < 0:
656 result[elem] = 0 - count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000657 return result
658
659 def __or__(self, other):
660 '''Union is the maximum of value in either of the input counters.
661
662 >>> Counter('abbb') | Counter('bcc')
663 Counter({'b': 3, 'c': 2, 'a': 1})
664
665 '''
666 if not isinstance(other, Counter):
667 return NotImplemented
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000668 result = Counter()
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700669 for elem, count in self.items():
670 other_count = other[elem]
671 newcount = other_count if count < other_count else count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000672 if newcount > 0:
673 result[elem] = newcount
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700674 for elem, count in other.items():
675 if elem not in self and count > 0:
676 result[elem] = count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000677 return result
678
679 def __and__(self, other):
680 ''' Intersection is the minimum of corresponding counts.
681
682 >>> Counter('abbb') & Counter('bcc')
683 Counter({'b': 1})
684
685 '''
686 if not isinstance(other, Counter):
687 return NotImplemented
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000688 result = Counter()
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700689 for elem, count in self.items():
690 other_count = other[elem]
691 newcount = count if count < other_count else other_count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000692 if newcount > 0:
693 result[elem] = newcount
694 return result
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000695
696
697if __name__ == '__main__':
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000698 # verify that instances can be pickled
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000699 from cPickle import loads, dumps
Raymond Hettinger01a09572007-10-23 20:37:41 +0000700 Point = namedtuple('Point', 'x, y', True)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000701 p = Point(x=10, y=20)
702 assert p == loads(dumps(p))
703
Raymond Hettingereeeb9c42007-11-15 02:44:53 +0000704 # test and demonstrate ability to override methods
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000705 class Point(namedtuple('Point', 'x y')):
Raymond Hettingere1655082008-01-10 19:15:10 +0000706 __slots__ = ()
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000707 @property
708 def hypot(self):
709 return (self.x ** 2 + self.y ** 2) ** 0.5
Raymond Hettinger9a359212008-01-07 20:07:38 +0000710 def __str__(self):
Raymond Hettinger15b5e552008-01-10 23:00:01 +0000711 return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000712
Raymond Hettingere1655082008-01-10 19:15:10 +0000713 for p in Point(3, 4), Point(14, 5/7.):
Raymond Hettinger9a359212008-01-07 20:07:38 +0000714 print p
Raymond Hettingereeeb9c42007-11-15 02:44:53 +0000715
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000716 class Point(namedtuple('Point', 'x y')):
717 'Point class with optimized _make() and _replace() without error-checking'
Raymond Hettingere1655082008-01-10 19:15:10 +0000718 __slots__ = ()
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000719 _make = classmethod(tuple.__new__)
720 def _replace(self, _map=map, **kwds):
Raymond Hettingerf5e8af12008-01-07 20:56:05 +0000721 return self._make(_map(kwds.get, ('x', 'y'), self))
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000722
723 print Point(11, 22)._replace(x=100)
724
Raymond Hettingere850c462008-01-10 20:37:12 +0000725 Point3D = namedtuple('Point3D', Point._fields + ('z',))
726 print Point3D.__doc__
727
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000728 import doctest
Raymond Hettinger01a09572007-10-23 20:37:41 +0000729 TestResults = namedtuple('TestResults', 'failed attempted')
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000730 print TestResults(*doctest.testmod())