blob: 958e523b6c701dbc3c2a9889424175bc2127d412 [file] [log] [blame]
Raymond Hettingerbc512d32009-03-03 04:45:34 +00001__all__ = ['Counter', 'deque', 'defaultdict', 'namedtuple', 'OrderedDict']
Raymond Hettinger88880b22007-12-18 00:13:45 +00002# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
3# They should however be considered an integral part of collections.py.
4from _abcoll import *
5import _abcoll
6__all__ += _abcoll.__all__
Raymond Hettingereb979882007-02-28 18:37:52 +00007
8from _collections import deque, defaultdict
Raymond Hettingerb36f7472011-04-23 18:37:37 -07009from operator import itemgetter as _itemgetter
Raymond Hettingerabfd8df2007-10-16 21:28:32 +000010from keyword import iskeyword as _iskeyword
Raymond Hettingerc37e5e02007-03-01 06:16:43 +000011import sys as _sys
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +000012import heapq as _heapq
Raymond Hettingerb36f7472011-04-23 18:37:37 -070013from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
14
Raymond Hettinger74f869e2010-09-13 22:14:36 +000015try:
Raymond Hettinger7ce6d972011-04-22 18:49:53 -070016 from thread import get_ident as _get_ident
Amaury Forgeot d'Arc71431ef2010-11-09 07:35:26 +000017except ImportError:
Raymond Hettinger7ce6d972011-04-22 18:49:53 -070018 from dummy_thread import get_ident as _get_ident
Raymond Hettinger74f869e2010-09-13 22:14:36 +000019
Raymond Hettingerbc512d32009-03-03 04:45:34 +000020
21################################################################################
22### OrderedDict
23################################################################################
24
Raymond Hettinger8ebebd82011-01-02 01:03:26 +000025class OrderedDict(dict):
Raymond Hettingere980d2d2009-03-19 23:12:41 +000026 'Dictionary that remembers insertion order'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000027 # An inherited dict maps keys to values.
Raymond Hettingere980d2d2009-03-19 23:12:41 +000028 # The inherited dict provides __getitem__, __len__, __contains__, and get.
29 # The remaining methods are order-aware.
Raymond Hettingerc6467432011-04-24 12:30:39 -070030 # Big-O running times for all methods are the same as regular dictionaries.
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000031
Raymond Hettingerc6467432011-04-24 12:30:39 -070032 # The internal self.__map dict maps keys to links in a doubly linked list.
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000033 # The circular doubly linked list starts and ends with a sentinel element.
34 # The sentinel element never gets deleted (this simplifies the algorithm).
Raymond Hettingeraba22932010-03-09 09:58:53 +000035 # Each link is stored as a list of length three: [PREV, NEXT, KEY].
Raymond Hettingerbc512d32009-03-03 04:45:34 +000036
37 def __init__(self, *args, **kwds):
Raymond Hettinger3f2b1842011-04-24 12:55:28 -070038 '''Initialize an ordered dictionary. The signature is the same as
39 regular dictionaries, but keyword arguments are not recommended because
40 their insertion order is arbitrary.
Raymond Hettingera5cd6372009-04-08 05:39:38 +000041
42 '''
Raymond Hettingerbc512d32009-03-03 04:45:34 +000043 if len(args) > 1:
44 raise TypeError('expected at most 1 arguments, got %d' % len(args))
Raymond Hettinger9353ea22009-03-03 20:53:51 +000045 try:
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000046 self.__root
Raymond Hettinger9353ea22009-03-03 20:53:51 +000047 except AttributeError:
Raymond Hettinger0b795e52011-04-23 15:41:38 -070048 self.__root = root = [] # sentinel node
49 root[:] = [root, root, None]
Raymond Hettinger2dc90fd2009-03-25 22:41:32 +000050 self.__map = {}
Raymond Hettinger8ebebd82011-01-02 01:03:26 +000051 self.__update(*args, **kwds)
Raymond Hettingerbc512d32009-03-03 04:45:34 +000052
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000053 def __setitem__(self, key, value, PREV=0, NEXT=1, dict_setitem=dict.__setitem__):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000054 'od.__setitem__(i, y) <==> od[i]=y'
Raymond Hettingerc6467432011-04-24 12:30:39 -070055 # Setting a new item creates a new link at the end of the linked list,
56 # and the inherited dictionary is updated with the new key/value pair.
Raymond Hettingerbc512d32009-03-03 04:45:34 +000057 if key not in self:
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000058 root = self.__root
Raymond Hettingeraba22932010-03-09 09:58:53 +000059 last = root[PREV]
60 last[NEXT] = root[PREV] = self.__map[key] = [last, root, key]
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000061 dict_setitem(self, key, value)
Raymond Hettingerbc512d32009-03-03 04:45:34 +000062
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000063 def __delitem__(self, key, PREV=0, NEXT=1, dict_delitem=dict.__delitem__):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000064 'od.__delitem__(y) <==> del od[y]'
Raymond Hettingerc6467432011-04-24 12:30:39 -070065 # Deleting an existing item uses self.__map to find the link which gets
66 # removed by updating the links in the predecessor and successor nodes.
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000067 dict_delitem(self, key)
Raymond Hettinger43a56412011-04-23 15:51:38 -070068 link_prev, link_next, key = self.__map.pop(key)
Raymond Hettinger39282762010-04-03 00:39:26 +000069 link_prev[NEXT] = link_next
70 link_next[PREV] = link_prev
Raymond Hettingerbc512d32009-03-03 04:45:34 +000071
Raymond Hettinger3f2b1842011-04-24 12:55:28 -070072 def __iter__(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000073 'od.__iter__() <==> iter(od)'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000074 # Traverse the linked list in order.
Raymond Hettinger3f2b1842011-04-24 12:55:28 -070075 NEXT, KEY = 1, 2
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000076 root = self.__root
Raymond Hettingeraba22932010-03-09 09:58:53 +000077 curr = root[NEXT]
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000078 while curr is not root:
Raymond Hettingeraba22932010-03-09 09:58:53 +000079 yield curr[KEY]
80 curr = curr[NEXT]
Raymond Hettingerbc512d32009-03-03 04:45:34 +000081
Raymond Hettinger3f2b1842011-04-24 12:55:28 -070082 def __reversed__(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000083 'od.__reversed__() <==> reversed(od)'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000084 # Traverse the linked list in reverse order.
Raymond Hettinger3f2b1842011-04-24 12:55:28 -070085 PREV, KEY = 0, 2
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000086 root = self.__root
Raymond Hettingeraba22932010-03-09 09:58:53 +000087 curr = root[PREV]
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000088 while curr is not root:
Raymond Hettingeraba22932010-03-09 09:58:53 +000089 yield curr[KEY]
90 curr = curr[PREV]
Raymond Hettingerbc512d32009-03-03 04:45:34 +000091
Raymond Hettinger39282762010-04-03 00:39:26 +000092 def clear(self):
93 'od.clear() -> None. Remove all items from od.'
Raymond Hettingerc6467432011-04-24 12:30:39 -070094 for node in self.__map.itervalues():
95 del node[:]
96 root = self.__root
97 root[:] = [root, root, None]
98 self.__map.clear()
Raymond Hettinger6b96ecb2010-04-03 03:14:28 +000099 dict.clear(self)
Raymond Hettinger39282762010-04-03 00:39:26 +0000100
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700101 # -- the following methods do not depend on the internal structure --
102
103 def keys(self):
104 'od.keys() -> list of keys in od'
105 return list(self)
106
107 def values(self):
108 'od.values() -> list of values in od'
109 return [self[key] for key in self]
110
111 def items(self):
112 'od.items() -> list of (key, value) pairs in od'
113 return [(key, self[key]) for key in self]
114
115 def iterkeys(self):
116 'od.iterkeys() -> an iterator over the keys in od'
117 return iter(self)
118
119 def itervalues(self):
120 'od.itervalues -> an iterator over the values in od'
121 for k in self:
122 yield self[k]
123
124 def iteritems(self):
Raymond Hettinger3f2b1842011-04-24 12:55:28 -0700125 'od.iteritems -> an iterator over the (key, value) pairs in od'
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700126 for k in self:
127 yield (k, self[k])
128
129 update = MutableMapping.update
130
Raymond Hettingerc6467432011-04-24 12:30:39 -0700131 __update = update # let subclasses override update without breaking __init__
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000132
Raymond Hettinger8ebebd82011-01-02 01:03:26 +0000133 __marker = object()
134
135 def pop(self, key, default=__marker):
Raymond Hettinger3f2b1842011-04-24 12:55:28 -0700136 '''od.pop(k[,d]) -> v, remove specified key and return the corresponding
137 value. If key is not found, d is returned if given, otherwise KeyError
138 is raised.
139
140 '''
Raymond Hettinger8ebebd82011-01-02 01:03:26 +0000141 if key in self:
142 result = self[key]
143 del self[key]
144 return result
145 if default is self.__marker:
146 raise KeyError(key)
147 return default
148
149 def setdefault(self, key, default=None):
150 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od'
151 if key in self:
152 return self[key]
153 self[key] = default
154 return default
155
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000156 def popitem(self, last=True):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000157 '''od.popitem() -> (k, v), return and remove a (key, value) pair.
158 Pairs are returned in LIFO order if last is true or FIFO order if false.
159
160 '''
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000161 if not self:
162 raise KeyError('dictionary is empty')
Raymond Hettinger1355a3d2009-04-08 08:26:55 +0000163 key = next(reversed(self) if last else iter(self))
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000164 value = self.pop(key)
165 return key, value
166
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700167 def __repr__(self, _repr_running={}):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000168 'od.__repr__() <==> repr(od)'
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700169 call_key = id(self), _get_ident()
170 if call_key in _repr_running:
171 return '...'
172 _repr_running[call_key] = 1
173 try:
174 if not self:
175 return '%s()' % (self.__class__.__name__,)
176 return '%s(%r)' % (self.__class__.__name__, self.items())
177 finally:
178 del _repr_running[call_key]
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000179
Raymond Hettinger3674c852011-04-20 13:11:38 -0700180 def __reduce__(self):
181 'Return state information for pickling'
182 items = [[k, self[k]] for k in self]
183 inst_dict = vars(self).copy()
184 for k in vars(OrderedDict()):
185 inst_dict.pop(k, None)
186 if inst_dict:
187 return (self.__class__, (items,), inst_dict)
188 return self.__class__, (items,)
189
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000190 def copy(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000191 'od.copy() -> a shallow copy of od'
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000192 return self.__class__(self)
193
194 @classmethod
195 def fromkeys(cls, iterable, value=None):
Raymond Hettinger3f2b1842011-04-24 12:55:28 -0700196 '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S.
197 If not specified, the value defaults to None.
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000198
199 '''
Raymond Hettingerc6467432011-04-24 12:30:39 -0700200 self = cls()
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000201 for key in iterable:
Raymond Hettingerc6467432011-04-24 12:30:39 -0700202 self[key] = value
203 return self
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000204
205 def __eq__(self, other):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000206 '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
207 while comparison to a regular mapping is order-insensitive.
208
209 '''
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000210 if isinstance(other, OrderedDict):
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700211 return len(self)==len(other) and self.items() == other.items()
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000212 return dict.__eq__(self, other)
213
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700214 def __ne__(self, other):
215 'od.__ne__(y) <==> od!=y'
216 return not self == other
217
Raymond Hettinger43a56412011-04-23 15:51:38 -0700218 # -- the following methods support python 3.x style dictionary views --
219
220 def viewkeys(self):
221 "od.viewkeys() -> a set-like object providing a view on od's keys"
222 return KeysView(self)
223
224 def viewvalues(self):
225 "od.viewvalues() -> an object providing a view on od's values"
226 return ValuesView(self)
227
228 def viewitems(self):
229 "od.viewitems() -> a set-like object providing a view on od's items"
230 return ItemsView(self)
231
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000232
Raymond Hettingerae3f0682009-01-20 03:36:36 +0000233################################################################################
234### namedtuple
235################################################################################
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000236
Raymond Hettinger322daea2009-02-10 01:24:05 +0000237def namedtuple(typename, field_names, verbose=False, rename=False):
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000238 """Returns a new subclass of tuple with named fields.
239
Raymond Hettinger01a09572007-10-23 20:37:41 +0000240 >>> Point = namedtuple('Point', 'x y')
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000241 >>> Point.__doc__ # docstring for the new class
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000242 'Point(x, y)'
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000243 >>> p = Point(11, y=22) # instantiate with positional args or keywords
Raymond Hettinger8777bca2007-12-18 22:21:27 +0000244 >>> p[0] + p[1] # indexable like a plain tuple
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000245 33
Raymond Hettinger88880b22007-12-18 00:13:45 +0000246 >>> x, y = p # unpack like a regular tuple
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000247 >>> x, y
248 (11, 22)
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000249 >>> p.x + p.y # fields also accessable by name
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000250 33
Raymond Hettinger42da8742007-12-14 02:49:47 +0000251 >>> d = p._asdict() # convert to a dictionary
Raymond Hettingera7fc4b12007-10-05 02:47:07 +0000252 >>> d['x']
253 11
254 >>> Point(**d) # convert from a dictionary
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000255 Point(x=11, y=22)
Raymond Hettinger42da8742007-12-14 02:49:47 +0000256 >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000257 Point(x=100, y=22)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000258
259 """
260
Raymond Hettinger58167142008-01-08 02:02:05 +0000261 # Parse and validate the field names. Validation serves two purposes,
262 # generating informative error messages and preventing template injection attacks.
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000263 if isinstance(field_names, basestring):
Raymond Hettinger0e1d6062007-10-08 10:11:51 +0000264 field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas
Raymond Hettinger6ee7bc02008-09-25 23:31:52 +0000265 field_names = tuple(map(str, field_names))
Raymond Hettinger322daea2009-02-10 01:24:05 +0000266 if rename:
267 names = list(field_names)
268 seen = set()
269 for i, name in enumerate(names):
270 if (not all(c.isalnum() or c=='_' for c in name) or _iskeyword(name)
271 or not name or name[0].isdigit() or name.startswith('_')
272 or name in seen):
Raymond Hettinger756ab672009-04-02 22:25:40 +0000273 names[i] = '_%d' % i
Raymond Hettinger322daea2009-02-10 01:24:05 +0000274 seen.add(name)
275 field_names = tuple(names)
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000276 for name in (typename,) + field_names:
Raymond Hettinger2e1af252007-12-05 18:11:08 +0000277 if not all(c.isalnum() or c=='_' for c in name):
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000278 raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
Raymond Hettingerabfd8df2007-10-16 21:28:32 +0000279 if _iskeyword(name):
280 raise ValueError('Type names and field names cannot be a keyword: %r' % name)
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000281 if name[0].isdigit():
282 raise ValueError('Type names and field names cannot start with a number: %r' % name)
Raymond Hettinger163f6222007-10-09 01:36:23 +0000283 seen_names = set()
284 for name in field_names:
Raymond Hettinger322daea2009-02-10 01:24:05 +0000285 if name.startswith('_') and not rename:
Raymond Hettinger42da8742007-12-14 02:49:47 +0000286 raise ValueError('Field names cannot start with an underscore: %r' % name)
Raymond Hettinger163f6222007-10-09 01:36:23 +0000287 if name in seen_names:
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000288 raise ValueError('Encountered duplicate field name: %r' % name)
Raymond Hettinger163f6222007-10-09 01:36:23 +0000289 seen_names.add(name)
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000290
291 # Create and fill-in the class template
Raymond Hettinger02740f72008-01-05 01:35:43 +0000292 numfields = len(field_names)
Raymond Hettinger2b03d452007-09-18 03:33:19 +0000293 argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000294 reprtxt = ', '.join('%s=%%r' % name for name in field_names)
295 template = '''class %(typename)s(tuple):
Raymond Hettinger48eca672007-12-14 18:08:20 +0000296 '%(typename)s(%(argtxt)s)' \n
297 __slots__ = () \n
Raymond Hettingere0734e72008-01-04 03:22:53 +0000298 _fields = %(field_names)r \n
Raymond Hettingera68cad12009-05-27 02:24:45 +0000299 def __new__(_cls, %(argtxt)s):
Raymond Hettinger9bd35082010-03-09 09:01:46 +0000300 'Create new instance of %(typename)s(%(argtxt)s)'
Raymond Hettingera68cad12009-05-27 02:24:45 +0000301 return _tuple.__new__(_cls, (%(argtxt)s)) \n
Raymond Hettinger02740f72008-01-05 01:35:43 +0000302 @classmethod
Raymond Hettinger844f71b2008-01-06 22:11:54 +0000303 def _make(cls, iterable, new=tuple.__new__, len=len):
Raymond Hettinger02740f72008-01-05 01:35:43 +0000304 'Make a new %(typename)s object from a sequence or iterable'
Raymond Hettinger844f71b2008-01-06 22:11:54 +0000305 result = new(cls, iterable)
Raymond Hettinger02740f72008-01-05 01:35:43 +0000306 if len(result) != %(numfields)d:
307 raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
308 return result \n
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000309 def __repr__(self):
Raymond Hettinger9bd35082010-03-09 09:01:46 +0000310 'Return a nicely formatted representation string'
Raymond Hettinger48eca672007-12-14 18:08:20 +0000311 return '%(typename)s(%(reprtxt)s)' %% self \n
Raymond Hettinger88a91642009-03-03 04:51:24 +0000312 def _asdict(self):
313 'Return a new OrderedDict which maps field names to their values'
314 return OrderedDict(zip(self._fields, self)) \n
Raymond Hettinger45b08292011-06-02 20:40:35 -0700315 __dict__ = property(_asdict) \n
Raymond Hettingera68cad12009-05-27 02:24:45 +0000316 def _replace(_self, **kwds):
Raymond Hettingereeeb9c42007-11-15 02:44:53 +0000317 'Return a new %(typename)s object replacing specified fields with new values'
Raymond Hettingera68cad12009-05-27 02:24:45 +0000318 result = _self._make(map(kwds.pop, %(field_names)r, _self))
Raymond Hettinger1b50fd72008-01-05 02:17:24 +0000319 if kwds:
320 raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
Raymond Hettingere98839a2008-06-09 01:28:30 +0000321 return result \n
322 def __getnewargs__(self):
Raymond Hettinger9bd35082010-03-09 09:01:46 +0000323 'Return self as a plain tuple. Used by copy and pickle.'
Raymond Hettingere98839a2008-06-09 01:28:30 +0000324 return tuple(self) \n\n''' % locals()
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000325 for i, name in enumerate(field_names):
Raymond Hettinger9bd35082010-03-09 09:01:46 +0000326 template += " %s = _property(_itemgetter(%d), doc='Alias for field number %d')\n" % (name, i, i)
Raymond Hettinger2b03d452007-09-18 03:33:19 +0000327 if verbose:
328 print template
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000329
Raymond Hettinger3c2523c2008-05-30 07:16:53 +0000330 # Execute the template string in a temporary namespace and
331 # support tracing utilities by setting a value for frame.f_globals['__name__']
Raymond Hettingera68cad12009-05-27 02:24:45 +0000332 namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
333 OrderedDict=OrderedDict, _property=property, _tuple=tuple)
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000334 try:
Raymond Hettinger0e1d6062007-10-08 10:11:51 +0000335 exec template in namespace
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000336 except SyntaxError, e:
337 raise SyntaxError(e.message + ':\n' + template)
Raymond Hettinger0e1d6062007-10-08 10:11:51 +0000338 result = namespace[typename]
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000339
340 # For pickling to work, the __module__ variable needs to be set to the frame
341 # where the named tuple is created. Bypass this step in enviroments where
Benjamin Peterson7c67b032009-05-05 00:55:24 +0000342 # sys._getframe is not defined (Jython for example) or sys._getframe is not
343 # defined for arguments greater than 0 (IronPython).
344 try:
Raymond Hettingerecf252a2009-01-27 10:03:04 +0000345 result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
Benjamin Peterson7c67b032009-05-05 00:55:24 +0000346 except (AttributeError, ValueError):
347 pass
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000348
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000349 return result
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000350
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000351
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000352########################################################################
Raymond Hettingerae3f0682009-01-20 03:36:36 +0000353### Counter
354########################################################################
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000355
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000356class Counter(dict):
357 '''Dict subclass for counting hashable items. Sometimes called a bag
358 or multiset. Elements are stored as dictionary keys and their counts
359 are stored as dictionary values.
360
Raymond Hettingerc886a842011-01-03 08:59:18 +0000361 >>> c = Counter('abcdeabcdabcaba') # count elements from a string
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000362
363 >>> c.most_common(3) # three most common elements
Raymond Hettingerc886a842011-01-03 08:59:18 +0000364 [('a', 5), ('b', 4), ('c', 3)]
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000365 >>> sorted(c) # list all unique elements
Raymond Hettingerc886a842011-01-03 08:59:18 +0000366 ['a', 'b', 'c', 'd', 'e']
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000367 >>> ''.join(sorted(c.elements())) # list elements with repetitions
Raymond Hettingerc886a842011-01-03 08:59:18 +0000368 'aaaaabbbbcccdde'
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000369 >>> sum(c.values()) # total of all counts
Raymond Hettingerc886a842011-01-03 08:59:18 +0000370 15
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000371
372 >>> c['a'] # count of letter 'a'
373 5
374 >>> for elem in 'shazam': # update counts from an iterable
375 ... c[elem] += 1 # by adding 1 to each element's count
376 >>> c['a'] # now there are seven 'a'
377 7
Raymond Hettingerc886a842011-01-03 08:59:18 +0000378 >>> del c['b'] # remove all 'b'
379 >>> c['b'] # now there are zero 'b'
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000380 0
381
382 >>> d = Counter('simsalabim') # make another counter
383 >>> c.update(d) # add in the second counter
384 >>> c['a'] # now there are nine 'a'
385 9
386
387 >>> c.clear() # empty the counter
388 >>> c
389 Counter()
390
391 Note: If a count is set to zero or reduced to zero, it will remain
392 in the counter until the entry is deleted or the counter is cleared:
393
394 >>> c = Counter('aaabbc')
395 >>> c['b'] -= 2 # reduce the count of 'b' by two
396 >>> c.most_common() # 'b' is still in, but its count is zero
397 [('a', 3), ('c', 1), ('b', 0)]
398
399 '''
400 # References:
401 # http://en.wikipedia.org/wiki/Multiset
402 # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
403 # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
404 # http://code.activestate.com/recipes/259174/
405 # Knuth, TAOCP Vol. II section 4.6.3
406
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000407 def __init__(self, iterable=None, **kwds):
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000408 '''Create a new, empty Counter object. And if given, count elements
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000409 from an input iterable. Or, initialize the count from another mapping
410 of elements to their counts.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000411
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000412 >>> c = Counter() # a new, empty counter
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000413 >>> c = Counter('gallahad') # a new counter from an iterable
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000414 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000415 >>> c = Counter(a=4, b=2) # a new counter from keyword args
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000416
417 '''
Raymond Hettingerc886a842011-01-03 08:59:18 +0000418 super(Counter, self).__init__()
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000419 self.update(iterable, **kwds)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000420
421 def __missing__(self, key):
422 'The count of elements not in the Counter is zero.'
423 # Needed so that self[missing_item] does not raise KeyError
424 return 0
425
426 def most_common(self, n=None):
427 '''List the n most common elements and their counts from the most
428 common to the least. If n is None, then list all element counts.
429
Raymond Hettingerc886a842011-01-03 08:59:18 +0000430 >>> Counter('abcdeabcdabcaba').most_common(3)
431 [('a', 5), ('b', 4), ('c', 3)]
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000432
433 '''
434 # Emulate Bag.sortedByCount from Smalltalk
435 if n is None:
436 return sorted(self.iteritems(), key=_itemgetter(1), reverse=True)
437 return _heapq.nlargest(n, self.iteritems(), key=_itemgetter(1))
438
439 def elements(self):
440 '''Iterator over elements repeating each as many times as its count.
441
442 >>> c = Counter('ABCABC')
443 >>> sorted(c.elements())
444 ['A', 'A', 'B', 'B', 'C', 'C']
445
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000446 # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
447 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
448 >>> product = 1
449 >>> for factor in prime_factors.elements(): # loop over factors
450 ... product *= factor # and multiply them
451 >>> product
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000452 1836
453
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000454 Note, if an element's count has been set to zero or is a negative
455 number, elements() will ignore it.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000456
457 '''
458 # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
Raymond Hettinger35288c62009-01-13 04:50:35 +0000459 return _chain.from_iterable(_starmap(_repeat, self.iteritems()))
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000460
461 # Override dict methods where necessary
462
463 @classmethod
464 def fromkeys(cls, iterable, v=None):
465 # There is no equivalent method for counters because setting v=1
466 # means that no element can have a count greater than one.
467 raise NotImplementedError(
468 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
469
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000470 def update(self, iterable=None, **kwds):
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000471 '''Like dict.update() but add counts instead of replacing them.
472
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000473 Source can be an iterable, a dictionary, or another Counter instance.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000474
475 >>> c = Counter('which')
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000476 >>> c.update('witch') # add elements from another iterable
477 >>> d = Counter('watch')
478 >>> c.update(d) # add elements from another counter
479 >>> c['h'] # four 'h' in which, witch, and watch
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000480 4
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000481
482 '''
483 # The regular dict.update() operation makes no sense here because the
484 # replace behavior results in the some of original untouched counts
485 # being mixed-in with all of the other counts for a mismash that
486 # doesn't have a straight-forward interpretation in most counting
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000487 # contexts. Instead, we implement straight-addition. Both the inputs
488 # and outputs are allowed to contain zero and negative counts.
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000489
490 if iterable is not None:
491 if isinstance(iterable, Mapping):
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000492 if self:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000493 self_get = self.get
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000494 for elem, count in iterable.iteritems():
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000495 self[elem] = self_get(elem, 0) + count
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000496 else:
Raymond Hettingerc886a842011-01-03 08:59:18 +0000497 super(Counter, self).update(iterable) # fast path when counter is empty
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000498 else:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000499 self_get = self.get
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000500 for elem in iterable:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000501 self[elem] = self_get(elem, 0) + 1
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000502 if kwds:
503 self.update(kwds)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000504
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000505 def subtract(self, iterable=None, **kwds):
506 '''Like dict.update() but subtracts counts instead of replacing them.
507 Counts can be reduced below zero. Both the inputs and outputs are
508 allowed to contain zero and negative counts.
509
510 Source can be an iterable, a dictionary, or another Counter instance.
511
512 >>> c = Counter('which')
513 >>> c.subtract('witch') # subtract elements from another iterable
514 >>> c.subtract(Counter('watch')) # subtract elements from another counter
515 >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch
516 0
517 >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch
518 -1
519
520 '''
521 if iterable is not None:
Raymond Hettingerfdf1b562010-04-11 20:39:28 +0000522 self_get = self.get
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000523 if isinstance(iterable, Mapping):
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000524 for elem, count in iterable.items():
525 self[elem] = self_get(elem, 0) - count
526 else:
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000527 for elem in iterable:
528 self[elem] = self_get(elem, 0) - 1
529 if kwds:
530 self.subtract(kwds)
531
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000532 def copy(self):
Raymond Hettinger37c0fe52011-04-15 13:12:21 -0700533 'Return a shallow copy.'
534 return self.__class__(self)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000535
Raymond Hettingerc886a842011-01-03 08:59:18 +0000536 def __reduce__(self):
537 return self.__class__, (dict(self),)
538
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000539 def __delitem__(self, elem):
540 'Like dict.__delitem__() but does not raise KeyError for missing values.'
541 if elem in self:
Raymond Hettingerc886a842011-01-03 08:59:18 +0000542 super(Counter, self).__delitem__(elem)
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000543
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000544 def __repr__(self):
545 if not self:
546 return '%s()' % self.__class__.__name__
Raymond Hettinger35288c62009-01-13 04:50:35 +0000547 items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000548 return '%s({%s})' % (self.__class__.__name__, items)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000549
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000550 # Multiset-style mathematical operations discussed in:
551 # Knuth TAOCP Volume II section 4.6.3 exercise 19
552 # and at http://en.wikipedia.org/wiki/Multiset
553 #
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000554 # Outputs guaranteed to only include positive counts.
555 #
556 # To strip negative and zero counts, add-in an empty counter:
557 # c += Counter()
558
559 def __add__(self, other):
560 '''Add counts from two counters.
561
562 >>> Counter('abbb') + Counter('bcc')
563 Counter({'b': 4, 'c': 2, 'a': 1})
564
565 '''
566 if not isinstance(other, Counter):
567 return NotImplemented
568 result = Counter()
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700569 for elem, count in self.items():
570 newcount = count + other[elem]
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000571 if newcount > 0:
572 result[elem] = newcount
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700573 for elem, count in other.items():
574 if elem not in self and count > 0:
575 result[elem] = count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000576 return result
577
578 def __sub__(self, other):
579 ''' Subtract count, but keep only results with positive counts.
580
581 >>> Counter('abbbc') - Counter('bccd')
582 Counter({'b': 2, 'a': 1})
583
584 '''
585 if not isinstance(other, Counter):
586 return NotImplemented
587 result = Counter()
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700588 for elem, count in self.items():
589 newcount = count - other[elem]
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000590 if newcount > 0:
591 result[elem] = newcount
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700592 for elem, count in other.items():
593 if elem not in self and count < 0:
594 result[elem] = 0 - count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000595 return result
596
597 def __or__(self, other):
598 '''Union is the maximum of value in either of the input counters.
599
600 >>> Counter('abbb') | Counter('bcc')
601 Counter({'b': 3, 'c': 2, 'a': 1})
602
603 '''
604 if not isinstance(other, Counter):
605 return NotImplemented
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000606 result = Counter()
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700607 for elem, count in self.items():
608 other_count = other[elem]
609 newcount = other_count if count < other_count else count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000610 if newcount > 0:
611 result[elem] = newcount
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700612 for elem, count in other.items():
613 if elem not in self and count > 0:
614 result[elem] = count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000615 return result
616
617 def __and__(self, other):
618 ''' Intersection is the minimum of corresponding counts.
619
620 >>> Counter('abbb') & Counter('bcc')
621 Counter({'b': 1})
622
623 '''
624 if not isinstance(other, Counter):
625 return NotImplemented
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000626 result = Counter()
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700627 for elem, count in self.items():
628 other_count = other[elem]
629 newcount = count if count < other_count else other_count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000630 if newcount > 0:
631 result[elem] = newcount
632 return result
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000633
634
635if __name__ == '__main__':
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000636 # verify that instances can be pickled
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000637 from cPickle import loads, dumps
Raymond Hettinger01a09572007-10-23 20:37:41 +0000638 Point = namedtuple('Point', 'x, y', True)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000639 p = Point(x=10, y=20)
640 assert p == loads(dumps(p))
641
Raymond Hettingereeeb9c42007-11-15 02:44:53 +0000642 # test and demonstrate ability to override methods
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000643 class Point(namedtuple('Point', 'x y')):
Raymond Hettingere1655082008-01-10 19:15:10 +0000644 __slots__ = ()
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000645 @property
646 def hypot(self):
647 return (self.x ** 2 + self.y ** 2) ** 0.5
Raymond Hettinger9a359212008-01-07 20:07:38 +0000648 def __str__(self):
Raymond Hettinger15b5e552008-01-10 23:00:01 +0000649 return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000650
Raymond Hettingere1655082008-01-10 19:15:10 +0000651 for p in Point(3, 4), Point(14, 5/7.):
Raymond Hettinger9a359212008-01-07 20:07:38 +0000652 print p
Raymond Hettingereeeb9c42007-11-15 02:44:53 +0000653
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000654 class Point(namedtuple('Point', 'x y')):
655 'Point class with optimized _make() and _replace() without error-checking'
Raymond Hettingere1655082008-01-10 19:15:10 +0000656 __slots__ = ()
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000657 _make = classmethod(tuple.__new__)
658 def _replace(self, _map=map, **kwds):
Raymond Hettingerf5e8af12008-01-07 20:56:05 +0000659 return self._make(_map(kwds.get, ('x', 'y'), self))
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000660
661 print Point(11, 22)._replace(x=100)
662
Raymond Hettingere850c462008-01-10 20:37:12 +0000663 Point3D = namedtuple('Point3D', Point._fields + ('z',))
664 print Point3D.__doc__
665
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000666 import doctest
Raymond Hettinger01a09572007-10-23 20:37:41 +0000667 TestResults = namedtuple('TestResults', 'failed attempted')
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000668 print TestResults(*doctest.testmod())