blob: dcb3c6c5613d8b147d284af6e98de4f4bda0d9ff [file] [log] [blame]
Raymond Hettingerbc512d32009-03-03 04:45:34 +00001__all__ = ['Counter', 'deque', 'defaultdict', 'namedtuple', 'OrderedDict']
Raymond Hettinger88880b22007-12-18 00:13:45 +00002# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
3# They should however be considered an integral part of collections.py.
4from _abcoll import *
5import _abcoll
6__all__ += _abcoll.__all__
Raymond Hettingereb979882007-02-28 18:37:52 +00007
8from _collections import deque, defaultdict
Raymond Hettingerb36f7472011-04-23 18:37:37 -07009from operator import itemgetter as _itemgetter
Raymond Hettingerabfd8df2007-10-16 21:28:32 +000010from keyword import iskeyword as _iskeyword
Raymond Hettingerc37e5e02007-03-01 06:16:43 +000011import sys as _sys
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +000012import heapq as _heapq
Raymond Hettingerb36f7472011-04-23 18:37:37 -070013from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
14
Raymond Hettinger74f869e2010-09-13 22:14:36 +000015try:
Raymond Hettinger7ce6d972011-04-22 18:49:53 -070016 from thread import get_ident as _get_ident
Amaury Forgeot d'Arc71431ef2010-11-09 07:35:26 +000017except ImportError:
Raymond Hettinger7ce6d972011-04-22 18:49:53 -070018 from dummy_thread import get_ident as _get_ident
Raymond Hettinger74f869e2010-09-13 22:14:36 +000019
Raymond Hettingerbc512d32009-03-03 04:45:34 +000020
21################################################################################
22### OrderedDict
23################################################################################
24
Raymond Hettinger8ebebd82011-01-02 01:03:26 +000025class OrderedDict(dict):
Raymond Hettingere980d2d2009-03-19 23:12:41 +000026 'Dictionary that remembers insertion order'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000027 # An inherited dict maps keys to values.
Raymond Hettingere980d2d2009-03-19 23:12:41 +000028 # The inherited dict provides __getitem__, __len__, __contains__, and get.
29 # The remaining methods are order-aware.
Raymond Hettingerc6467432011-04-24 12:30:39 -070030 # Big-O running times for all methods are the same as regular dictionaries.
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000031
Raymond Hettingerc6467432011-04-24 12:30:39 -070032 # The internal self.__map dict maps keys to links in a doubly linked list.
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000033 # The circular doubly linked list starts and ends with a sentinel element.
34 # The sentinel element never gets deleted (this simplifies the algorithm).
Raymond Hettingeraba22932010-03-09 09:58:53 +000035 # Each link is stored as a list of length three: [PREV, NEXT, KEY].
Raymond Hettingerbc512d32009-03-03 04:45:34 +000036
37 def __init__(self, *args, **kwds):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000038 '''Initialize an ordered dictionary. Signature is the same as for
39 regular dictionaries, but keyword arguments are not recommended
40 because their insertion order is arbitrary.
41
42 '''
Raymond Hettingerbc512d32009-03-03 04:45:34 +000043 if len(args) > 1:
44 raise TypeError('expected at most 1 arguments, got %d' % len(args))
Raymond Hettinger9353ea22009-03-03 20:53:51 +000045 try:
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000046 self.__root
Raymond Hettinger9353ea22009-03-03 20:53:51 +000047 except AttributeError:
Raymond Hettinger0b795e52011-04-23 15:41:38 -070048 self.__root = root = [] # sentinel node
49 root[:] = [root, root, None]
Raymond Hettinger2dc90fd2009-03-25 22:41:32 +000050 self.__map = {}
Raymond Hettinger8ebebd82011-01-02 01:03:26 +000051 self.__update(*args, **kwds)
Raymond Hettingerbc512d32009-03-03 04:45:34 +000052
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000053 def __setitem__(self, key, value, PREV=0, NEXT=1, dict_setitem=dict.__setitem__):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000054 'od.__setitem__(i, y) <==> od[i]=y'
Raymond Hettingerc6467432011-04-24 12:30:39 -070055 # Setting a new item creates a new link at the end of the linked list,
56 # and the inherited dictionary is updated with the new key/value pair.
Raymond Hettingerbc512d32009-03-03 04:45:34 +000057 if key not in self:
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000058 root = self.__root
Raymond Hettingeraba22932010-03-09 09:58:53 +000059 last = root[PREV]
60 last[NEXT] = root[PREV] = self.__map[key] = [last, root, key]
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000061 dict_setitem(self, key, value)
Raymond Hettingerbc512d32009-03-03 04:45:34 +000062
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000063 def __delitem__(self, key, PREV=0, NEXT=1, dict_delitem=dict.__delitem__):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000064 'od.__delitem__(y) <==> del od[y]'
Raymond Hettingerc6467432011-04-24 12:30:39 -070065 # Deleting an existing item uses self.__map to find the link which gets
66 # removed by updating the links in the predecessor and successor nodes.
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000067 dict_delitem(self, key)
Raymond Hettinger43a56412011-04-23 15:51:38 -070068 link_prev, link_next, key = self.__map.pop(key)
Raymond Hettinger39282762010-04-03 00:39:26 +000069 link_prev[NEXT] = link_next
70 link_next[PREV] = link_prev
Raymond Hettingerbc512d32009-03-03 04:45:34 +000071
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000072 def __iter__(self, NEXT=1, KEY=2):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000073 'od.__iter__() <==> iter(od)'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000074 # Traverse the linked list in order.
75 root = self.__root
Raymond Hettingeraba22932010-03-09 09:58:53 +000076 curr = root[NEXT]
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000077 while curr is not root:
Raymond Hettingeraba22932010-03-09 09:58:53 +000078 yield curr[KEY]
79 curr = curr[NEXT]
Raymond Hettingerbc512d32009-03-03 04:45:34 +000080
Raymond Hettingerdd2fedc2010-04-03 07:57:09 +000081 def __reversed__(self, PREV=0, KEY=2):
Raymond Hettingera5cd6372009-04-08 05:39:38 +000082 'od.__reversed__() <==> reversed(od)'
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000083 # Traverse the linked list in reverse order.
84 root = self.__root
Raymond Hettingeraba22932010-03-09 09:58:53 +000085 curr = root[PREV]
Raymond Hettingerf1e2df92009-03-23 00:08:09 +000086 while curr is not root:
Raymond Hettingeraba22932010-03-09 09:58:53 +000087 yield curr[KEY]
88 curr = curr[PREV]
Raymond Hettingerbc512d32009-03-03 04:45:34 +000089
Raymond Hettinger39282762010-04-03 00:39:26 +000090 def clear(self):
91 'od.clear() -> None. Remove all items from od.'
Raymond Hettingerc6467432011-04-24 12:30:39 -070092 for node in self.__map.itervalues():
93 del node[:]
94 root = self.__root
95 root[:] = [root, root, None]
96 self.__map.clear()
Raymond Hettinger6b96ecb2010-04-03 03:14:28 +000097 dict.clear(self)
Raymond Hettinger39282762010-04-03 00:39:26 +000098
Raymond Hettinger7ce6d972011-04-22 18:49:53 -070099 # -- the following methods do not depend on the internal structure --
100
101 def keys(self):
102 'od.keys() -> list of keys in od'
103 return list(self)
104
105 def values(self):
106 'od.values() -> list of values in od'
107 return [self[key] for key in self]
108
109 def items(self):
110 'od.items() -> list of (key, value) pairs in od'
111 return [(key, self[key]) for key in self]
112
113 def iterkeys(self):
114 'od.iterkeys() -> an iterator over the keys in od'
115 return iter(self)
116
117 def itervalues(self):
118 'od.itervalues -> an iterator over the values in od'
119 for k in self:
120 yield self[k]
121
122 def iteritems(self):
123 'od.iteritems -> an iterator over the (key, value) items in od'
124 for k in self:
125 yield (k, self[k])
126
127 update = MutableMapping.update
128
Raymond Hettingerc6467432011-04-24 12:30:39 -0700129 __update = update # let subclasses override update without breaking __init__
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000130
Raymond Hettinger8ebebd82011-01-02 01:03:26 +0000131 __marker = object()
132
133 def pop(self, key, default=__marker):
134 if key in self:
135 result = self[key]
136 del self[key]
137 return result
138 if default is self.__marker:
139 raise KeyError(key)
140 return default
141
142 def setdefault(self, key, default=None):
143 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od'
144 if key in self:
145 return self[key]
146 self[key] = default
147 return default
148
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000149 def popitem(self, last=True):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000150 '''od.popitem() -> (k, v), return and remove a (key, value) pair.
151 Pairs are returned in LIFO order if last is true or FIFO order if false.
152
153 '''
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000154 if not self:
155 raise KeyError('dictionary is empty')
Raymond Hettinger1355a3d2009-04-08 08:26:55 +0000156 key = next(reversed(self) if last else iter(self))
Raymond Hettingere980d2d2009-03-19 23:12:41 +0000157 value = self.pop(key)
158 return key, value
159
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700160 def __repr__(self, _repr_running={}):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000161 'od.__repr__() <==> repr(od)'
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700162 call_key = id(self), _get_ident()
163 if call_key in _repr_running:
164 return '...'
165 _repr_running[call_key] = 1
166 try:
167 if not self:
168 return '%s()' % (self.__class__.__name__,)
169 return '%s(%r)' % (self.__class__.__name__, self.items())
170 finally:
171 del _repr_running[call_key]
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000172
Raymond Hettinger3674c852011-04-20 13:11:38 -0700173 def __reduce__(self):
174 'Return state information for pickling'
175 items = [[k, self[k]] for k in self]
176 inst_dict = vars(self).copy()
177 for k in vars(OrderedDict()):
178 inst_dict.pop(k, None)
179 if inst_dict:
180 return (self.__class__, (items,), inst_dict)
181 return self.__class__, (items,)
182
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000183 def copy(self):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000184 'od.copy() -> a shallow copy of od'
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000185 return self.__class__(self)
186
187 @classmethod
188 def fromkeys(cls, iterable, value=None):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000189 '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
190 and values equal to v (which defaults to None).
191
192 '''
Raymond Hettingerc6467432011-04-24 12:30:39 -0700193 self = cls()
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000194 for key in iterable:
Raymond Hettingerc6467432011-04-24 12:30:39 -0700195 self[key] = value
196 return self
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000197
198 def __eq__(self, other):
Raymond Hettingera5cd6372009-04-08 05:39:38 +0000199 '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
200 while comparison to a regular mapping is order-insensitive.
201
202 '''
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000203 if isinstance(other, OrderedDict):
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700204 return len(self)==len(other) and self.items() == other.items()
Raymond Hettingerbc512d32009-03-03 04:45:34 +0000205 return dict.__eq__(self, other)
206
Raymond Hettinger7ce6d972011-04-22 18:49:53 -0700207 def __ne__(self, other):
208 'od.__ne__(y) <==> od!=y'
209 return not self == other
210
Raymond Hettinger43a56412011-04-23 15:51:38 -0700211 # -- the following methods support python 3.x style dictionary views --
212
213 def viewkeys(self):
214 "od.viewkeys() -> a set-like object providing a view on od's keys"
215 return KeysView(self)
216
217 def viewvalues(self):
218 "od.viewvalues() -> an object providing a view on od's values"
219 return ValuesView(self)
220
221 def viewitems(self):
222 "od.viewitems() -> a set-like object providing a view on od's items"
223 return ItemsView(self)
224
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000225
Raymond Hettingerae3f0682009-01-20 03:36:36 +0000226################################################################################
227### namedtuple
228################################################################################
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000229
Raymond Hettinger322daea2009-02-10 01:24:05 +0000230def namedtuple(typename, field_names, verbose=False, rename=False):
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000231 """Returns a new subclass of tuple with named fields.
232
Raymond Hettinger01a09572007-10-23 20:37:41 +0000233 >>> Point = namedtuple('Point', 'x y')
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000234 >>> Point.__doc__ # docstring for the new class
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000235 'Point(x, y)'
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000236 >>> p = Point(11, y=22) # instantiate with positional args or keywords
Raymond Hettinger8777bca2007-12-18 22:21:27 +0000237 >>> p[0] + p[1] # indexable like a plain tuple
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000238 33
Raymond Hettinger88880b22007-12-18 00:13:45 +0000239 >>> x, y = p # unpack like a regular tuple
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000240 >>> x, y
241 (11, 22)
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000242 >>> p.x + p.y # fields also accessable by name
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000243 33
Raymond Hettinger42da8742007-12-14 02:49:47 +0000244 >>> d = p._asdict() # convert to a dictionary
Raymond Hettingera7fc4b12007-10-05 02:47:07 +0000245 >>> d['x']
246 11
247 >>> Point(**d) # convert from a dictionary
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000248 Point(x=11, y=22)
Raymond Hettinger42da8742007-12-14 02:49:47 +0000249 >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000250 Point(x=100, y=22)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000251
252 """
253
Raymond Hettinger58167142008-01-08 02:02:05 +0000254 # Parse and validate the field names. Validation serves two purposes,
255 # generating informative error messages and preventing template injection attacks.
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000256 if isinstance(field_names, basestring):
Raymond Hettinger0e1d6062007-10-08 10:11:51 +0000257 field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas
Raymond Hettinger6ee7bc02008-09-25 23:31:52 +0000258 field_names = tuple(map(str, field_names))
Raymond Hettinger322daea2009-02-10 01:24:05 +0000259 if rename:
260 names = list(field_names)
261 seen = set()
262 for i, name in enumerate(names):
263 if (not all(c.isalnum() or c=='_' for c in name) or _iskeyword(name)
264 or not name or name[0].isdigit() or name.startswith('_')
265 or name in seen):
Raymond Hettinger756ab672009-04-02 22:25:40 +0000266 names[i] = '_%d' % i
Raymond Hettinger322daea2009-02-10 01:24:05 +0000267 seen.add(name)
268 field_names = tuple(names)
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000269 for name in (typename,) + field_names:
Raymond Hettinger2e1af252007-12-05 18:11:08 +0000270 if not all(c.isalnum() or c=='_' for c in name):
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000271 raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
Raymond Hettingerabfd8df2007-10-16 21:28:32 +0000272 if _iskeyword(name):
273 raise ValueError('Type names and field names cannot be a keyword: %r' % name)
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000274 if name[0].isdigit():
275 raise ValueError('Type names and field names cannot start with a number: %r' % name)
Raymond Hettinger163f6222007-10-09 01:36:23 +0000276 seen_names = set()
277 for name in field_names:
Raymond Hettinger322daea2009-02-10 01:24:05 +0000278 if name.startswith('_') and not rename:
Raymond Hettinger42da8742007-12-14 02:49:47 +0000279 raise ValueError('Field names cannot start with an underscore: %r' % name)
Raymond Hettinger163f6222007-10-09 01:36:23 +0000280 if name in seen_names:
Raymond Hettinger050afbf2007-10-16 19:18:30 +0000281 raise ValueError('Encountered duplicate field name: %r' % name)
Raymond Hettinger163f6222007-10-09 01:36:23 +0000282 seen_names.add(name)
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000283
284 # Create and fill-in the class template
Raymond Hettinger02740f72008-01-05 01:35:43 +0000285 numfields = len(field_names)
Raymond Hettinger2b03d452007-09-18 03:33:19 +0000286 argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000287 reprtxt = ', '.join('%s=%%r' % name for name in field_names)
288 template = '''class %(typename)s(tuple):
Raymond Hettinger48eca672007-12-14 18:08:20 +0000289 '%(typename)s(%(argtxt)s)' \n
290 __slots__ = () \n
Raymond Hettingere0734e72008-01-04 03:22:53 +0000291 _fields = %(field_names)r \n
Raymond Hettingera68cad12009-05-27 02:24:45 +0000292 def __new__(_cls, %(argtxt)s):
Raymond Hettinger9bd35082010-03-09 09:01:46 +0000293 'Create new instance of %(typename)s(%(argtxt)s)'
Raymond Hettingera68cad12009-05-27 02:24:45 +0000294 return _tuple.__new__(_cls, (%(argtxt)s)) \n
Raymond Hettinger02740f72008-01-05 01:35:43 +0000295 @classmethod
Raymond Hettinger844f71b2008-01-06 22:11:54 +0000296 def _make(cls, iterable, new=tuple.__new__, len=len):
Raymond Hettinger02740f72008-01-05 01:35:43 +0000297 'Make a new %(typename)s object from a sequence or iterable'
Raymond Hettinger844f71b2008-01-06 22:11:54 +0000298 result = new(cls, iterable)
Raymond Hettinger02740f72008-01-05 01:35:43 +0000299 if len(result) != %(numfields)d:
300 raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
301 return result \n
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000302 def __repr__(self):
Raymond Hettinger9bd35082010-03-09 09:01:46 +0000303 'Return a nicely formatted representation string'
Raymond Hettinger48eca672007-12-14 18:08:20 +0000304 return '%(typename)s(%(reprtxt)s)' %% self \n
Raymond Hettinger88a91642009-03-03 04:51:24 +0000305 def _asdict(self):
306 'Return a new OrderedDict which maps field names to their values'
307 return OrderedDict(zip(self._fields, self)) \n
Raymond Hettingera68cad12009-05-27 02:24:45 +0000308 def _replace(_self, **kwds):
Raymond Hettingereeeb9c42007-11-15 02:44:53 +0000309 'Return a new %(typename)s object replacing specified fields with new values'
Raymond Hettingera68cad12009-05-27 02:24:45 +0000310 result = _self._make(map(kwds.pop, %(field_names)r, _self))
Raymond Hettinger1b50fd72008-01-05 02:17:24 +0000311 if kwds:
312 raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
Raymond Hettingere98839a2008-06-09 01:28:30 +0000313 return result \n
314 def __getnewargs__(self):
Raymond Hettinger9bd35082010-03-09 09:01:46 +0000315 'Return self as a plain tuple. Used by copy and pickle.'
Raymond Hettingere98839a2008-06-09 01:28:30 +0000316 return tuple(self) \n\n''' % locals()
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000317 for i, name in enumerate(field_names):
Raymond Hettinger9bd35082010-03-09 09:01:46 +0000318 template += " %s = _property(_itemgetter(%d), doc='Alias for field number %d')\n" % (name, i, i)
Raymond Hettinger2b03d452007-09-18 03:33:19 +0000319 if verbose:
320 print template
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000321
Raymond Hettinger3c2523c2008-05-30 07:16:53 +0000322 # Execute the template string in a temporary namespace and
323 # support tracing utilities by setting a value for frame.f_globals['__name__']
Raymond Hettingera68cad12009-05-27 02:24:45 +0000324 namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
325 OrderedDict=OrderedDict, _property=property, _tuple=tuple)
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000326 try:
Raymond Hettinger0e1d6062007-10-08 10:11:51 +0000327 exec template in namespace
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000328 except SyntaxError, e:
329 raise SyntaxError(e.message + ':\n' + template)
Raymond Hettinger0e1d6062007-10-08 10:11:51 +0000330 result = namespace[typename]
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000331
332 # For pickling to work, the __module__ variable needs to be set to the frame
333 # where the named tuple is created. Bypass this step in enviroments where
Benjamin Peterson7c67b032009-05-05 00:55:24 +0000334 # sys._getframe is not defined (Jython for example) or sys._getframe is not
335 # defined for arguments greater than 0 (IronPython).
336 try:
Raymond Hettingerecf252a2009-01-27 10:03:04 +0000337 result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
Benjamin Peterson7c67b032009-05-05 00:55:24 +0000338 except (AttributeError, ValueError):
339 pass
Raymond Hettinger2115bbc2007-10-08 09:14:28 +0000340
Raymond Hettinger5a41daf2007-05-19 01:11:16 +0000341 return result
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000342
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000343
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000344########################################################################
Raymond Hettingerae3f0682009-01-20 03:36:36 +0000345### Counter
346########################################################################
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000347
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000348class Counter(dict):
349 '''Dict subclass for counting hashable items. Sometimes called a bag
350 or multiset. Elements are stored as dictionary keys and their counts
351 are stored as dictionary values.
352
Raymond Hettingerc886a842011-01-03 08:59:18 +0000353 >>> c = Counter('abcdeabcdabcaba') # count elements from a string
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000354
355 >>> c.most_common(3) # three most common elements
Raymond Hettingerc886a842011-01-03 08:59:18 +0000356 [('a', 5), ('b', 4), ('c', 3)]
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000357 >>> sorted(c) # list all unique elements
Raymond Hettingerc886a842011-01-03 08:59:18 +0000358 ['a', 'b', 'c', 'd', 'e']
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000359 >>> ''.join(sorted(c.elements())) # list elements with repetitions
Raymond Hettingerc886a842011-01-03 08:59:18 +0000360 'aaaaabbbbcccdde'
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000361 >>> sum(c.values()) # total of all counts
Raymond Hettingerc886a842011-01-03 08:59:18 +0000362 15
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000363
364 >>> c['a'] # count of letter 'a'
365 5
366 >>> for elem in 'shazam': # update counts from an iterable
367 ... c[elem] += 1 # by adding 1 to each element's count
368 >>> c['a'] # now there are seven 'a'
369 7
Raymond Hettingerc886a842011-01-03 08:59:18 +0000370 >>> del c['b'] # remove all 'b'
371 >>> c['b'] # now there are zero 'b'
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000372 0
373
374 >>> d = Counter('simsalabim') # make another counter
375 >>> c.update(d) # add in the second counter
376 >>> c['a'] # now there are nine 'a'
377 9
378
379 >>> c.clear() # empty the counter
380 >>> c
381 Counter()
382
383 Note: If a count is set to zero or reduced to zero, it will remain
384 in the counter until the entry is deleted or the counter is cleared:
385
386 >>> c = Counter('aaabbc')
387 >>> c['b'] -= 2 # reduce the count of 'b' by two
388 >>> c.most_common() # 'b' is still in, but its count is zero
389 [('a', 3), ('c', 1), ('b', 0)]
390
391 '''
392 # References:
393 # http://en.wikipedia.org/wiki/Multiset
394 # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
395 # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
396 # http://code.activestate.com/recipes/259174/
397 # Knuth, TAOCP Vol. II section 4.6.3
398
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000399 def __init__(self, iterable=None, **kwds):
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000400 '''Create a new, empty Counter object. And if given, count elements
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000401 from an input iterable. Or, initialize the count from another mapping
402 of elements to their counts.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000403
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000404 >>> c = Counter() # a new, empty counter
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000405 >>> c = Counter('gallahad') # a new counter from an iterable
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000406 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000407 >>> c = Counter(a=4, b=2) # a new counter from keyword args
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000408
409 '''
Raymond Hettingerc886a842011-01-03 08:59:18 +0000410 super(Counter, self).__init__()
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000411 self.update(iterable, **kwds)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000412
413 def __missing__(self, key):
414 'The count of elements not in the Counter is zero.'
415 # Needed so that self[missing_item] does not raise KeyError
416 return 0
417
418 def most_common(self, n=None):
419 '''List the n most common elements and their counts from the most
420 common to the least. If n is None, then list all element counts.
421
Raymond Hettingerc886a842011-01-03 08:59:18 +0000422 >>> Counter('abcdeabcdabcaba').most_common(3)
423 [('a', 5), ('b', 4), ('c', 3)]
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000424
425 '''
426 # Emulate Bag.sortedByCount from Smalltalk
427 if n is None:
428 return sorted(self.iteritems(), key=_itemgetter(1), reverse=True)
429 return _heapq.nlargest(n, self.iteritems(), key=_itemgetter(1))
430
431 def elements(self):
432 '''Iterator over elements repeating each as many times as its count.
433
434 >>> c = Counter('ABCABC')
435 >>> sorted(c.elements())
436 ['A', 'A', 'B', 'B', 'C', 'C']
437
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000438 # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
439 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
440 >>> product = 1
441 >>> for factor in prime_factors.elements(): # loop over factors
442 ... product *= factor # and multiply them
443 >>> product
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000444 1836
445
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000446 Note, if an element's count has been set to zero or is a negative
447 number, elements() will ignore it.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000448
449 '''
450 # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
Raymond Hettinger35288c62009-01-13 04:50:35 +0000451 return _chain.from_iterable(_starmap(_repeat, self.iteritems()))
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000452
453 # Override dict methods where necessary
454
455 @classmethod
456 def fromkeys(cls, iterable, v=None):
457 # There is no equivalent method for counters because setting v=1
458 # means that no element can have a count greater than one.
459 raise NotImplementedError(
460 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
461
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000462 def update(self, iterable=None, **kwds):
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000463 '''Like dict.update() but add counts instead of replacing them.
464
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000465 Source can be an iterable, a dictionary, or another Counter instance.
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000466
467 >>> c = Counter('which')
Raymond Hettinger783d73f2009-01-13 04:13:53 +0000468 >>> c.update('witch') # add elements from another iterable
469 >>> d = Counter('watch')
470 >>> c.update(d) # add elements from another counter
471 >>> c['h'] # four 'h' in which, witch, and watch
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000472 4
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000473
474 '''
475 # The regular dict.update() operation makes no sense here because the
476 # replace behavior results in the some of original untouched counts
477 # being mixed-in with all of the other counts for a mismash that
478 # doesn't have a straight-forward interpretation in most counting
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000479 # contexts. Instead, we implement straight-addition. Both the inputs
480 # and outputs are allowed to contain zero and negative counts.
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000481
482 if iterable is not None:
483 if isinstance(iterable, Mapping):
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000484 if self:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000485 self_get = self.get
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000486 for elem, count in iterable.iteritems():
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000487 self[elem] = self_get(elem, 0) + count
Raymond Hettinger1bc1c8a2009-01-22 09:05:43 +0000488 else:
Raymond Hettingerc886a842011-01-03 08:59:18 +0000489 super(Counter, self).update(iterable) # fast path when counter is empty
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000490 else:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000491 self_get = self.get
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000492 for elem in iterable:
Raymond Hettinger5dfc7f92009-06-29 19:10:29 +0000493 self[elem] = self_get(elem, 0) + 1
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000494 if kwds:
495 self.update(kwds)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000496
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000497 def subtract(self, iterable=None, **kwds):
498 '''Like dict.update() but subtracts counts instead of replacing them.
499 Counts can be reduced below zero. Both the inputs and outputs are
500 allowed to contain zero and negative counts.
501
502 Source can be an iterable, a dictionary, or another Counter instance.
503
504 >>> c = Counter('which')
505 >>> c.subtract('witch') # subtract elements from another iterable
506 >>> c.subtract(Counter('watch')) # subtract elements from another counter
507 >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch
508 0
509 >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch
510 -1
511
512 '''
513 if iterable is not None:
Raymond Hettingerfdf1b562010-04-11 20:39:28 +0000514 self_get = self.get
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000515 if isinstance(iterable, Mapping):
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000516 for elem, count in iterable.items():
517 self[elem] = self_get(elem, 0) - count
518 else:
Raymond Hettinger34c35b22010-04-03 10:22:00 +0000519 for elem in iterable:
520 self[elem] = self_get(elem, 0) - 1
521 if kwds:
522 self.subtract(kwds)
523
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000524 def copy(self):
Raymond Hettinger37c0fe52011-04-15 13:12:21 -0700525 'Return a shallow copy.'
526 return self.__class__(self)
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000527
Raymond Hettingerc886a842011-01-03 08:59:18 +0000528 def __reduce__(self):
529 return self.__class__, (dict(self),)
530
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000531 def __delitem__(self, elem):
532 'Like dict.__delitem__() but does not raise KeyError for missing values.'
533 if elem in self:
Raymond Hettingerc886a842011-01-03 08:59:18 +0000534 super(Counter, self).__delitem__(elem)
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000535
Raymond Hettingerf94d7fa2009-01-12 22:58:41 +0000536 def __repr__(self):
537 if not self:
538 return '%s()' % self.__class__.__name__
Raymond Hettinger35288c62009-01-13 04:50:35 +0000539 items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
Raymond Hettingeraaa6e632009-01-13 01:05:03 +0000540 return '%s({%s})' % (self.__class__.__name__, items)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000541
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000542 # Multiset-style mathematical operations discussed in:
543 # Knuth TAOCP Volume II section 4.6.3 exercise 19
544 # and at http://en.wikipedia.org/wiki/Multiset
545 #
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000546 # Outputs guaranteed to only include positive counts.
547 #
548 # To strip negative and zero counts, add-in an empty counter:
549 # c += Counter()
550
551 def __add__(self, other):
552 '''Add counts from two counters.
553
554 >>> Counter('abbb') + Counter('bcc')
555 Counter({'b': 4, 'c': 2, 'a': 1})
556
557 '''
558 if not isinstance(other, Counter):
559 return NotImplemented
560 result = Counter()
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700561 for elem, count in self.items():
562 newcount = count + other[elem]
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000563 if newcount > 0:
564 result[elem] = newcount
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700565 for elem, count in other.items():
566 if elem not in self and count > 0:
567 result[elem] = count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000568 return result
569
570 def __sub__(self, other):
571 ''' Subtract count, but keep only results with positive counts.
572
573 >>> Counter('abbbc') - Counter('bccd')
574 Counter({'b': 2, 'a': 1})
575
576 '''
577 if not isinstance(other, Counter):
578 return NotImplemented
579 result = Counter()
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700580 for elem, count in self.items():
581 newcount = count - other[elem]
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000582 if newcount > 0:
583 result[elem] = newcount
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700584 for elem, count in other.items():
585 if elem not in self and count < 0:
586 result[elem] = 0 - count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000587 return result
588
589 def __or__(self, other):
590 '''Union is the maximum of value in either of the input counters.
591
592 >>> Counter('abbb') | Counter('bcc')
593 Counter({'b': 3, 'c': 2, 'a': 1})
594
595 '''
596 if not isinstance(other, Counter):
597 return NotImplemented
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000598 result = Counter()
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700599 for elem, count in self.items():
600 other_count = other[elem]
601 newcount = other_count if count < other_count else count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000602 if newcount > 0:
603 result[elem] = newcount
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700604 for elem, count in other.items():
605 if elem not in self and count > 0:
606 result[elem] = count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000607 return result
608
609 def __and__(self, other):
610 ''' Intersection is the minimum of corresponding counts.
611
612 >>> Counter('abbb') & Counter('bcc')
613 Counter({'b': 1})
614
615 '''
616 if not isinstance(other, Counter):
617 return NotImplemented
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000618 result = Counter()
Raymond Hettingerefeb8bd2011-04-17 20:08:41 -0700619 for elem, count in self.items():
620 other_count = other[elem]
621 newcount = count if count < other_count else other_count
Raymond Hettingerbad1eb22009-01-20 01:19:26 +0000622 if newcount > 0:
623 result[elem] = newcount
624 return result
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000625
626
627if __name__ == '__main__':
Raymond Hettingerd36a60e2007-09-17 00:55:00 +0000628 # verify that instances can be pickled
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000629 from cPickle import loads, dumps
Raymond Hettinger01a09572007-10-23 20:37:41 +0000630 Point = namedtuple('Point', 'x, y', True)
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000631 p = Point(x=10, y=20)
632 assert p == loads(dumps(p))
633
Raymond Hettingereeeb9c42007-11-15 02:44:53 +0000634 # test and demonstrate ability to override methods
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000635 class Point(namedtuple('Point', 'x y')):
Raymond Hettingere1655082008-01-10 19:15:10 +0000636 __slots__ = ()
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000637 @property
638 def hypot(self):
639 return (self.x ** 2 + self.y ** 2) ** 0.5
Raymond Hettinger9a359212008-01-07 20:07:38 +0000640 def __str__(self):
Raymond Hettinger15b5e552008-01-10 23:00:01 +0000641 return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
Raymond Hettingerb8e00722008-01-07 04:24:49 +0000642
Raymond Hettingere1655082008-01-10 19:15:10 +0000643 for p in Point(3, 4), Point(14, 5/7.):
Raymond Hettinger9a359212008-01-07 20:07:38 +0000644 print p
Raymond Hettingereeeb9c42007-11-15 02:44:53 +0000645
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000646 class Point(namedtuple('Point', 'x y')):
647 'Point class with optimized _make() and _replace() without error-checking'
Raymond Hettingere1655082008-01-10 19:15:10 +0000648 __slots__ = ()
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000649 _make = classmethod(tuple.__new__)
650 def _replace(self, _map=map, **kwds):
Raymond Hettingerf5e8af12008-01-07 20:56:05 +0000651 return self._make(_map(kwds.get, ('x', 'y'), self))
Raymond Hettingerdc55f352008-01-07 09:03:49 +0000652
653 print Point(11, 22)._replace(x=100)
654
Raymond Hettingere850c462008-01-10 20:37:12 +0000655 Point3D = namedtuple('Point3D', Point._fields + ('z',))
656 print Point3D.__doc__
657
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000658 import doctest
Raymond Hettinger01a09572007-10-23 20:37:41 +0000659 TestResults = namedtuple('TestResults', 'failed attempted')
Raymond Hettingerc37e5e02007-03-01 06:16:43 +0000660 print TestResults(*doctest.testmod())