blob: 5a07a2d8d9ac40511358792926fdf4c8c1eb95b2 [file] [log] [blame]
Raymond Hettingere52f3b12004-01-29 07:27:45 +00001\section{\module{collections} ---
Raymond Hettinger5c5eb862004-02-07 21:13:00 +00002 High-performance container datatypes}
Raymond Hettingere52f3b12004-01-29 07:27:45 +00003
4\declaremodule{standard}{collections}
5\modulesynopsis{High-performance datatypes}
6\moduleauthor{Raymond Hettinger}{python@rcn.com}
7\sectionauthor{Raymond Hettinger}{python@rcn.com}
8\versionadded{2.4}
9
10
Guido van Rossum1968ad32006-02-25 22:38:04 +000011This module implements high-performance container datatypes. Currently,
12there are two datatypes, deque and defaultdict.
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000013Future additions may include balanced trees and ordered dictionaries.
Guido van Rossum1968ad32006-02-25 22:38:04 +000014\versionchanged[Added defaultdict]{2.5}
Raymond Hettingere52f3b12004-01-29 07:27:45 +000015
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000016\subsection{\class{deque} objects \label{deque-objects}}
17
Raymond Hettingere52f3b12004-01-29 07:27:45 +000018\begin{funcdesc}{deque}{\optional{iterable}}
19 Returns a new deque objected initialized left-to-right (using
20 \method{append()}) with data from \var{iterable}. If \var{iterable}
21 is not specified, the new deque is empty.
22
Raymond Hettinger5c5eb862004-02-07 21:13:00 +000023 Deques are a generalization of stacks and queues (the name is pronounced
24 ``deck'' and is short for ``double-ended queue''). Deques support
25 thread-safe, memory efficient appends and pops from either side of the deque
26 with approximately the same \code{O(1)} performance in either direction.
27
28 Though \class{list} objects support similar operations, they are optimized
29 for fast fixed-length operations and incur \code{O(n)} memory movement costs
30 for \samp{pop(0)} and \samp{insert(0, v)} operations which change both the
31 size and position of the underlying data representation.
Raymond Hettingere52f3b12004-01-29 07:27:45 +000032 \versionadded{2.4}
33\end{funcdesc}
34
35Deque objects support the following methods:
36
37\begin{methoddesc}{append}{x}
38 Add \var{x} to the right side of the deque.
39\end{methoddesc}
40
41\begin{methoddesc}{appendleft}{x}
42 Add \var{x} to the left side of the deque.
43\end{methoddesc}
44
45\begin{methoddesc}{clear}{}
46 Remove all elements from the deque leaving it with length 0.
47\end{methoddesc}
48
Raymond Hettinger3ba85c22004-02-06 19:04:56 +000049\begin{methoddesc}{extend}{iterable}
50 Extend the right side of the deque by appending elements from
51 the iterable argument.
52\end{methoddesc}
53
54\begin{methoddesc}{extendleft}{iterable}
55 Extend the left side of the deque by appending elements from
56 \var{iterable}. Note, the series of left appends results in
57 reversing the order of elements in the iterable argument.
58\end{methoddesc}
59
Raymond Hettingere52f3b12004-01-29 07:27:45 +000060\begin{methoddesc}{pop}{}
61 Remove and return an element from the right side of the deque.
Thomas Wouters477c8d52006-05-27 19:21:47 +000062 If no elements are present, raises an \exception{IndexError}.
Raymond Hettingere52f3b12004-01-29 07:27:45 +000063\end{methoddesc}
64
65\begin{methoddesc}{popleft}{}
66 Remove and return an element from the left side of the deque.
Thomas Wouters477c8d52006-05-27 19:21:47 +000067 If no elements are present, raises an \exception{IndexError}.
Raymond Hettinger738ec902004-02-29 02:15:56 +000068\end{methoddesc}
69
Raymond Hettinger4aec61e2005-03-18 21:20:23 +000070\begin{methoddesc}{remove}{value}
71 Removed the first occurrence of \var{value}. If not found,
72 raises a \exception{ValueError}.
73 \versionadded{2.5}
74\end{methoddesc}
75
Raymond Hettinger5c5eb862004-02-07 21:13:00 +000076\begin{methoddesc}{rotate}{n}
77 Rotate the deque \var{n} steps to the right. If \var{n} is
78 negative, rotate to the left. Rotating one step to the right
Raymond Hettingerf5f9a3702004-04-30 22:52:50 +000079 is equivalent to: \samp{d.appendleft(d.pop())}.
Raymond Hettinger5c5eb862004-02-07 21:13:00 +000080\end{methoddesc}
81
82In addition to the above, deques support iteration, pickling, \samp{len(d)},
Raymond Hettinger0a4977c2004-03-01 23:16:22 +000083\samp{reversed(d)}, \samp{copy.copy(d)}, \samp{copy.deepcopy(d)},
84membership testing with the \keyword{in} operator, and subscript references
85such as \samp{d[-1]}.
Raymond Hettingere52f3b12004-01-29 07:27:45 +000086
87Example:
88
89\begin{verbatim}
90>>> from collections import deque
Raymond Hettinger5c5eb862004-02-07 21:13:00 +000091>>> d = deque('ghi') # make a new deque with three items
92>>> for elem in d: # iterate over the deque's elements
Raymond Hettinger738ec902004-02-29 02:15:56 +000093... print elem.upper()
Raymond Hettingere52f3b12004-01-29 07:27:45 +000094G
95H
96I
Raymond Hettinger738ec902004-02-29 02:15:56 +000097
Raymond Hettinger5c5eb862004-02-07 21:13:00 +000098>>> d.append('j') # add a new entry to the right side
99>>> d.appendleft('f') # add a new entry to the left side
100>>> d # show the representation of the deque
Raymond Hettingere52f3b12004-01-29 07:27:45 +0000101deque(['f', 'g', 'h', 'i', 'j'])
Raymond Hettinger738ec902004-02-29 02:15:56 +0000102
Raymond Hettinger5c5eb862004-02-07 21:13:00 +0000103>>> d.pop() # return and remove the rightmost item
Raymond Hettingere52f3b12004-01-29 07:27:45 +0000104'j'
Raymond Hettinger5c5eb862004-02-07 21:13:00 +0000105>>> d.popleft() # return and remove the leftmost item
Raymond Hettingere52f3b12004-01-29 07:27:45 +0000106'f'
Raymond Hettinger5c5eb862004-02-07 21:13:00 +0000107>>> list(d) # list the contents of the deque
Raymond Hettingere52f3b12004-01-29 07:27:45 +0000108['g', 'h', 'i']
Raymond Hettinger0a4977c2004-03-01 23:16:22 +0000109>>> d[0] # peek at leftmost item
Raymond Hettinger738ec902004-02-29 02:15:56 +0000110'g'
Raymond Hettinger0a4977c2004-03-01 23:16:22 +0000111>>> d[-1] # peek at rightmost item
Raymond Hettinger738ec902004-02-29 02:15:56 +0000112'i'
Raymond Hettinger0a4977c2004-03-01 23:16:22 +0000113
Raymond Hettinger5c5eb862004-02-07 21:13:00 +0000114>>> list(reversed(d)) # list the contents of a deque in reverse
Raymond Hettingerc058fd12004-02-07 02:45:22 +0000115['i', 'h', 'g']
Raymond Hettinger5c5eb862004-02-07 21:13:00 +0000116>>> 'h' in d # search the deque
Raymond Hettingere52f3b12004-01-29 07:27:45 +0000117True
Raymond Hettinger5c5eb862004-02-07 21:13:00 +0000118>>> d.extend('jkl') # add multiple elements at once
Raymond Hettingere52f3b12004-01-29 07:27:45 +0000119>>> d
120deque(['g', 'h', 'i', 'j', 'k', 'l'])
Raymond Hettinger5c5eb862004-02-07 21:13:00 +0000121>>> d.rotate(1) # right rotation
122>>> d
123deque(['l', 'g', 'h', 'i', 'j', 'k'])
124>>> d.rotate(-1) # left rotation
125>>> d
126deque(['g', 'h', 'i', 'j', 'k', 'l'])
Raymond Hettinger738ec902004-02-29 02:15:56 +0000127
Raymond Hettinger5c5eb862004-02-07 21:13:00 +0000128>>> deque(reversed(d)) # make a new deque in reverse order
129deque(['l', 'k', 'j', 'i', 'h', 'g'])
130>>> d.clear() # empty the deque
131>>> d.pop() # cannot pop from an empty deque
Raymond Hettingere52f3b12004-01-29 07:27:45 +0000132Traceback (most recent call last):
133 File "<pyshell#6>", line 1, in -toplevel-
134 d.pop()
Raymond Hettinger738ec902004-02-29 02:15:56 +0000135IndexError: pop from an empty deque
Raymond Hettinger3ba85c22004-02-06 19:04:56 +0000136
Raymond Hettinger5c5eb862004-02-07 21:13:00 +0000137>>> d.extendleft('abc') # extendleft() reverses the input order
Raymond Hettinger3ba85c22004-02-06 19:04:56 +0000138>>> d
139deque(['c', 'b', 'a'])
Raymond Hettingerf5f9a3702004-04-30 22:52:50 +0000140\end{verbatim}
Raymond Hettinger3ba85c22004-02-06 19:04:56 +0000141
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000142\subsubsection{Recipes \label{deque-recipes}}
Raymond Hettingere7169eb2004-05-09 01:15:01 +0000143
144This section shows various approaches to working with deques.
145
146The \method{rotate()} method provides a way to implement \class{deque}
Raymond Hettinger2e669402004-06-12 07:59:40 +0000147slicing and deletion. For example, a pure python implementation of
148\code{del d[n]} relies on the \method{rotate()} method to position
149elements to be popped:
150
Raymond Hettingere7169eb2004-05-09 01:15:01 +0000151\begin{verbatim}
152def delete_nth(d, n):
Raymond Hettingere7169eb2004-05-09 01:15:01 +0000153 d.rotate(-n)
154 d.popleft()
155 d.rotate(n)
Raymond Hettingere7169eb2004-05-09 01:15:01 +0000156\end{verbatim}
157
Raymond Hettinger0e371f22004-05-12 20:55:56 +0000158To implement \class{deque} slicing, use a similar approach applying
159\method{rotate()} to bring a target element to the left side of the deque.
160Remove old entries with \method{popleft()}, add new entries with
161\method{extend()}, and then reverse the rotation.
Raymond Hettingere7169eb2004-05-09 01:15:01 +0000162
163With minor variations on that approach, it is easy to implement Forth style
164stack manipulations such as \code{dup}, \code{drop}, \code{swap}, \code{over},
165\code{pick}, \code{rot}, and \code{roll}.
Raymond Hettingerf5f9a3702004-04-30 22:52:50 +0000166
167A roundrobin task server can be built from a \class{deque} using
168\method{popleft()} to select the current task and \method{append()}
169to add it back to the tasklist if the input stream is not exhausted:
170
171\begin{verbatim}
172def roundrobin(*iterables):
173 pending = deque(iter(i) for i in iterables)
174 while pending:
175 task = pending.popleft()
176 try:
Georg Brandla18af4e2007-04-21 15:47:16 +0000177 yield next(task)
Raymond Hettingerf5f9a3702004-04-30 22:52:50 +0000178 except StopIteration:
179 continue
180 pending.append(task)
181
182>>> for value in roundrobin('abc', 'd', 'efgh'):
Raymond Hettingere7169eb2004-05-09 01:15:01 +0000183... print value
Raymond Hettingerf5f9a3702004-04-30 22:52:50 +0000184
185a
186d
187e
188b
189f
190c
191g
192h
193
194\end{verbatim}
Raymond Hettingere7169eb2004-05-09 01:15:01 +0000195
196
197Multi-pass data reduction algorithms can be succinctly expressed and
Raymond Hettinger2e669402004-06-12 07:59:40 +0000198efficiently coded by extracting elements with multiple calls to
199\method{popleft()}, applying the reduction function, and calling
200\method{append()} to add the result back to the queue.
Raymond Hettingere7169eb2004-05-09 01:15:01 +0000201
202For example, building a balanced binary tree of nested lists entails
203reducing two adjacent nodes into one by grouping them in a list:
204
205\begin{verbatim}
206def maketree(iterable):
207 d = deque(iterable)
208 while len(d) > 1:
209 pair = [d.popleft(), d.popleft()]
210 d.append(pair)
211 return list(d)
212
213>>> print maketree('abcdefgh')
214[[[['a', 'b'], ['c', 'd']], [['e', 'f'], ['g', 'h']]]]
215
216\end{verbatim}
Guido van Rossum1968ad32006-02-25 22:38:04 +0000217
218
219
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000220\subsection{\class{defaultdict} objects \label{defaultdict-objects}}
221
Guido van Rossum1968ad32006-02-25 22:38:04 +0000222\begin{funcdesc}{defaultdict}{\optional{default_factory\optional{, ...}}}
223 Returns a new dictionary-like object. \class{defaultdict} is a subclass
224 of the builtin \class{dict} class. It overrides one method and adds one
225 writable instance variable. The remaining functionality is the same as
226 for the \class{dict} class and is not documented here.
227
228 The first argument provides the initial value for the
229 \member{default_factory} attribute; it defaults to \code{None}.
230 All remaining arguments are treated the same as if they were
231 passed to the \class{dict} constructor, including keyword arguments.
232
233 \versionadded{2.5}
234\end{funcdesc}
235
236\class{defaultdict} objects support the following method in addition to
237the standard \class{dict} operations:
238
239\begin{methoddesc}{__missing__}{key}
240 If the \member{default_factory} attribute is \code{None}, this raises
241 an \exception{KeyError} exception with the \var{key} as argument.
242
243 If \member{default_factory} is not \code{None}, it is called without
244 arguments to provide a default value for the given \var{key}, this
245 value is inserted in the dictionary for the \var{key}, and returned.
246
247 If calling \member{default_factory} raises an exception this exception
248 is propagated unchanged.
249
250 This method is called by the \method{__getitem__} method of the
251 \class{dict} class when the requested key is not found; whatever it
252 returns or raises is then returned or raised by \method{__getitem__}.
253\end{methoddesc}
254
255\class{defaultdict} objects support the following instance variable:
256
257\begin{datadesc}{default_factory}
258 This attribute is used by the \method{__missing__} method; it is initialized
259 from the first argument to the constructor, if present, or to \code{None},
260 if absent.
261\end{datadesc}
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000262
263
264\subsubsection{\class{defaultdict} Examples \label{defaultdict-examples}}
265
266Using \class{list} as the \member{default_factory}, it is easy to group
267a sequence of key-value pairs into a dictionary of lists:
268
269\begin{verbatim}
270>>> s = [('yellow', 1), ('blue', 2), ('yellow', 3), ('blue', 4), ('red', 1)]
271>>> d = defaultdict(list)
272>>> for k, v in s:
273 d[k].append(v)
274
275>>> d.items()
276[('blue', [2, 4]), ('red', [1]), ('yellow', [1, 3])]
277\end{verbatim}
278
279When each key is encountered for the first time, it is not already in the
280mapping; so an entry is automatically created using the
281\member{default_factory} function which returns an empty \class{list}. The
282\method{list.append()} operation then attaches the value to the new list. When
283keys are encountered again, the look-up proceeds normally (returning the list
284for that key) and the \method{list.append()} operation adds another value to
285the list. This technique is simpler and faster than an equivalent technique
286using \method{dict.setdefault()}:
287
288\begin{verbatim}
289>>> d = {}
290>>> for k, v in s:
291 d.setdefault(k, []).append(v)
292
293>>> d.items()
294[('blue', [2, 4]), ('red', [1]), ('yellow', [1, 3])]
295\end{verbatim}
296
297Setting the \member{default_factory} to \class{int} makes the
298\class{defaultdict} useful for counting (like a bag or multiset in other
299languages):
300
301\begin{verbatim}
302>>> s = 'mississippi'
303>>> d = defaultdict(int)
304>>> for k in s:
305 d[k] += 1
306
307>>> d.items()
308[('i', 4), ('p', 2), ('s', 4), ('m', 1)]
309\end{verbatim}
310
311When a letter is first encountered, it is missing from the mapping, so the
312\member{default_factory} function calls \function{int()} to supply a default
313count of zero. The increment operation then builds up the count for each
Thomas Wouterscf297e42007-02-23 15:07:44 +0000314letter.
315
316The function \function{int()} which always returns zero is just a special
317case of constant functions. A faster and more flexible way to create
Georg Brandla18af4e2007-04-21 15:47:16 +0000318constant functions is to use a lambda function which can supply
Thomas Wouterscf297e42007-02-23 15:07:44 +0000319any constant value (not just zero):
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000320
321\begin{verbatim}
Thomas Wouterscf297e42007-02-23 15:07:44 +0000322>>> def constant_factory(value):
Georg Brandla18af4e2007-04-21 15:47:16 +0000323... return lambda: value
Thomas Wouterscf297e42007-02-23 15:07:44 +0000324>>> d = defaultdict(constant_factory('<missing>'))
325>>> d.update(name='John', action='ran')
326>>> '%(name)s %(action)s to %(object)s' % d
327'John ran to <missing>'
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000328\end{verbatim}
329
330Setting the \member{default_factory} to \class{set} makes the
331\class{defaultdict} useful for building a dictionary of sets:
332
333\begin{verbatim}
334>>> s = [('red', 1), ('blue', 2), ('red', 3), ('blue', 4), ('red', 1), ('blue', 4)]
335>>> d = defaultdict(set)
336>>> for k, v in s:
337 d[k].add(v)
338
339>>> d.items()
340[('blue', set([2, 4])), ('red', set([1, 3]))]
341\end{verbatim}