blob: e78547c1e1a62538ffc6be304ca247c7d1d971f0 [file] [log] [blame]
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001# Copyright (C) 2002-2007 Python Software Foundation
2# Author: Ben Gertzfield, Barry Warsaw
3# Contact: email-sig@python.org
4
5"""Header encoding and decoding functionality."""
6
7__all__ = [
8 'Header',
9 'decode_header',
10 'make_header',
11 ]
12
13import re
14import binascii
15
16import email.quoprimime
17import email.base64mime
18
19from email.errors import HeaderParseError
20from email.charset import Charset
21
22NL = '\n'
23SPACE = ' '
24BSPACE = b' '
25SPACE8 = ' ' * 8
26EMPTYSTRING = ''
Guido van Rossum9604e662007-08-30 03:46:43 +000027MAXLINELEN = 78
Guido van Rossum8b3febe2007-08-30 01:15:14 +000028
29USASCII = Charset('us-ascii')
30UTF8 = Charset('utf-8')
31
32# Match encoded-word strings in the form =?charset?q?Hello_World?=
33ecre = re.compile(r'''
34 =\? # literal =?
35 (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
36 \? # literal ?
37 (?P<encoding>[qb]) # either a "q" or a "b", case insensitive
38 \? # literal ?
39 (?P<encoded>.*?) # non-greedy up to the next ?= is the encoded string
40 \?= # literal ?=
41 (?=[ \t]|$) # whitespace or the end of the string
42 ''', re.VERBOSE | re.IGNORECASE | re.MULTILINE)
43
44# Field name regexp, including trailing colon, but not separating whitespace,
45# according to RFC 2822. Character range is from tilde to exclamation mark.
46# For use with .match()
47fcre = re.compile(r'[\041-\176]+:$')
48
Ezio Melotti13925002011-03-16 11:05:33 +020049# Find a header embedded in a putative header value. Used to check for
R. David Murray389af002011-01-09 02:48:04 +000050# header injection attack.
51_embeded_header = re.compile(r'\n[^ \t]+:')
52
Guido van Rossum8b3febe2007-08-30 01:15:14 +000053
54
55# Helpers
56_max_append = email.quoprimime._max_append
57
58
59
60def decode_header(header):
61 """Decode a message header value without converting charset.
62
63 Returns a list of (string, charset) pairs containing each of the decoded
64 parts of the header. Charset is None for non-encoded parts of the header,
65 otherwise a lower-case string containing the name of the character set
66 specified in the encoded string.
67
68 An email.Errors.HeaderParseError may be raised when certain decoding error
69 occurs (e.g. a base64 decoding exception).
70 """
71 # If no encoding, just return the header with no charset.
72 if not ecre.search(header):
73 return [(header, None)]
74 # First step is to parse all the encoded parts into triplets of the form
75 # (encoded_string, encoding, charset). For unencoded strings, the last
76 # two parts will be None.
77 words = []
78 for line in header.splitlines():
79 parts = ecre.split(line)
80 while parts:
81 unencoded = parts.pop(0).strip()
82 if unencoded:
83 words.append((unencoded, None, None))
84 if parts:
85 charset = parts.pop(0).lower()
86 encoding = parts.pop(0).lower()
87 encoded = parts.pop(0)
88 words.append((encoded, encoding, charset))
89 # The next step is to decode each encoded word by applying the reverse
90 # base64 or quopri transformation. decoded_words is now a list of the
91 # form (decoded_word, charset).
92 decoded_words = []
93 for encoded_string, encoding, charset in words:
94 if encoding is None:
95 # This is an unencoded word.
96 decoded_words.append((encoded_string, charset))
97 elif encoding == 'q':
98 word = email.quoprimime.header_decode(encoded_string)
99 decoded_words.append((word, charset))
100 elif encoding == 'b':
R. David Murraye06528c2010-08-03 23:35:44 +0000101 paderr = len(encoded_string) % 4 # Postel's law: add missing padding
102 if paderr:
103 encoded_string += '==='[:4 - paderr]
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000104 try:
105 word = email.base64mime.decode(encoded_string)
106 except binascii.Error:
107 raise HeaderParseError('Base64 decoding error')
108 else:
109 decoded_words.append((word, charset))
110 else:
111 raise AssertionError('Unexpected encoding: ' + encoding)
112 # Now convert all words to bytes and collapse consecutive runs of
113 # similarly encoded words.
114 collapsed = []
115 last_word = last_charset = None
116 for word, charset in decoded_words:
117 if isinstance(word, str):
Guido van Rossum9604e662007-08-30 03:46:43 +0000118 word = bytes(word, 'raw-unicode-escape')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000119 if last_word is None:
120 last_word = word
121 last_charset = charset
122 elif charset != last_charset:
123 collapsed.append((last_word, last_charset))
124 last_word = word
125 last_charset = charset
126 elif last_charset is None:
127 last_word += BSPACE + word
128 else:
129 last_word += word
130 collapsed.append((last_word, last_charset))
131 return collapsed
132
133
134
135def make_header(decoded_seq, maxlinelen=None, header_name=None,
136 continuation_ws=' '):
137 """Create a Header from a sequence of pairs as returned by decode_header()
138
139 decode_header() takes a header value string and returns a sequence of
140 pairs of the format (decoded_string, charset) where charset is the string
141 name of the character set.
142
143 This function takes one of those sequence of pairs and returns a Header
144 instance. Optional maxlinelen, header_name, and continuation_ws are as in
145 the Header constructor.
146 """
147 h = Header(maxlinelen=maxlinelen, header_name=header_name,
148 continuation_ws=continuation_ws)
149 for s, charset in decoded_seq:
150 # None means us-ascii but we can simply pass it on to h.append()
151 if charset is not None and not isinstance(charset, Charset):
152 charset = Charset(charset)
153 h.append(s, charset)
154 return h
155
156
157
158class Header:
159 def __init__(self, s=None, charset=None,
160 maxlinelen=None, header_name=None,
161 continuation_ws=' ', errors='strict'):
162 """Create a MIME-compliant header that can contain many character sets.
163
164 Optional s is the initial header value. If None, the initial header
165 value is not set. You can later append to the header with .append()
166 method calls. s may be a byte string or a Unicode string, but see the
167 .append() documentation for semantics.
168
169 Optional charset serves two purposes: it has the same meaning as the
170 charset argument to the .append() method. It also sets the default
171 character set for all subsequent .append() calls that omit the charset
172 argument. If charset is not provided in the constructor, the us-ascii
173 charset is used both as s's initial charset and as the default for
174 subsequent .append() calls.
175
R. David Murray5723d222010-12-29 19:03:53 +0000176 The maximum line length can be specified explicitly via maxlinelen. For
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000177 splitting the first line to a shorter value (to account for the field
178 header which isn't included in s, e.g. `Subject') pass in the name of
Guido van Rossum9604e662007-08-30 03:46:43 +0000179 the field in header_name. The default maxlinelen is 78 as recommended
180 by RFC 2822.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000181
182 continuation_ws must be RFC 2822 compliant folding whitespace (usually
183 either a space or a hard tab) which will be prepended to continuation
184 lines.
185
186 errors is passed through to the .append() call.
187 """
188 if charset is None:
189 charset = USASCII
190 elif not isinstance(charset, Charset):
191 charset = Charset(charset)
192 self._charset = charset
193 self._continuation_ws = continuation_ws
194 self._chunks = []
195 if s is not None:
196 self.append(s, charset, errors)
197 if maxlinelen is None:
198 maxlinelen = MAXLINELEN
199 self._maxlinelen = maxlinelen
200 if header_name is None:
201 self._headerlen = 0
202 else:
203 # Take the separating colon and space into account.
204 self._headerlen = len(header_name) + 2
205
206 def __str__(self):
207 """Return the string value of the header."""
Guido van Rossum9604e662007-08-30 03:46:43 +0000208 self._normalize()
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000209 uchunks = []
210 lastcs = None
Guido van Rossum9604e662007-08-30 03:46:43 +0000211 for string, charset in self._chunks:
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000212 # We must preserve spaces between encoded and non-encoded word
213 # boundaries, which means for us we need to add a space when we go
214 # from a charset to None/us-ascii, or from None/us-ascii to a
215 # charset. Only do this for the second and subsequent chunks.
216 nextcs = charset
217 if uchunks:
218 if lastcs not in (None, 'us-ascii'):
219 if nextcs in (None, 'us-ascii'):
220 uchunks.append(SPACE)
221 nextcs = None
222 elif nextcs not in (None, 'us-ascii'):
223 uchunks.append(SPACE)
224 lastcs = nextcs
Guido van Rossum9604e662007-08-30 03:46:43 +0000225 uchunks.append(string)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000226 return EMPTYSTRING.join(uchunks)
227
228 # Rich comparison operators for equality only. BAW: does it make sense to
229 # have or explicitly disable <, <=, >, >= operators?
230 def __eq__(self, other):
231 # other may be a Header or a string. Both are fine so coerce
Guido van Rossum9604e662007-08-30 03:46:43 +0000232 # ourselves to a unicode (of the unencoded header value), swap the
233 # args and do another comparison.
234 return other == str(self)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000235
236 def __ne__(self, other):
237 return not self == other
238
239 def append(self, s, charset=None, errors='strict'):
240 """Append a string to the MIME header.
241
242 Optional charset, if given, should be a Charset instance or the name
243 of a character set (which will be converted to a Charset instance). A
244 value of None (the default) means that the charset given in the
245 constructor is used.
246
247 s may be a byte string or a Unicode string. If it is a byte string
R. David Murray5723d222010-12-29 19:03:53 +0000248 (i.e. isinstance(s, str) is false), then charset is the encoding of
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000249 that byte string, and a UnicodeError will be raised if the string
250 cannot be decoded with that charset. If s is a Unicode string, then
251 charset is a hint specifying the character set of the characters in
R. David Murrayf9844c82011-01-05 01:47:38 +0000252 the string. In either case, when producing an RFC 2822 compliant
253 header using RFC 2047 rules, the string will be encoded using the
254 output codec of the charset. If the string cannot be encoded to the
255 output codec, a UnicodeError will be raised.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000256
R. David Murrayf9844c82011-01-05 01:47:38 +0000257 Optional `errors' is passed as the errors argument to the decode
258 call if s is a byte string.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000259 """
260 if charset is None:
261 charset = self._charset
262 elif not isinstance(charset, Charset):
263 charset = Charset(charset)
R. David Murrayf9844c82011-01-05 01:47:38 +0000264 if not isinstance(s, str):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000265 input_charset = charset.input_codec or 'us-ascii'
R. David Murrayf9844c82011-01-05 01:47:38 +0000266 s = s.decode(input_charset, errors)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000267 # Ensure that the bytes we're storing can be decoded to the output
268 # character set, otherwise an early error is thrown.
269 output_charset = charset.output_codec or 'us-ascii'
R. David Murrayf9844c82011-01-05 01:47:38 +0000270 s.encode(output_charset, errors)
271 self._chunks.append((s, charset))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000272
Guido van Rossum9604e662007-08-30 03:46:43 +0000273 def encode(self, splitchars=';, \t', maxlinelen=None):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000274 """Encode a message header into an RFC-compliant format.
275
276 There are many issues involved in converting a given string for use in
277 an email header. Only certain character sets are readable in most
278 email clients, and as header strings can only contain a subset of
279 7-bit ASCII, care must be taken to properly convert and encode (with
280 Base64 or quoted-printable) header strings. In addition, there is a
281 75-character length limit on any given encoded header field, so
282 line-wrapping must be performed, even with double-byte character sets.
283
R David Murray308f14a2011-04-12 15:00:44 -0400284 Optional maxlinelen specifies the maxiumum length of each generated
285 line, exclusive of the linesep string. Individual lines may be longer
286 than maxlinelen if a folding point cannot be found. The first line
287 will be shorter by the length of the header name plus ": " if a header
288 name was specified at Header construction time. The default value for
289 maxlinelen is determined at header construction time.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000290
291 Optional splitchars is a string containing characters to split long
292 ASCII lines on, in rough support of RFC 2822's `highest level
293 syntactic breaks'. This doesn't affect RFC 2047 encoded lines.
294 """
295 self._normalize()
Guido van Rossum9604e662007-08-30 03:46:43 +0000296 if maxlinelen is None:
297 maxlinelen = self._maxlinelen
298 # A maxlinelen of 0 means don't wrap. For all practical purposes,
299 # choosing a huge number here accomplishes that and makes the
300 # _ValueFormatter algorithm much simpler.
301 if maxlinelen == 0:
302 maxlinelen = 1000000
303 formatter = _ValueFormatter(self._headerlen, maxlinelen,
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000304 self._continuation_ws, splitchars)
305 for string, charset in self._chunks:
306 lines = string.splitlines()
R David Murrayde912762011-03-16 18:26:23 -0400307 formatter.feed(lines[0] if lines else '', charset)
R. David Murray43b2f452011-02-11 03:13:19 +0000308 for line in lines[1:]:
309 formatter.newline()
310 if charset.header_encoding is not None:
311 formatter.feed(self._continuation_ws, USASCII)
312 line = ' ' + line.lstrip()
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000313 formatter.feed(line, charset)
R. David Murray43b2f452011-02-11 03:13:19 +0000314 if len(lines) > 1:
315 formatter.newline()
Barry Warsaw00b34222007-08-31 02:35:00 +0000316 formatter.add_transition()
R. David Murray389af002011-01-09 02:48:04 +0000317 value = str(formatter)
318 if _embeded_header.search(value):
319 raise HeaderParseError("header value appears to contain "
320 "an embedded header: {!r}".format(value))
321 return value
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000322
323 def _normalize(self):
Guido van Rossum9604e662007-08-30 03:46:43 +0000324 # Step 1: Normalize the chunks so that all runs of identical charsets
325 # get collapsed into a single unicode string.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000326 chunks = []
327 last_charset = None
328 last_chunk = []
329 for string, charset in self._chunks:
330 if charset == last_charset:
331 last_chunk.append(string)
332 else:
333 if last_charset is not None:
334 chunks.append((SPACE.join(last_chunk), last_charset))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000335 last_chunk = [string]
336 last_charset = charset
337 if last_chunk:
338 chunks.append((SPACE.join(last_chunk), last_charset))
339 self._chunks = chunks
340
341
342
343class _ValueFormatter:
344 def __init__(self, headerlen, maxlen, continuation_ws, splitchars):
345 self._maxlen = maxlen
346 self._continuation_ws = continuation_ws
347 self._continuation_ws_len = len(continuation_ws.replace('\t', SPACE8))
348 self._splitchars = splitchars
349 self._lines = []
350 self._current_line = _Accumulator(headerlen)
351
352 def __str__(self):
353 self.newline()
354 return NL.join(self._lines)
355
356 def newline(self):
Barry Warsaw00b34222007-08-31 02:35:00 +0000357 end_of_line = self._current_line.pop()
358 if end_of_line is not None:
359 self._current_line.push(end_of_line)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000360 if len(self._current_line) > 0:
361 self._lines.append(str(self._current_line))
362 self._current_line.reset()
363
Barry Warsaw00b34222007-08-31 02:35:00 +0000364 def add_transition(self):
365 self._current_line.push(None)
366
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000367 def feed(self, string, charset):
368 # If the string itself fits on the current line in its encoded format,
369 # then add it now and be done with it.
370 encoded_string = charset.header_encode(string)
371 if len(encoded_string) + len(self._current_line) <= self._maxlen:
372 self._current_line.push(encoded_string)
373 return
Guido van Rossum9604e662007-08-30 03:46:43 +0000374 # If the charset has no header encoding (i.e. it is an ASCII encoding)
375 # then we must split the header at the "highest level syntactic break"
376 # possible. Note that we don't have a lot of smarts about field
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000377 # syntax; we just try to break on semi-colons, then commas, then
Guido van Rossum9604e662007-08-30 03:46:43 +0000378 # whitespace. Eventually, this should be pluggable.
379 if charset.header_encoding is None:
380 for ch in self._splitchars:
381 if ch in string:
382 break
383 else:
384 ch = None
385 # If there's no available split character then regardless of
386 # whether the string fits on the line, we have to put it on a line
387 # by itself.
388 if ch is None:
389 if not self._current_line.is_onlyws():
390 self._lines.append(str(self._current_line))
391 self._current_line.reset(self._continuation_ws)
392 self._current_line.push(encoded_string)
393 else:
394 self._ascii_split(string, ch)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000395 return
Guido van Rossum9604e662007-08-30 03:46:43 +0000396 # Otherwise, we're doing either a Base64 or a quoted-printable
397 # encoding which means we don't need to split the line on syntactic
398 # breaks. We can basically just find enough characters to fit on the
399 # current line, minus the RFC 2047 chrome. What makes this trickier
400 # though is that we have to split at octet boundaries, not character
401 # boundaries but it's only safe to split at character boundaries so at
402 # best we can only get close.
403 encoded_lines = charset.header_encode_lines(string, self._maxlengths())
404 # The first element extends the current line, but if it's None then
405 # nothing more fit on the current line so start a new line.
406 try:
407 first_line = encoded_lines.pop(0)
408 except IndexError:
409 # There are no encoded lines, so we're done.
410 return
411 if first_line is not None:
412 self._current_line.push(first_line)
413 self._lines.append(str(self._current_line))
414 self._current_line.reset(self._continuation_ws)
415 try:
416 last_line = encoded_lines.pop()
417 except IndexError:
418 # There was only one line.
419 return
420 self._current_line.push(last_line)
Guido van Rossum9604e662007-08-30 03:46:43 +0000421 # Everything else are full lines in themselves.
422 for line in encoded_lines:
423 self._lines.append(self._continuation_ws + line)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000424
Guido van Rossum9604e662007-08-30 03:46:43 +0000425 def _maxlengths(self):
426 # The first line's length.
427 yield self._maxlen - len(self._current_line)
428 while True:
429 yield self._maxlen - self._continuation_ws_len
430
431 def _ascii_split(self, string, ch):
432 holding = _Accumulator()
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000433 # Split the line on the split character, preserving it. If the split
434 # character is whitespace RFC 2822 $2.2.3 requires us to fold on the
435 # whitespace, so that the line leads with the original whitespace we
436 # split on. However, if a higher syntactic break is used instead
437 # (e.g. comma or semicolon), the folding should happen after the split
438 # character. But then in that case, we need to add our own
439 # continuation whitespace -- although won't that break unfolding?
440 for part, splitpart, nextpart in _spliterator(ch, string):
441 if not splitpart:
442 # No splitpart means this is the last chunk. Put this part
443 # either on the current line or the next line depending on
444 # whether it fits.
445 holding.push(part)
446 if len(holding) + len(self._current_line) <= self._maxlen:
447 # It fits, but we're done.
448 self._current_line.push(str(holding))
449 else:
450 # It doesn't fit, but we're done. Before pushing a new
451 # line, watch out for the current line containing only
452 # whitespace.
453 holding.pop()
Guido van Rossum9604e662007-08-30 03:46:43 +0000454 if self._current_line.is_onlyws() and holding.is_onlyws():
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000455 # Don't start a new line.
456 holding.push(part)
457 part = None
458 self._current_line.push(str(holding))
459 self._lines.append(str(self._current_line))
460 if part is None:
461 self._current_line.reset()
462 else:
463 holding.reset(part)
464 self._current_line.reset(str(holding))
465 return
466 elif not nextpart:
R David Murraye1292a22011-04-07 20:54:03 -0400467 # There must be some trailing or duplicated split characters
468 # because we
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000469 # found a split character but no next part. In this case we
470 # must treat the thing to fit as the part + splitpart because
471 # if splitpart is whitespace it's not allowed to be the only
472 # thing on the line, and if it's not whitespace we must split
R David Murraye1292a22011-04-07 20:54:03 -0400473 # after the syntactic break.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000474 holding_prelen = len(holding)
475 holding.push(part + splitpart)
476 if len(holding) + len(self._current_line) <= self._maxlen:
477 self._current_line.push(str(holding))
478 elif holding_prelen == 0:
479 # This is the only chunk left so it has to go on the
480 # current line.
481 self._current_line.push(str(holding))
482 else:
483 save_part = holding.pop()
484 self._current_line.push(str(holding))
485 self._lines.append(str(self._current_line))
486 holding.reset(save_part)
487 self._current_line.reset(str(holding))
R David Murraye1292a22011-04-07 20:54:03 -0400488 holding.reset()
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000489 elif not part:
490 # We're leading with a split character. See if the splitpart
491 # and nextpart fits on the current line.
492 holding.push(splitpart + nextpart)
493 holding_len = len(holding)
494 # We know we're not leaving the nextpart on the stack.
495 holding.pop()
496 if holding_len + len(self._current_line) <= self._maxlen:
497 holding.push(splitpart)
498 else:
499 # It doesn't fit. Since there's no current part really
500 # the best we can do is start a new line and push the
501 # split part onto it.
502 self._current_line.push(str(holding))
503 holding.reset()
504 if len(self._current_line) > 0 and self._lines:
505 self._lines.append(str(self._current_line))
506 self._current_line.reset()
507 holding.push(splitpart)
508 else:
509 # All three parts are present. First let's see if all three
510 # parts will fit on the current line. If so, we don't need to
511 # split it.
512 holding.push(part + splitpart + nextpart)
513 holding_len = len(holding)
514 # Pop the part because we'll push nextpart on the next
515 # iteration through the loop.
516 holding.pop()
517 if holding_len + len(self._current_line) <= self._maxlen:
518 holding.push(part + splitpart)
519 else:
520 # The entire thing doesn't fit. See if we need to split
521 # before or after the split characters.
522 if splitpart.isspace():
523 # Split before whitespace. Remember that the
524 # whitespace becomes the continuation whitespace of
525 # the next line so it goes to current_line not holding.
526 holding.push(part)
527 self._current_line.push(str(holding))
528 holding.reset()
529 self._lines.append(str(self._current_line))
530 self._current_line.reset(splitpart)
531 else:
532 # Split after non-whitespace. The continuation
533 # whitespace comes from the instance variable.
534 holding.push(part + splitpart)
535 self._current_line.push(str(holding))
536 holding.reset()
537 self._lines.append(str(self._current_line))
538 if nextpart[0].isspace():
539 self._current_line.reset()
540 else:
541 self._current_line.reset(self._continuation_ws)
542 # Get the last of the holding part
543 self._current_line.push(str(holding))
544
545
546
547def _spliterator(character, string):
548 parts = list(reversed(re.split('(%s)' % character, string)))
549 while parts:
550 part = parts.pop()
551 splitparts = (parts.pop() if parts else None)
552 nextpart = (parts.pop() if parts else None)
553 yield (part, splitparts, nextpart)
554 if nextpart is not None:
555 parts.append(nextpart)
556
557
558class _Accumulator:
Guido van Rossum9604e662007-08-30 03:46:43 +0000559 def __init__(self, initial_size=0):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000560 self._initial_size = initial_size
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000561 self._current = []
562
563 def push(self, string):
564 self._current.append(string)
565
566 def pop(self):
Barry Warsaw00b34222007-08-31 02:35:00 +0000567 if not self._current:
568 return None
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000569 return self._current.pop()
570
571 def __len__(self):
Barry Warsaw00b34222007-08-31 02:35:00 +0000572 return sum(((1 if string is None else len(string))
573 for string in self._current),
Guido van Rossum9604e662007-08-30 03:46:43 +0000574 self._initial_size)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000575
576 def __str__(self):
Barry Warsaw00b34222007-08-31 02:35:00 +0000577 if self._current and self._current[-1] is None:
578 self._current.pop()
579 return EMPTYSTRING.join((' ' if string is None else string)
580 for string in self._current)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000581
582 def reset(self, string=None):
583 self._current = []
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000584 self._initial_size = 0
585 if string is not None:
586 self.push(string)
Guido van Rossum9604e662007-08-30 03:46:43 +0000587
588 def is_onlyws(self):
589 return len(self) == 0 or str(self).isspace()