blob: e33324ad38e1e2fb2ed64f51d9684a5e76443962 [file] [log] [blame]
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001# Copyright (C) 2002-2007 Python Software Foundation
2# Author: Ben Gertzfield, Barry Warsaw
3# Contact: email-sig@python.org
4
5"""Header encoding and decoding functionality."""
6
7__all__ = [
8 'Header',
9 'decode_header',
10 'make_header',
11 ]
12
13import re
14import binascii
15
16import email.quoprimime
17import email.base64mime
18
19from email.errors import HeaderParseError
R. David Murray92532142011-01-07 23:25:30 +000020from email import charset as _charset
21Charset = _charset.Charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +000022
23NL = '\n'
24SPACE = ' '
25BSPACE = b' '
26SPACE8 = ' ' * 8
27EMPTYSTRING = ''
Guido van Rossum9604e662007-08-30 03:46:43 +000028MAXLINELEN = 78
R David Murray01581ee2011-04-18 10:04:34 -040029FWS = ' \t'
Guido van Rossum8b3febe2007-08-30 01:15:14 +000030
31USASCII = Charset('us-ascii')
32UTF8 = Charset('utf-8')
33
34# Match encoded-word strings in the form =?charset?q?Hello_World?=
35ecre = re.compile(r'''
36 =\? # literal =?
37 (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
38 \? # literal ?
39 (?P<encoding>[qb]) # either a "q" or a "b", case insensitive
40 \? # literal ?
41 (?P<encoded>.*?) # non-greedy up to the next ?= is the encoded string
42 \?= # literal ?=
43 (?=[ \t]|$) # whitespace or the end of the string
44 ''', re.VERBOSE | re.IGNORECASE | re.MULTILINE)
45
46# Field name regexp, including trailing colon, but not separating whitespace,
47# according to RFC 2822. Character range is from tilde to exclamation mark.
48# For use with .match()
49fcre = re.compile(r'[\041-\176]+:$')
50
Ezio Melotti13925002011-03-16 11:05:33 +020051# Find a header embedded in a putative header value. Used to check for
R. David Murray5b2d9dd2011-01-09 02:35:24 +000052# header injection attack.
53_embeded_header = re.compile(r'\n[^ \t]+:')
54
Guido van Rossum8b3febe2007-08-30 01:15:14 +000055
56
57# Helpers
58_max_append = email.quoprimime._max_append
59
60
61
62def decode_header(header):
63 """Decode a message header value without converting charset.
64
65 Returns a list of (string, charset) pairs containing each of the decoded
66 parts of the header. Charset is None for non-encoded parts of the header,
67 otherwise a lower-case string containing the name of the character set
68 specified in the encoded string.
69
R David Murray041015c2011-03-25 15:10:55 -040070 header may be a string that may or may not contain RFC2047 encoded words,
71 or it may be a Header object.
72
Amaury Forgeot d'Arc1c25de62009-07-12 16:43:19 +000073 An email.errors.HeaderParseError may be raised when certain decoding error
Guido van Rossum8b3febe2007-08-30 01:15:14 +000074 occurs (e.g. a base64 decoding exception).
75 """
R David Murray6bdb1762011-06-18 12:30:55 -040076 # If it is a Header object, we can just return the encoded chunks.
R David Murray041015c2011-03-25 15:10:55 -040077 if hasattr(header, '_chunks'):
R David Murray6bdb1762011-06-18 12:30:55 -040078 return [(_charset._encode(string, str(charset)), str(charset))
79 for string, charset in header._chunks]
Guido van Rossum8b3febe2007-08-30 01:15:14 +000080 # If no encoding, just return the header with no charset.
81 if not ecre.search(header):
82 return [(header, None)]
83 # First step is to parse all the encoded parts into triplets of the form
84 # (encoded_string, encoding, charset). For unencoded strings, the last
85 # two parts will be None.
86 words = []
87 for line in header.splitlines():
88 parts = ecre.split(line)
89 while parts:
90 unencoded = parts.pop(0).strip()
91 if unencoded:
92 words.append((unencoded, None, None))
93 if parts:
94 charset = parts.pop(0).lower()
95 encoding = parts.pop(0).lower()
96 encoded = parts.pop(0)
97 words.append((encoded, encoding, charset))
98 # The next step is to decode each encoded word by applying the reverse
99 # base64 or quopri transformation. decoded_words is now a list of the
100 # form (decoded_word, charset).
101 decoded_words = []
102 for encoded_string, encoding, charset in words:
103 if encoding is None:
104 # This is an unencoded word.
105 decoded_words.append((encoded_string, charset))
106 elif encoding == 'q':
107 word = email.quoprimime.header_decode(encoded_string)
108 decoded_words.append((word, charset))
109 elif encoding == 'b':
R. David Murrayc4e69cc2010-08-03 22:14:10 +0000110 paderr = len(encoded_string) % 4 # Postel's law: add missing padding
111 if paderr:
112 encoded_string += '==='[:4 - paderr]
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000113 try:
114 word = email.base64mime.decode(encoded_string)
115 except binascii.Error:
116 raise HeaderParseError('Base64 decoding error')
117 else:
118 decoded_words.append((word, charset))
119 else:
120 raise AssertionError('Unexpected encoding: ' + encoding)
121 # Now convert all words to bytes and collapse consecutive runs of
122 # similarly encoded words.
123 collapsed = []
124 last_word = last_charset = None
125 for word, charset in decoded_words:
126 if isinstance(word, str):
Guido van Rossum9604e662007-08-30 03:46:43 +0000127 word = bytes(word, 'raw-unicode-escape')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000128 if last_word is None:
129 last_word = word
130 last_charset = charset
131 elif charset != last_charset:
132 collapsed.append((last_word, last_charset))
133 last_word = word
134 last_charset = charset
135 elif last_charset is None:
136 last_word += BSPACE + word
137 else:
138 last_word += word
139 collapsed.append((last_word, last_charset))
140 return collapsed
141
142
143
144def make_header(decoded_seq, maxlinelen=None, header_name=None,
145 continuation_ws=' '):
146 """Create a Header from a sequence of pairs as returned by decode_header()
147
148 decode_header() takes a header value string and returns a sequence of
149 pairs of the format (decoded_string, charset) where charset is the string
150 name of the character set.
151
152 This function takes one of those sequence of pairs and returns a Header
153 instance. Optional maxlinelen, header_name, and continuation_ws are as in
154 the Header constructor.
155 """
156 h = Header(maxlinelen=maxlinelen, header_name=header_name,
157 continuation_ws=continuation_ws)
158 for s, charset in decoded_seq:
159 # None means us-ascii but we can simply pass it on to h.append()
160 if charset is not None and not isinstance(charset, Charset):
161 charset = Charset(charset)
162 h.append(s, charset)
163 return h
164
165
166
167class Header:
168 def __init__(self, s=None, charset=None,
169 maxlinelen=None, header_name=None,
170 continuation_ws=' ', errors='strict'):
171 """Create a MIME-compliant header that can contain many character sets.
172
173 Optional s is the initial header value. If None, the initial header
174 value is not set. You can later append to the header with .append()
175 method calls. s may be a byte string or a Unicode string, but see the
176 .append() documentation for semantics.
177
178 Optional charset serves two purposes: it has the same meaning as the
179 charset argument to the .append() method. It also sets the default
180 character set for all subsequent .append() calls that omit the charset
181 argument. If charset is not provided in the constructor, the us-ascii
182 charset is used both as s's initial charset and as the default for
183 subsequent .append() calls.
184
R. David Murray4c1da4c2010-12-29 16:57:24 +0000185 The maximum line length can be specified explicitly via maxlinelen. For
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000186 splitting the first line to a shorter value (to account for the field
187 header which isn't included in s, e.g. `Subject') pass in the name of
Guido van Rossum9604e662007-08-30 03:46:43 +0000188 the field in header_name. The default maxlinelen is 78 as recommended
189 by RFC 2822.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000190
191 continuation_ws must be RFC 2822 compliant folding whitespace (usually
192 either a space or a hard tab) which will be prepended to continuation
193 lines.
194
195 errors is passed through to the .append() call.
196 """
197 if charset is None:
198 charset = USASCII
199 elif not isinstance(charset, Charset):
200 charset = Charset(charset)
201 self._charset = charset
202 self._continuation_ws = continuation_ws
203 self._chunks = []
204 if s is not None:
205 self.append(s, charset, errors)
206 if maxlinelen is None:
207 maxlinelen = MAXLINELEN
208 self._maxlinelen = maxlinelen
209 if header_name is None:
210 self._headerlen = 0
211 else:
212 # Take the separating colon and space into account.
213 self._headerlen = len(header_name) + 2
214
215 def __str__(self):
216 """Return the string value of the header."""
Guido van Rossum9604e662007-08-30 03:46:43 +0000217 self._normalize()
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000218 uchunks = []
219 lastcs = None
Guido van Rossum9604e662007-08-30 03:46:43 +0000220 for string, charset in self._chunks:
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000221 # We must preserve spaces between encoded and non-encoded word
222 # boundaries, which means for us we need to add a space when we go
223 # from a charset to None/us-ascii, or from None/us-ascii to a
224 # charset. Only do this for the second and subsequent chunks.
225 nextcs = charset
R. David Murray92532142011-01-07 23:25:30 +0000226 if nextcs == _charset.UNKNOWN8BIT:
227 original_bytes = string.encode('ascii', 'surrogateescape')
228 string = original_bytes.decode('ascii', 'replace')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000229 if uchunks:
230 if lastcs not in (None, 'us-ascii'):
231 if nextcs in (None, 'us-ascii'):
232 uchunks.append(SPACE)
233 nextcs = None
234 elif nextcs not in (None, 'us-ascii'):
235 uchunks.append(SPACE)
236 lastcs = nextcs
Guido van Rossum9604e662007-08-30 03:46:43 +0000237 uchunks.append(string)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000238 return EMPTYSTRING.join(uchunks)
239
240 # Rich comparison operators for equality only. BAW: does it make sense to
241 # have or explicitly disable <, <=, >, >= operators?
242 def __eq__(self, other):
243 # other may be a Header or a string. Both are fine so coerce
Guido van Rossum9604e662007-08-30 03:46:43 +0000244 # ourselves to a unicode (of the unencoded header value), swap the
245 # args and do another comparison.
246 return other == str(self)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000247
248 def __ne__(self, other):
249 return not self == other
250
251 def append(self, s, charset=None, errors='strict'):
252 """Append a string to the MIME header.
253
254 Optional charset, if given, should be a Charset instance or the name
255 of a character set (which will be converted to a Charset instance). A
256 value of None (the default) means that the charset given in the
257 constructor is used.
258
259 s may be a byte string or a Unicode string. If it is a byte string
R. David Murray4c1da4c2010-12-29 16:57:24 +0000260 (i.e. isinstance(s, str) is false), then charset is the encoding of
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000261 that byte string, and a UnicodeError will be raised if the string
262 cannot be decoded with that charset. If s is a Unicode string, then
263 charset is a hint specifying the character set of the characters in
R. David Murray477efb32011-01-05 01:39:32 +0000264 the string. In either case, when producing an RFC 2822 compliant
265 header using RFC 2047 rules, the string will be encoded using the
266 output codec of the charset. If the string cannot be encoded to the
267 output codec, a UnicodeError will be raised.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000268
R. David Murray477efb32011-01-05 01:39:32 +0000269 Optional `errors' is passed as the errors argument to the decode
270 call if s is a byte string.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000271 """
272 if charset is None:
273 charset = self._charset
274 elif not isinstance(charset, Charset):
275 charset = Charset(charset)
R. David Murray477efb32011-01-05 01:39:32 +0000276 if not isinstance(s, str):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000277 input_charset = charset.input_codec or 'us-ascii'
R David Murraye5e366c2011-06-18 12:57:28 -0400278 if input_charset == _charset.UNKNOWN8BIT:
279 s = s.decode('us-ascii', 'surrogateescape')
280 else:
281 s = s.decode(input_charset, errors)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000282 # Ensure that the bytes we're storing can be decoded to the output
Andrew Svetlov737fb892012-12-18 21:14:22 +0200283 # character set, otherwise an early error is raised.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000284 output_charset = charset.output_codec or 'us-ascii'
R. David Murray92532142011-01-07 23:25:30 +0000285 if output_charset != _charset.UNKNOWN8BIT:
R David Murray7441a7a2012-03-14 02:59:51 -0400286 try:
287 s.encode(output_charset, errors)
288 except UnicodeEncodeError:
289 if output_charset!='us-ascii':
290 raise
291 charset = UTF8
R. David Murray477efb32011-01-05 01:39:32 +0000292 self._chunks.append((s, charset))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000293
R. David Murray8451c4b2010-10-23 22:19:56 +0000294 def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'):
R David Murraycd37dfc2011-03-14 18:35:56 -0400295 r"""Encode a message header into an RFC-compliant format.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000296
297 There are many issues involved in converting a given string for use in
298 an email header. Only certain character sets are readable in most
299 email clients, and as header strings can only contain a subset of
300 7-bit ASCII, care must be taken to properly convert and encode (with
301 Base64 or quoted-printable) header strings. In addition, there is a
302 75-character length limit on any given encoded header field, so
303 line-wrapping must be performed, even with double-byte character sets.
304
Ezio Melottice073cd2011-04-13 16:43:21 +0300305 Optional maxlinelen specifies the maximum length of each generated
R David Murray308f14a2011-04-12 15:00:44 -0400306 line, exclusive of the linesep string. Individual lines may be longer
307 than maxlinelen if a folding point cannot be found. The first line
308 will be shorter by the length of the header name plus ": " if a header
309 name was specified at Header construction time. The default value for
310 maxlinelen is determined at header construction time.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000311
R David Murray01581ee2011-04-18 10:04:34 -0400312 Optional splitchars is a string containing characters which should be
313 given extra weight by the splitting algorithm during normal header
314 wrapping. This is in very rough support of RFC 2822's `higher level
315 syntactic breaks': split points preceded by a splitchar are preferred
316 during line splitting, with the characters preferred in the order in
317 which they appear in the string. Space and tab may be included in the
318 string to indicate whether preference should be given to one over the
319 other as a split point when other split chars do not appear in the line
320 being split. Splitchars does not affect RFC 2047 encoded lines.
R. David Murray8451c4b2010-10-23 22:19:56 +0000321
322 Optional linesep is a string to be used to separate the lines of
323 the value. The default value is the most useful for typical
324 Python applications, but it can be set to \r\n to produce RFC-compliant
325 line separators when needed.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000326 """
327 self._normalize()
Guido van Rossum9604e662007-08-30 03:46:43 +0000328 if maxlinelen is None:
329 maxlinelen = self._maxlinelen
330 # A maxlinelen of 0 means don't wrap. For all practical purposes,
331 # choosing a huge number here accomplishes that and makes the
332 # _ValueFormatter algorithm much simpler.
333 if maxlinelen == 0:
334 maxlinelen = 1000000
335 formatter = _ValueFormatter(self._headerlen, maxlinelen,
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000336 self._continuation_ws, splitchars)
337 for string, charset in self._chunks:
338 lines = string.splitlines()
R David Murray01581ee2011-04-18 10:04:34 -0400339 if lines:
340 formatter.feed('', lines[0], charset)
341 else:
342 formatter.feed('', '', charset)
R. David Murray6f0022d2011-01-07 21:57:25 +0000343 for line in lines[1:]:
344 formatter.newline()
345 if charset.header_encoding is not None:
R David Murray01581ee2011-04-18 10:04:34 -0400346 formatter.feed(self._continuation_ws, ' ' + line.lstrip(),
347 charset)
348 else:
349 sline = line.lstrip()
350 fws = line[:len(line)-len(sline)]
351 formatter.feed(fws, sline, charset)
R. David Murray6f0022d2011-01-07 21:57:25 +0000352 if len(lines) > 1:
353 formatter.newline()
Barry Warsaw00b34222007-08-31 02:35:00 +0000354 formatter.add_transition()
R. David Murray5b2d9dd2011-01-09 02:35:24 +0000355 value = formatter._str(linesep)
356 if _embeded_header.search(value):
357 raise HeaderParseError("header value appears to contain "
358 "an embedded header: {!r}".format(value))
359 return value
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000360
361 def _normalize(self):
Guido van Rossum9604e662007-08-30 03:46:43 +0000362 # Step 1: Normalize the chunks so that all runs of identical charsets
363 # get collapsed into a single unicode string.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000364 chunks = []
365 last_charset = None
366 last_chunk = []
367 for string, charset in self._chunks:
368 if charset == last_charset:
369 last_chunk.append(string)
370 else:
371 if last_charset is not None:
372 chunks.append((SPACE.join(last_chunk), last_charset))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000373 last_chunk = [string]
374 last_charset = charset
375 if last_chunk:
376 chunks.append((SPACE.join(last_chunk), last_charset))
377 self._chunks = chunks
378
379
380
381class _ValueFormatter:
382 def __init__(self, headerlen, maxlen, continuation_ws, splitchars):
383 self._maxlen = maxlen
384 self._continuation_ws = continuation_ws
R David Murray01581ee2011-04-18 10:04:34 -0400385 self._continuation_ws_len = len(continuation_ws)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000386 self._splitchars = splitchars
387 self._lines = []
388 self._current_line = _Accumulator(headerlen)
389
R. David Murray8451c4b2010-10-23 22:19:56 +0000390 def _str(self, linesep):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000391 self.newline()
R. David Murray8451c4b2010-10-23 22:19:56 +0000392 return linesep.join(self._lines)
393
394 def __str__(self):
395 return self._str(NL)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000396
397 def newline(self):
Barry Warsaw00b34222007-08-31 02:35:00 +0000398 end_of_line = self._current_line.pop()
R David Murray01581ee2011-04-18 10:04:34 -0400399 if end_of_line != (' ', ''):
400 self._current_line.push(*end_of_line)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000401 if len(self._current_line) > 0:
R David Murray01581ee2011-04-18 10:04:34 -0400402 if self._current_line.is_onlyws():
403 self._lines[-1] += str(self._current_line)
404 else:
405 self._lines.append(str(self._current_line))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000406 self._current_line.reset()
407
Barry Warsaw00b34222007-08-31 02:35:00 +0000408 def add_transition(self):
R David Murray01581ee2011-04-18 10:04:34 -0400409 self._current_line.push(' ', '')
Barry Warsaw00b34222007-08-31 02:35:00 +0000410
R David Murray01581ee2011-04-18 10:04:34 -0400411 def feed(self, fws, string, charset):
Guido van Rossum9604e662007-08-30 03:46:43 +0000412 # If the charset has no header encoding (i.e. it is an ASCII encoding)
413 # then we must split the header at the "highest level syntactic break"
414 # possible. Note that we don't have a lot of smarts about field
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000415 # syntax; we just try to break on semi-colons, then commas, then
Guido van Rossum9604e662007-08-30 03:46:43 +0000416 # whitespace. Eventually, this should be pluggable.
417 if charset.header_encoding is None:
R David Murray01581ee2011-04-18 10:04:34 -0400418 self._ascii_split(fws, string, self._splitchars)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000419 return
Guido van Rossum9604e662007-08-30 03:46:43 +0000420 # Otherwise, we're doing either a Base64 or a quoted-printable
421 # encoding which means we don't need to split the line on syntactic
422 # breaks. We can basically just find enough characters to fit on the
423 # current line, minus the RFC 2047 chrome. What makes this trickier
424 # though is that we have to split at octet boundaries, not character
425 # boundaries but it's only safe to split at character boundaries so at
426 # best we can only get close.
427 encoded_lines = charset.header_encode_lines(string, self._maxlengths())
428 # The first element extends the current line, but if it's None then
429 # nothing more fit on the current line so start a new line.
430 try:
431 first_line = encoded_lines.pop(0)
432 except IndexError:
433 # There are no encoded lines, so we're done.
434 return
435 if first_line is not None:
R David Murray01581ee2011-04-18 10:04:34 -0400436 self._append_chunk(fws, first_line)
Guido van Rossum9604e662007-08-30 03:46:43 +0000437 try:
438 last_line = encoded_lines.pop()
439 except IndexError:
440 # There was only one line.
441 return
R David Murray01581ee2011-04-18 10:04:34 -0400442 self.newline()
443 self._current_line.push(self._continuation_ws, last_line)
Guido van Rossum9604e662007-08-30 03:46:43 +0000444 # Everything else are full lines in themselves.
445 for line in encoded_lines:
446 self._lines.append(self._continuation_ws + line)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000447
Guido van Rossum9604e662007-08-30 03:46:43 +0000448 def _maxlengths(self):
449 # The first line's length.
450 yield self._maxlen - len(self._current_line)
451 while True:
452 yield self._maxlen - self._continuation_ws_len
453
R David Murray01581ee2011-04-18 10:04:34 -0400454 def _ascii_split(self, fws, string, splitchars):
455 # The RFC 2822 header folding algorithm is simple in principle but
456 # complex in practice. Lines may be folded any place where "folding
457 # white space" appears by inserting a linesep character in front of the
458 # FWS. The complication is that not all spaces or tabs qualify as FWS,
459 # and we are also supposed to prefer to break at "higher level
460 # syntactic breaks". We can't do either of these without intimate
461 # knowledge of the structure of structured headers, which we don't have
462 # here. So the best we can do here is prefer to break at the specified
463 # splitchars, and hope that we don't choose any spaces or tabs that
464 # aren't legal FWS. (This is at least better than the old algorithm,
465 # where we would sometimes *introduce* FWS after a splitchar, or the
466 # algorithm before that, where we would turn all white space runs into
467 # single spaces or tabs.)
468 parts = re.split("(["+FWS+"]+)", fws+string)
469 if parts[0]:
470 parts[:0] = ['']
471 else:
472 parts.pop(0)
473 for fws, part in zip(*[iter(parts)]*2):
474 self._append_chunk(fws, part)
475
476 def _append_chunk(self, fws, string):
477 self._current_line.push(fws, string)
478 if len(self._current_line) > self._maxlen:
479 # Find the best split point, working backward from the end.
480 # There might be none, on a long first line.
481 for ch in self._splitchars:
482 for i in range(self._current_line.part_count()-1, 0, -1):
483 if ch.isspace():
484 fws = self._current_line[i][0]
485 if fws and fws[0]==ch:
486 break
487 prevpart = self._current_line[i-1][1]
488 if prevpart and prevpart[-1]==ch:
489 break
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000490 else:
R David Murray01581ee2011-04-18 10:04:34 -0400491 continue
492 break
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000493 else:
R David Murray01581ee2011-04-18 10:04:34 -0400494 fws, part = self._current_line.pop()
495 if self._current_line._initial_size > 0:
496 # There will be a header, so leave it on a line by itself.
497 self.newline()
498 if not fws:
499 # We don't use continuation_ws here because the whitespace
500 # after a header should always be a space.
501 fws = ' '
502 self._current_line.push(fws, part)
503 return
504 remainder = self._current_line.pop_from(i)
505 self._lines.append(str(self._current_line))
506 self._current_line.reset(remainder)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000507
508
R David Murray01581ee2011-04-18 10:04:34 -0400509class _Accumulator(list):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000510
Guido van Rossum9604e662007-08-30 03:46:43 +0000511 def __init__(self, initial_size=0):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000512 self._initial_size = initial_size
R David Murray01581ee2011-04-18 10:04:34 -0400513 super().__init__()
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000514
R David Murray01581ee2011-04-18 10:04:34 -0400515 def push(self, fws, string):
516 self.append((fws, string))
517
518 def pop_from(self, i=0):
519 popped = self[i:]
520 self[i:] = []
521 return popped
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000522
523 def pop(self):
R David Murray01581ee2011-04-18 10:04:34 -0400524 if self.part_count()==0:
525 return ('', '')
526 return super().pop()
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000527
528 def __len__(self):
R David Murray01581ee2011-04-18 10:04:34 -0400529 return sum((len(fws)+len(part) for fws, part in self),
Guido van Rossum9604e662007-08-30 03:46:43 +0000530 self._initial_size)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000531
532 def __str__(self):
R David Murray01581ee2011-04-18 10:04:34 -0400533 return EMPTYSTRING.join((EMPTYSTRING.join((fws, part))
534 for fws, part in self))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000535
R David Murray01581ee2011-04-18 10:04:34 -0400536 def reset(self, startval=None):
537 if startval is None:
538 startval = []
539 self[:] = startval
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000540 self._initial_size = 0
Guido van Rossum9604e662007-08-30 03:46:43 +0000541
542 def is_onlyws(self):
R David Murray01581ee2011-04-18 10:04:34 -0400543 return self._initial_size==0 and (not self or str(self).isspace())
544
545 def part_count(self):
546 return super().__len__()