blob: 5e61a7b97a25ec4dfa10b857b1ae1bf3a6ea3253 [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
R. David Murray719a4492010-11-21 16:53:48 +00006import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00007import sys
8import time
9import base64
10import difflib
11import unittest
12import warnings
R. David Murray96fd54e2010-10-08 15:55:28 +000013import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000014
R. David Murray96fd54e2010-10-08 15:55:28 +000015from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016from itertools import chain
17
18import email
19
20from email.charset import Charset
21from email.header import Header, decode_header, make_header
22from email.parser import Parser, HeaderParser
R David Murray638d40b2012-08-24 11:14:13 -040023from email.generator import Generator, DecodedGenerator, BytesGenerator
Guido van Rossum8b3febe2007-08-30 01:15:14 +000024from email.message import Message
25from email.mime.application import MIMEApplication
26from email.mime.audio import MIMEAudio
27from email.mime.text import MIMEText
28from email.mime.image import MIMEImage
29from email.mime.base import MIMEBase
30from email.mime.message import MIMEMessage
31from email.mime.multipart import MIMEMultipart
32from email import utils
33from email import errors
34from email import encoders
35from email import iterators
36from email import base64mime
37from email import quoprimime
38
R. David Murray96fd54e2010-10-08 15:55:28 +000039from test.support import findfile, run_unittest, unlink
Guido van Rossum8b3febe2007-08-30 01:15:14 +000040from email.test import __file__ as landmark
41
42
43NL = '\n'
44EMPTYSTRING = ''
45SPACE = ' '
46
47
Ezio Melottib3aedd42010-11-20 19:04:17 +000048
Guido van Rossum8b3febe2007-08-30 01:15:14 +000049def openfile(filename, *args, **kws):
50 path = os.path.join(os.path.dirname(landmark), 'data', filename)
51 return open(path, *args, **kws)
52
53
Ezio Melottib3aedd42010-11-20 19:04:17 +000054
Guido van Rossum8b3febe2007-08-30 01:15:14 +000055# Base test class
56class TestEmailBase(unittest.TestCase):
57 def ndiffAssertEqual(self, first, second):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000058 """Like assertEqual except use ndiff for readable output."""
Guido van Rossum8b3febe2007-08-30 01:15:14 +000059 if first != second:
60 sfirst = str(first)
61 ssecond = str(second)
62 rfirst = [repr(line) for line in sfirst.splitlines()]
63 rsecond = [repr(line) for line in ssecond.splitlines()]
64 diff = difflib.ndiff(rfirst, rsecond)
65 raise self.failureException(NL + NL.join(diff))
66
67 def _msgobj(self, filename):
68 with openfile(findfile(filename)) as fp:
69 return email.message_from_file(fp)
70
R David Murraye67c6c52013-03-07 16:38:03 -050071 maxDiff = None
Guido van Rossum8b3febe2007-08-30 01:15:14 +000072
Ezio Melottib3aedd42010-11-20 19:04:17 +000073
Guido van Rossum8b3febe2007-08-30 01:15:14 +000074# Test various aspects of the Message class's API
75class TestMessageAPI(TestEmailBase):
76 def test_get_all(self):
77 eq = self.assertEqual
78 msg = self._msgobj('msg_20.txt')
79 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
80 eq(msg.get_all('xx', 'n/a'), 'n/a')
81
R. David Murraye5db2632010-11-20 15:10:13 +000082 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000083 eq = self.assertEqual
84 msg = Message()
85 eq(msg.get_charset(), None)
86 charset = Charset('iso-8859-1')
87 msg.set_charset(charset)
88 eq(msg['mime-version'], '1.0')
89 eq(msg.get_content_type(), 'text/plain')
90 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
91 eq(msg.get_param('charset'), 'iso-8859-1')
92 eq(msg['content-transfer-encoding'], 'quoted-printable')
93 eq(msg.get_charset().input_charset, 'iso-8859-1')
94 # Remove the charset
95 msg.set_charset(None)
96 eq(msg.get_charset(), None)
97 eq(msg['content-type'], 'text/plain')
98 # Try adding a charset when there's already MIME headers present
99 msg = Message()
100 msg['MIME-Version'] = '2.0'
101 msg['Content-Type'] = 'text/x-weird'
102 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
103 msg.set_charset(charset)
104 eq(msg['mime-version'], '2.0')
105 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
106 eq(msg['content-transfer-encoding'], 'quinted-puntable')
107
108 def test_set_charset_from_string(self):
109 eq = self.assertEqual
110 msg = Message()
111 msg.set_charset('us-ascii')
112 eq(msg.get_charset().input_charset, 'us-ascii')
113 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
114
115 def test_set_payload_with_charset(self):
116 msg = Message()
117 charset = Charset('iso-8859-1')
118 msg.set_payload('This is a string payload', charset)
119 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
120
121 def test_get_charsets(self):
122 eq = self.assertEqual
123
124 msg = self._msgobj('msg_08.txt')
125 charsets = msg.get_charsets()
126 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
127
128 msg = self._msgobj('msg_09.txt')
129 charsets = msg.get_charsets('dingbat')
130 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
131 'koi8-r'])
132
133 msg = self._msgobj('msg_12.txt')
134 charsets = msg.get_charsets()
135 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
136 'iso-8859-3', 'us-ascii', 'koi8-r'])
137
138 def test_get_filename(self):
139 eq = self.assertEqual
140
141 msg = self._msgobj('msg_04.txt')
142 filenames = [p.get_filename() for p in msg.get_payload()]
143 eq(filenames, ['msg.txt', 'msg.txt'])
144
145 msg = self._msgobj('msg_07.txt')
146 subpart = msg.get_payload(1)
147 eq(subpart.get_filename(), 'dingusfish.gif')
148
149 def test_get_filename_with_name_parameter(self):
150 eq = self.assertEqual
151
152 msg = self._msgobj('msg_44.txt')
153 filenames = [p.get_filename() for p in msg.get_payload()]
154 eq(filenames, ['msg.txt', 'msg.txt'])
155
156 def test_get_boundary(self):
157 eq = self.assertEqual
158 msg = self._msgobj('msg_07.txt')
159 # No quotes!
160 eq(msg.get_boundary(), 'BOUNDARY')
161
162 def test_set_boundary(self):
163 eq = self.assertEqual
164 # This one has no existing boundary parameter, but the Content-Type:
165 # header appears fifth.
166 msg = self._msgobj('msg_01.txt')
167 msg.set_boundary('BOUNDARY')
168 header, value = msg.items()[4]
169 eq(header.lower(), 'content-type')
170 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
171 # This one has a Content-Type: header, with a boundary, stuck in the
172 # middle of its headers. Make sure the order is preserved; it should
173 # be fifth.
174 msg = self._msgobj('msg_04.txt')
175 msg.set_boundary('BOUNDARY')
176 header, value = msg.items()[4]
177 eq(header.lower(), 'content-type')
178 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
179 # And this one has no Content-Type: header at all.
180 msg = self._msgobj('msg_03.txt')
181 self.assertRaises(errors.HeaderParseError,
182 msg.set_boundary, 'BOUNDARY')
183
R. David Murray73a559d2010-12-21 18:07:59 +0000184 def test_make_boundary(self):
185 msg = MIMEMultipart('form-data')
186 # Note that when the boundary gets created is an implementation
187 # detail and might change.
188 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
189 # Trigger creation of boundary
190 msg.as_string()
191 self.assertEqual(msg.items()[0][1][:33],
192 'multipart/form-data; boundary="==')
193 # XXX: there ought to be tests of the uniqueness of the boundary, too.
194
R. David Murray57c45ac2010-02-21 04:39:40 +0000195 def test_message_rfc822_only(self):
196 # Issue 7970: message/rfc822 not in multipart parsed by
197 # HeaderParser caused an exception when flattened.
Brett Cannon384917a2010-10-29 23:08:36 +0000198 with openfile(findfile('msg_46.txt')) as fp:
199 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000200 parser = HeaderParser()
201 msg = parser.parsestr(msgdata)
202 out = StringIO()
203 gen = Generator(out, True, 0)
204 gen.flatten(msg, False)
205 self.assertEqual(out.getvalue(), msgdata)
206
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000207 def test_get_decoded_payload(self):
208 eq = self.assertEqual
209 msg = self._msgobj('msg_10.txt')
210 # The outer message is a multipart
211 eq(msg.get_payload(decode=True), None)
212 # Subpart 1 is 7bit encoded
213 eq(msg.get_payload(0).get_payload(decode=True),
214 b'This is a 7bit encoded message.\n')
215 # Subpart 2 is quopri
216 eq(msg.get_payload(1).get_payload(decode=True),
217 b'\xa1This is a Quoted Printable encoded message!\n')
218 # Subpart 3 is base64
219 eq(msg.get_payload(2).get_payload(decode=True),
220 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000221 # Subpart 4 is base64 with a trailing newline, which
222 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000223 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000224 b'This is a Base64 encoded message.\n')
225 # Subpart 5 has no Content-Transfer-Encoding: header.
226 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000227 b'This has no Content-Transfer-Encoding: header.\n')
228
229 def test_get_decoded_uu_payload(self):
230 eq = self.assertEqual
231 msg = Message()
232 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
233 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
234 msg['content-transfer-encoding'] = cte
235 eq(msg.get_payload(decode=True), b'hello world')
236 # Now try some bogus data
237 msg.set_payload('foo')
238 eq(msg.get_payload(decode=True), b'foo')
239
R David Murraya2860e82011-04-16 09:20:30 -0400240 def test_get_payload_n_raises_on_non_multipart(self):
241 msg = Message()
242 self.assertRaises(TypeError, msg.get_payload, 1)
243
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000244 def test_decoded_generator(self):
245 eq = self.assertEqual
246 msg = self._msgobj('msg_07.txt')
247 with openfile('msg_17.txt') as fp:
248 text = fp.read()
249 s = StringIO()
250 g = DecodedGenerator(s)
251 g.flatten(msg)
252 eq(s.getvalue(), text)
253
254 def test__contains__(self):
255 msg = Message()
256 msg['From'] = 'Me'
257 msg['to'] = 'You'
258 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000259 self.assertTrue('from' in msg)
260 self.assertTrue('From' in msg)
261 self.assertTrue('FROM' in msg)
262 self.assertTrue('to' in msg)
263 self.assertTrue('To' in msg)
264 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000265
266 def test_as_string(self):
267 eq = self.ndiffAssertEqual
268 msg = self._msgobj('msg_01.txt')
269 with openfile('msg_01.txt') as fp:
270 text = fp.read()
271 eq(text, str(msg))
272 fullrepr = msg.as_string(unixfrom=True)
273 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000274 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000275 eq(text, NL.join(lines[1:]))
276
277 def test_bad_param(self):
278 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
279 self.assertEqual(msg.get_param('baz'), '')
280
281 def test_missing_filename(self):
282 msg = email.message_from_string("From: foo\n")
283 self.assertEqual(msg.get_filename(), None)
284
285 def test_bogus_filename(self):
286 msg = email.message_from_string(
287 "Content-Disposition: blarg; filename\n")
288 self.assertEqual(msg.get_filename(), '')
289
290 def test_missing_boundary(self):
291 msg = email.message_from_string("From: foo\n")
292 self.assertEqual(msg.get_boundary(), None)
293
294 def test_get_params(self):
295 eq = self.assertEqual
296 msg = email.message_from_string(
297 'X-Header: foo=one; bar=two; baz=three\n')
298 eq(msg.get_params(header='x-header'),
299 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
300 msg = email.message_from_string(
301 'X-Header: foo; bar=one; baz=two\n')
302 eq(msg.get_params(header='x-header'),
303 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
304 eq(msg.get_params(), None)
305 msg = email.message_from_string(
306 'X-Header: foo; bar="one"; baz=two\n')
307 eq(msg.get_params(header='x-header'),
308 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
309
310 def test_get_param_liberal(self):
311 msg = Message()
312 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
313 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
314
315 def test_get_param(self):
316 eq = self.assertEqual
317 msg = email.message_from_string(
318 "X-Header: foo=one; bar=two; baz=three\n")
319 eq(msg.get_param('bar', header='x-header'), 'two')
320 eq(msg.get_param('quuz', header='x-header'), None)
321 eq(msg.get_param('quuz'), None)
322 msg = email.message_from_string(
323 'X-Header: foo; bar="one"; baz=two\n')
324 eq(msg.get_param('foo', header='x-header'), '')
325 eq(msg.get_param('bar', header='x-header'), 'one')
326 eq(msg.get_param('baz', header='x-header'), 'two')
327 # XXX: We are not RFC-2045 compliant! We cannot parse:
328 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
329 # msg.get_param("weird")
330 # yet.
331
332 def test_get_param_funky_continuation_lines(self):
333 msg = self._msgobj('msg_22.txt')
334 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
335
336 def test_get_param_with_semis_in_quotes(self):
337 msg = email.message_from_string(
338 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
339 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
340 self.assertEqual(msg.get_param('name', unquote=False),
341 '"Jim&amp;&amp;Jill"')
342
R. David Murrayd48739f2010-04-14 18:59:18 +0000343 def test_get_param_with_quotes(self):
344 msg = email.message_from_string(
345 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
346 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
347 msg = email.message_from_string(
348 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
349 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
350
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000351 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000352 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000353 msg = email.message_from_string('Header: exists')
354 unless('header' in msg)
355 unless('Header' in msg)
356 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000357 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000358
359 def test_set_param(self):
360 eq = self.assertEqual
361 msg = Message()
362 msg.set_param('charset', 'iso-2022-jp')
363 eq(msg.get_param('charset'), 'iso-2022-jp')
364 msg.set_param('importance', 'high value')
365 eq(msg.get_param('importance'), 'high value')
366 eq(msg.get_param('importance', unquote=False), '"high value"')
367 eq(msg.get_params(), [('text/plain', ''),
368 ('charset', 'iso-2022-jp'),
369 ('importance', 'high value')])
370 eq(msg.get_params(unquote=False), [('text/plain', ''),
371 ('charset', '"iso-2022-jp"'),
372 ('importance', '"high value"')])
373 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
374 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
375
376 def test_del_param(self):
377 eq = self.assertEqual
378 msg = self._msgobj('msg_05.txt')
379 eq(msg.get_params(),
380 [('multipart/report', ''), ('report-type', 'delivery-status'),
381 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
382 old_val = msg.get_param("report-type")
383 msg.del_param("report-type")
384 eq(msg.get_params(),
385 [('multipart/report', ''),
386 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
387 msg.set_param("report-type", old_val)
388 eq(msg.get_params(),
389 [('multipart/report', ''),
390 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
391 ('report-type', old_val)])
392
393 def test_del_param_on_other_header(self):
394 msg = Message()
395 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
396 msg.del_param('filename', 'content-disposition')
397 self.assertEqual(msg['content-disposition'], 'attachment')
398
R David Murraya2860e82011-04-16 09:20:30 -0400399 def test_del_param_on_nonexistent_header(self):
400 msg = Message()
401 msg.del_param('filename', 'content-disposition')
402
403 def test_del_nonexistent_param(self):
404 msg = Message()
405 msg.add_header('Content-Type', 'text/plain', charset='utf-8')
406 existing_header = msg['Content-Type']
407 msg.del_param('foobar', header='Content-Type')
408 self.assertEqual(msg['Content-Type'], 'text/plain; charset="utf-8"')
409
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000410 def test_set_type(self):
411 eq = self.assertEqual
412 msg = Message()
413 self.assertRaises(ValueError, msg.set_type, 'text')
414 msg.set_type('text/plain')
415 eq(msg['content-type'], 'text/plain')
416 msg.set_param('charset', 'us-ascii')
417 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
418 msg.set_type('text/html')
419 eq(msg['content-type'], 'text/html; charset="us-ascii"')
420
421 def test_set_type_on_other_header(self):
422 msg = Message()
423 msg['X-Content-Type'] = 'text/plain'
424 msg.set_type('application/octet-stream', 'X-Content-Type')
425 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
426
427 def test_get_content_type_missing(self):
428 msg = Message()
429 self.assertEqual(msg.get_content_type(), 'text/plain')
430
431 def test_get_content_type_missing_with_default_type(self):
432 msg = Message()
433 msg.set_default_type('message/rfc822')
434 self.assertEqual(msg.get_content_type(), 'message/rfc822')
435
436 def test_get_content_type_from_message_implicit(self):
437 msg = self._msgobj('msg_30.txt')
438 self.assertEqual(msg.get_payload(0).get_content_type(),
439 'message/rfc822')
440
441 def test_get_content_type_from_message_explicit(self):
442 msg = self._msgobj('msg_28.txt')
443 self.assertEqual(msg.get_payload(0).get_content_type(),
444 'message/rfc822')
445
446 def test_get_content_type_from_message_text_plain_implicit(self):
447 msg = self._msgobj('msg_03.txt')
448 self.assertEqual(msg.get_content_type(), 'text/plain')
449
450 def test_get_content_type_from_message_text_plain_explicit(self):
451 msg = self._msgobj('msg_01.txt')
452 self.assertEqual(msg.get_content_type(), 'text/plain')
453
454 def test_get_content_maintype_missing(self):
455 msg = Message()
456 self.assertEqual(msg.get_content_maintype(), 'text')
457
458 def test_get_content_maintype_missing_with_default_type(self):
459 msg = Message()
460 msg.set_default_type('message/rfc822')
461 self.assertEqual(msg.get_content_maintype(), 'message')
462
463 def test_get_content_maintype_from_message_implicit(self):
464 msg = self._msgobj('msg_30.txt')
465 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
466
467 def test_get_content_maintype_from_message_explicit(self):
468 msg = self._msgobj('msg_28.txt')
469 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
470
471 def test_get_content_maintype_from_message_text_plain_implicit(self):
472 msg = self._msgobj('msg_03.txt')
473 self.assertEqual(msg.get_content_maintype(), 'text')
474
475 def test_get_content_maintype_from_message_text_plain_explicit(self):
476 msg = self._msgobj('msg_01.txt')
477 self.assertEqual(msg.get_content_maintype(), 'text')
478
479 def test_get_content_subtype_missing(self):
480 msg = Message()
481 self.assertEqual(msg.get_content_subtype(), 'plain')
482
483 def test_get_content_subtype_missing_with_default_type(self):
484 msg = Message()
485 msg.set_default_type('message/rfc822')
486 self.assertEqual(msg.get_content_subtype(), 'rfc822')
487
488 def test_get_content_subtype_from_message_implicit(self):
489 msg = self._msgobj('msg_30.txt')
490 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
491
492 def test_get_content_subtype_from_message_explicit(self):
493 msg = self._msgobj('msg_28.txt')
494 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
495
496 def test_get_content_subtype_from_message_text_plain_implicit(self):
497 msg = self._msgobj('msg_03.txt')
498 self.assertEqual(msg.get_content_subtype(), 'plain')
499
500 def test_get_content_subtype_from_message_text_plain_explicit(self):
501 msg = self._msgobj('msg_01.txt')
502 self.assertEqual(msg.get_content_subtype(), 'plain')
503
504 def test_get_content_maintype_error(self):
505 msg = Message()
506 msg['Content-Type'] = 'no-slash-in-this-string'
507 self.assertEqual(msg.get_content_maintype(), 'text')
508
509 def test_get_content_subtype_error(self):
510 msg = Message()
511 msg['Content-Type'] = 'no-slash-in-this-string'
512 self.assertEqual(msg.get_content_subtype(), 'plain')
513
514 def test_replace_header(self):
515 eq = self.assertEqual
516 msg = Message()
517 msg.add_header('First', 'One')
518 msg.add_header('Second', 'Two')
519 msg.add_header('Third', 'Three')
520 eq(msg.keys(), ['First', 'Second', 'Third'])
521 eq(msg.values(), ['One', 'Two', 'Three'])
522 msg.replace_header('Second', 'Twenty')
523 eq(msg.keys(), ['First', 'Second', 'Third'])
524 eq(msg.values(), ['One', 'Twenty', 'Three'])
525 msg.add_header('First', 'Eleven')
526 msg.replace_header('First', 'One Hundred')
527 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
528 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
529 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
530
531 def test_broken_base64_payload(self):
532 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
533 msg = Message()
534 msg['content-type'] = 'audio/x-midi'
535 msg['content-transfer-encoding'] = 'base64'
536 msg.set_payload(x)
537 self.assertEqual(msg.get_payload(decode=True),
Guido van Rossum9604e662007-08-30 03:46:43 +0000538 bytes(x, 'raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000539
R David Murraya2860e82011-04-16 09:20:30 -0400540 def test_broken_unicode_payload(self):
541 # This test improves coverage but is not a compliance test.
542 # The behavior in this situation is currently undefined by the API.
543 x = 'this is a br\xf6ken thing to do'
544 msg = Message()
545 msg['content-type'] = 'text/plain'
546 msg['content-transfer-encoding'] = '8bit'
547 msg.set_payload(x)
548 self.assertEqual(msg.get_payload(decode=True),
549 bytes(x, 'raw-unicode-escape'))
550
551 def test_questionable_bytes_payload(self):
552 # This test improves coverage but is not a compliance test,
553 # since it involves poking inside the black box.
554 x = 'this is a quéstionable thing to do'.encode('utf-8')
555 msg = Message()
556 msg['content-type'] = 'text/plain; charset="utf-8"'
557 msg['content-transfer-encoding'] = '8bit'
558 msg._payload = x
559 self.assertEqual(msg.get_payload(decode=True), x)
560
R. David Murray7ec754b2010-12-13 23:51:19 +0000561 # Issue 1078919
562 def test_ascii_add_header(self):
563 msg = Message()
564 msg.add_header('Content-Disposition', 'attachment',
565 filename='bud.gif')
566 self.assertEqual('attachment; filename="bud.gif"',
567 msg['Content-Disposition'])
568
569 def test_noascii_add_header(self):
570 msg = Message()
571 msg.add_header('Content-Disposition', 'attachment',
572 filename="Fußballer.ppt")
573 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000574 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
R. David Murray7ec754b2010-12-13 23:51:19 +0000575 msg['Content-Disposition'])
576
577 def test_nonascii_add_header_via_triple(self):
578 msg = Message()
579 msg.add_header('Content-Disposition', 'attachment',
580 filename=('iso-8859-1', '', 'Fußballer.ppt'))
581 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000582 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
583 msg['Content-Disposition'])
584
585 def test_ascii_add_header_with_tspecial(self):
586 msg = Message()
587 msg.add_header('Content-Disposition', 'attachment',
588 filename="windows [filename].ppt")
589 self.assertEqual(
590 'attachment; filename="windows [filename].ppt"',
591 msg['Content-Disposition'])
592
593 def test_nonascii_add_header_with_tspecial(self):
594 msg = Message()
595 msg.add_header('Content-Disposition', 'attachment',
596 filename="Fußballer [filename].ppt")
597 self.assertEqual(
598 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
R. David Murray7ec754b2010-12-13 23:51:19 +0000599 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000600
R David Murraya2860e82011-04-16 09:20:30 -0400601 def test_add_header_with_name_only_param(self):
602 msg = Message()
603 msg.add_header('Content-Disposition', 'inline', foo_bar=None)
604 self.assertEqual("inline; foo-bar", msg['Content-Disposition'])
605
606 def test_add_header_with_no_value(self):
607 msg = Message()
608 msg.add_header('X-Status', None)
609 self.assertEqual('', msg['X-Status'])
610
R. David Murray5b2d9dd2011-01-09 02:35:24 +0000611 # Issue 5871: reject an attempt to embed a header inside a header value
612 # (header injection attack).
613 def test_embeded_header_via_Header_rejected(self):
614 msg = Message()
615 msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
616 self.assertRaises(errors.HeaderParseError, msg.as_string)
617
618 def test_embeded_header_via_string_rejected(self):
619 msg = Message()
620 msg['Dummy'] = 'dummy\nX-Injected-Header: test'
621 self.assertRaises(errors.HeaderParseError, msg.as_string)
622
R David Murray7441a7a2012-03-14 02:59:51 -0400623 def test_unicode_header_defaults_to_utf8_encoding(self):
624 # Issue 14291
625 m = MIMEText('abc\n')
626 m['Subject'] = 'É test'
627 self.assertEqual(str(m),textwrap.dedent("""\
628 Content-Type: text/plain; charset="us-ascii"
629 MIME-Version: 1.0
630 Content-Transfer-Encoding: 7bit
631 Subject: =?utf-8?q?=C3=89_test?=
632
633 abc
634 """))
635
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000636# Test the email.encoders module
637class TestEncoders(unittest.TestCase):
R David Murray6d94bd42011-03-16 15:52:22 -0400638
639 def test_EncodersEncode_base64(self):
640 with openfile('PyBanner048.gif', 'rb') as fp:
641 bindata = fp.read()
642 mimed = email.mime.image.MIMEImage(bindata)
643 base64ed = mimed.get_payload()
644 # the transfer-encoded body lines should all be <=76 characters
645 lines = base64ed.split('\n')
646 self.assertLessEqual(max([ len(x) for x in lines ]), 76)
647
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000648 def test_encode_empty_payload(self):
649 eq = self.assertEqual
650 msg = Message()
651 msg.set_charset('us-ascii')
652 eq(msg['content-transfer-encoding'], '7bit')
653
654 def test_default_cte(self):
655 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000656 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000657 msg = MIMEText('hello world')
658 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000659 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000660 msg = MIMEText('hello \xf8 world')
661 eq(msg['content-transfer-encoding'], '8bit')
662 # And now with a different charset
663 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
664 eq(msg['content-transfer-encoding'], 'quoted-printable')
665
R. David Murraye85200d2010-05-06 01:41:14 +0000666 def test_encode7or8bit(self):
667 # Make sure a charset whose input character set is 8bit but
668 # whose output character set is 7bit gets a transfer-encoding
669 # of 7bit.
670 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000671 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000672 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000673
R David Murrayf581b372013-02-05 10:49:49 -0500674 def test_qp_encode_latin1(self):
675 msg = MIMEText('\xe1\xf6\n', 'text', 'ISO-8859-1')
676 self.assertEqual(str(msg), textwrap.dedent("""\
677 MIME-Version: 1.0
678 Content-Type: text/text; charset="iso-8859-1"
679 Content-Transfer-Encoding: quoted-printable
680
681 =E1=F6
682 """))
683
684 def test_qp_encode_non_latin1(self):
685 # Issue 16948
686 msg = MIMEText('\u017c\n', 'text', 'ISO-8859-2')
687 self.assertEqual(str(msg), textwrap.dedent("""\
688 MIME-Version: 1.0
689 Content-Type: text/text; charset="iso-8859-2"
690 Content-Transfer-Encoding: quoted-printable
691
692 =BF
693 """))
694
Ezio Melottib3aedd42010-11-20 19:04:17 +0000695
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000696# Test long header wrapping
697class TestLongHeaders(TestEmailBase):
R David Murray01581ee2011-04-18 10:04:34 -0400698
699 maxDiff = None
700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000701 def test_split_long_continuation(self):
702 eq = self.ndiffAssertEqual
703 msg = email.message_from_string("""\
704Subject: bug demonstration
705\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
706\tmore text
707
708test
709""")
710 sfp = StringIO()
711 g = Generator(sfp)
712 g.flatten(msg)
713 eq(sfp.getvalue(), """\
714Subject: bug demonstration
715\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
716\tmore text
717
718test
719""")
720
721 def test_another_long_almost_unsplittable_header(self):
722 eq = self.ndiffAssertEqual
723 hstr = """\
724bug demonstration
725\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
726\tmore text"""
727 h = Header(hstr, continuation_ws='\t')
728 eq(h.encode(), """\
729bug demonstration
730\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
731\tmore text""")
732 h = Header(hstr.replace('\t', ' '))
733 eq(h.encode(), """\
734bug demonstration
735 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
736 more text""")
737
738 def test_long_nonstring(self):
739 eq = self.ndiffAssertEqual
740 g = Charset("iso-8859-1")
741 cz = Charset("iso-8859-2")
742 utf8 = Charset("utf-8")
743 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
744 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
745 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
746 b'bef\xf6rdert. ')
747 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
748 b'd\xf9vtipu.. ')
749 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
750 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
751 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
752 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
753 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
754 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
755 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
756 '\u3044\u307e\u3059\u3002')
757 h = Header(g_head, g, header_name='Subject')
758 h.append(cz_head, cz)
759 h.append(utf8_head, utf8)
760 msg = Message()
761 msg['Subject'] = h
762 sfp = StringIO()
763 g = Generator(sfp)
764 g.flatten(msg)
765 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000766Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
767 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
768 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
769 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
770 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
771 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
772 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
773 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
774 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
775 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
776 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000777
778""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000779 eq(h.encode(maxlinelen=76), """\
780=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
781 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
782 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
783 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
784 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
785 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
786 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
787 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
788 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
789 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
790 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000791
792 def test_long_header_encode(self):
793 eq = self.ndiffAssertEqual
794 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
795 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
796 header_name='X-Foobar-Spoink-Defrobnit')
797 eq(h.encode(), '''\
798wasnipoop; giraffes="very-long-necked-animals";
799 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
800
801 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
802 eq = self.ndiffAssertEqual
803 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
804 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
805 header_name='X-Foobar-Spoink-Defrobnit',
806 continuation_ws='\t')
807 eq(h.encode(), '''\
808wasnipoop; giraffes="very-long-necked-animals";
809 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
810
811 def test_long_header_encode_with_tab_continuation(self):
812 eq = self.ndiffAssertEqual
813 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
814 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
815 header_name='X-Foobar-Spoink-Defrobnit',
816 continuation_ws='\t')
817 eq(h.encode(), '''\
818wasnipoop; giraffes="very-long-necked-animals";
819\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
820
R David Murray3a6152f2011-03-14 21:13:03 -0400821 def test_header_encode_with_different_output_charset(self):
822 h = Header('文', 'euc-jp')
823 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
824
825 def test_long_header_encode_with_different_output_charset(self):
826 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
827 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
828 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
829 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
830 res = """\
831=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
832 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
833 self.assertEqual(h.encode(), res)
834
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000835 def test_header_splitter(self):
836 eq = self.ndiffAssertEqual
837 msg = MIMEText('')
838 # It'd be great if we could use add_header() here, but that doesn't
839 # guarantee an order of the parameters.
840 msg['X-Foobar-Spoink-Defrobnit'] = (
841 'wasnipoop; giraffes="very-long-necked-animals"; '
842 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
843 sfp = StringIO()
844 g = Generator(sfp)
845 g.flatten(msg)
846 eq(sfp.getvalue(), '''\
847Content-Type: text/plain; charset="us-ascii"
848MIME-Version: 1.0
849Content-Transfer-Encoding: 7bit
850X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
851 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
852
853''')
854
855 def test_no_semis_header_splitter(self):
856 eq = self.ndiffAssertEqual
857 msg = Message()
858 msg['From'] = 'test@dom.ain'
859 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
860 msg.set_payload('Test')
861 sfp = StringIO()
862 g = Generator(sfp)
863 g.flatten(msg)
864 eq(sfp.getvalue(), """\
865From: test@dom.ain
866References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
867 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
868
869Test""")
870
R David Murray7da4db12011-04-07 20:37:17 -0400871 def test_last_split_chunk_does_not_fit(self):
872 eq = self.ndiffAssertEqual
873 h = Header('Subject: the first part of this is short, but_the_second'
874 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
875 '_all_by_itself')
876 eq(h.encode(), """\
877Subject: the first part of this is short,
878 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
879
880 def test_splittable_leading_char_followed_by_overlong_unsplitable(self):
881 eq = self.ndiffAssertEqual
882 h = Header(', but_the_second'
883 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
884 '_all_by_itself')
885 eq(h.encode(), """\
886,
887 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
888
889 def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self):
890 eq = self.ndiffAssertEqual
891 h = Header(', , but_the_second'
892 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
893 '_all_by_itself')
894 eq(h.encode(), """\
895, ,
896 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
897
898 def test_trailing_splitable_on_overlong_unsplitable(self):
899 eq = self.ndiffAssertEqual
900 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
901 'be_on_a_line_all_by_itself;')
902 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
903 "be_on_a_line_all_by_itself;")
904
905 def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self):
906 eq = self.ndiffAssertEqual
907 h = Header('; '
908 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
R David Murray01581ee2011-04-18 10:04:34 -0400909 'be_on_a_line_all_by_itself; ')
R David Murray7da4db12011-04-07 20:37:17 -0400910 eq(h.encode(), """\
911;
R David Murray01581ee2011-04-18 10:04:34 -0400912 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
R David Murray7da4db12011-04-07 20:37:17 -0400913
R David Murraye1292a22011-04-07 20:54:03 -0400914 def test_long_header_with_multiple_sequential_split_chars(self):
R David Murraye1292a22011-04-07 20:54:03 -0400915 eq = self.ndiffAssertEqual
916 h = Header('This is a long line that has two whitespaces in a row. '
917 'This used to cause truncation of the header when folded')
918 eq(h.encode(), """\
919This is a long line that has two whitespaces in a row. This used to cause
920 truncation of the header when folded""")
921
R David Murray01581ee2011-04-18 10:04:34 -0400922 def test_splitter_split_on_punctuation_only_if_fws(self):
923 eq = self.ndiffAssertEqual
924 h = Header('thisverylongheaderhas;semicolons;and,commas,but'
925 'they;arenotlegal;fold,points')
926 eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;"
927 "arenotlegal;fold,points")
928
929 def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self):
930 eq = self.ndiffAssertEqual
931 h = Header('this is a test where we need to have more than one line '
932 'before; our final line that is just too big to fit;; '
933 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
934 'be_on_a_line_all_by_itself;')
935 eq(h.encode(), """\
936this is a test where we need to have more than one line before;
937 our final line that is just too big to fit;;
938 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
939
940 def test_overlong_last_part_followed_by_split_point(self):
941 eq = self.ndiffAssertEqual
942 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
943 'be_on_a_line_all_by_itself ')
944 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_"
945 "should_be_on_a_line_all_by_itself ")
946
947 def test_multiline_with_overlong_parts_separated_by_two_split_points(self):
948 eq = self.ndiffAssertEqual
949 h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_'
950 'before_our_final_line_; ; '
951 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
952 'be_on_a_line_all_by_itself; ')
953 eq(h.encode(), """\
954this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_;
955 ;
956 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
957
958 def test_multiline_with_overlong_last_part_followed_by_split_point(self):
959 eq = self.ndiffAssertEqual
960 h = Header('this is a test where we need to have more than one line '
961 'before our final line; ; '
962 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
963 'be_on_a_line_all_by_itself; ')
964 eq(h.encode(), """\
965this is a test where we need to have more than one line before our final line;
966 ;
967 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
968
969 def test_long_header_with_whitespace_runs(self):
970 eq = self.ndiffAssertEqual
971 msg = Message()
972 msg['From'] = 'test@dom.ain'
973 msg['References'] = SPACE.join(['<foo@dom.ain> '] * 10)
974 msg.set_payload('Test')
975 sfp = StringIO()
976 g = Generator(sfp)
977 g.flatten(msg)
978 eq(sfp.getvalue(), """\
979From: test@dom.ain
980References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
981 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
982 <foo@dom.ain> <foo@dom.ain>\x20\x20
983
984Test""")
985
986 def test_long_run_with_semi_header_splitter(self):
987 eq = self.ndiffAssertEqual
988 msg = Message()
989 msg['From'] = 'test@dom.ain'
990 msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc'
991 msg.set_payload('Test')
992 sfp = StringIO()
993 g = Generator(sfp)
994 g.flatten(msg)
995 eq(sfp.getvalue(), """\
996From: test@dom.ain
997References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
998 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
999 <foo@dom.ain>; abc
1000
1001Test""")
1002
1003 def test_splitter_split_on_punctuation_only_if_fws(self):
1004 eq = self.ndiffAssertEqual
1005 msg = Message()
1006 msg['From'] = 'test@dom.ain'
1007 msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but'
1008 'they;arenotlegal;fold,points')
1009 msg.set_payload('Test')
1010 sfp = StringIO()
1011 g = Generator(sfp)
1012 g.flatten(msg)
1013 # XXX the space after the header should not be there.
1014 eq(sfp.getvalue(), """\
1015From: test@dom.ain
1016References:\x20
1017 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points
1018
1019Test""")
1020
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001021 def test_no_split_long_header(self):
1022 eq = self.ndiffAssertEqual
1023 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +00001024 h = Header(hstr)
1025 # These come on two lines because Headers are really field value
1026 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001027 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001028References:
1029 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
1030 h = Header('x' * 80)
1031 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001032
1033 def test_splitting_multiple_long_lines(self):
1034 eq = self.ndiffAssertEqual
1035 hstr = """\
1036from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1037\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1038\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1039"""
1040 h = Header(hstr, continuation_ws='\t')
1041 eq(h.encode(), """\
1042from babylon.socal-raves.org (localhost [127.0.0.1]);
1043 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1044 for <mailman-admin@babylon.socal-raves.org>;
1045 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1046\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1047 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1048 for <mailman-admin@babylon.socal-raves.org>;
1049 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1050\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1051 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1052 for <mailman-admin@babylon.socal-raves.org>;
1053 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
1054
1055 def test_splitting_first_line_only_is_long(self):
1056 eq = self.ndiffAssertEqual
1057 hstr = """\
1058from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
1059\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1060\tid 17k4h5-00034i-00
1061\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
1062 h = Header(hstr, maxlinelen=78, header_name='Received',
1063 continuation_ws='\t')
1064 eq(h.encode(), """\
1065from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
1066 helo=cthulhu.gerg.ca)
1067\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1068\tid 17k4h5-00034i-00
1069\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
1070
1071 def test_long_8bit_header(self):
1072 eq = self.ndiffAssertEqual
1073 msg = Message()
1074 h = Header('Britische Regierung gibt', 'iso-8859-1',
1075 header_name='Subject')
1076 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +00001077 eq(h.encode(maxlinelen=76), """\
1078=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1079 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001080 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +00001081 eq(msg.as_string(maxheaderlen=76), """\
1082Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1083 =?iso-8859-1?q?hore-Windkraftprojekte?=
1084
1085""")
1086 eq(msg.as_string(maxheaderlen=0), """\
1087Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001088
1089""")
1090
1091 def test_long_8bit_header_no_charset(self):
1092 eq = self.ndiffAssertEqual
1093 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +00001094 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
1095 'f\xfcr Offshore-Windkraftprojekte '
1096 '<a-very-long-address@example.com>')
1097 msg['Reply-To'] = header_string
R David Murray7441a7a2012-03-14 02:59:51 -04001098 eq(msg.as_string(maxheaderlen=78), """\
1099Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1100 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1101
1102""")
Barry Warsaw8c571042007-08-30 19:17:18 +00001103 msg = Message()
R David Murray7441a7a2012-03-14 02:59:51 -04001104 msg['Reply-To'] = Header(header_string,
Barry Warsaw8c571042007-08-30 19:17:18 +00001105 header_name='Reply-To')
1106 eq(msg.as_string(maxheaderlen=78), """\
1107Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1108 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001109
1110""")
1111
1112 def test_long_to_header(self):
1113 eq = self.ndiffAssertEqual
1114 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
R David Murray01581ee2011-04-18 10:04:34 -04001115 '<someone@eecs.umich.edu>, '
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001116 '"Someone Test #B" <someone@umich.edu>, '
1117 '"Someone Test #C" <someone@eecs.umich.edu>, '
1118 '"Someone Test #D" <someone@eecs.umich.edu>')
1119 msg = Message()
1120 msg['To'] = to
1121 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +00001122To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001123 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +00001124 "Someone Test #C" <someone@eecs.umich.edu>,
1125 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001126
1127''')
1128
1129 def test_long_line_after_append(self):
1130 eq = self.ndiffAssertEqual
1131 s = 'This is an example of string which has almost the limit of header length.'
1132 h = Header(s)
1133 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +00001134 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001135This is an example of string which has almost the limit of header length.
1136 Add another line.""")
1137
1138 def test_shorter_line_with_append(self):
1139 eq = self.ndiffAssertEqual
1140 s = 'This is a shorter line.'
1141 h = Header(s)
1142 h.append('Add another sentence. (Surprise?)')
1143 eq(h.encode(),
1144 'This is a shorter line. Add another sentence. (Surprise?)')
1145
1146 def test_long_field_name(self):
1147 eq = self.ndiffAssertEqual
1148 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +00001149 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
1150 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
1151 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
1152 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001153 h = Header(gs, 'iso-8859-1', header_name=fn)
1154 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +00001155 eq(h.encode(maxlinelen=76), """\
1156=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
1157 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
1158 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
1159 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001160
1161 def test_long_received_header(self):
1162 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
1163 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
1164 'Wed, 05 Mar 2003 18:10:18 -0700')
1165 msg = Message()
1166 msg['Received-1'] = Header(h, continuation_ws='\t')
1167 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +00001168 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001169 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001170Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1171 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001172 Wed, 05 Mar 2003 18:10:18 -0700
R David Murray01581ee2011-04-18 10:04:34 -04001173Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1174 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001175 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001176
1177""")
1178
1179 def test_string_headerinst_eq(self):
1180 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
1181 'tu-muenchen.de> (David Bremner\'s message of '
1182 '"Thu, 6 Mar 2003 13:58:21 +0100")')
1183 msg = Message()
1184 msg['Received-1'] = Header(h, header_name='Received-1',
1185 continuation_ws='\t')
1186 msg['Received-2'] = h
R David Murray01581ee2011-04-18 10:04:34 -04001187 # XXX The space after the ':' should not be there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001188 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001189Received-1:\x20
1190 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1191 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1192Received-2:\x20
1193 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1194 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001195
1196""")
1197
1198 def test_long_unbreakable_lines_with_continuation(self):
1199 eq = self.ndiffAssertEqual
1200 msg = Message()
1201 t = """\
1202iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1203 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
1204 msg['Face-1'] = t
1205 msg['Face-2'] = Header(t, header_name='Face-2')
R David Murray01581ee2011-04-18 10:04:34 -04001206 msg['Face-3'] = ' ' + t
Barry Warsawbef9d212007-08-31 10:55:37 +00001207 # XXX This splitting is all wrong. It the first value line should be
R David Murray01581ee2011-04-18 10:04:34 -04001208 # snug against the field name or the space after the header not there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001209 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +00001210Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001211 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001212 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +00001213Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +00001214 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001215 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
R David Murray01581ee2011-04-18 10:04:34 -04001216Face-3:\x20
1217 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1218 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001219
1220""")
1221
1222 def test_another_long_multiline_header(self):
1223 eq = self.ndiffAssertEqual
1224 m = ('Received: from siimage.com '
1225 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +00001226 'Microsoft SMTPSVC(5.0.2195.4905); '
1227 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001228 msg = email.message_from_string(m)
1229 eq(msg.as_string(maxheaderlen=78), '''\
R David Murray01581ee2011-04-18 10:04:34 -04001230Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
1231 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001232
1233''')
1234
1235 def test_long_lines_with_different_header(self):
1236 eq = self.ndiffAssertEqual
1237 h = ('List-Unsubscribe: '
1238 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
1239 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
1240 '?subject=unsubscribe>')
1241 msg = Message()
1242 msg['List'] = h
1243 msg['List'] = Header(h, header_name='List')
1244 eq(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001245List: List-Unsubscribe:
1246 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001247 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
R David Murray01581ee2011-04-18 10:04:34 -04001248List: List-Unsubscribe:
1249 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001250 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001251
1252""")
1253
R. David Murray6f0022d2011-01-07 21:57:25 +00001254 def test_long_rfc2047_header_with_embedded_fws(self):
1255 h = Header(textwrap.dedent("""\
1256 We're going to pretend this header is in a non-ascii character set
1257 \tto see if line wrapping with encoded words and embedded
1258 folding white space works"""),
1259 charset='utf-8',
1260 header_name='Test')
1261 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1262 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1263 =?utf-8?q?cter_set?=
1264 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1265 =?utf-8?q?_folding_white_space_works?=""")+'\n')
1266
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001267
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001268# Test mangling of "From " lines in the body of a message
1269class TestFromMangling(unittest.TestCase):
1270 def setUp(self):
1271 self.msg = Message()
1272 self.msg['From'] = 'aaa@bbb.org'
1273 self.msg.set_payload("""\
1274From the desk of A.A.A.:
1275Blah blah blah
1276""")
1277
1278 def test_mangled_from(self):
1279 s = StringIO()
1280 g = Generator(s, mangle_from_=True)
1281 g.flatten(self.msg)
1282 self.assertEqual(s.getvalue(), """\
1283From: aaa@bbb.org
1284
1285>From the desk of A.A.A.:
1286Blah blah blah
1287""")
1288
1289 def test_dont_mangle_from(self):
1290 s = StringIO()
1291 g = Generator(s, mangle_from_=False)
1292 g.flatten(self.msg)
1293 self.assertEqual(s.getvalue(), """\
1294From: aaa@bbb.org
1295
1296From the desk of A.A.A.:
1297Blah blah blah
1298""")
1299
R David Murray6a31bc62012-07-22 21:47:53 -04001300 def test_mangle_from_in_preamble_and_epilog(self):
1301 s = StringIO()
1302 g = Generator(s, mangle_from_=True)
1303 msg = email.message_from_string(textwrap.dedent("""\
1304 From: foo@bar.com
1305 Mime-Version: 1.0
1306 Content-Type: multipart/mixed; boundary=XXX
1307
1308 From somewhere unknown
1309
1310 --XXX
1311 Content-Type: text/plain
1312
1313 foo
1314
1315 --XXX--
1316
1317 From somewhere unknowable
1318 """))
1319 g.flatten(msg)
1320 self.assertEqual(len([1 for x in s.getvalue().split('\n')
1321 if x.startswith('>From ')]), 2)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001322
R David Murray638d40b2012-08-24 11:14:13 -04001323 def test_mangled_from_with_bad_bytes(self):
1324 source = textwrap.dedent("""\
1325 Content-Type: text/plain; charset="utf-8"
1326 MIME-Version: 1.0
1327 Content-Transfer-Encoding: 8bit
1328 From: aaa@bbb.org
1329
1330 """).encode('utf-8')
1331 msg = email.message_from_bytes(source + b'From R\xc3\xb6lli\n')
1332 b = BytesIO()
1333 g = BytesGenerator(b, mangle_from_=True)
1334 g.flatten(msg)
1335 self.assertEqual(b.getvalue(), source + b'>From R\xc3\xb6lli\n')
1336
Ezio Melottib3aedd42010-11-20 19:04:17 +00001337
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001338# Test the basic MIMEAudio class
1339class TestMIMEAudio(unittest.TestCase):
1340 def setUp(self):
1341 # Make sure we pick up the audiotest.au that lives in email/test/data.
1342 # In Python, there's an audiotest.au living in Lib/test but that isn't
1343 # included in some binary distros that don't include the test
1344 # package. The trailing empty string on the .join() is significant
1345 # since findfile() will do a dirname().
1346 datadir = os.path.join(os.path.dirname(landmark), 'data', '')
1347 with open(findfile('audiotest.au', datadir), 'rb') as fp:
1348 self._audiodata = fp.read()
1349 self._au = MIMEAudio(self._audiodata)
1350
1351 def test_guess_minor_type(self):
1352 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1353
1354 def test_encoding(self):
1355 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001356 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1357 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001358
1359 def test_checkSetMinor(self):
1360 au = MIMEAudio(self._audiodata, 'fish')
1361 self.assertEqual(au.get_content_type(), 'audio/fish')
1362
1363 def test_add_header(self):
1364 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001365 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001366 self._au.add_header('Content-Disposition', 'attachment',
1367 filename='audiotest.au')
1368 eq(self._au['content-disposition'],
1369 'attachment; filename="audiotest.au"')
1370 eq(self._au.get_params(header='content-disposition'),
1371 [('attachment', ''), ('filename', 'audiotest.au')])
1372 eq(self._au.get_param('filename', header='content-disposition'),
1373 'audiotest.au')
1374 missing = []
1375 eq(self._au.get_param('attachment', header='content-disposition'), '')
1376 unless(self._au.get_param('foo', failobj=missing,
1377 header='content-disposition') is missing)
1378 # Try some missing stuff
1379 unless(self._au.get_param('foobar', missing) is missing)
1380 unless(self._au.get_param('attachment', missing,
1381 header='foobar') is missing)
1382
1383
Ezio Melottib3aedd42010-11-20 19:04:17 +00001384
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001385# Test the basic MIMEImage class
1386class TestMIMEImage(unittest.TestCase):
1387 def setUp(self):
1388 with openfile('PyBanner048.gif', 'rb') as fp:
1389 self._imgdata = fp.read()
1390 self._im = MIMEImage(self._imgdata)
1391
1392 def test_guess_minor_type(self):
1393 self.assertEqual(self._im.get_content_type(), 'image/gif')
1394
1395 def test_encoding(self):
1396 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001397 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1398 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001399
1400 def test_checkSetMinor(self):
1401 im = MIMEImage(self._imgdata, 'fish')
1402 self.assertEqual(im.get_content_type(), 'image/fish')
1403
1404 def test_add_header(self):
1405 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001406 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001407 self._im.add_header('Content-Disposition', 'attachment',
1408 filename='dingusfish.gif')
1409 eq(self._im['content-disposition'],
1410 'attachment; filename="dingusfish.gif"')
1411 eq(self._im.get_params(header='content-disposition'),
1412 [('attachment', ''), ('filename', 'dingusfish.gif')])
1413 eq(self._im.get_param('filename', header='content-disposition'),
1414 'dingusfish.gif')
1415 missing = []
1416 eq(self._im.get_param('attachment', header='content-disposition'), '')
1417 unless(self._im.get_param('foo', failobj=missing,
1418 header='content-disposition') is missing)
1419 # Try some missing stuff
1420 unless(self._im.get_param('foobar', missing) is missing)
1421 unless(self._im.get_param('attachment', missing,
1422 header='foobar') is missing)
1423
1424
Ezio Melottib3aedd42010-11-20 19:04:17 +00001425
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001426# Test the basic MIMEApplication class
1427class TestMIMEApplication(unittest.TestCase):
1428 def test_headers(self):
1429 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001430 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001431 eq(msg.get_content_type(), 'application/octet-stream')
1432 eq(msg['content-transfer-encoding'], 'base64')
1433
1434 def test_body(self):
1435 eq = self.assertEqual
R David Murray6d94bd42011-03-16 15:52:22 -04001436 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1437 msg = MIMEApplication(bytesdata)
1438 # whitespace in the cte encoded block is RFC-irrelevant.
1439 eq(msg.get_payload().strip(), '+vv8/f7/')
1440 eq(msg.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001441
R David Murrayec317a82013-02-11 10:51:28 -05001442 def test_binary_body_with_encode_7or8bit(self):
1443 # Issue 17171.
1444 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1445 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_7or8bit)
1446 # Treated as a string, this will be invalid code points.
1447 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1448 self.assertEqual(msg.get_payload(decode=True), bytesdata)
1449 self.assertEqual(msg['Content-Transfer-Encoding'], '8bit')
1450 s = BytesIO()
1451 g = BytesGenerator(s)
1452 g.flatten(msg)
1453 wireform = s.getvalue()
1454 msg2 = email.message_from_bytes(wireform)
1455 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1456 self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1457 self.assertEqual(msg2['Content-Transfer-Encoding'], '8bit')
1458
1459 def test_binary_body_with_encode_noop(self):
R David Murrayceaa8b12013-02-09 13:02:58 -05001460 # Issue 16564: This does not produce an RFC valid message, since to be
1461 # valid it should have a CTE of binary. But the below works in
1462 # Python2, and is documented as working this way.
1463 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1464 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop)
1465 # Treated as a string, this will be invalid code points.
1466 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1467 self.assertEqual(msg.get_payload(decode=True), bytesdata)
1468 s = BytesIO()
1469 g = BytesGenerator(s)
1470 g.flatten(msg)
1471 wireform = s.getvalue()
1472 msg2 = email.message_from_bytes(wireform)
1473 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1474 self.assertEqual(msg2.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001475
Ezio Melottib3aedd42010-11-20 19:04:17 +00001476
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001477# Test the basic MIMEText class
1478class TestMIMEText(unittest.TestCase):
1479 def setUp(self):
1480 self._msg = MIMEText('hello there')
1481
1482 def test_types(self):
1483 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001484 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001485 eq(self._msg.get_content_type(), 'text/plain')
1486 eq(self._msg.get_param('charset'), 'us-ascii')
1487 missing = []
1488 unless(self._msg.get_param('foobar', missing) is missing)
1489 unless(self._msg.get_param('charset', missing, header='foobar')
1490 is missing)
1491
1492 def test_payload(self):
1493 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001494 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001495
1496 def test_charset(self):
1497 eq = self.assertEqual
1498 msg = MIMEText('hello there', _charset='us-ascii')
1499 eq(msg.get_charset().input_charset, 'us-ascii')
1500 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1501
R. David Murray850fc852010-06-03 01:58:28 +00001502 def test_7bit_input(self):
1503 eq = self.assertEqual
1504 msg = MIMEText('hello there', _charset='us-ascii')
1505 eq(msg.get_charset().input_charset, 'us-ascii')
1506 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1507
1508 def test_7bit_input_no_charset(self):
1509 eq = self.assertEqual
1510 msg = MIMEText('hello there')
1511 eq(msg.get_charset(), 'us-ascii')
1512 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1513 self.assertTrue('hello there' in msg.as_string())
1514
1515 def test_utf8_input(self):
1516 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1517 eq = self.assertEqual
1518 msg = MIMEText(teststr, _charset='utf-8')
1519 eq(msg.get_charset().output_charset, 'utf-8')
1520 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1521 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1522
1523 @unittest.skip("can't fix because of backward compat in email5, "
1524 "will fix in email6")
1525 def test_utf8_input_no_charset(self):
1526 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1527 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1528
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001529
Ezio Melottib3aedd42010-11-20 19:04:17 +00001530
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001531# Test complicated multipart/* messages
1532class TestMultipart(TestEmailBase):
1533 def setUp(self):
1534 with openfile('PyBanner048.gif', 'rb') as fp:
1535 data = fp.read()
1536 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1537 image = MIMEImage(data, name='dingusfish.gif')
1538 image.add_header('content-disposition', 'attachment',
1539 filename='dingusfish.gif')
1540 intro = MIMEText('''\
1541Hi there,
1542
1543This is the dingus fish.
1544''')
1545 container.attach(intro)
1546 container.attach(image)
1547 container['From'] = 'Barry <barry@digicool.com>'
1548 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1549 container['Subject'] = 'Here is your dingus fish'
1550
1551 now = 987809702.54848599
1552 timetuple = time.localtime(now)
1553 if timetuple[-1] == 0:
1554 tzsecs = time.timezone
1555 else:
1556 tzsecs = time.altzone
1557 if tzsecs > 0:
1558 sign = '-'
1559 else:
1560 sign = '+'
1561 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1562 container['Date'] = time.strftime(
1563 '%a, %d %b %Y %H:%M:%S',
1564 time.localtime(now)) + tzoffset
1565 self._msg = container
1566 self._im = image
1567 self._txt = intro
1568
1569 def test_hierarchy(self):
1570 # convenience
1571 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001572 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001573 raises = self.assertRaises
1574 # tests
1575 m = self._msg
1576 unless(m.is_multipart())
1577 eq(m.get_content_type(), 'multipart/mixed')
1578 eq(len(m.get_payload()), 2)
1579 raises(IndexError, m.get_payload, 2)
1580 m0 = m.get_payload(0)
1581 m1 = m.get_payload(1)
1582 unless(m0 is self._txt)
1583 unless(m1 is self._im)
1584 eq(m.get_payload(), [m0, m1])
1585 unless(not m0.is_multipart())
1586 unless(not m1.is_multipart())
1587
1588 def test_empty_multipart_idempotent(self):
1589 text = """\
1590Content-Type: multipart/mixed; boundary="BOUNDARY"
1591MIME-Version: 1.0
1592Subject: A subject
1593To: aperson@dom.ain
1594From: bperson@dom.ain
1595
1596
1597--BOUNDARY
1598
1599
1600--BOUNDARY--
1601"""
1602 msg = Parser().parsestr(text)
1603 self.ndiffAssertEqual(text, msg.as_string())
1604
1605 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1606 outer = MIMEBase('multipart', 'mixed')
1607 outer['Subject'] = 'A subject'
1608 outer['To'] = 'aperson@dom.ain'
1609 outer['From'] = 'bperson@dom.ain'
1610 outer.set_boundary('BOUNDARY')
1611 self.ndiffAssertEqual(outer.as_string(), '''\
1612Content-Type: multipart/mixed; boundary="BOUNDARY"
1613MIME-Version: 1.0
1614Subject: A subject
1615To: aperson@dom.ain
1616From: bperson@dom.ain
1617
1618--BOUNDARY
1619
1620--BOUNDARY--''')
1621
1622 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1623 outer = MIMEBase('multipart', 'mixed')
1624 outer['Subject'] = 'A subject'
1625 outer['To'] = 'aperson@dom.ain'
1626 outer['From'] = 'bperson@dom.ain'
1627 outer.preamble = ''
1628 outer.epilogue = ''
1629 outer.set_boundary('BOUNDARY')
1630 self.ndiffAssertEqual(outer.as_string(), '''\
1631Content-Type: multipart/mixed; boundary="BOUNDARY"
1632MIME-Version: 1.0
1633Subject: A subject
1634To: aperson@dom.ain
1635From: bperson@dom.ain
1636
1637
1638--BOUNDARY
1639
1640--BOUNDARY--
1641''')
1642
1643 def test_one_part_in_a_multipart(self):
1644 eq = self.ndiffAssertEqual
1645 outer = MIMEBase('multipart', 'mixed')
1646 outer['Subject'] = 'A subject'
1647 outer['To'] = 'aperson@dom.ain'
1648 outer['From'] = 'bperson@dom.ain'
1649 outer.set_boundary('BOUNDARY')
1650 msg = MIMEText('hello world')
1651 outer.attach(msg)
1652 eq(outer.as_string(), '''\
1653Content-Type: multipart/mixed; boundary="BOUNDARY"
1654MIME-Version: 1.0
1655Subject: A subject
1656To: aperson@dom.ain
1657From: bperson@dom.ain
1658
1659--BOUNDARY
1660Content-Type: text/plain; charset="us-ascii"
1661MIME-Version: 1.0
1662Content-Transfer-Encoding: 7bit
1663
1664hello world
1665--BOUNDARY--''')
1666
1667 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1668 eq = self.ndiffAssertEqual
1669 outer = MIMEBase('multipart', 'mixed')
1670 outer['Subject'] = 'A subject'
1671 outer['To'] = 'aperson@dom.ain'
1672 outer['From'] = 'bperson@dom.ain'
1673 outer.preamble = ''
1674 msg = MIMEText('hello world')
1675 outer.attach(msg)
1676 outer.set_boundary('BOUNDARY')
1677 eq(outer.as_string(), '''\
1678Content-Type: multipart/mixed; boundary="BOUNDARY"
1679MIME-Version: 1.0
1680Subject: A subject
1681To: aperson@dom.ain
1682From: bperson@dom.ain
1683
1684
1685--BOUNDARY
1686Content-Type: text/plain; charset="us-ascii"
1687MIME-Version: 1.0
1688Content-Transfer-Encoding: 7bit
1689
1690hello world
1691--BOUNDARY--''')
1692
1693
1694 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1695 eq = self.ndiffAssertEqual
1696 outer = MIMEBase('multipart', 'mixed')
1697 outer['Subject'] = 'A subject'
1698 outer['To'] = 'aperson@dom.ain'
1699 outer['From'] = 'bperson@dom.ain'
1700 outer.preamble = None
1701 msg = MIMEText('hello world')
1702 outer.attach(msg)
1703 outer.set_boundary('BOUNDARY')
1704 eq(outer.as_string(), '''\
1705Content-Type: multipart/mixed; boundary="BOUNDARY"
1706MIME-Version: 1.0
1707Subject: A subject
1708To: aperson@dom.ain
1709From: bperson@dom.ain
1710
1711--BOUNDARY
1712Content-Type: text/plain; charset="us-ascii"
1713MIME-Version: 1.0
1714Content-Transfer-Encoding: 7bit
1715
1716hello world
1717--BOUNDARY--''')
1718
1719
1720 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1721 eq = self.ndiffAssertEqual
1722 outer = MIMEBase('multipart', 'mixed')
1723 outer['Subject'] = 'A subject'
1724 outer['To'] = 'aperson@dom.ain'
1725 outer['From'] = 'bperson@dom.ain'
1726 outer.epilogue = None
1727 msg = MIMEText('hello world')
1728 outer.attach(msg)
1729 outer.set_boundary('BOUNDARY')
1730 eq(outer.as_string(), '''\
1731Content-Type: multipart/mixed; boundary="BOUNDARY"
1732MIME-Version: 1.0
1733Subject: A subject
1734To: aperson@dom.ain
1735From: bperson@dom.ain
1736
1737--BOUNDARY
1738Content-Type: text/plain; charset="us-ascii"
1739MIME-Version: 1.0
1740Content-Transfer-Encoding: 7bit
1741
1742hello world
1743--BOUNDARY--''')
1744
1745
1746 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1747 eq = self.ndiffAssertEqual
1748 outer = MIMEBase('multipart', 'mixed')
1749 outer['Subject'] = 'A subject'
1750 outer['To'] = 'aperson@dom.ain'
1751 outer['From'] = 'bperson@dom.ain'
1752 outer.epilogue = ''
1753 msg = MIMEText('hello world')
1754 outer.attach(msg)
1755 outer.set_boundary('BOUNDARY')
1756 eq(outer.as_string(), '''\
1757Content-Type: multipart/mixed; boundary="BOUNDARY"
1758MIME-Version: 1.0
1759Subject: A subject
1760To: aperson@dom.ain
1761From: bperson@dom.ain
1762
1763--BOUNDARY
1764Content-Type: text/plain; charset="us-ascii"
1765MIME-Version: 1.0
1766Content-Transfer-Encoding: 7bit
1767
1768hello world
1769--BOUNDARY--
1770''')
1771
1772
1773 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1774 eq = self.ndiffAssertEqual
1775 outer = MIMEBase('multipart', 'mixed')
1776 outer['Subject'] = 'A subject'
1777 outer['To'] = 'aperson@dom.ain'
1778 outer['From'] = 'bperson@dom.ain'
1779 outer.epilogue = '\n'
1780 msg = MIMEText('hello world')
1781 outer.attach(msg)
1782 outer.set_boundary('BOUNDARY')
1783 eq(outer.as_string(), '''\
1784Content-Type: multipart/mixed; boundary="BOUNDARY"
1785MIME-Version: 1.0
1786Subject: A subject
1787To: aperson@dom.ain
1788From: bperson@dom.ain
1789
1790--BOUNDARY
1791Content-Type: text/plain; charset="us-ascii"
1792MIME-Version: 1.0
1793Content-Transfer-Encoding: 7bit
1794
1795hello world
1796--BOUNDARY--
1797
1798''')
1799
1800 def test_message_external_body(self):
1801 eq = self.assertEqual
1802 msg = self._msgobj('msg_36.txt')
1803 eq(len(msg.get_payload()), 2)
1804 msg1 = msg.get_payload(1)
1805 eq(msg1.get_content_type(), 'multipart/alternative')
1806 eq(len(msg1.get_payload()), 2)
1807 for subpart in msg1.get_payload():
1808 eq(subpart.get_content_type(), 'message/external-body')
1809 eq(len(subpart.get_payload()), 1)
1810 subsubpart = subpart.get_payload(0)
1811 eq(subsubpart.get_content_type(), 'text/plain')
1812
1813 def test_double_boundary(self):
1814 # msg_37.txt is a multipart that contains two dash-boundary's in a
1815 # row. Our interpretation of RFC 2046 calls for ignoring the second
1816 # and subsequent boundaries.
1817 msg = self._msgobj('msg_37.txt')
1818 self.assertEqual(len(msg.get_payload()), 3)
1819
1820 def test_nested_inner_contains_outer_boundary(self):
1821 eq = self.ndiffAssertEqual
1822 # msg_38.txt has an inner part that contains outer boundaries. My
1823 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1824 # these are illegal and should be interpreted as unterminated inner
1825 # parts.
1826 msg = self._msgobj('msg_38.txt')
1827 sfp = StringIO()
1828 iterators._structure(msg, sfp)
1829 eq(sfp.getvalue(), """\
1830multipart/mixed
1831 multipart/mixed
1832 multipart/alternative
1833 text/plain
1834 text/plain
1835 text/plain
1836 text/plain
1837""")
1838
1839 def test_nested_with_same_boundary(self):
1840 eq = self.ndiffAssertEqual
1841 # msg 39.txt is similarly evil in that it's got inner parts that use
1842 # the same boundary as outer parts. Again, I believe the way this is
1843 # parsed is closest to the spirit of RFC 2046
1844 msg = self._msgobj('msg_39.txt')
1845 sfp = StringIO()
1846 iterators._structure(msg, sfp)
1847 eq(sfp.getvalue(), """\
1848multipart/mixed
1849 multipart/mixed
1850 multipart/alternative
1851 application/octet-stream
1852 application/octet-stream
1853 text/plain
1854""")
1855
1856 def test_boundary_in_non_multipart(self):
1857 msg = self._msgobj('msg_40.txt')
1858 self.assertEqual(msg.as_string(), '''\
1859MIME-Version: 1.0
1860Content-Type: text/html; boundary="--961284236552522269"
1861
1862----961284236552522269
1863Content-Type: text/html;
1864Content-Transfer-Encoding: 7Bit
1865
1866<html></html>
1867
1868----961284236552522269--
1869''')
1870
1871 def test_boundary_with_leading_space(self):
1872 eq = self.assertEqual
1873 msg = email.message_from_string('''\
1874MIME-Version: 1.0
1875Content-Type: multipart/mixed; boundary=" XXXX"
1876
1877-- XXXX
1878Content-Type: text/plain
1879
1880
1881-- XXXX
1882Content-Type: text/plain
1883
1884-- XXXX--
1885''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001886 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001887 eq(msg.get_boundary(), ' XXXX')
1888 eq(len(msg.get_payload()), 2)
1889
1890 def test_boundary_without_trailing_newline(self):
1891 m = Parser().parsestr("""\
1892Content-Type: multipart/mixed; boundary="===============0012394164=="
1893MIME-Version: 1.0
1894
1895--===============0012394164==
1896Content-Type: image/file1.jpg
1897MIME-Version: 1.0
1898Content-Transfer-Encoding: base64
1899
1900YXNkZg==
1901--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001902 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001903
1904
Ezio Melottib3aedd42010-11-20 19:04:17 +00001905
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001906# Test some badly formatted messages
1907class TestNonConformant(TestEmailBase):
1908 def test_parse_missing_minor_type(self):
1909 eq = self.assertEqual
1910 msg = self._msgobj('msg_14.txt')
1911 eq(msg.get_content_type(), 'text/plain')
1912 eq(msg.get_content_maintype(), 'text')
1913 eq(msg.get_content_subtype(), 'plain')
1914
1915 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001916 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001917 msg = self._msgobj('msg_15.txt')
1918 # XXX We can probably eventually do better
1919 inner = msg.get_payload(0)
1920 unless(hasattr(inner, 'defects'))
1921 self.assertEqual(len(inner.defects), 1)
1922 unless(isinstance(inner.defects[0],
1923 errors.StartBoundaryNotFoundDefect))
1924
1925 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001926 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001927 msg = self._msgobj('msg_25.txt')
1928 unless(isinstance(msg.get_payload(), str))
1929 self.assertEqual(len(msg.defects), 2)
1930 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1931 unless(isinstance(msg.defects[1],
1932 errors.MultipartInvariantViolationDefect))
1933
1934 def test_invalid_content_type(self):
1935 eq = self.assertEqual
1936 neq = self.ndiffAssertEqual
1937 msg = Message()
1938 # RFC 2045, $5.2 says invalid yields text/plain
1939 msg['Content-Type'] = 'text'
1940 eq(msg.get_content_maintype(), 'text')
1941 eq(msg.get_content_subtype(), 'plain')
1942 eq(msg.get_content_type(), 'text/plain')
1943 # Clear the old value and try something /really/ invalid
1944 del msg['content-type']
1945 msg['Content-Type'] = 'foo'
1946 eq(msg.get_content_maintype(), 'text')
1947 eq(msg.get_content_subtype(), 'plain')
1948 eq(msg.get_content_type(), 'text/plain')
1949 # Still, make sure that the message is idempotently generated
1950 s = StringIO()
1951 g = Generator(s)
1952 g.flatten(msg)
1953 neq(s.getvalue(), 'Content-Type: foo\n\n')
1954
1955 def test_no_start_boundary(self):
1956 eq = self.ndiffAssertEqual
1957 msg = self._msgobj('msg_31.txt')
1958 eq(msg.get_payload(), """\
1959--BOUNDARY
1960Content-Type: text/plain
1961
1962message 1
1963
1964--BOUNDARY
1965Content-Type: text/plain
1966
1967message 2
1968
1969--BOUNDARY--
1970""")
1971
1972 def test_no_separating_blank_line(self):
1973 eq = self.ndiffAssertEqual
1974 msg = self._msgobj('msg_35.txt')
1975 eq(msg.as_string(), """\
1976From: aperson@dom.ain
1977To: bperson@dom.ain
1978Subject: here's something interesting
1979
1980counter to RFC 2822, there's no separating newline here
1981""")
1982
1983 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001984 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001985 msg = self._msgobj('msg_41.txt')
1986 unless(hasattr(msg, 'defects'))
1987 self.assertEqual(len(msg.defects), 2)
1988 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1989 unless(isinstance(msg.defects[1],
1990 errors.MultipartInvariantViolationDefect))
1991
1992 def test_missing_start_boundary(self):
1993 outer = self._msgobj('msg_42.txt')
1994 # The message structure is:
1995 #
1996 # multipart/mixed
1997 # text/plain
1998 # message/rfc822
1999 # multipart/mixed [*]
2000 #
2001 # [*] This message is missing its start boundary
2002 bad = outer.get_payload(1).get_payload(0)
2003 self.assertEqual(len(bad.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002004 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002005 errors.StartBoundaryNotFoundDefect))
2006
2007 def test_first_line_is_continuation_header(self):
2008 eq = self.assertEqual
2009 m = ' Line 1\nLine 2\nLine 3'
2010 msg = email.message_from_string(m)
2011 eq(msg.keys(), [])
2012 eq(msg.get_payload(), 'Line 2\nLine 3')
2013 eq(len(msg.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002014 self.assertTrue(isinstance(msg.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002015 errors.FirstHeaderLineIsContinuationDefect))
2016 eq(msg.defects[0].line, ' Line 1\n')
2017
2018
Ezio Melottib3aedd42010-11-20 19:04:17 +00002019
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002020# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00002021class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002022 def test_rfc2047_multiline(self):
2023 eq = self.assertEqual
2024 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
2025 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
2026 dh = decode_header(s)
2027 eq(dh, [
2028 (b'Re:', None),
2029 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
2030 (b'baz foo bar', None),
2031 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
2032 header = make_header(dh)
2033 eq(str(header),
2034 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00002035 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00002036Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
2037 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002038
2039 def test_whitespace_eater_unicode(self):
2040 eq = self.assertEqual
2041 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
2042 dh = decode_header(s)
2043 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
2044 (b'Pirard <pirard@dom.ain>', None)])
2045 header = str(make_header(dh))
2046 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
2047
2048 def test_whitespace_eater_unicode_2(self):
2049 eq = self.assertEqual
2050 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
2051 dh = decode_header(s)
2052 eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'),
2053 (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')])
2054 hu = str(make_header(dh))
2055 eq(hu, 'The quick brown fox jumped over the lazy dog')
2056
2057 def test_rfc2047_missing_whitespace(self):
2058 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
2059 dh = decode_header(s)
2060 self.assertEqual(dh, [(s, None)])
2061
2062 def test_rfc2047_with_whitespace(self):
2063 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
2064 dh = decode_header(s)
2065 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
2066 (b'rg', None), (b'\xe5', 'iso-8859-1'),
2067 (b'sbord', None)])
2068
R. David Murrayc4e69cc2010-08-03 22:14:10 +00002069 def test_rfc2047_B_bad_padding(self):
2070 s = '=?iso-8859-1?B?%s?='
2071 data = [ # only test complete bytes
2072 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
2073 ('dmk=', b'vi'), ('dmk', b'vi')
2074 ]
2075 for q, a in data:
2076 dh = decode_header(s % q)
2077 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002078
R. David Murray31e984c2010-10-01 15:40:20 +00002079 def test_rfc2047_Q_invalid_digits(self):
2080 # issue 10004.
2081 s = '=?iso-8659-1?Q?andr=e9=zz?='
2082 self.assertEqual(decode_header(s),
2083 [(b'andr\xe9=zz', 'iso-8659-1')])
2084
Ezio Melottib3aedd42010-11-20 19:04:17 +00002085
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002086# Test the MIMEMessage class
2087class TestMIMEMessage(TestEmailBase):
2088 def setUp(self):
2089 with openfile('msg_11.txt') as fp:
2090 self._text = fp.read()
2091
2092 def test_type_error(self):
2093 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
2094
2095 def test_valid_argument(self):
2096 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002097 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002098 subject = 'A sub-message'
2099 m = Message()
2100 m['Subject'] = subject
2101 r = MIMEMessage(m)
2102 eq(r.get_content_type(), 'message/rfc822')
2103 payload = r.get_payload()
2104 unless(isinstance(payload, list))
2105 eq(len(payload), 1)
2106 subpart = payload[0]
2107 unless(subpart is m)
2108 eq(subpart['subject'], subject)
2109
2110 def test_bad_multipart(self):
2111 eq = self.assertEqual
2112 msg1 = Message()
2113 msg1['Subject'] = 'subpart 1'
2114 msg2 = Message()
2115 msg2['Subject'] = 'subpart 2'
2116 r = MIMEMessage(msg1)
2117 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
2118
2119 def test_generate(self):
2120 # First craft the message to be encapsulated
2121 m = Message()
2122 m['Subject'] = 'An enclosed message'
2123 m.set_payload('Here is the body of the message.\n')
2124 r = MIMEMessage(m)
2125 r['Subject'] = 'The enclosing message'
2126 s = StringIO()
2127 g = Generator(s)
2128 g.flatten(r)
2129 self.assertEqual(s.getvalue(), """\
2130Content-Type: message/rfc822
2131MIME-Version: 1.0
2132Subject: The enclosing message
2133
2134Subject: An enclosed message
2135
2136Here is the body of the message.
2137""")
2138
2139 def test_parse_message_rfc822(self):
2140 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002141 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002142 msg = self._msgobj('msg_11.txt')
2143 eq(msg.get_content_type(), 'message/rfc822')
2144 payload = msg.get_payload()
2145 unless(isinstance(payload, list))
2146 eq(len(payload), 1)
2147 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002148 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002149 eq(submsg['subject'], 'An enclosed message')
2150 eq(submsg.get_payload(), 'Here is the body of the message.\n')
2151
2152 def test_dsn(self):
2153 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002154 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002155 # msg 16 is a Delivery Status Notification, see RFC 1894
2156 msg = self._msgobj('msg_16.txt')
2157 eq(msg.get_content_type(), 'multipart/report')
2158 unless(msg.is_multipart())
2159 eq(len(msg.get_payload()), 3)
2160 # Subpart 1 is a text/plain, human readable section
2161 subpart = msg.get_payload(0)
2162 eq(subpart.get_content_type(), 'text/plain')
2163 eq(subpart.get_payload(), """\
2164This report relates to a message you sent with the following header fields:
2165
2166 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
2167 Date: Sun, 23 Sep 2001 20:10:55 -0700
2168 From: "Ian T. Henry" <henryi@oxy.edu>
2169 To: SoCal Raves <scr@socal-raves.org>
2170 Subject: [scr] yeah for Ians!!
2171
2172Your message cannot be delivered to the following recipients:
2173
2174 Recipient address: jangel1@cougar.noc.ucla.edu
2175 Reason: recipient reached disk quota
2176
2177""")
2178 # Subpart 2 contains the machine parsable DSN information. It
2179 # consists of two blocks of headers, represented by two nested Message
2180 # objects.
2181 subpart = msg.get_payload(1)
2182 eq(subpart.get_content_type(), 'message/delivery-status')
2183 eq(len(subpart.get_payload()), 2)
2184 # message/delivery-status should treat each block as a bunch of
2185 # headers, i.e. a bunch of Message objects.
2186 dsn1 = subpart.get_payload(0)
2187 unless(isinstance(dsn1, Message))
2188 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
2189 eq(dsn1.get_param('dns', header='reporting-mta'), '')
2190 # Try a missing one <wink>
2191 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
2192 dsn2 = subpart.get_payload(1)
2193 unless(isinstance(dsn2, Message))
2194 eq(dsn2['action'], 'failed')
2195 eq(dsn2.get_params(header='original-recipient'),
2196 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
2197 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
2198 # Subpart 3 is the original message
2199 subpart = msg.get_payload(2)
2200 eq(subpart.get_content_type(), 'message/rfc822')
2201 payload = subpart.get_payload()
2202 unless(isinstance(payload, list))
2203 eq(len(payload), 1)
2204 subsubpart = payload[0]
2205 unless(isinstance(subsubpart, Message))
2206 eq(subsubpart.get_content_type(), 'text/plain')
2207 eq(subsubpart['message-id'],
2208 '<002001c144a6$8752e060$56104586@oxy.edu>')
2209
2210 def test_epilogue(self):
2211 eq = self.ndiffAssertEqual
2212 with openfile('msg_21.txt') as fp:
2213 text = fp.read()
2214 msg = Message()
2215 msg['From'] = 'aperson@dom.ain'
2216 msg['To'] = 'bperson@dom.ain'
2217 msg['Subject'] = 'Test'
2218 msg.preamble = 'MIME message'
2219 msg.epilogue = 'End of MIME message\n'
2220 msg1 = MIMEText('One')
2221 msg2 = MIMEText('Two')
2222 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2223 msg.attach(msg1)
2224 msg.attach(msg2)
2225 sfp = StringIO()
2226 g = Generator(sfp)
2227 g.flatten(msg)
2228 eq(sfp.getvalue(), text)
2229
2230 def test_no_nl_preamble(self):
2231 eq = self.ndiffAssertEqual
2232 msg = Message()
2233 msg['From'] = 'aperson@dom.ain'
2234 msg['To'] = 'bperson@dom.ain'
2235 msg['Subject'] = 'Test'
2236 msg.preamble = 'MIME message'
2237 msg.epilogue = ''
2238 msg1 = MIMEText('One')
2239 msg2 = MIMEText('Two')
2240 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2241 msg.attach(msg1)
2242 msg.attach(msg2)
2243 eq(msg.as_string(), """\
2244From: aperson@dom.ain
2245To: bperson@dom.ain
2246Subject: Test
2247Content-Type: multipart/mixed; boundary="BOUNDARY"
2248
2249MIME message
2250--BOUNDARY
2251Content-Type: text/plain; charset="us-ascii"
2252MIME-Version: 1.0
2253Content-Transfer-Encoding: 7bit
2254
2255One
2256--BOUNDARY
2257Content-Type: text/plain; charset="us-ascii"
2258MIME-Version: 1.0
2259Content-Transfer-Encoding: 7bit
2260
2261Two
2262--BOUNDARY--
2263""")
2264
2265 def test_default_type(self):
2266 eq = self.assertEqual
2267 with openfile('msg_30.txt') as fp:
2268 msg = email.message_from_file(fp)
2269 container1 = msg.get_payload(0)
2270 eq(container1.get_default_type(), 'message/rfc822')
2271 eq(container1.get_content_type(), 'message/rfc822')
2272 container2 = msg.get_payload(1)
2273 eq(container2.get_default_type(), 'message/rfc822')
2274 eq(container2.get_content_type(), 'message/rfc822')
2275 container1a = container1.get_payload(0)
2276 eq(container1a.get_default_type(), 'text/plain')
2277 eq(container1a.get_content_type(), 'text/plain')
2278 container2a = container2.get_payload(0)
2279 eq(container2a.get_default_type(), 'text/plain')
2280 eq(container2a.get_content_type(), 'text/plain')
2281
2282 def test_default_type_with_explicit_container_type(self):
2283 eq = self.assertEqual
2284 with openfile('msg_28.txt') as fp:
2285 msg = email.message_from_file(fp)
2286 container1 = msg.get_payload(0)
2287 eq(container1.get_default_type(), 'message/rfc822')
2288 eq(container1.get_content_type(), 'message/rfc822')
2289 container2 = msg.get_payload(1)
2290 eq(container2.get_default_type(), 'message/rfc822')
2291 eq(container2.get_content_type(), 'message/rfc822')
2292 container1a = container1.get_payload(0)
2293 eq(container1a.get_default_type(), 'text/plain')
2294 eq(container1a.get_content_type(), 'text/plain')
2295 container2a = container2.get_payload(0)
2296 eq(container2a.get_default_type(), 'text/plain')
2297 eq(container2a.get_content_type(), 'text/plain')
2298
2299 def test_default_type_non_parsed(self):
2300 eq = self.assertEqual
2301 neq = self.ndiffAssertEqual
2302 # Set up container
2303 container = MIMEMultipart('digest', 'BOUNDARY')
2304 container.epilogue = ''
2305 # Set up subparts
2306 subpart1a = MIMEText('message 1\n')
2307 subpart2a = MIMEText('message 2\n')
2308 subpart1 = MIMEMessage(subpart1a)
2309 subpart2 = MIMEMessage(subpart2a)
2310 container.attach(subpart1)
2311 container.attach(subpart2)
2312 eq(subpart1.get_content_type(), 'message/rfc822')
2313 eq(subpart1.get_default_type(), 'message/rfc822')
2314 eq(subpart2.get_content_type(), 'message/rfc822')
2315 eq(subpart2.get_default_type(), 'message/rfc822')
2316 neq(container.as_string(0), '''\
2317Content-Type: multipart/digest; boundary="BOUNDARY"
2318MIME-Version: 1.0
2319
2320--BOUNDARY
2321Content-Type: message/rfc822
2322MIME-Version: 1.0
2323
2324Content-Type: text/plain; charset="us-ascii"
2325MIME-Version: 1.0
2326Content-Transfer-Encoding: 7bit
2327
2328message 1
2329
2330--BOUNDARY
2331Content-Type: message/rfc822
2332MIME-Version: 1.0
2333
2334Content-Type: text/plain; charset="us-ascii"
2335MIME-Version: 1.0
2336Content-Transfer-Encoding: 7bit
2337
2338message 2
2339
2340--BOUNDARY--
2341''')
2342 del subpart1['content-type']
2343 del subpart1['mime-version']
2344 del subpart2['content-type']
2345 del subpart2['mime-version']
2346 eq(subpart1.get_content_type(), 'message/rfc822')
2347 eq(subpart1.get_default_type(), 'message/rfc822')
2348 eq(subpart2.get_content_type(), 'message/rfc822')
2349 eq(subpart2.get_default_type(), 'message/rfc822')
2350 neq(container.as_string(0), '''\
2351Content-Type: multipart/digest; boundary="BOUNDARY"
2352MIME-Version: 1.0
2353
2354--BOUNDARY
2355
2356Content-Type: text/plain; charset="us-ascii"
2357MIME-Version: 1.0
2358Content-Transfer-Encoding: 7bit
2359
2360message 1
2361
2362--BOUNDARY
2363
2364Content-Type: text/plain; charset="us-ascii"
2365MIME-Version: 1.0
2366Content-Transfer-Encoding: 7bit
2367
2368message 2
2369
2370--BOUNDARY--
2371''')
2372
2373 def test_mime_attachments_in_constructor(self):
2374 eq = self.assertEqual
2375 text1 = MIMEText('')
2376 text2 = MIMEText('')
2377 msg = MIMEMultipart(_subparts=(text1, text2))
2378 eq(len(msg.get_payload()), 2)
2379 eq(msg.get_payload(0), text1)
2380 eq(msg.get_payload(1), text2)
2381
Christian Heimes587c2bf2008-01-19 16:21:02 +00002382 def test_default_multipart_constructor(self):
2383 msg = MIMEMultipart()
2384 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002385
Ezio Melottib3aedd42010-11-20 19:04:17 +00002386
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002387# A general test of parser->model->generator idempotency. IOW, read a message
2388# in, parse it into a message object tree, then without touching the tree,
2389# regenerate the plain text. The original text and the transformed text
2390# should be identical. Note: that we ignore the Unix-From since that may
2391# contain a changed date.
2392class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002393
2394 linesep = '\n'
2395
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002396 def _msgobj(self, filename):
2397 with openfile(filename) as fp:
2398 data = fp.read()
2399 msg = email.message_from_string(data)
2400 return msg, data
2401
R. David Murray719a4492010-11-21 16:53:48 +00002402 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002403 eq = self.ndiffAssertEqual
2404 s = StringIO()
2405 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002406 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002407 eq(text, s.getvalue())
2408
2409 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002410 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002411 msg, text = self._msgobj('msg_01.txt')
2412 eq(msg.get_content_type(), 'text/plain')
2413 eq(msg.get_content_maintype(), 'text')
2414 eq(msg.get_content_subtype(), 'plain')
2415 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2416 eq(msg.get_param('charset'), 'us-ascii')
2417 eq(msg.preamble, None)
2418 eq(msg.epilogue, None)
2419 self._idempotent(msg, text)
2420
2421 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002422 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002423 msg, text = self._msgobj('msg_03.txt')
2424 eq(msg.get_content_type(), 'text/plain')
2425 eq(msg.get_params(), None)
2426 eq(msg.get_param('charset'), None)
2427 self._idempotent(msg, text)
2428
2429 def test_simple_multipart(self):
2430 msg, text = self._msgobj('msg_04.txt')
2431 self._idempotent(msg, text)
2432
2433 def test_MIME_digest(self):
2434 msg, text = self._msgobj('msg_02.txt')
2435 self._idempotent(msg, text)
2436
2437 def test_long_header(self):
2438 msg, text = self._msgobj('msg_27.txt')
2439 self._idempotent(msg, text)
2440
2441 def test_MIME_digest_with_part_headers(self):
2442 msg, text = self._msgobj('msg_28.txt')
2443 self._idempotent(msg, text)
2444
2445 def test_mixed_with_image(self):
2446 msg, text = self._msgobj('msg_06.txt')
2447 self._idempotent(msg, text)
2448
2449 def test_multipart_report(self):
2450 msg, text = self._msgobj('msg_05.txt')
2451 self._idempotent(msg, text)
2452
2453 def test_dsn(self):
2454 msg, text = self._msgobj('msg_16.txt')
2455 self._idempotent(msg, text)
2456
2457 def test_preamble_epilogue(self):
2458 msg, text = self._msgobj('msg_21.txt')
2459 self._idempotent(msg, text)
2460
2461 def test_multipart_one_part(self):
2462 msg, text = self._msgobj('msg_23.txt')
2463 self._idempotent(msg, text)
2464
2465 def test_multipart_no_parts(self):
2466 msg, text = self._msgobj('msg_24.txt')
2467 self._idempotent(msg, text)
2468
2469 def test_no_start_boundary(self):
2470 msg, text = self._msgobj('msg_31.txt')
2471 self._idempotent(msg, text)
2472
2473 def test_rfc2231_charset(self):
2474 msg, text = self._msgobj('msg_32.txt')
2475 self._idempotent(msg, text)
2476
2477 def test_more_rfc2231_parameters(self):
2478 msg, text = self._msgobj('msg_33.txt')
2479 self._idempotent(msg, text)
2480
2481 def test_text_plain_in_a_multipart_digest(self):
2482 msg, text = self._msgobj('msg_34.txt')
2483 self._idempotent(msg, text)
2484
2485 def test_nested_multipart_mixeds(self):
2486 msg, text = self._msgobj('msg_12a.txt')
2487 self._idempotent(msg, text)
2488
2489 def test_message_external_body_idempotent(self):
2490 msg, text = self._msgobj('msg_36.txt')
2491 self._idempotent(msg, text)
2492
R. David Murray719a4492010-11-21 16:53:48 +00002493 def test_message_delivery_status(self):
2494 msg, text = self._msgobj('msg_43.txt')
2495 self._idempotent(msg, text, unixfrom=True)
2496
R. David Murray96fd54e2010-10-08 15:55:28 +00002497 def test_message_signed_idempotent(self):
2498 msg, text = self._msgobj('msg_45.txt')
2499 self._idempotent(msg, text)
2500
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002501 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002502 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002503 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002504 # Get a message object and reset the seek pointer for other tests
2505 msg, text = self._msgobj('msg_05.txt')
2506 eq(msg.get_content_type(), 'multipart/report')
2507 # Test the Content-Type: parameters
2508 params = {}
2509 for pk, pv in msg.get_params():
2510 params[pk] = pv
2511 eq(params['report-type'], 'delivery-status')
2512 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002513 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2514 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002515 eq(len(msg.get_payload()), 3)
2516 # Make sure the subparts are what we expect
2517 msg1 = msg.get_payload(0)
2518 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002519 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002520 msg2 = msg.get_payload(1)
2521 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002522 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002523 msg3 = msg.get_payload(2)
2524 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002525 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002526 payload = msg3.get_payload()
2527 unless(isinstance(payload, list))
2528 eq(len(payload), 1)
2529 msg4 = payload[0]
2530 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002531 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002532
2533 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002534 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002535 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002536 msg, text = self._msgobj('msg_06.txt')
2537 # Check some of the outer headers
2538 eq(msg.get_content_type(), 'message/rfc822')
2539 # Make sure the payload is a list of exactly one sub-Message, and that
2540 # that submessage has a type of text/plain
2541 payload = msg.get_payload()
2542 unless(isinstance(payload, list))
2543 eq(len(payload), 1)
2544 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002545 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002546 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002547 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002548 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002549
2550
Ezio Melottib3aedd42010-11-20 19:04:17 +00002551
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002552# Test various other bits of the package's functionality
2553class TestMiscellaneous(TestEmailBase):
2554 def test_message_from_string(self):
2555 with openfile('msg_01.txt') as fp:
2556 text = fp.read()
2557 msg = email.message_from_string(text)
2558 s = StringIO()
2559 # Don't wrap/continue long headers since we're trying to test
2560 # idempotency.
2561 g = Generator(s, maxheaderlen=0)
2562 g.flatten(msg)
2563 self.assertEqual(text, s.getvalue())
2564
2565 def test_message_from_file(self):
2566 with openfile('msg_01.txt') as fp:
2567 text = fp.read()
2568 fp.seek(0)
2569 msg = email.message_from_file(fp)
2570 s = StringIO()
2571 # Don't wrap/continue long headers since we're trying to test
2572 # idempotency.
2573 g = Generator(s, maxheaderlen=0)
2574 g.flatten(msg)
2575 self.assertEqual(text, s.getvalue())
2576
2577 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002578 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002579 with openfile('msg_01.txt') as fp:
2580 text = fp.read()
2581
2582 # Create a subclass
2583 class MyMessage(Message):
2584 pass
2585
2586 msg = email.message_from_string(text, MyMessage)
2587 unless(isinstance(msg, MyMessage))
2588 # Try something more complicated
2589 with openfile('msg_02.txt') as fp:
2590 text = fp.read()
2591 msg = email.message_from_string(text, MyMessage)
2592 for subpart in msg.walk():
2593 unless(isinstance(subpart, MyMessage))
2594
2595 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002596 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002597 # Create a subclass
2598 class MyMessage(Message):
2599 pass
2600
2601 with openfile('msg_01.txt') as fp:
2602 msg = email.message_from_file(fp, MyMessage)
2603 unless(isinstance(msg, MyMessage))
2604 # Try something more complicated
2605 with openfile('msg_02.txt') as fp:
2606 msg = email.message_from_file(fp, MyMessage)
2607 for subpart in msg.walk():
2608 unless(isinstance(subpart, MyMessage))
2609
2610 def test__all__(self):
2611 module = __import__('email')
R David Murray1b6c7242012-03-16 22:43:05 -04002612 self.assertEqual(sorted(module.__all__), [
2613 'base64mime', 'charset', 'encoders', 'errors', 'feedparser',
2614 'generator', 'header', 'iterators', 'message',
2615 'message_from_binary_file', 'message_from_bytes',
2616 'message_from_file', 'message_from_string', 'mime', 'parser',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002617 'quoprimime', 'utils',
2618 ])
2619
2620 def test_formatdate(self):
2621 now = time.time()
2622 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2623 time.gmtime(now)[:6])
2624
2625 def test_formatdate_localtime(self):
2626 now = time.time()
2627 self.assertEqual(
2628 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2629 time.localtime(now)[:6])
2630
2631 def test_formatdate_usegmt(self):
2632 now = time.time()
2633 self.assertEqual(
2634 utils.formatdate(now, localtime=False),
2635 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2636 self.assertEqual(
2637 utils.formatdate(now, localtime=False, usegmt=True),
2638 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2639
2640 def test_parsedate_none(self):
2641 self.assertEqual(utils.parsedate(''), None)
2642
2643 def test_parsedate_compact(self):
2644 # The FWS after the comma is optional
2645 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2646 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2647
2648 def test_parsedate_no_dayofweek(self):
2649 eq = self.assertEqual
2650 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2651 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2652
2653 def test_parsedate_compact_no_dayofweek(self):
2654 eq = self.assertEqual
2655 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2656 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2657
R. David Murray4a62e892010-12-23 20:35:46 +00002658 def test_parsedate_no_space_before_positive_offset(self):
2659 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2660 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2661
2662 def test_parsedate_no_space_before_negative_offset(self):
2663 # Issue 1155362: we already handled '+' for this case.
2664 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
2665 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
2666
2667
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002668 def test_parsedate_acceptable_to_time_functions(self):
2669 eq = self.assertEqual
2670 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2671 t = int(time.mktime(timetup))
2672 eq(time.localtime(t)[:6], timetup[:6])
2673 eq(int(time.strftime('%Y', timetup)), 2003)
2674 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2675 t = int(time.mktime(timetup[:9]))
2676 eq(time.localtime(t)[:6], timetup[:6])
2677 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2678
Alexander Belopolskya07548e2012-06-21 20:34:09 -04002679 def test_mktime_tz(self):
2680 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
2681 -1, -1, -1, 0)), 0)
2682 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
2683 -1, -1, -1, 1234)), -1234)
2684
R. David Murray219d1c82010-08-25 00:45:55 +00002685 def test_parsedate_y2k(self):
2686 """Test for parsing a date with a two-digit year.
2687
2688 Parsing a date with a two-digit year should return the correct
2689 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2690 obsoletes RFC822) requires four-digit years.
2691
2692 """
2693 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2694 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2695 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2696 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2697
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002698 def test_parseaddr_empty(self):
2699 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2700 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2701
2702 def test_noquote_dump(self):
2703 self.assertEqual(
2704 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2705 'A Silly Person <person@dom.ain>')
2706
2707 def test_escape_dump(self):
2708 self.assertEqual(
2709 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
2710 r'"A \(Very\) Silly Person" <person@dom.ain>')
2711 a = r'A \(Special\) Person'
2712 b = 'person@dom.ain'
2713 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2714
2715 def test_escape_backslashes(self):
2716 self.assertEqual(
2717 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2718 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2719 a = r'Arthur \Backslash\ Foobar'
2720 b = 'person@dom.ain'
2721 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2722
2723 def test_name_with_dot(self):
2724 x = 'John X. Doe <jxd@example.com>'
2725 y = '"John X. Doe" <jxd@example.com>'
2726 a, b = ('John X. Doe', 'jxd@example.com')
2727 self.assertEqual(utils.parseaddr(x), (a, b))
2728 self.assertEqual(utils.parseaddr(y), (a, b))
2729 # formataddr() quotes the name if there's a dot in it
2730 self.assertEqual(utils.formataddr((a, b)), y)
2731
R. David Murray5397e862010-10-02 15:58:26 +00002732 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2733 # issue 10005. Note that in the third test the second pair of
2734 # backslashes is not actually a quoted pair because it is not inside a
2735 # comment or quoted string: the address being parsed has a quoted
2736 # string containing a quoted backslash, followed by 'example' and two
2737 # backslashes, followed by another quoted string containing a space and
2738 # the word 'example'. parseaddr copies those two backslashes
2739 # literally. Per rfc5322 this is not technically correct since a \ may
2740 # not appear in an address outside of a quoted string. It is probably
2741 # a sensible Postel interpretation, though.
2742 eq = self.assertEqual
2743 eq(utils.parseaddr('""example" example"@example.com'),
2744 ('', '""example" example"@example.com'))
2745 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2746 ('', '"\\"example\\" example"@example.com'))
2747 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2748 ('', '"\\\\"example\\\\" example"@example.com'))
2749
R. David Murray63563cd2010-12-18 18:25:38 +00002750 def test_parseaddr_preserves_spaces_in_local_part(self):
2751 # issue 9286. A normal RFC5322 local part should not contain any
2752 # folding white space, but legacy local parts can (they are a sequence
2753 # of atoms, not dotatoms). On the other hand we strip whitespace from
2754 # before the @ and around dots, on the assumption that the whitespace
2755 # around the punctuation is a mistake in what would otherwise be
2756 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
2757 self.assertEqual(('', "merwok wok@xample.com"),
2758 utils.parseaddr("merwok wok@xample.com"))
2759 self.assertEqual(('', "merwok wok@xample.com"),
2760 utils.parseaddr("merwok wok@xample.com"))
2761 self.assertEqual(('', "merwok wok@xample.com"),
2762 utils.parseaddr(" merwok wok @xample.com"))
2763 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
2764 utils.parseaddr('merwok"wok" wok@xample.com'))
2765 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
2766 utils.parseaddr('merwok. wok . wok@xample.com'))
2767
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002768 def test_multiline_from_comment(self):
2769 x = """\
2770Foo
2771\tBar <foo@example.com>"""
2772 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2773
2774 def test_quote_dump(self):
2775 self.assertEqual(
2776 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2777 r'"A Silly; Person" <person@dom.ain>')
2778
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002779 def test_charset_richcomparisons(self):
2780 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002781 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002782 cset1 = Charset()
2783 cset2 = Charset()
2784 eq(cset1, 'us-ascii')
2785 eq(cset1, 'US-ASCII')
2786 eq(cset1, 'Us-AsCiI')
2787 eq('us-ascii', cset1)
2788 eq('US-ASCII', cset1)
2789 eq('Us-AsCiI', cset1)
2790 ne(cset1, 'usascii')
2791 ne(cset1, 'USASCII')
2792 ne(cset1, 'UsAsCiI')
2793 ne('usascii', cset1)
2794 ne('USASCII', cset1)
2795 ne('UsAsCiI', cset1)
2796 eq(cset1, cset2)
2797 eq(cset2, cset1)
2798
2799 def test_getaddresses(self):
2800 eq = self.assertEqual
2801 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2802 'Bud Person <bperson@dom.ain>']),
2803 [('Al Person', 'aperson@dom.ain'),
2804 ('Bud Person', 'bperson@dom.ain')])
2805
2806 def test_getaddresses_nasty(self):
2807 eq = self.assertEqual
2808 eq(utils.getaddresses(['foo: ;']), [('', '')])
2809 eq(utils.getaddresses(
2810 ['[]*-- =~$']),
2811 [('', ''), ('', ''), ('', '*--')])
2812 eq(utils.getaddresses(
2813 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2814 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2815
2816 def test_getaddresses_embedded_comment(self):
2817 """Test proper handling of a nested comment"""
2818 eq = self.assertEqual
2819 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2820 eq(addrs[0][1], 'foo@bar.com')
2821
2822 def test_utils_quote_unquote(self):
2823 eq = self.assertEqual
2824 msg = Message()
2825 msg.add_header('content-disposition', 'attachment',
2826 filename='foo\\wacky"name')
2827 eq(msg.get_filename(), 'foo\\wacky"name')
2828
2829 def test_get_body_encoding_with_bogus_charset(self):
2830 charset = Charset('not a charset')
2831 self.assertEqual(charset.get_body_encoding(), 'base64')
2832
2833 def test_get_body_encoding_with_uppercase_charset(self):
2834 eq = self.assertEqual
2835 msg = Message()
2836 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2837 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2838 charsets = msg.get_charsets()
2839 eq(len(charsets), 1)
2840 eq(charsets[0], 'utf-8')
2841 charset = Charset(charsets[0])
2842 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002843 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002844 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2845 eq(msg.get_payload(decode=True), b'hello world')
2846 eq(msg['content-transfer-encoding'], 'base64')
2847 # Try another one
2848 msg = Message()
2849 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2850 charsets = msg.get_charsets()
2851 eq(len(charsets), 1)
2852 eq(charsets[0], 'us-ascii')
2853 charset = Charset(charsets[0])
2854 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2855 msg.set_payload('hello world', charset=charset)
2856 eq(msg.get_payload(), 'hello world')
2857 eq(msg['content-transfer-encoding'], '7bit')
2858
2859 def test_charsets_case_insensitive(self):
2860 lc = Charset('us-ascii')
2861 uc = Charset('US-ASCII')
2862 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2863
2864 def test_partial_falls_inside_message_delivery_status(self):
2865 eq = self.ndiffAssertEqual
2866 # The Parser interface provides chunks of data to FeedParser in 8192
2867 # byte gulps. SF bug #1076485 found one of those chunks inside
2868 # message/delivery-status header block, which triggered an
2869 # unreadline() of NeedMoreData.
2870 msg = self._msgobj('msg_43.txt')
2871 sfp = StringIO()
2872 iterators._structure(msg, sfp)
2873 eq(sfp.getvalue(), """\
2874multipart/report
2875 text/plain
2876 message/delivery-status
2877 text/plain
2878 text/plain
2879 text/plain
2880 text/plain
2881 text/plain
2882 text/plain
2883 text/plain
2884 text/plain
2885 text/plain
2886 text/plain
2887 text/plain
2888 text/plain
2889 text/plain
2890 text/plain
2891 text/plain
2892 text/plain
2893 text/plain
2894 text/plain
2895 text/plain
2896 text/plain
2897 text/plain
2898 text/plain
2899 text/plain
2900 text/plain
2901 text/plain
2902 text/plain
2903 text/rfc822-headers
2904""")
2905
R. David Murraya0b44b52010-12-02 21:47:19 +00002906 def test_make_msgid_domain(self):
2907 self.assertEqual(
2908 email.utils.make_msgid(domain='testdomain-string')[-19:],
2909 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002910
R David Murraye67c6c52013-03-07 16:38:03 -05002911 def test_Generator_linend(self):
2912 # Issue 14645.
2913 with openfile('msg_26.txt', newline='\n') as f:
2914 msgtxt = f.read()
2915 msgtxt_nl = msgtxt.replace('\r\n', '\n')
2916 msg = email.message_from_string(msgtxt)
2917 s = StringIO()
2918 g = email.generator.Generator(s)
2919 g.flatten(msg)
2920 self.assertEqual(s.getvalue(), msgtxt_nl)
2921
2922 def test_BytesGenerator_linend(self):
2923 # Issue 14645.
2924 with openfile('msg_26.txt', newline='\n') as f:
2925 msgtxt = f.read()
2926 msgtxt_nl = msgtxt.replace('\r\n', '\n')
2927 msg = email.message_from_string(msgtxt_nl)
2928 s = BytesIO()
2929 g = email.generator.BytesGenerator(s)
2930 g.flatten(msg, linesep='\r\n')
2931 self.assertEqual(s.getvalue().decode('ascii'), msgtxt)
2932
2933 def test_BytesGenerator_linend_with_non_ascii(self):
2934 # Issue 14645.
2935 with openfile('msg_26.txt', 'rb') as f:
2936 msgtxt = f.read()
2937 msgtxt = msgtxt.replace(b'with attachment', b'fo\xf6')
2938 msgtxt_nl = msgtxt.replace(b'\r\n', b'\n')
2939 msg = email.message_from_bytes(msgtxt_nl)
2940 s = BytesIO()
2941 g = email.generator.BytesGenerator(s)
2942 g.flatten(msg, linesep='\r\n')
2943 self.assertEqual(s.getvalue(), msgtxt)
2944
Ezio Melottib3aedd42010-11-20 19:04:17 +00002945
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002946# Test the iterator/generators
2947class TestIterators(TestEmailBase):
2948 def test_body_line_iterator(self):
2949 eq = self.assertEqual
2950 neq = self.ndiffAssertEqual
2951 # First a simple non-multipart message
2952 msg = self._msgobj('msg_01.txt')
2953 it = iterators.body_line_iterator(msg)
2954 lines = list(it)
2955 eq(len(lines), 6)
2956 neq(EMPTYSTRING.join(lines), msg.get_payload())
2957 # Now a more complicated multipart
2958 msg = self._msgobj('msg_02.txt')
2959 it = iterators.body_line_iterator(msg)
2960 lines = list(it)
2961 eq(len(lines), 43)
2962 with openfile('msg_19.txt') as fp:
2963 neq(EMPTYSTRING.join(lines), fp.read())
2964
2965 def test_typed_subpart_iterator(self):
2966 eq = self.assertEqual
2967 msg = self._msgobj('msg_04.txt')
2968 it = iterators.typed_subpart_iterator(msg, 'text')
2969 lines = []
2970 subparts = 0
2971 for subpart in it:
2972 subparts += 1
2973 lines.append(subpart.get_payload())
2974 eq(subparts, 2)
2975 eq(EMPTYSTRING.join(lines), """\
2976a simple kind of mirror
2977to reflect upon our own
2978a simple kind of mirror
2979to reflect upon our own
2980""")
2981
2982 def test_typed_subpart_iterator_default_type(self):
2983 eq = self.assertEqual
2984 msg = self._msgobj('msg_03.txt')
2985 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
2986 lines = []
2987 subparts = 0
2988 for subpart in it:
2989 subparts += 1
2990 lines.append(subpart.get_payload())
2991 eq(subparts, 1)
2992 eq(EMPTYSTRING.join(lines), """\
2993
2994Hi,
2995
2996Do you like this message?
2997
2998-Me
2999""")
3000
R. David Murray45bf773f2010-07-17 01:19:57 +00003001 def test_pushCR_LF(self):
3002 '''FeedParser BufferedSubFile.push() assumed it received complete
3003 line endings. A CR ending one push() followed by a LF starting
3004 the next push() added an empty line.
3005 '''
3006 imt = [
3007 ("a\r \n", 2),
3008 ("b", 0),
3009 ("c\n", 1),
3010 ("", 0),
3011 ("d\r\n", 1),
3012 ("e\r", 0),
3013 ("\nf", 1),
3014 ("\r\n", 1),
3015 ]
3016 from email.feedparser import BufferedSubFile, NeedMoreData
3017 bsf = BufferedSubFile()
3018 om = []
3019 nt = 0
3020 for il, n in imt:
3021 bsf.push(il)
3022 nt += n
3023 n1 = 0
3024 while True:
3025 ol = bsf.readline()
3026 if ol == NeedMoreData:
3027 break
3028 om.append(ol)
3029 n1 += 1
3030 self.assertTrue(n == n1)
3031 self.assertTrue(len(om) == nt)
3032 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
3033
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003034
Ezio Melottib3aedd42010-11-20 19:04:17 +00003035
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003036class TestParsers(TestEmailBase):
3037 def test_header_parser(self):
3038 eq = self.assertEqual
3039 # Parse only the headers of a complex multipart MIME document
3040 with openfile('msg_02.txt') as fp:
3041 msg = HeaderParser().parse(fp)
3042 eq(msg['from'], 'ppp-request@zzz.org')
3043 eq(msg['to'], 'ppp@zzz.org')
3044 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003045 self.assertFalse(msg.is_multipart())
3046 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003047
3048 def test_whitespace_continuation(self):
3049 eq = self.assertEqual
3050 # This message contains a line after the Subject: header that has only
3051 # whitespace, but it is not empty!
3052 msg = email.message_from_string("""\
3053From: aperson@dom.ain
3054To: bperson@dom.ain
3055Subject: the next line has a space on it
3056\x20
3057Date: Mon, 8 Apr 2002 15:09:19 -0400
3058Message-ID: spam
3059
3060Here's the message body
3061""")
3062 eq(msg['subject'], 'the next line has a space on it\n ')
3063 eq(msg['message-id'], 'spam')
3064 eq(msg.get_payload(), "Here's the message body\n")
3065
3066 def test_whitespace_continuation_last_header(self):
3067 eq = self.assertEqual
3068 # Like the previous test, but the subject line is the last
3069 # header.
3070 msg = email.message_from_string("""\
3071From: aperson@dom.ain
3072To: bperson@dom.ain
3073Date: Mon, 8 Apr 2002 15:09:19 -0400
3074Message-ID: spam
3075Subject: the next line has a space on it
3076\x20
3077
3078Here's the message body
3079""")
3080 eq(msg['subject'], 'the next line has a space on it\n ')
3081 eq(msg['message-id'], 'spam')
3082 eq(msg.get_payload(), "Here's the message body\n")
3083
3084 def test_crlf_separation(self):
3085 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00003086 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003087 msg = Parser().parse(fp)
3088 eq(len(msg.get_payload()), 2)
3089 part1 = msg.get_payload(0)
3090 eq(part1.get_content_type(), 'text/plain')
3091 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
3092 part2 = msg.get_payload(1)
3093 eq(part2.get_content_type(), 'application/riscos')
3094
R. David Murray8451c4b2010-10-23 22:19:56 +00003095 def test_crlf_flatten(self):
3096 # Using newline='\n' preserves the crlfs in this input file.
3097 with openfile('msg_26.txt', newline='\n') as fp:
3098 text = fp.read()
3099 msg = email.message_from_string(text)
3100 s = StringIO()
3101 g = Generator(s)
3102 g.flatten(msg, linesep='\r\n')
3103 self.assertEqual(s.getvalue(), text)
3104
3105 maxDiff = None
3106
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003107 def test_multipart_digest_with_extra_mime_headers(self):
3108 eq = self.assertEqual
3109 neq = self.ndiffAssertEqual
3110 with openfile('msg_28.txt') as fp:
3111 msg = email.message_from_file(fp)
3112 # Structure is:
3113 # multipart/digest
3114 # message/rfc822
3115 # text/plain
3116 # message/rfc822
3117 # text/plain
3118 eq(msg.is_multipart(), 1)
3119 eq(len(msg.get_payload()), 2)
3120 part1 = msg.get_payload(0)
3121 eq(part1.get_content_type(), 'message/rfc822')
3122 eq(part1.is_multipart(), 1)
3123 eq(len(part1.get_payload()), 1)
3124 part1a = part1.get_payload(0)
3125 eq(part1a.is_multipart(), 0)
3126 eq(part1a.get_content_type(), 'text/plain')
3127 neq(part1a.get_payload(), 'message 1\n')
3128 # next message/rfc822
3129 part2 = msg.get_payload(1)
3130 eq(part2.get_content_type(), 'message/rfc822')
3131 eq(part2.is_multipart(), 1)
3132 eq(len(part2.get_payload()), 1)
3133 part2a = part2.get_payload(0)
3134 eq(part2a.is_multipart(), 0)
3135 eq(part2a.get_content_type(), 'text/plain')
3136 neq(part2a.get_payload(), 'message 2\n')
3137
3138 def test_three_lines(self):
3139 # A bug report by Andrew McNamara
3140 lines = ['From: Andrew Person <aperson@dom.ain',
3141 'Subject: Test',
3142 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
3143 msg = email.message_from_string(NL.join(lines))
3144 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
3145
3146 def test_strip_line_feed_and_carriage_return_in_headers(self):
3147 eq = self.assertEqual
3148 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
3149 value1 = 'text'
3150 value2 = 'more text'
3151 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
3152 value1, value2)
3153 msg = email.message_from_string(m)
3154 eq(msg.get('Header'), value1)
3155 eq(msg.get('Next-Header'), value2)
3156
3157 def test_rfc2822_header_syntax(self):
3158 eq = self.assertEqual
3159 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3160 msg = email.message_from_string(m)
3161 eq(len(msg), 3)
3162 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
3163 eq(msg.get_payload(), 'body')
3164
3165 def test_rfc2822_space_not_allowed_in_header(self):
3166 eq = self.assertEqual
3167 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3168 msg = email.message_from_string(m)
3169 eq(len(msg.keys()), 0)
3170
3171 def test_rfc2822_one_character_header(self):
3172 eq = self.assertEqual
3173 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
3174 msg = email.message_from_string(m)
3175 headers = msg.keys()
3176 headers.sort()
3177 eq(headers, ['A', 'B', 'CC'])
3178 eq(msg.get_payload(), 'body')
3179
R. David Murray45e0e142010-06-16 02:19:40 +00003180 def test_CRLFLF_at_end_of_part(self):
3181 # issue 5610: feedparser should not eat two chars from body part ending
3182 # with "\r\n\n".
3183 m = (
3184 "From: foo@bar.com\n"
3185 "To: baz\n"
3186 "Mime-Version: 1.0\n"
3187 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
3188 "\n"
3189 "--BOUNDARY\n"
3190 "Content-Type: text/plain\n"
3191 "\n"
3192 "body ending with CRLF newline\r\n"
3193 "\n"
3194 "--BOUNDARY--\n"
3195 )
3196 msg = email.message_from_string(m)
3197 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003198
Ezio Melottib3aedd42010-11-20 19:04:17 +00003199
R. David Murray96fd54e2010-10-08 15:55:28 +00003200class Test8BitBytesHandling(unittest.TestCase):
3201 # In Python3 all input is string, but that doesn't work if the actual input
3202 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
3203 # decode byte streams using the surrogateescape error handler, and
3204 # reconvert to binary at appropriate places if we detect surrogates. This
3205 # doesn't allow us to transform headers with 8bit bytes (they get munged),
3206 # but it does allow us to parse and preserve them, and to decode body
3207 # parts that use an 8bit CTE.
3208
3209 bodytest_msg = textwrap.dedent("""\
3210 From: foo@bar.com
3211 To: baz
3212 Mime-Version: 1.0
3213 Content-Type: text/plain; charset={charset}
3214 Content-Transfer-Encoding: {cte}
3215
3216 {bodyline}
3217 """)
3218
3219 def test_known_8bit_CTE(self):
3220 m = self.bodytest_msg.format(charset='utf-8',
3221 cte='8bit',
3222 bodyline='pöstal').encode('utf-8')
3223 msg = email.message_from_bytes(m)
3224 self.assertEqual(msg.get_payload(), "pöstal\n")
3225 self.assertEqual(msg.get_payload(decode=True),
3226 "pöstal\n".encode('utf-8'))
3227
3228 def test_unknown_8bit_CTE(self):
3229 m = self.bodytest_msg.format(charset='notavalidcharset',
3230 cte='8bit',
3231 bodyline='pöstal').encode('utf-8')
3232 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003233 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
R. David Murray96fd54e2010-10-08 15:55:28 +00003234 self.assertEqual(msg.get_payload(decode=True),
3235 "pöstal\n".encode('utf-8'))
3236
3237 def test_8bit_in_quopri_body(self):
3238 # This is non-RFC compliant data...without 'decode' the library code
3239 # decodes the body using the charset from the headers, and because the
3240 # source byte really is utf-8 this works. This is likely to fail
3241 # against real dirty data (ie: produce mojibake), but the data is
3242 # invalid anyway so it is as good a guess as any. But this means that
3243 # this test just confirms the current behavior; that behavior is not
3244 # necessarily the best possible behavior. With 'decode' it is
3245 # returning the raw bytes, so that test should be of correct behavior,
3246 # or at least produce the same result that email4 did.
3247 m = self.bodytest_msg.format(charset='utf-8',
3248 cte='quoted-printable',
3249 bodyline='p=C3=B6stál').encode('utf-8')
3250 msg = email.message_from_bytes(m)
3251 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
3252 self.assertEqual(msg.get_payload(decode=True),
3253 'pöstál\n'.encode('utf-8'))
3254
3255 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
3256 # This is similar to the previous test, but proves that if the 8bit
3257 # byte is undecodeable in the specified charset, it gets replaced
3258 # by the unicode 'unknown' character. Again, this may or may not
3259 # be the ideal behavior. Note that if decode=False none of the
3260 # decoders will get involved, so this is the only test we need
3261 # for this behavior.
3262 m = self.bodytest_msg.format(charset='ascii',
3263 cte='quoted-printable',
3264 bodyline='p=C3=B6stál').encode('utf-8')
3265 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003266 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
R. David Murray96fd54e2010-10-08 15:55:28 +00003267 self.assertEqual(msg.get_payload(decode=True),
3268 'pöstál\n'.encode('utf-8'))
3269
3270 def test_8bit_in_base64_body(self):
3271 # Sticking an 8bit byte in a base64 block makes it undecodable by
3272 # normal means, so the block is returned undecoded, but as bytes.
3273 m = self.bodytest_msg.format(charset='utf-8',
3274 cte='base64',
3275 bodyline='cMO2c3RhbAá=').encode('utf-8')
3276 msg = email.message_from_bytes(m)
3277 self.assertEqual(msg.get_payload(decode=True),
3278 'cMO2c3RhbAá=\n'.encode('utf-8'))
3279
3280 def test_8bit_in_uuencode_body(self):
3281 # Sticking an 8bit byte in a uuencode block makes it undecodable by
3282 # normal means, so the block is returned undecoded, but as bytes.
3283 m = self.bodytest_msg.format(charset='utf-8',
3284 cte='uuencode',
3285 bodyline='<,.V<W1A; á ').encode('utf-8')
3286 msg = email.message_from_bytes(m)
3287 self.assertEqual(msg.get_payload(decode=True),
3288 '<,.V<W1A; á \n'.encode('utf-8'))
3289
3290
R. David Murray92532142011-01-07 23:25:30 +00003291 headertest_headers = (
3292 ('From: foo@bar.com', ('From', 'foo@bar.com')),
3293 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
3294 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
3295 '\tJean de Baddie',
3296 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3297 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
3298 ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
3299 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
3300 )
3301 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
3302 '\nYes, they are flying.\n').encode('utf-8')
R. David Murray96fd54e2010-10-08 15:55:28 +00003303
3304 def test_get_8bit_header(self):
3305 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003306 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
3307 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
R. David Murray96fd54e2010-10-08 15:55:28 +00003308
3309 def test_print_8bit_headers(self):
3310 msg = email.message_from_bytes(self.headertest_msg)
3311 self.assertEqual(str(msg),
R. David Murray92532142011-01-07 23:25:30 +00003312 textwrap.dedent("""\
3313 From: {}
3314 To: {}
3315 Subject: {}
3316 From: {}
3317
3318 Yes, they are flying.
3319 """).format(*[expected[1] for (_, expected) in
3320 self.headertest_headers]))
R. David Murray96fd54e2010-10-08 15:55:28 +00003321
3322 def test_values_with_8bit_headers(self):
3323 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003324 self.assertListEqual([str(x) for x in msg.values()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003325 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003326 'b\uFFFD\uFFFDz',
3327 'Maintenant je vous pr\uFFFD\uFFFDsente mon '
3328 'coll\uFFFD\uFFFDgue, le pouf '
3329 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
R. David Murray96fd54e2010-10-08 15:55:28 +00003330 '\tJean de Baddie',
R. David Murray92532142011-01-07 23:25:30 +00003331 "g\uFFFD\uFFFDst"])
R. David Murray96fd54e2010-10-08 15:55:28 +00003332
3333 def test_items_with_8bit_headers(self):
3334 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003335 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003336 [('From', 'foo@bar.com'),
R. David Murray92532142011-01-07 23:25:30 +00003337 ('To', 'b\uFFFD\uFFFDz'),
3338 ('Subject', 'Maintenant je vous '
3339 'pr\uFFFD\uFFFDsente '
3340 'mon coll\uFFFD\uFFFDgue, le pouf '
3341 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3342 '\tJean de Baddie'),
3343 ('From', 'g\uFFFD\uFFFDst')])
R. David Murray96fd54e2010-10-08 15:55:28 +00003344
3345 def test_get_all_with_8bit_headers(self):
3346 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003347 self.assertListEqual([str(x) for x in msg.get_all('from')],
R. David Murray96fd54e2010-10-08 15:55:28 +00003348 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003349 'g\uFFFD\uFFFDst'])
R. David Murray96fd54e2010-10-08 15:55:28 +00003350
R David Murraya2150232011-03-16 21:11:23 -04003351 def test_get_content_type_with_8bit(self):
3352 msg = email.message_from_bytes(textwrap.dedent("""\
3353 Content-Type: text/pl\xA7in; charset=utf-8
3354 """).encode('latin-1'))
3355 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
3356 self.assertEqual(msg.get_content_maintype(), "text")
3357 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
3358
3359 def test_get_params_with_8bit(self):
3360 msg = email.message_from_bytes(
3361 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
3362 self.assertEqual(msg.get_params(header='x-header'),
3363 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
3364 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
3365 # XXX: someday you might be able to get 'b\xa7r', for now you can't.
3366 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
3367
3368 def test_get_rfc2231_params_with_8bit(self):
3369 msg = email.message_from_bytes(textwrap.dedent("""\
3370 Content-Type: text/plain; charset=us-ascii;
3371 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3372 ).encode('latin-1'))
3373 self.assertEqual(msg.get_param('title'),
3374 ('us-ascii', 'en', 'This is not f\uFFFDn'))
3375
3376 def test_set_rfc2231_params_with_8bit(self):
3377 msg = email.message_from_bytes(textwrap.dedent("""\
3378 Content-Type: text/plain; charset=us-ascii;
3379 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3380 ).encode('latin-1'))
3381 msg.set_param('title', 'test')
3382 self.assertEqual(msg.get_param('title'), 'test')
3383
3384 def test_del_rfc2231_params_with_8bit(self):
3385 msg = email.message_from_bytes(textwrap.dedent("""\
3386 Content-Type: text/plain; charset=us-ascii;
3387 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3388 ).encode('latin-1'))
3389 msg.del_param('title')
3390 self.assertEqual(msg.get_param('title'), None)
3391 self.assertEqual(msg.get_content_maintype(), 'text')
3392
3393 def test_get_payload_with_8bit_cte_header(self):
3394 msg = email.message_from_bytes(textwrap.dedent("""\
3395 Content-Transfer-Encoding: b\xa7se64
3396 Content-Type: text/plain; charset=latin-1
3397
3398 payload
3399 """).encode('latin-1'))
3400 self.assertEqual(msg.get_payload(), 'payload\n')
3401 self.assertEqual(msg.get_payload(decode=True), b'payload\n')
3402
R. David Murray96fd54e2010-10-08 15:55:28 +00003403 non_latin_bin_msg = textwrap.dedent("""\
3404 From: foo@bar.com
3405 To: báz
3406 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
3407 \tJean de Baddie
3408 Mime-Version: 1.0
3409 Content-Type: text/plain; charset="utf-8"
3410 Content-Transfer-Encoding: 8bit
3411
3412 Да, они летят.
3413 """).encode('utf-8')
3414
3415 def test_bytes_generator(self):
3416 msg = email.message_from_bytes(self.non_latin_bin_msg)
3417 out = BytesIO()
3418 email.generator.BytesGenerator(out).flatten(msg)
3419 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
3420
R. David Murray7372a072011-01-26 21:21:32 +00003421 def test_bytes_generator_handles_None_body(self):
3422 #Issue 11019
3423 msg = email.message.Message()
3424 out = BytesIO()
3425 email.generator.BytesGenerator(out).flatten(msg)
3426 self.assertEqual(out.getvalue(), b"\n")
3427
R. David Murray92532142011-01-07 23:25:30 +00003428 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
R. David Murray96fd54e2010-10-08 15:55:28 +00003429 From: foo@bar.com
R. David Murray92532142011-01-07 23:25:30 +00003430 To: =?unknown-8bit?q?b=C3=A1z?=
3431 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
3432 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
3433 =?unknown-8bit?q?_Jean_de_Baddie?=
R. David Murray96fd54e2010-10-08 15:55:28 +00003434 Mime-Version: 1.0
3435 Content-Type: text/plain; charset="utf-8"
3436 Content-Transfer-Encoding: base64
3437
3438 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
3439 """)
3440
3441 def test_generator_handles_8bit(self):
3442 msg = email.message_from_bytes(self.non_latin_bin_msg)
3443 out = StringIO()
3444 email.generator.Generator(out).flatten(msg)
R. David Murray92532142011-01-07 23:25:30 +00003445 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
R. David Murray96fd54e2010-10-08 15:55:28 +00003446
3447 def test_bytes_generator_with_unix_from(self):
3448 # The unixfrom contains a current date, so we can't check it
3449 # literally. Just make sure the first word is 'From' and the
3450 # rest of the message matches the input.
3451 msg = email.message_from_bytes(self.non_latin_bin_msg)
3452 out = BytesIO()
3453 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
3454 lines = out.getvalue().split(b'\n')
3455 self.assertEqual(lines[0].split()[0], b'From')
3456 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
3457
R. David Murray92532142011-01-07 23:25:30 +00003458 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
3459 non_latin_bin_msg_as7bit[2:4] = [
3460 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3461 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
3462 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
3463
R. David Murray96fd54e2010-10-08 15:55:28 +00003464 def test_message_from_binary_file(self):
3465 fn = 'test.msg'
3466 self.addCleanup(unlink, fn)
3467 with open(fn, 'wb') as testfile:
3468 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00003469 with open(fn, 'rb') as testfile:
3470 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00003471 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
3472
3473 latin_bin_msg = textwrap.dedent("""\
3474 From: foo@bar.com
3475 To: Dinsdale
3476 Subject: Nudge nudge, wink, wink
3477 Mime-Version: 1.0
3478 Content-Type: text/plain; charset="latin-1"
3479 Content-Transfer-Encoding: 8bit
3480
3481 oh là là, know what I mean, know what I mean?
3482 """).encode('latin-1')
3483
3484 latin_bin_msg_as7bit = textwrap.dedent("""\
3485 From: foo@bar.com
3486 To: Dinsdale
3487 Subject: Nudge nudge, wink, wink
3488 Mime-Version: 1.0
3489 Content-Type: text/plain; charset="iso-8859-1"
3490 Content-Transfer-Encoding: quoted-printable
3491
3492 oh l=E0 l=E0, know what I mean, know what I mean?
3493 """)
3494
3495 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
3496 m = email.message_from_bytes(self.latin_bin_msg)
3497 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3498
3499 def test_decoded_generator_emits_unicode_body(self):
3500 m = email.message_from_bytes(self.latin_bin_msg)
3501 out = StringIO()
3502 email.generator.DecodedGenerator(out).flatten(m)
3503 #DecodedHeader output contains an extra blank line compared
3504 #to the input message. RDM: not sure if this is a bug or not,
3505 #but it is not specific to the 8bit->7bit conversion.
3506 self.assertEqual(out.getvalue(),
3507 self.latin_bin_msg.decode('latin-1')+'\n')
3508
3509 def test_bytes_feedparser(self):
3510 bfp = email.feedparser.BytesFeedParser()
3511 for i in range(0, len(self.latin_bin_msg), 10):
3512 bfp.feed(self.latin_bin_msg[i:i+10])
3513 m = bfp.close()
3514 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3515
R. David Murray8451c4b2010-10-23 22:19:56 +00003516 def test_crlf_flatten(self):
3517 with openfile('msg_26.txt', 'rb') as fp:
3518 text = fp.read()
3519 msg = email.message_from_bytes(text)
3520 s = BytesIO()
3521 g = email.generator.BytesGenerator(s)
3522 g.flatten(msg, linesep='\r\n')
3523 self.assertEqual(s.getvalue(), text)
R David Murrayc5c14722011-04-06 08:13:02 -04003524
3525 def test_8bit_multipart(self):
3526 # Issue 11605
3527 source = textwrap.dedent("""\
3528 Date: Fri, 18 Mar 2011 17:15:43 +0100
3529 To: foo@example.com
3530 From: foodwatch-Newsletter <bar@example.com>
3531 Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
3532 Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
3533 MIME-Version: 1.0
3534 Content-Type: multipart/alternative;
3535 boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
3536
3537 --b1_76a486bee62b0d200f33dc2ca08220ad
3538 Content-Type: text/plain; charset="utf-8"
3539 Content-Transfer-Encoding: 8bit
3540
3541 Guten Tag, ,
3542
3543 mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
3544 Nachrichten aus Japan.
3545
3546
3547 --b1_76a486bee62b0d200f33dc2ca08220ad
3548 Content-Type: text/html; charset="utf-8"
3549 Content-Transfer-Encoding: 8bit
3550
3551 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
3552 "http://www.w3.org/TR/html4/loose.dtd">
3553 <html lang="de">
3554 <head>
3555 <title>foodwatch - Newsletter</title>
3556 </head>
3557 <body>
3558 <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
3559 die Nachrichten aus Japan.</p>
3560 </body>
3561 </html>
3562 --b1_76a486bee62b0d200f33dc2ca08220ad--
3563
3564 """).encode('utf-8')
3565 msg = email.message_from_bytes(source)
3566 s = BytesIO()
3567 g = email.generator.BytesGenerator(s)
3568 g.flatten(msg)
3569 self.assertEqual(s.getvalue(), source)
3570
R David Murray9fd170e2012-03-14 14:05:03 -04003571 def test_bytes_generator_b_encoding_linesep(self):
3572 # Issue 14062: b encoding was tacking on an extra \n.
3573 m = Message()
3574 # This has enough non-ascii that it should always end up b encoded.
3575 m['Subject'] = Header('žluťoučký kůň')
3576 s = BytesIO()
3577 g = email.generator.BytesGenerator(s)
3578 g.flatten(m, linesep='\r\n')
3579 self.assertEqual(
3580 s.getvalue(),
3581 b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3582
3583 def test_generator_b_encoding_linesep(self):
3584 # Since this broke in ByteGenerator, test Generator for completeness.
3585 m = Message()
3586 # This has enough non-ascii that it should always end up b encoded.
3587 m['Subject'] = Header('žluťoučký kůň')
3588 s = StringIO()
3589 g = email.generator.Generator(s)
3590 g.flatten(m, linesep='\r\n')
3591 self.assertEqual(
3592 s.getvalue(),
3593 'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3594
R. David Murray8451c4b2010-10-23 22:19:56 +00003595 maxDiff = None
3596
Ezio Melottib3aedd42010-11-20 19:04:17 +00003597
R. David Murray719a4492010-11-21 16:53:48 +00003598class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003599
R. David Murraye5db2632010-11-20 15:10:13 +00003600 maxDiff = None
3601
R. David Murray96fd54e2010-10-08 15:55:28 +00003602 def _msgobj(self, filename):
3603 with openfile(filename, 'rb') as fp:
3604 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003605 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003606 msg = email.message_from_bytes(data)
3607 return msg, data
3608
R. David Murray719a4492010-11-21 16:53:48 +00003609 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003610 b = BytesIO()
3611 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003612 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R. David Murraye5db2632010-11-20 15:10:13 +00003613 self.assertByteStringsEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003614
R. David Murraye5db2632010-11-20 15:10:13 +00003615 def assertByteStringsEqual(self, str1, str2):
R. David Murray719a4492010-11-21 16:53:48 +00003616 # Not using self.blinesep here is intentional. This way the output
3617 # is more useful when the failure results in mixed line endings.
R. David Murray96fd54e2010-10-08 15:55:28 +00003618 self.assertListEqual(str1.split(b'\n'), str2.split(b'\n'))
3619
3620
R. David Murray719a4492010-11-21 16:53:48 +00003621class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3622 TestIdempotent):
3623 linesep = '\n'
3624 blinesep = b'\n'
3625 normalize_linesep_regex = re.compile(br'\r\n')
3626
3627
3628class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3629 TestIdempotent):
3630 linesep = '\r\n'
3631 blinesep = b'\r\n'
3632 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3633
Ezio Melottib3aedd42010-11-20 19:04:17 +00003634
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003635class TestBase64(unittest.TestCase):
3636 def test_len(self):
3637 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003638 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003639 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003640 for size in range(15):
3641 if size == 0 : bsize = 0
3642 elif size <= 3 : bsize = 4
3643 elif size <= 6 : bsize = 8
3644 elif size <= 9 : bsize = 12
3645 elif size <= 12: bsize = 16
3646 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003647 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003648
3649 def test_decode(self):
3650 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003651 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003652 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003653
3654 def test_encode(self):
3655 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003656 eq(base64mime.body_encode(b''), b'')
3657 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003658 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003659 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003660 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003661 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003662eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3663eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3664eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3665eHh4eCB4eHh4IA==
3666""")
3667 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003668 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003669 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003670eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3671eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3672eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3673eHh4eCB4eHh4IA==\r
3674""")
3675
3676 def test_header_encode(self):
3677 eq = self.assertEqual
3678 he = base64mime.header_encode
3679 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003680 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3681 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003682 # Test the charset option
3683 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3684 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003685
3686
Ezio Melottib3aedd42010-11-20 19:04:17 +00003687
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003688class TestQuopri(unittest.TestCase):
3689 def setUp(self):
3690 # Set of characters (as byte integers) that don't need to be encoded
3691 # in headers.
3692 self.hlit = list(chain(
3693 range(ord('a'), ord('z') + 1),
3694 range(ord('A'), ord('Z') + 1),
3695 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003696 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003697 # Set of characters (as byte integers) that do need to be encoded in
3698 # headers.
3699 self.hnon = [c for c in range(256) if c not in self.hlit]
3700 assert len(self.hlit) + len(self.hnon) == 256
3701 # Set of characters (as byte integers) that don't need to be encoded
3702 # in bodies.
3703 self.blit = list(range(ord(' '), ord('~') + 1))
3704 self.blit.append(ord('\t'))
3705 self.blit.remove(ord('='))
3706 # Set of characters (as byte integers) that do need to be encoded in
3707 # bodies.
3708 self.bnon = [c for c in range(256) if c not in self.blit]
3709 assert len(self.blit) + len(self.bnon) == 256
3710
Guido van Rossum9604e662007-08-30 03:46:43 +00003711 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003712 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003713 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003714 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003715 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003716 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003717 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003718
Guido van Rossum9604e662007-08-30 03:46:43 +00003719 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003720 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003721 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003722 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003723 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003724 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003725 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003726
3727 def test_header_quopri_len(self):
3728 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003729 eq(quoprimime.header_length(b'hello'), 5)
3730 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003731 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003732 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003733 # =?xxx?q?...?= means 10 extra characters
3734 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00003735 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
3736 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003737 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003738 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003739 # =?xxx?q?...?= means 10 extra characters
3740 10)
3741 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003742 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003743 'expected length 1 for %r' % chr(c))
3744 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003745 # Space is special; it's encoded to _
3746 if c == ord(' '):
3747 continue
3748 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003749 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00003750 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003751
3752 def test_body_quopri_len(self):
3753 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003754 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003755 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003756 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003757 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003758
3759 def test_quote_unquote_idempotent(self):
3760 for x in range(256):
3761 c = chr(x)
3762 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
3763
R David Murrayec1b5b82011-03-23 14:19:05 -04003764 def _test_header_encode(self, header, expected_encoded_header, charset=None):
3765 if charset is None:
3766 encoded_header = quoprimime.header_encode(header)
3767 else:
3768 encoded_header = quoprimime.header_encode(header, charset)
3769 self.assertEqual(encoded_header, expected_encoded_header)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003770
R David Murraycafd79d2011-03-23 15:25:55 -04003771 def test_header_encode_null(self):
3772 self._test_header_encode(b'', '')
3773
R David Murrayec1b5b82011-03-23 14:19:05 -04003774 def test_header_encode_one_word(self):
3775 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
3776
3777 def test_header_encode_two_lines(self):
3778 self._test_header_encode(b'hello\nworld',
3779 '=?iso-8859-1?q?hello=0Aworld?=')
3780
3781 def test_header_encode_non_ascii(self):
3782 self._test_header_encode(b'hello\xc7there',
3783 '=?iso-8859-1?q?hello=C7there?=')
3784
3785 def test_header_encode_alt_charset(self):
3786 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
3787 charset='iso-8859-2')
3788
3789 def _test_header_decode(self, encoded_header, expected_decoded_header):
3790 decoded_header = quoprimime.header_decode(encoded_header)
3791 self.assertEqual(decoded_header, expected_decoded_header)
3792
3793 def test_header_decode_null(self):
3794 self._test_header_decode('', '')
3795
3796 def test_header_decode_one_word(self):
3797 self._test_header_decode('hello', 'hello')
3798
3799 def test_header_decode_two_lines(self):
3800 self._test_header_decode('hello=0Aworld', 'hello\nworld')
3801
3802 def test_header_decode_non_ascii(self):
3803 self._test_header_decode('hello=C7there', 'hello\xc7there')
3804
3805 def _test_decode(self, encoded, expected_decoded, eol=None):
3806 if eol is None:
3807 decoded = quoprimime.decode(encoded)
3808 else:
3809 decoded = quoprimime.decode(encoded, eol=eol)
3810 self.assertEqual(decoded, expected_decoded)
3811
3812 def test_decode_null_word(self):
3813 self._test_decode('', '')
3814
3815 def test_decode_null_line_null_word(self):
3816 self._test_decode('\r\n', '\n')
3817
3818 def test_decode_one_word(self):
3819 self._test_decode('hello', 'hello')
3820
3821 def test_decode_one_word_eol(self):
3822 self._test_decode('hello', 'hello', eol='X')
3823
3824 def test_decode_one_line(self):
3825 self._test_decode('hello\r\n', 'hello\n')
3826
3827 def test_decode_one_line_lf(self):
3828 self._test_decode('hello\n', 'hello\n')
3829
R David Murraycafd79d2011-03-23 15:25:55 -04003830 def test_decode_one_line_cr(self):
3831 self._test_decode('hello\r', 'hello\n')
3832
3833 def test_decode_one_line_nl(self):
3834 self._test_decode('hello\n', 'helloX', eol='X')
3835
3836 def test_decode_one_line_crnl(self):
3837 self._test_decode('hello\r\n', 'helloX', eol='X')
3838
R David Murrayec1b5b82011-03-23 14:19:05 -04003839 def test_decode_one_line_one_word(self):
3840 self._test_decode('hello\r\nworld', 'hello\nworld')
3841
3842 def test_decode_one_line_one_word_eol(self):
3843 self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
3844
3845 def test_decode_two_lines(self):
3846 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
3847
R David Murraycafd79d2011-03-23 15:25:55 -04003848 def test_decode_two_lines_eol(self):
3849 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
3850
R David Murrayec1b5b82011-03-23 14:19:05 -04003851 def test_decode_one_long_line(self):
3852 self._test_decode('Spam' * 250, 'Spam' * 250)
3853
3854 def test_decode_one_space(self):
3855 self._test_decode(' ', '')
3856
3857 def test_decode_multiple_spaces(self):
3858 self._test_decode(' ' * 5, '')
3859
3860 def test_decode_one_line_trailing_spaces(self):
3861 self._test_decode('hello \r\n', 'hello\n')
3862
3863 def test_decode_two_lines_trailing_spaces(self):
3864 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n')
3865
3866 def test_decode_quoted_word(self):
3867 self._test_decode('=22quoted=20words=22', '"quoted words"')
3868
3869 def test_decode_uppercase_quoting(self):
3870 self._test_decode('ab=CD=EF', 'ab\xcd\xef')
3871
3872 def test_decode_lowercase_quoting(self):
3873 self._test_decode('ab=cd=ef', 'ab\xcd\xef')
3874
3875 def test_decode_soft_line_break(self):
3876 self._test_decode('soft line=\r\nbreak', 'soft linebreak')
3877
3878 def test_decode_false_quoting(self):
3879 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
3880
3881 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
3882 kwargs = {}
3883 if maxlinelen is None:
3884 # Use body_encode's default.
3885 maxlinelen = 76
3886 else:
3887 kwargs['maxlinelen'] = maxlinelen
3888 if eol is None:
3889 # Use body_encode's default.
3890 eol = '\n'
3891 else:
3892 kwargs['eol'] = eol
3893 encoded_body = quoprimime.body_encode(body, **kwargs)
3894 self.assertEqual(encoded_body, expected_encoded_body)
3895 if eol == '\n' or eol == '\r\n':
3896 # We know how to split the result back into lines, so maxlinelen
3897 # can be checked.
3898 for line in encoded_body.splitlines():
3899 self.assertLessEqual(len(line), maxlinelen)
3900
3901 def test_encode_null(self):
3902 self._test_encode('', '')
3903
3904 def test_encode_null_lines(self):
3905 self._test_encode('\n\n', '\n\n')
3906
3907 def test_encode_one_line(self):
3908 self._test_encode('hello\n', 'hello\n')
3909
3910 def test_encode_one_line_crlf(self):
3911 self._test_encode('hello\r\n', 'hello\n')
3912
3913 def test_encode_one_line_eol(self):
3914 self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
3915
3916 def test_encode_one_space(self):
3917 self._test_encode(' ', '=20')
3918
3919 def test_encode_one_line_one_space(self):
3920 self._test_encode(' \n', '=20\n')
3921
R David Murrayb938c8c2011-03-24 12:19:26 -04003922# XXX: body_encode() expect strings, but uses ord(char) from these strings
3923# to index into a 256-entry list. For code points above 255, this will fail.
3924# Should there be a check for 8-bit only ord() values in body, or at least
3925# a comment about the expected input?
3926
3927 def test_encode_two_lines_one_space(self):
3928 self._test_encode(' \n \n', '=20\n=20\n')
3929
R David Murrayec1b5b82011-03-23 14:19:05 -04003930 def test_encode_one_word_trailing_spaces(self):
3931 self._test_encode('hello ', 'hello =20')
3932
3933 def test_encode_one_line_trailing_spaces(self):
3934 self._test_encode('hello \n', 'hello =20\n')
3935
3936 def test_encode_one_word_trailing_tab(self):
3937 self._test_encode('hello \t', 'hello =09')
3938
3939 def test_encode_one_line_trailing_tab(self):
3940 self._test_encode('hello \t\n', 'hello =09\n')
3941
3942 def test_encode_trailing_space_before_maxlinelen(self):
3943 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
3944
R David Murrayb938c8c2011-03-24 12:19:26 -04003945 def test_encode_trailing_space_at_maxlinelen(self):
3946 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
3947
R David Murrayec1b5b82011-03-23 14:19:05 -04003948 def test_encode_trailing_space_beyond_maxlinelen(self):
R David Murrayb938c8c2011-03-24 12:19:26 -04003949 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
3950
3951 def test_encode_whitespace_lines(self):
3952 self._test_encode(' \n' * 5, '=20\n' * 5)
R David Murrayec1b5b82011-03-23 14:19:05 -04003953
3954 def test_encode_quoted_equals(self):
3955 self._test_encode('a = b', 'a =3D b')
3956
3957 def test_encode_one_long_string(self):
3958 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
3959
3960 def test_encode_one_long_line(self):
3961 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
3962
3963 def test_encode_one_very_long_line(self):
3964 self._test_encode('x' * 200 + '\n',
3965 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
3966
3967 def test_encode_one_long_line(self):
3968 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
3969
3970 def test_encode_shortest_maxlinelen(self):
3971 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003972
R David Murrayb938c8c2011-03-24 12:19:26 -04003973 def test_encode_maxlinelen_too_small(self):
3974 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
3975
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003976 def test_encode(self):
3977 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003978 eq(quoprimime.body_encode(''), '')
3979 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003980 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00003981 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003982 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00003983 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003984xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
3985 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
3986x xxxx xxxx xxxx xxxx=20""")
3987 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00003988 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
3989 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003990xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
3991 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
3992x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00003993 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003994one line
3995
3996two line"""), """\
3997one line
3998
3999two line""")
4000
4001
Ezio Melottib3aedd42010-11-20 19:04:17 +00004002
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004003# Test the Charset class
4004class TestCharset(unittest.TestCase):
4005 def tearDown(self):
4006 from email import charset as CharsetModule
4007 try:
4008 del CharsetModule.CHARSETS['fake']
4009 except KeyError:
4010 pass
4011
Guido van Rossum9604e662007-08-30 03:46:43 +00004012 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004013 eq = self.assertEqual
4014 # Make sure us-ascii = no Unicode conversion
4015 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00004016 eq(c.header_encode('Hello World!'), 'Hello World!')
4017 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004018 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00004019 self.assertRaises(UnicodeError, c.header_encode, s)
4020 c = Charset('utf-8')
4021 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004022
4023 def test_body_encode(self):
4024 eq = self.assertEqual
4025 # Try a charset with QP body encoding
4026 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004027 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004028 # Try a charset with Base64 body encoding
4029 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00004030 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004031 # Try a charset with None body encoding
4032 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004033 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004034 # Try the convert argument, where input codec != output codec
4035 c = Charset('euc-jp')
4036 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00004037 # XXX FIXME
4038## try:
4039## eq('\x1b$B5FCO;~IW\x1b(B',
4040## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
4041## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
4042## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
4043## except LookupError:
4044## # We probably don't have the Japanese codecs installed
4045## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004046 # Testing SF bug #625509, which we have to fake, since there are no
4047 # built-in encodings where the header encoding is QP but the body
4048 # encoding is not.
4049 from email import charset as CharsetModule
R David Murray56a9d7e2011-03-15 12:20:02 -04004050 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004051 c = Charset('fake')
R David Murray56a9d7e2011-03-15 12:20:02 -04004052 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004053
4054 def test_unicode_charset_name(self):
4055 charset = Charset('us-ascii')
4056 self.assertEqual(str(charset), 'us-ascii')
4057 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
4058
4059
Ezio Melottib3aedd42010-11-20 19:04:17 +00004060
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004061# Test multilingual MIME headers.
4062class TestHeader(TestEmailBase):
4063 def test_simple(self):
4064 eq = self.ndiffAssertEqual
4065 h = Header('Hello World!')
4066 eq(h.encode(), 'Hello World!')
4067 h.append(' Goodbye World!')
4068 eq(h.encode(), 'Hello World! Goodbye World!')
4069
4070 def test_simple_surprise(self):
4071 eq = self.ndiffAssertEqual
4072 h = Header('Hello World!')
4073 eq(h.encode(), 'Hello World!')
4074 h.append('Goodbye World!')
4075 eq(h.encode(), 'Hello World! Goodbye World!')
4076
4077 def test_header_needs_no_decoding(self):
4078 h = 'no decoding needed'
4079 self.assertEqual(decode_header(h), [(h, None)])
4080
4081 def test_long(self):
4082 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
4083 maxlinelen=76)
4084 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004085 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004086
4087 def test_multilingual(self):
4088 eq = self.ndiffAssertEqual
4089 g = Charset("iso-8859-1")
4090 cz = Charset("iso-8859-2")
4091 utf8 = Charset("utf-8")
4092 g_head = (b'Die Mieter treten hier ein werden mit einem '
4093 b'Foerderband komfortabel den Korridor entlang, '
4094 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
4095 b'gegen die rotierenden Klingen bef\xf6rdert. ')
4096 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
4097 b'd\xf9vtipu.. ')
4098 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
4099 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
4100 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
4101 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
4102 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
4103 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
4104 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
4105 '\u3044\u307e\u3059\u3002')
4106 h = Header(g_head, g)
4107 h.append(cz_head, cz)
4108 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00004109 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004110 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00004111=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
4112 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
4113 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
4114 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004115 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
4116 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
4117 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
4118 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00004119 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
4120 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
4121 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
4122 decoded = decode_header(enc)
4123 eq(len(decoded), 3)
4124 eq(decoded[0], (g_head, 'iso-8859-1'))
4125 eq(decoded[1], (cz_head, 'iso-8859-2'))
4126 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004127 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00004128 eq(ustr,
4129 (b'Die Mieter treten hier ein werden mit einem Foerderband '
4130 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
4131 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
4132 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
4133 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
4134 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
4135 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
4136 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
4137 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
4138 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
4139 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
4140 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
4141 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
4142 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
4143 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
4144 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
4145 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004146 # Test make_header()
4147 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00004148 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004149
4150 def test_empty_header_encode(self):
4151 h = Header()
4152 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00004153
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004154 def test_header_ctor_default_args(self):
4155 eq = self.ndiffAssertEqual
4156 h = Header()
4157 eq(h, '')
4158 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00004159 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004160
4161 def test_explicit_maxlinelen(self):
4162 eq = self.ndiffAssertEqual
4163 hstr = ('A very long line that must get split to something other '
4164 'than at the 76th character boundary to test the non-default '
4165 'behavior')
4166 h = Header(hstr)
4167 eq(h.encode(), '''\
4168A very long line that must get split to something other than at the 76th
4169 character boundary to test the non-default behavior''')
4170 eq(str(h), hstr)
4171 h = Header(hstr, header_name='Subject')
4172 eq(h.encode(), '''\
4173A very long line that must get split to something other than at the
4174 76th character boundary to test the non-default behavior''')
4175 eq(str(h), hstr)
4176 h = Header(hstr, maxlinelen=1024, header_name='Subject')
4177 eq(h.encode(), hstr)
4178 eq(str(h), hstr)
4179
Guido van Rossum9604e662007-08-30 03:46:43 +00004180 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004181 eq = self.ndiffAssertEqual
4182 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004183 x = 'xxxx ' * 20
4184 h.append(x)
4185 s = h.encode()
4186 eq(s, """\
4187=?iso-8859-1?q?xxx?=
4188 =?iso-8859-1?q?x_?=
4189 =?iso-8859-1?q?xx?=
4190 =?iso-8859-1?q?xx?=
4191 =?iso-8859-1?q?_x?=
4192 =?iso-8859-1?q?xx?=
4193 =?iso-8859-1?q?x_?=
4194 =?iso-8859-1?q?xx?=
4195 =?iso-8859-1?q?xx?=
4196 =?iso-8859-1?q?_x?=
4197 =?iso-8859-1?q?xx?=
4198 =?iso-8859-1?q?x_?=
4199 =?iso-8859-1?q?xx?=
4200 =?iso-8859-1?q?xx?=
4201 =?iso-8859-1?q?_x?=
4202 =?iso-8859-1?q?xx?=
4203 =?iso-8859-1?q?x_?=
4204 =?iso-8859-1?q?xx?=
4205 =?iso-8859-1?q?xx?=
4206 =?iso-8859-1?q?_x?=
4207 =?iso-8859-1?q?xx?=
4208 =?iso-8859-1?q?x_?=
4209 =?iso-8859-1?q?xx?=
4210 =?iso-8859-1?q?xx?=
4211 =?iso-8859-1?q?_x?=
4212 =?iso-8859-1?q?xx?=
4213 =?iso-8859-1?q?x_?=
4214 =?iso-8859-1?q?xx?=
4215 =?iso-8859-1?q?xx?=
4216 =?iso-8859-1?q?_x?=
4217 =?iso-8859-1?q?xx?=
4218 =?iso-8859-1?q?x_?=
4219 =?iso-8859-1?q?xx?=
4220 =?iso-8859-1?q?xx?=
4221 =?iso-8859-1?q?_x?=
4222 =?iso-8859-1?q?xx?=
4223 =?iso-8859-1?q?x_?=
4224 =?iso-8859-1?q?xx?=
4225 =?iso-8859-1?q?xx?=
4226 =?iso-8859-1?q?_x?=
4227 =?iso-8859-1?q?xx?=
4228 =?iso-8859-1?q?x_?=
4229 =?iso-8859-1?q?xx?=
4230 =?iso-8859-1?q?xx?=
4231 =?iso-8859-1?q?_x?=
4232 =?iso-8859-1?q?xx?=
4233 =?iso-8859-1?q?x_?=
4234 =?iso-8859-1?q?xx?=
4235 =?iso-8859-1?q?xx?=
4236 =?iso-8859-1?q?_?=""")
4237 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004238 h = Header(charset='iso-8859-1', maxlinelen=40)
4239 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004240 s = h.encode()
4241 eq(s, """\
4242=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
4243 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
4244 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
4245 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
4246 =?iso-8859-1?q?_xxxx_xxxx_?=""")
4247 eq(x, str(make_header(decode_header(s))))
4248
4249 def test_base64_splittable(self):
4250 eq = self.ndiffAssertEqual
4251 h = Header(charset='koi8-r', maxlinelen=20)
4252 x = 'xxxx ' * 20
4253 h.append(x)
4254 s = h.encode()
4255 eq(s, """\
4256=?koi8-r?b?eHh4?=
4257 =?koi8-r?b?eCB4?=
4258 =?koi8-r?b?eHh4?=
4259 =?koi8-r?b?IHh4?=
4260 =?koi8-r?b?eHgg?=
4261 =?koi8-r?b?eHh4?=
4262 =?koi8-r?b?eCB4?=
4263 =?koi8-r?b?eHh4?=
4264 =?koi8-r?b?IHh4?=
4265 =?koi8-r?b?eHgg?=
4266 =?koi8-r?b?eHh4?=
4267 =?koi8-r?b?eCB4?=
4268 =?koi8-r?b?eHh4?=
4269 =?koi8-r?b?IHh4?=
4270 =?koi8-r?b?eHgg?=
4271 =?koi8-r?b?eHh4?=
4272 =?koi8-r?b?eCB4?=
4273 =?koi8-r?b?eHh4?=
4274 =?koi8-r?b?IHh4?=
4275 =?koi8-r?b?eHgg?=
4276 =?koi8-r?b?eHh4?=
4277 =?koi8-r?b?eCB4?=
4278 =?koi8-r?b?eHh4?=
4279 =?koi8-r?b?IHh4?=
4280 =?koi8-r?b?eHgg?=
4281 =?koi8-r?b?eHh4?=
4282 =?koi8-r?b?eCB4?=
4283 =?koi8-r?b?eHh4?=
4284 =?koi8-r?b?IHh4?=
4285 =?koi8-r?b?eHgg?=
4286 =?koi8-r?b?eHh4?=
4287 =?koi8-r?b?eCB4?=
4288 =?koi8-r?b?eHh4?=
4289 =?koi8-r?b?IA==?=""")
4290 eq(x, str(make_header(decode_header(s))))
4291 h = Header(charset='koi8-r', maxlinelen=40)
4292 h.append(x)
4293 s = h.encode()
4294 eq(s, """\
4295=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
4296 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
4297 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
4298 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
4299 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
4300 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
4301 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004302
4303 def test_us_ascii_header(self):
4304 eq = self.assertEqual
4305 s = 'hello'
4306 x = decode_header(s)
4307 eq(x, [('hello', None)])
4308 h = make_header(x)
4309 eq(s, h.encode())
4310
4311 def test_string_charset(self):
4312 eq = self.assertEqual
4313 h = Header()
4314 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00004315 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004316
4317## def test_unicode_error(self):
4318## raises = self.assertRaises
4319## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
4320## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
4321## h = Header()
4322## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
4323## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
4324## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
4325
4326 def test_utf8_shortest(self):
4327 eq = self.assertEqual
4328 h = Header('p\xf6stal', 'utf-8')
4329 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
4330 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
4331 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
4332
4333 def test_bad_8bit_header(self):
4334 raises = self.assertRaises
4335 eq = self.assertEqual
4336 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4337 raises(UnicodeError, Header, x)
4338 h = Header()
4339 raises(UnicodeError, h.append, x)
4340 e = x.decode('utf-8', 'replace')
4341 eq(str(Header(x, errors='replace')), e)
4342 h.append(x, errors='replace')
4343 eq(str(h), e)
4344
R David Murray041015c2011-03-25 15:10:55 -04004345 def test_escaped_8bit_header(self):
4346 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
R David Murray6bdb1762011-06-18 12:30:55 -04004347 e = x.decode('ascii', 'surrogateescape')
4348 h = Header(e, charset=email.charset.UNKNOWN8BIT)
R David Murray041015c2011-03-25 15:10:55 -04004349 self.assertEqual(str(h),
4350 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4351 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4352
R David Murraye5e366c2011-06-18 12:57:28 -04004353 def test_header_handles_binary_unknown8bit(self):
4354 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4355 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4356 self.assertEqual(str(h),
4357 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4358 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4359
4360 def test_make_header_handles_binary_unknown8bit(self):
4361 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4362 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4363 h2 = email.header.make_header(email.header.decode_header(h))
4364 self.assertEqual(str(h2),
4365 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4366 self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')])
4367
R David Murray041015c2011-03-25 15:10:55 -04004368 def test_modify_returned_list_does_not_change_header(self):
4369 h = Header('test')
4370 chunks = email.header.decode_header(h)
4371 chunks.append(('ascii', 'test2'))
4372 self.assertEqual(str(h), 'test')
4373
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004374 def test_encoded_adjacent_nonencoded(self):
4375 eq = self.assertEqual
4376 h = Header()
4377 h.append('hello', 'iso-8859-1')
4378 h.append('world')
4379 s = h.encode()
4380 eq(s, '=?iso-8859-1?q?hello?= world')
4381 h = make_header(decode_header(s))
4382 eq(h.encode(), s)
4383
4384 def test_whitespace_eater(self):
4385 eq = self.assertEqual
4386 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
4387 parts = decode_header(s)
4388 eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)])
4389 hdr = make_header(parts)
4390 eq(hdr.encode(),
4391 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
4392
4393 def test_broken_base64_header(self):
4394 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00004395 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004396 raises(errors.HeaderParseError, decode_header, s)
4397
R. David Murray477efb32011-01-05 01:39:32 +00004398 def test_shift_jis_charset(self):
4399 h = Header('文', charset='shift_jis')
4400 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
4401
R David Murrayde912762011-03-16 18:26:23 -04004402 def test_flatten_header_with_no_value(self):
4403 # Issue 11401 (regression from email 4.x) Note that the space after
4404 # the header doesn't reflect the input, but this is also the way
4405 # email 4.x behaved. At some point it would be nice to fix that.
4406 msg = email.message_from_string("EmptyHeader:")
4407 self.assertEqual(str(msg), "EmptyHeader: \n\n")
4408
R David Murray01581ee2011-04-18 10:04:34 -04004409 def test_encode_preserves_leading_ws_on_value(self):
4410 msg = Message()
4411 msg['SomeHeader'] = ' value with leading ws'
4412 self.assertEqual(str(msg), "SomeHeader: value with leading ws\n\n")
4413
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004414
Ezio Melottib3aedd42010-11-20 19:04:17 +00004415
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004416# Test RFC 2231 header parameters (en/de)coding
4417class TestRFC2231(TestEmailBase):
4418 def test_get_param(self):
4419 eq = self.assertEqual
4420 msg = self._msgobj('msg_29.txt')
4421 eq(msg.get_param('title'),
4422 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4423 eq(msg.get_param('title', unquote=False),
4424 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
4425
4426 def test_set_param(self):
4427 eq = self.ndiffAssertEqual
4428 msg = Message()
4429 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4430 charset='us-ascii')
4431 eq(msg.get_param('title'),
4432 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
4433 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4434 charset='us-ascii', language='en')
4435 eq(msg.get_param('title'),
4436 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4437 msg = self._msgobj('msg_01.txt')
4438 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4439 charset='us-ascii', language='en')
4440 eq(msg.as_string(maxheaderlen=78), """\
4441Return-Path: <bbb@zzz.org>
4442Delivered-To: bbb@zzz.org
4443Received: by mail.zzz.org (Postfix, from userid 889)
4444\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4445MIME-Version: 1.0
4446Content-Transfer-Encoding: 7bit
4447Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4448From: bbb@ddd.com (John X. Doe)
4449To: bbb@zzz.org
4450Subject: This is a test message
4451Date: Fri, 4 May 2001 14:05:44 -0400
4452Content-Type: text/plain; charset=us-ascii;
R. David Murraydfd7eb02010-12-24 22:36:49 +00004453 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004454
4455
4456Hi,
4457
4458Do you like this message?
4459
4460-Me
4461""")
4462
R David Murraya2860e82011-04-16 09:20:30 -04004463 def test_set_param_requote(self):
4464 msg = Message()
4465 msg.set_param('title', 'foo')
4466 self.assertEqual(msg['content-type'], 'text/plain; title="foo"')
4467 msg.set_param('title', 'bar', requote=False)
4468 self.assertEqual(msg['content-type'], 'text/plain; title=bar')
4469 # tspecial is still quoted.
4470 msg.set_param('title', "(bar)bell", requote=False)
4471 self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"')
4472
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004473 def test_del_param(self):
4474 eq = self.ndiffAssertEqual
4475 msg = self._msgobj('msg_01.txt')
4476 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
4477 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4478 charset='us-ascii', language='en')
4479 msg.del_param('foo', header='Content-Type')
4480 eq(msg.as_string(maxheaderlen=78), """\
4481Return-Path: <bbb@zzz.org>
4482Delivered-To: bbb@zzz.org
4483Received: by mail.zzz.org (Postfix, from userid 889)
4484\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4485MIME-Version: 1.0
4486Content-Transfer-Encoding: 7bit
4487Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4488From: bbb@ddd.com (John X. Doe)
4489To: bbb@zzz.org
4490Subject: This is a test message
4491Date: Fri, 4 May 2001 14:05:44 -0400
4492Content-Type: text/plain; charset="us-ascii";
R. David Murraydfd7eb02010-12-24 22:36:49 +00004493 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004494
4495
4496Hi,
4497
4498Do you like this message?
4499
4500-Me
4501""")
4502
4503 def test_rfc2231_get_content_charset(self):
4504 eq = self.assertEqual
4505 msg = self._msgobj('msg_32.txt')
4506 eq(msg.get_content_charset(), 'us-ascii')
4507
R. David Murraydfd7eb02010-12-24 22:36:49 +00004508 def test_rfc2231_parse_rfc_quoting(self):
4509 m = textwrap.dedent('''\
4510 Content-Disposition: inline;
4511 \tfilename*0*=''This%20is%20even%20more%20;
4512 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
4513 \tfilename*2="is it not.pdf"
4514
4515 ''')
4516 msg = email.message_from_string(m)
4517 self.assertEqual(msg.get_filename(),
4518 'This is even more ***fun*** is it not.pdf')
4519 self.assertEqual(m, msg.as_string())
4520
4521 def test_rfc2231_parse_extra_quoting(self):
4522 m = textwrap.dedent('''\
4523 Content-Disposition: inline;
4524 \tfilename*0*="''This%20is%20even%20more%20";
4525 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4526 \tfilename*2="is it not.pdf"
4527
4528 ''')
4529 msg = email.message_from_string(m)
4530 self.assertEqual(msg.get_filename(),
4531 'This is even more ***fun*** is it not.pdf')
4532 self.assertEqual(m, msg.as_string())
4533
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004534 def test_rfc2231_no_language_or_charset(self):
4535 m = '''\
4536Content-Transfer-Encoding: 8bit
4537Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
4538Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
4539
4540'''
4541 msg = email.message_from_string(m)
4542 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004543 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004544 self.assertEqual(
4545 param,
4546 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
4547
4548 def test_rfc2231_no_language_or_charset_in_filename(self):
4549 m = '''\
4550Content-Disposition: inline;
4551\tfilename*0*="''This%20is%20even%20more%20";
4552\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4553\tfilename*2="is it not.pdf"
4554
4555'''
4556 msg = email.message_from_string(m)
4557 self.assertEqual(msg.get_filename(),
4558 'This is even more ***fun*** is it not.pdf')
4559
4560 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
4561 m = '''\
4562Content-Disposition: inline;
4563\tfilename*0*="''This%20is%20even%20more%20";
4564\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4565\tfilename*2="is it not.pdf"
4566
4567'''
4568 msg = email.message_from_string(m)
4569 self.assertEqual(msg.get_filename(),
4570 'This is even more ***fun*** is it not.pdf')
4571
4572 def test_rfc2231_partly_encoded(self):
4573 m = '''\
4574Content-Disposition: inline;
4575\tfilename*0="''This%20is%20even%20more%20";
4576\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4577\tfilename*2="is it not.pdf"
4578
4579'''
4580 msg = email.message_from_string(m)
4581 self.assertEqual(
4582 msg.get_filename(),
4583 'This%20is%20even%20more%20***fun*** is it not.pdf')
4584
4585 def test_rfc2231_partly_nonencoded(self):
4586 m = '''\
4587Content-Disposition: inline;
4588\tfilename*0="This%20is%20even%20more%20";
4589\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
4590\tfilename*2="is it not.pdf"
4591
4592'''
4593 msg = email.message_from_string(m)
4594 self.assertEqual(
4595 msg.get_filename(),
4596 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
4597
4598 def test_rfc2231_no_language_or_charset_in_boundary(self):
4599 m = '''\
4600Content-Type: multipart/alternative;
4601\tboundary*0*="''This%20is%20even%20more%20";
4602\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
4603\tboundary*2="is it not.pdf"
4604
4605'''
4606 msg = email.message_from_string(m)
4607 self.assertEqual(msg.get_boundary(),
4608 'This is even more ***fun*** is it not.pdf')
4609
4610 def test_rfc2231_no_language_or_charset_in_charset(self):
4611 # This is a nonsensical charset value, but tests the code anyway
4612 m = '''\
4613Content-Type: text/plain;
4614\tcharset*0*="This%20is%20even%20more%20";
4615\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
4616\tcharset*2="is it not.pdf"
4617
4618'''
4619 msg = email.message_from_string(m)
4620 self.assertEqual(msg.get_content_charset(),
4621 'this is even more ***fun*** is it not.pdf')
4622
4623 def test_rfc2231_bad_encoding_in_filename(self):
4624 m = '''\
4625Content-Disposition: inline;
4626\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
4627\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4628\tfilename*2="is it not.pdf"
4629
4630'''
4631 msg = email.message_from_string(m)
4632 self.assertEqual(msg.get_filename(),
4633 'This is even more ***fun*** is it not.pdf')
4634
4635 def test_rfc2231_bad_encoding_in_charset(self):
4636 m = """\
4637Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
4638
4639"""
4640 msg = email.message_from_string(m)
4641 # This should return None because non-ascii characters in the charset
4642 # are not allowed.
4643 self.assertEqual(msg.get_content_charset(), None)
4644
4645 def test_rfc2231_bad_character_in_charset(self):
4646 m = """\
4647Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
4648
4649"""
4650 msg = email.message_from_string(m)
4651 # This should return None because non-ascii characters in the charset
4652 # are not allowed.
4653 self.assertEqual(msg.get_content_charset(), None)
4654
4655 def test_rfc2231_bad_character_in_filename(self):
4656 m = '''\
4657Content-Disposition: inline;
4658\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
4659\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4660\tfilename*2*="is it not.pdf%E2"
4661
4662'''
4663 msg = email.message_from_string(m)
4664 self.assertEqual(msg.get_filename(),
4665 'This is even more ***fun*** is it not.pdf\ufffd')
4666
4667 def test_rfc2231_unknown_encoding(self):
4668 m = """\
4669Content-Transfer-Encoding: 8bit
4670Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
4671
4672"""
4673 msg = email.message_from_string(m)
4674 self.assertEqual(msg.get_filename(), 'myfile.txt')
4675
4676 def test_rfc2231_single_tick_in_filename_extended(self):
4677 eq = self.assertEqual
4678 m = """\
4679Content-Type: application/x-foo;
4680\tname*0*=\"Frank's\"; name*1*=\" Document\"
4681
4682"""
4683 msg = email.message_from_string(m)
4684 charset, language, s = msg.get_param('name')
4685 eq(charset, None)
4686 eq(language, None)
4687 eq(s, "Frank's Document")
4688
4689 def test_rfc2231_single_tick_in_filename(self):
4690 m = """\
4691Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
4692
4693"""
4694 msg = email.message_from_string(m)
4695 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004696 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004697 self.assertEqual(param, "Frank's Document")
4698
4699 def test_rfc2231_tick_attack_extended(self):
4700 eq = self.assertEqual
4701 m = """\
4702Content-Type: application/x-foo;
4703\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
4704
4705"""
4706 msg = email.message_from_string(m)
4707 charset, language, s = msg.get_param('name')
4708 eq(charset, 'us-ascii')
4709 eq(language, 'en-us')
4710 eq(s, "Frank's Document")
4711
4712 def test_rfc2231_tick_attack(self):
4713 m = """\
4714Content-Type: application/x-foo;
4715\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
4716
4717"""
4718 msg = email.message_from_string(m)
4719 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004720 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004721 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
4722
4723 def test_rfc2231_no_extended_values(self):
4724 eq = self.assertEqual
4725 m = """\
4726Content-Type: application/x-foo; name=\"Frank's Document\"
4727
4728"""
4729 msg = email.message_from_string(m)
4730 eq(msg.get_param('name'), "Frank's Document")
4731
4732 def test_rfc2231_encoded_then_unencoded_segments(self):
4733 eq = self.assertEqual
4734 m = """\
4735Content-Type: application/x-foo;
4736\tname*0*=\"us-ascii'en-us'My\";
4737\tname*1=\" Document\";
4738\tname*2*=\" For You\"
4739
4740"""
4741 msg = email.message_from_string(m)
4742 charset, language, s = msg.get_param('name')
4743 eq(charset, 'us-ascii')
4744 eq(language, 'en-us')
4745 eq(s, 'My Document For You')
4746
4747 def test_rfc2231_unencoded_then_encoded_segments(self):
4748 eq = self.assertEqual
4749 m = """\
4750Content-Type: application/x-foo;
4751\tname*0=\"us-ascii'en-us'My\";
4752\tname*1*=\" Document\";
4753\tname*2*=\" For You\"
4754
4755"""
4756 msg = email.message_from_string(m)
4757 charset, language, s = msg.get_param('name')
4758 eq(charset, 'us-ascii')
4759 eq(language, 'en-us')
4760 eq(s, 'My Document For You')
4761
4762
Ezio Melottib3aedd42010-11-20 19:04:17 +00004763
R. David Murraya8f480f2010-01-16 18:30:03 +00004764# Tests to ensure that signed parts of an email are completely preserved, as
4765# required by RFC1847 section 2.1. Note that these are incomplete, because the
4766# email package does not currently always preserve the body. See issue 1670765.
4767class TestSigned(TestEmailBase):
4768
4769 def _msg_and_obj(self, filename):
4770 with openfile(findfile(filename)) as fp:
4771 original = fp.read()
4772 msg = email.message_from_string(original)
4773 return original, msg
4774
4775 def _signed_parts_eq(self, original, result):
4776 # Extract the first mime part of each message
4777 import re
4778 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
4779 inpart = repart.search(original).group(2)
4780 outpart = repart.search(result).group(2)
4781 self.assertEqual(outpart, inpart)
4782
4783 def test_long_headers_as_string(self):
4784 original, msg = self._msg_and_obj('msg_45.txt')
4785 result = msg.as_string()
4786 self._signed_parts_eq(original, result)
4787
4788 def test_long_headers_as_string_maxheaderlen(self):
4789 original, msg = self._msg_and_obj('msg_45.txt')
4790 result = msg.as_string(maxheaderlen=60)
4791 self._signed_parts_eq(original, result)
4792
4793 def test_long_headers_flatten(self):
4794 original, msg = self._msg_and_obj('msg_45.txt')
4795 fp = StringIO()
4796 Generator(fp).flatten(msg)
4797 result = fp.getvalue()
4798 self._signed_parts_eq(original, result)
4799
4800
Ezio Melottib3aedd42010-11-20 19:04:17 +00004801
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004802def _testclasses():
4803 mod = sys.modules[__name__]
4804 return [getattr(mod, name) for name in dir(mod) if name.startswith('Test')]
4805
4806
4807def suite():
4808 suite = unittest.TestSuite()
4809 for testclass in _testclasses():
4810 suite.addTest(unittest.makeSuite(testclass))
4811 return suite
4812
4813
4814def test_main():
4815 for testclass in _testclasses():
4816 run_unittest(testclass)
4817
4818
Ezio Melottib3aedd42010-11-20 19:04:17 +00004819
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004820if __name__ == '__main__':
4821 unittest.main(defaultTest='suite')