blob: 632e0a9bb2522cbc932c2ae8e374743f29989c83 [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
R. David Murray719a4492010-11-21 16:53:48 +00006import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00007import sys
8import time
9import base64
10import difflib
11import unittest
12import warnings
R. David Murray96fd54e2010-10-08 15:55:28 +000013import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000014
R. David Murray96fd54e2010-10-08 15:55:28 +000015from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016from itertools import chain
17
18import email
19
20from email.charset import Charset
21from email.header import Header, decode_header, make_header
22from email.parser import Parser, HeaderParser
23from email.generator import Generator, DecodedGenerator
24from email.message import Message
25from email.mime.application import MIMEApplication
26from email.mime.audio import MIMEAudio
27from email.mime.text import MIMEText
28from email.mime.image import MIMEImage
29from email.mime.base import MIMEBase
30from email.mime.message import MIMEMessage
31from email.mime.multipart import MIMEMultipart
32from email import utils
33from email import errors
34from email import encoders
35from email import iterators
36from email import base64mime
37from email import quoprimime
38
R David Murray28346b82011-03-31 11:40:20 -040039from test.support import run_unittest, unlink
R David Murraya256bac2011-03-31 12:20:23 -040040from test.test_email import openfile, TestEmailBase
Guido van Rossum8b3febe2007-08-30 01:15:14 +000041
42NL = '\n'
43EMPTYSTRING = ''
44SPACE = ' '
45
46
Guido van Rossum8b3febe2007-08-30 01:15:14 +000047# Test various aspects of the Message class's API
48class TestMessageAPI(TestEmailBase):
49 def test_get_all(self):
50 eq = self.assertEqual
51 msg = self._msgobj('msg_20.txt')
52 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
53 eq(msg.get_all('xx', 'n/a'), 'n/a')
54
R. David Murraye5db2632010-11-20 15:10:13 +000055 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000056 eq = self.assertEqual
57 msg = Message()
58 eq(msg.get_charset(), None)
59 charset = Charset('iso-8859-1')
60 msg.set_charset(charset)
61 eq(msg['mime-version'], '1.0')
62 eq(msg.get_content_type(), 'text/plain')
63 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
64 eq(msg.get_param('charset'), 'iso-8859-1')
65 eq(msg['content-transfer-encoding'], 'quoted-printable')
66 eq(msg.get_charset().input_charset, 'iso-8859-1')
67 # Remove the charset
68 msg.set_charset(None)
69 eq(msg.get_charset(), None)
70 eq(msg['content-type'], 'text/plain')
71 # Try adding a charset when there's already MIME headers present
72 msg = Message()
73 msg['MIME-Version'] = '2.0'
74 msg['Content-Type'] = 'text/x-weird'
75 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
76 msg.set_charset(charset)
77 eq(msg['mime-version'], '2.0')
78 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
79 eq(msg['content-transfer-encoding'], 'quinted-puntable')
80
81 def test_set_charset_from_string(self):
82 eq = self.assertEqual
83 msg = Message()
84 msg.set_charset('us-ascii')
85 eq(msg.get_charset().input_charset, 'us-ascii')
86 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
87
88 def test_set_payload_with_charset(self):
89 msg = Message()
90 charset = Charset('iso-8859-1')
91 msg.set_payload('This is a string payload', charset)
92 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
93
94 def test_get_charsets(self):
95 eq = self.assertEqual
96
97 msg = self._msgobj('msg_08.txt')
98 charsets = msg.get_charsets()
99 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
100
101 msg = self._msgobj('msg_09.txt')
102 charsets = msg.get_charsets('dingbat')
103 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
104 'koi8-r'])
105
106 msg = self._msgobj('msg_12.txt')
107 charsets = msg.get_charsets()
108 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
109 'iso-8859-3', 'us-ascii', 'koi8-r'])
110
111 def test_get_filename(self):
112 eq = self.assertEqual
113
114 msg = self._msgobj('msg_04.txt')
115 filenames = [p.get_filename() for p in msg.get_payload()]
116 eq(filenames, ['msg.txt', 'msg.txt'])
117
118 msg = self._msgobj('msg_07.txt')
119 subpart = msg.get_payload(1)
120 eq(subpart.get_filename(), 'dingusfish.gif')
121
122 def test_get_filename_with_name_parameter(self):
123 eq = self.assertEqual
124
125 msg = self._msgobj('msg_44.txt')
126 filenames = [p.get_filename() for p in msg.get_payload()]
127 eq(filenames, ['msg.txt', 'msg.txt'])
128
129 def test_get_boundary(self):
130 eq = self.assertEqual
131 msg = self._msgobj('msg_07.txt')
132 # No quotes!
133 eq(msg.get_boundary(), 'BOUNDARY')
134
135 def test_set_boundary(self):
136 eq = self.assertEqual
137 # This one has no existing boundary parameter, but the Content-Type:
138 # header appears fifth.
139 msg = self._msgobj('msg_01.txt')
140 msg.set_boundary('BOUNDARY')
141 header, value = msg.items()[4]
142 eq(header.lower(), 'content-type')
143 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
144 # This one has a Content-Type: header, with a boundary, stuck in the
145 # middle of its headers. Make sure the order is preserved; it should
146 # be fifth.
147 msg = self._msgobj('msg_04.txt')
148 msg.set_boundary('BOUNDARY')
149 header, value = msg.items()[4]
150 eq(header.lower(), 'content-type')
151 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
152 # And this one has no Content-Type: header at all.
153 msg = self._msgobj('msg_03.txt')
154 self.assertRaises(errors.HeaderParseError,
155 msg.set_boundary, 'BOUNDARY')
156
R. David Murray73a559d2010-12-21 18:07:59 +0000157 def test_make_boundary(self):
158 msg = MIMEMultipart('form-data')
159 # Note that when the boundary gets created is an implementation
160 # detail and might change.
161 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
162 # Trigger creation of boundary
163 msg.as_string()
164 self.assertEqual(msg.items()[0][1][:33],
165 'multipart/form-data; boundary="==')
166 # XXX: there ought to be tests of the uniqueness of the boundary, too.
167
R. David Murray57c45ac2010-02-21 04:39:40 +0000168 def test_message_rfc822_only(self):
169 # Issue 7970: message/rfc822 not in multipart parsed by
170 # HeaderParser caused an exception when flattened.
R David Murray28346b82011-03-31 11:40:20 -0400171 with openfile('msg_46.txt') as fp:
Brett Cannon384917a2010-10-29 23:08:36 +0000172 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000173 parser = HeaderParser()
174 msg = parser.parsestr(msgdata)
175 out = StringIO()
176 gen = Generator(out, True, 0)
177 gen.flatten(msg, False)
178 self.assertEqual(out.getvalue(), msgdata)
179
R David Murrayb35c8502011-04-13 16:46:05 -0400180 def test_byte_message_rfc822_only(self):
181 # Make sure new bytes header parser also passes this.
182 with openfile('msg_46.txt', 'rb') as fp:
183 msgdata = fp.read()
184 parser = email.parser.BytesHeaderParser()
185 msg = parser.parsebytes(msgdata)
186 out = BytesIO()
187 gen = email.generator.BytesGenerator(out)
188 gen.flatten(msg)
189 self.assertEqual(out.getvalue(), msgdata)
190
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000191 def test_get_decoded_payload(self):
192 eq = self.assertEqual
193 msg = self._msgobj('msg_10.txt')
194 # The outer message is a multipart
195 eq(msg.get_payload(decode=True), None)
196 # Subpart 1 is 7bit encoded
197 eq(msg.get_payload(0).get_payload(decode=True),
198 b'This is a 7bit encoded message.\n')
199 # Subpart 2 is quopri
200 eq(msg.get_payload(1).get_payload(decode=True),
201 b'\xa1This is a Quoted Printable encoded message!\n')
202 # Subpart 3 is base64
203 eq(msg.get_payload(2).get_payload(decode=True),
204 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000205 # Subpart 4 is base64 with a trailing newline, which
206 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000207 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000208 b'This is a Base64 encoded message.\n')
209 # Subpart 5 has no Content-Transfer-Encoding: header.
210 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000211 b'This has no Content-Transfer-Encoding: header.\n')
212
213 def test_get_decoded_uu_payload(self):
214 eq = self.assertEqual
215 msg = Message()
216 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
217 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
218 msg['content-transfer-encoding'] = cte
219 eq(msg.get_payload(decode=True), b'hello world')
220 # Now try some bogus data
221 msg.set_payload('foo')
222 eq(msg.get_payload(decode=True), b'foo')
223
224 def test_decoded_generator(self):
225 eq = self.assertEqual
226 msg = self._msgobj('msg_07.txt')
227 with openfile('msg_17.txt') as fp:
228 text = fp.read()
229 s = StringIO()
230 g = DecodedGenerator(s)
231 g.flatten(msg)
232 eq(s.getvalue(), text)
233
234 def test__contains__(self):
235 msg = Message()
236 msg['From'] = 'Me'
237 msg['to'] = 'You'
238 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000239 self.assertTrue('from' in msg)
240 self.assertTrue('From' in msg)
241 self.assertTrue('FROM' in msg)
242 self.assertTrue('to' in msg)
243 self.assertTrue('To' in msg)
244 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000245
246 def test_as_string(self):
247 eq = self.ndiffAssertEqual
248 msg = self._msgobj('msg_01.txt')
249 with openfile('msg_01.txt') as fp:
250 text = fp.read()
251 eq(text, str(msg))
252 fullrepr = msg.as_string(unixfrom=True)
253 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000254 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000255 eq(text, NL.join(lines[1:]))
256
257 def test_bad_param(self):
258 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
259 self.assertEqual(msg.get_param('baz'), '')
260
261 def test_missing_filename(self):
262 msg = email.message_from_string("From: foo\n")
263 self.assertEqual(msg.get_filename(), None)
264
265 def test_bogus_filename(self):
266 msg = email.message_from_string(
267 "Content-Disposition: blarg; filename\n")
268 self.assertEqual(msg.get_filename(), '')
269
270 def test_missing_boundary(self):
271 msg = email.message_from_string("From: foo\n")
272 self.assertEqual(msg.get_boundary(), None)
273
274 def test_get_params(self):
275 eq = self.assertEqual
276 msg = email.message_from_string(
277 'X-Header: foo=one; bar=two; baz=three\n')
278 eq(msg.get_params(header='x-header'),
279 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
280 msg = email.message_from_string(
281 'X-Header: foo; bar=one; baz=two\n')
282 eq(msg.get_params(header='x-header'),
283 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
284 eq(msg.get_params(), None)
285 msg = email.message_from_string(
286 'X-Header: foo; bar="one"; baz=two\n')
287 eq(msg.get_params(header='x-header'),
288 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
289
290 def test_get_param_liberal(self):
291 msg = Message()
292 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
293 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
294
295 def test_get_param(self):
296 eq = self.assertEqual
297 msg = email.message_from_string(
298 "X-Header: foo=one; bar=two; baz=three\n")
299 eq(msg.get_param('bar', header='x-header'), 'two')
300 eq(msg.get_param('quuz', header='x-header'), None)
301 eq(msg.get_param('quuz'), None)
302 msg = email.message_from_string(
303 'X-Header: foo; bar="one"; baz=two\n')
304 eq(msg.get_param('foo', header='x-header'), '')
305 eq(msg.get_param('bar', header='x-header'), 'one')
306 eq(msg.get_param('baz', header='x-header'), 'two')
307 # XXX: We are not RFC-2045 compliant! We cannot parse:
308 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
309 # msg.get_param("weird")
310 # yet.
311
312 def test_get_param_funky_continuation_lines(self):
313 msg = self._msgobj('msg_22.txt')
314 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
315
316 def test_get_param_with_semis_in_quotes(self):
317 msg = email.message_from_string(
318 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
319 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
320 self.assertEqual(msg.get_param('name', unquote=False),
321 '"Jim&amp;&amp;Jill"')
322
R. David Murrayd48739f2010-04-14 18:59:18 +0000323 def test_get_param_with_quotes(self):
324 msg = email.message_from_string(
325 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
326 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
327 msg = email.message_from_string(
328 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
329 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
330
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000331 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000332 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000333 msg = email.message_from_string('Header: exists')
334 unless('header' in msg)
335 unless('Header' in msg)
336 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000337 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000338
339 def test_set_param(self):
340 eq = self.assertEqual
341 msg = Message()
342 msg.set_param('charset', 'iso-2022-jp')
343 eq(msg.get_param('charset'), 'iso-2022-jp')
344 msg.set_param('importance', 'high value')
345 eq(msg.get_param('importance'), 'high value')
346 eq(msg.get_param('importance', unquote=False), '"high value"')
347 eq(msg.get_params(), [('text/plain', ''),
348 ('charset', 'iso-2022-jp'),
349 ('importance', 'high value')])
350 eq(msg.get_params(unquote=False), [('text/plain', ''),
351 ('charset', '"iso-2022-jp"'),
352 ('importance', '"high value"')])
353 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
354 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
355
356 def test_del_param(self):
357 eq = self.assertEqual
358 msg = self._msgobj('msg_05.txt')
359 eq(msg.get_params(),
360 [('multipart/report', ''), ('report-type', 'delivery-status'),
361 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
362 old_val = msg.get_param("report-type")
363 msg.del_param("report-type")
364 eq(msg.get_params(),
365 [('multipart/report', ''),
366 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
367 msg.set_param("report-type", old_val)
368 eq(msg.get_params(),
369 [('multipart/report', ''),
370 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
371 ('report-type', old_val)])
372
373 def test_del_param_on_other_header(self):
374 msg = Message()
375 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
376 msg.del_param('filename', 'content-disposition')
377 self.assertEqual(msg['content-disposition'], 'attachment')
378
379 def test_set_type(self):
380 eq = self.assertEqual
381 msg = Message()
382 self.assertRaises(ValueError, msg.set_type, 'text')
383 msg.set_type('text/plain')
384 eq(msg['content-type'], 'text/plain')
385 msg.set_param('charset', 'us-ascii')
386 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
387 msg.set_type('text/html')
388 eq(msg['content-type'], 'text/html; charset="us-ascii"')
389
390 def test_set_type_on_other_header(self):
391 msg = Message()
392 msg['X-Content-Type'] = 'text/plain'
393 msg.set_type('application/octet-stream', 'X-Content-Type')
394 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
395
396 def test_get_content_type_missing(self):
397 msg = Message()
398 self.assertEqual(msg.get_content_type(), 'text/plain')
399
400 def test_get_content_type_missing_with_default_type(self):
401 msg = Message()
402 msg.set_default_type('message/rfc822')
403 self.assertEqual(msg.get_content_type(), 'message/rfc822')
404
405 def test_get_content_type_from_message_implicit(self):
406 msg = self._msgobj('msg_30.txt')
407 self.assertEqual(msg.get_payload(0).get_content_type(),
408 'message/rfc822')
409
410 def test_get_content_type_from_message_explicit(self):
411 msg = self._msgobj('msg_28.txt')
412 self.assertEqual(msg.get_payload(0).get_content_type(),
413 'message/rfc822')
414
415 def test_get_content_type_from_message_text_plain_implicit(self):
416 msg = self._msgobj('msg_03.txt')
417 self.assertEqual(msg.get_content_type(), 'text/plain')
418
419 def test_get_content_type_from_message_text_plain_explicit(self):
420 msg = self._msgobj('msg_01.txt')
421 self.assertEqual(msg.get_content_type(), 'text/plain')
422
423 def test_get_content_maintype_missing(self):
424 msg = Message()
425 self.assertEqual(msg.get_content_maintype(), 'text')
426
427 def test_get_content_maintype_missing_with_default_type(self):
428 msg = Message()
429 msg.set_default_type('message/rfc822')
430 self.assertEqual(msg.get_content_maintype(), 'message')
431
432 def test_get_content_maintype_from_message_implicit(self):
433 msg = self._msgobj('msg_30.txt')
434 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
435
436 def test_get_content_maintype_from_message_explicit(self):
437 msg = self._msgobj('msg_28.txt')
438 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
439
440 def test_get_content_maintype_from_message_text_plain_implicit(self):
441 msg = self._msgobj('msg_03.txt')
442 self.assertEqual(msg.get_content_maintype(), 'text')
443
444 def test_get_content_maintype_from_message_text_plain_explicit(self):
445 msg = self._msgobj('msg_01.txt')
446 self.assertEqual(msg.get_content_maintype(), 'text')
447
448 def test_get_content_subtype_missing(self):
449 msg = Message()
450 self.assertEqual(msg.get_content_subtype(), 'plain')
451
452 def test_get_content_subtype_missing_with_default_type(self):
453 msg = Message()
454 msg.set_default_type('message/rfc822')
455 self.assertEqual(msg.get_content_subtype(), 'rfc822')
456
457 def test_get_content_subtype_from_message_implicit(self):
458 msg = self._msgobj('msg_30.txt')
459 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
460
461 def test_get_content_subtype_from_message_explicit(self):
462 msg = self._msgobj('msg_28.txt')
463 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
464
465 def test_get_content_subtype_from_message_text_plain_implicit(self):
466 msg = self._msgobj('msg_03.txt')
467 self.assertEqual(msg.get_content_subtype(), 'plain')
468
469 def test_get_content_subtype_from_message_text_plain_explicit(self):
470 msg = self._msgobj('msg_01.txt')
471 self.assertEqual(msg.get_content_subtype(), 'plain')
472
473 def test_get_content_maintype_error(self):
474 msg = Message()
475 msg['Content-Type'] = 'no-slash-in-this-string'
476 self.assertEqual(msg.get_content_maintype(), 'text')
477
478 def test_get_content_subtype_error(self):
479 msg = Message()
480 msg['Content-Type'] = 'no-slash-in-this-string'
481 self.assertEqual(msg.get_content_subtype(), 'plain')
482
483 def test_replace_header(self):
484 eq = self.assertEqual
485 msg = Message()
486 msg.add_header('First', 'One')
487 msg.add_header('Second', 'Two')
488 msg.add_header('Third', 'Three')
489 eq(msg.keys(), ['First', 'Second', 'Third'])
490 eq(msg.values(), ['One', 'Two', 'Three'])
491 msg.replace_header('Second', 'Twenty')
492 eq(msg.keys(), ['First', 'Second', 'Third'])
493 eq(msg.values(), ['One', 'Twenty', 'Three'])
494 msg.add_header('First', 'Eleven')
495 msg.replace_header('First', 'One Hundred')
496 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
497 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
498 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
499
500 def test_broken_base64_payload(self):
501 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
502 msg = Message()
503 msg['content-type'] = 'audio/x-midi'
504 msg['content-transfer-encoding'] = 'base64'
505 msg.set_payload(x)
506 self.assertEqual(msg.get_payload(decode=True),
Guido van Rossum9604e662007-08-30 03:46:43 +0000507 bytes(x, 'raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000508
R. David Murray7ec754b2010-12-13 23:51:19 +0000509 # Issue 1078919
510 def test_ascii_add_header(self):
511 msg = Message()
512 msg.add_header('Content-Disposition', 'attachment',
513 filename='bud.gif')
514 self.assertEqual('attachment; filename="bud.gif"',
515 msg['Content-Disposition'])
516
517 def test_noascii_add_header(self):
518 msg = Message()
519 msg.add_header('Content-Disposition', 'attachment',
520 filename="Fußballer.ppt")
521 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000522 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
R. David Murray7ec754b2010-12-13 23:51:19 +0000523 msg['Content-Disposition'])
524
525 def test_nonascii_add_header_via_triple(self):
526 msg = Message()
527 msg.add_header('Content-Disposition', 'attachment',
528 filename=('iso-8859-1', '', 'Fußballer.ppt'))
529 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000530 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
531 msg['Content-Disposition'])
532
533 def test_ascii_add_header_with_tspecial(self):
534 msg = Message()
535 msg.add_header('Content-Disposition', 'attachment',
536 filename="windows [filename].ppt")
537 self.assertEqual(
538 'attachment; filename="windows [filename].ppt"',
539 msg['Content-Disposition'])
540
541 def test_nonascii_add_header_with_tspecial(self):
542 msg = Message()
543 msg.add_header('Content-Disposition', 'attachment',
544 filename="Fußballer [filename].ppt")
545 self.assertEqual(
546 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
R. David Murray7ec754b2010-12-13 23:51:19 +0000547 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000548
R. David Murray5b2d9dd2011-01-09 02:35:24 +0000549 # Issue 5871: reject an attempt to embed a header inside a header value
550 # (header injection attack).
551 def test_embeded_header_via_Header_rejected(self):
552 msg = Message()
553 msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
554 self.assertRaises(errors.HeaderParseError, msg.as_string)
555
556 def test_embeded_header_via_string_rejected(self):
557 msg = Message()
558 msg['Dummy'] = 'dummy\nX-Injected-Header: test'
559 self.assertRaises(errors.HeaderParseError, msg.as_string)
560
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000561# Test the email.encoders module
562class TestEncoders(unittest.TestCase):
R David Murray6d94bd42011-03-16 15:52:22 -0400563
564 def test_EncodersEncode_base64(self):
565 with openfile('PyBanner048.gif', 'rb') as fp:
566 bindata = fp.read()
567 mimed = email.mime.image.MIMEImage(bindata)
568 base64ed = mimed.get_payload()
569 # the transfer-encoded body lines should all be <=76 characters
570 lines = base64ed.split('\n')
571 self.assertLessEqual(max([ len(x) for x in lines ]), 76)
572
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000573 def test_encode_empty_payload(self):
574 eq = self.assertEqual
575 msg = Message()
576 msg.set_charset('us-ascii')
577 eq(msg['content-transfer-encoding'], '7bit')
578
579 def test_default_cte(self):
580 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000581 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000582 msg = MIMEText('hello world')
583 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000584 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000585 msg = MIMEText('hello \xf8 world')
586 eq(msg['content-transfer-encoding'], '8bit')
587 # And now with a different charset
588 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
589 eq(msg['content-transfer-encoding'], 'quoted-printable')
590
R. David Murraye85200d2010-05-06 01:41:14 +0000591 def test_encode7or8bit(self):
592 # Make sure a charset whose input character set is 8bit but
593 # whose output character set is 7bit gets a transfer-encoding
594 # of 7bit.
595 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000596 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000597 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000598
Ezio Melottib3aedd42010-11-20 19:04:17 +0000599
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000600# Test long header wrapping
601class TestLongHeaders(TestEmailBase):
602 def test_split_long_continuation(self):
603 eq = self.ndiffAssertEqual
604 msg = email.message_from_string("""\
605Subject: bug demonstration
606\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
607\tmore text
608
609test
610""")
611 sfp = StringIO()
612 g = Generator(sfp)
613 g.flatten(msg)
614 eq(sfp.getvalue(), """\
615Subject: bug demonstration
616\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
617\tmore text
618
619test
620""")
621
622 def test_another_long_almost_unsplittable_header(self):
623 eq = self.ndiffAssertEqual
624 hstr = """\
625bug demonstration
626\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
627\tmore text"""
628 h = Header(hstr, continuation_ws='\t')
629 eq(h.encode(), """\
630bug demonstration
631\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
632\tmore text""")
633 h = Header(hstr.replace('\t', ' '))
634 eq(h.encode(), """\
635bug demonstration
636 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
637 more text""")
638
639 def test_long_nonstring(self):
640 eq = self.ndiffAssertEqual
641 g = Charset("iso-8859-1")
642 cz = Charset("iso-8859-2")
643 utf8 = Charset("utf-8")
644 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
645 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
646 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
647 b'bef\xf6rdert. ')
648 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
649 b'd\xf9vtipu.. ')
650 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
651 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
652 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
653 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
654 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
655 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
656 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
657 '\u3044\u307e\u3059\u3002')
658 h = Header(g_head, g, header_name='Subject')
659 h.append(cz_head, cz)
660 h.append(utf8_head, utf8)
661 msg = Message()
662 msg['Subject'] = h
663 sfp = StringIO()
664 g = Generator(sfp)
665 g.flatten(msg)
666 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000667Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
668 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
669 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
670 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
671 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
672 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
673 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
674 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
675 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
676 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
677 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000678
679""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000680 eq(h.encode(maxlinelen=76), """\
681=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
682 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
683 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
684 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
685 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
686 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
687 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
688 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
689 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
690 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
691 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000692
693 def test_long_header_encode(self):
694 eq = self.ndiffAssertEqual
695 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
696 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
697 header_name='X-Foobar-Spoink-Defrobnit')
698 eq(h.encode(), '''\
699wasnipoop; giraffes="very-long-necked-animals";
700 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
701
702 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
703 eq = self.ndiffAssertEqual
704 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
705 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
706 header_name='X-Foobar-Spoink-Defrobnit',
707 continuation_ws='\t')
708 eq(h.encode(), '''\
709wasnipoop; giraffes="very-long-necked-animals";
710 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
711
712 def test_long_header_encode_with_tab_continuation(self):
713 eq = self.ndiffAssertEqual
714 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
715 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
716 header_name='X-Foobar-Spoink-Defrobnit',
717 continuation_ws='\t')
718 eq(h.encode(), '''\
719wasnipoop; giraffes="very-long-necked-animals";
720\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
721
R David Murray3a6152f2011-03-14 21:13:03 -0400722 def test_header_encode_with_different_output_charset(self):
723 h = Header('文', 'euc-jp')
724 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
725
726 def test_long_header_encode_with_different_output_charset(self):
727 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
728 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
729 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
730 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
731 res = """\
732=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
733 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
734 self.assertEqual(h.encode(), res)
735
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000736 def test_header_splitter(self):
737 eq = self.ndiffAssertEqual
738 msg = MIMEText('')
739 # It'd be great if we could use add_header() here, but that doesn't
740 # guarantee an order of the parameters.
741 msg['X-Foobar-Spoink-Defrobnit'] = (
742 'wasnipoop; giraffes="very-long-necked-animals"; '
743 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
744 sfp = StringIO()
745 g = Generator(sfp)
746 g.flatten(msg)
747 eq(sfp.getvalue(), '''\
748Content-Type: text/plain; charset="us-ascii"
749MIME-Version: 1.0
750Content-Transfer-Encoding: 7bit
751X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
752 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
753
754''')
755
756 def test_no_semis_header_splitter(self):
757 eq = self.ndiffAssertEqual
758 msg = Message()
759 msg['From'] = 'test@dom.ain'
760 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
761 msg.set_payload('Test')
762 sfp = StringIO()
763 g = Generator(sfp)
764 g.flatten(msg)
765 eq(sfp.getvalue(), """\
766From: test@dom.ain
767References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
768 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
769
770Test""")
771
R David Murray7da4db12011-04-07 20:37:17 -0400772 def test_last_split_chunk_does_not_fit(self):
773 eq = self.ndiffAssertEqual
774 h = Header('Subject: the first part of this is short, but_the_second'
775 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
776 '_all_by_itself')
777 eq(h.encode(), """\
778Subject: the first part of this is short,
779 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
780
781 def test_splittable_leading_char_followed_by_overlong_unsplitable(self):
782 eq = self.ndiffAssertEqual
783 h = Header(', but_the_second'
784 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
785 '_all_by_itself')
786 eq(h.encode(), """\
787,
788 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
789
790 def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self):
791 eq = self.ndiffAssertEqual
792 h = Header(', , but_the_second'
793 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
794 '_all_by_itself')
795 eq(h.encode(), """\
796, ,
797 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
798
799 def test_trailing_splitable_on_overlong_unsplitable(self):
800 eq = self.ndiffAssertEqual
801 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
802 'be_on_a_line_all_by_itself;')
803 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
804 "be_on_a_line_all_by_itself;")
805
806 def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self):
807 eq = self.ndiffAssertEqual
808 h = Header('; '
809 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
810 'be_on_a_line_all_by_itself;')
811 eq(h.encode(), """\
812;
813 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
814
R David Murraye1292a22011-04-07 20:54:03 -0400815 def test_long_header_with_multiple_sequential_split_chars(self):
816 # Issue 11492
817
818 eq = self.ndiffAssertEqual
819 h = Header('This is a long line that has two whitespaces in a row. '
820 'This used to cause truncation of the header when folded')
821 eq(h.encode(), """\
822This is a long line that has two whitespaces in a row. This used to cause
823 truncation of the header when folded""")
824
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000825 def test_no_split_long_header(self):
826 eq = self.ndiffAssertEqual
827 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +0000828 h = Header(hstr)
829 # These come on two lines because Headers are really field value
830 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000831 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000832References:
833 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
834 h = Header('x' * 80)
835 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000836
837 def test_splitting_multiple_long_lines(self):
838 eq = self.ndiffAssertEqual
839 hstr = """\
840from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
841\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
842\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
843"""
844 h = Header(hstr, continuation_ws='\t')
845 eq(h.encode(), """\
846from babylon.socal-raves.org (localhost [127.0.0.1]);
847 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
848 for <mailman-admin@babylon.socal-raves.org>;
849 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
850\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
851 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
852 for <mailman-admin@babylon.socal-raves.org>;
853 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
854\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
855 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
856 for <mailman-admin@babylon.socal-raves.org>;
857 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
858
859 def test_splitting_first_line_only_is_long(self):
860 eq = self.ndiffAssertEqual
861 hstr = """\
862from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
863\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
864\tid 17k4h5-00034i-00
865\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
866 h = Header(hstr, maxlinelen=78, header_name='Received',
867 continuation_ws='\t')
868 eq(h.encode(), """\
869from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
870 helo=cthulhu.gerg.ca)
871\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
872\tid 17k4h5-00034i-00
873\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
874
875 def test_long_8bit_header(self):
876 eq = self.ndiffAssertEqual
877 msg = Message()
878 h = Header('Britische Regierung gibt', 'iso-8859-1',
879 header_name='Subject')
880 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +0000881 eq(h.encode(maxlinelen=76), """\
882=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
883 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000884 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +0000885 eq(msg.as_string(maxheaderlen=76), """\
886Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
887 =?iso-8859-1?q?hore-Windkraftprojekte?=
888
889""")
890 eq(msg.as_string(maxheaderlen=0), """\
891Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000892
893""")
894
895 def test_long_8bit_header_no_charset(self):
896 eq = self.ndiffAssertEqual
897 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +0000898 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
899 'f\xfcr Offshore-Windkraftprojekte '
900 '<a-very-long-address@example.com>')
901 msg['Reply-To'] = header_string
902 self.assertRaises(UnicodeEncodeError, msg.as_string)
903 msg = Message()
904 msg['Reply-To'] = Header(header_string, 'utf-8',
905 header_name='Reply-To')
906 eq(msg.as_string(maxheaderlen=78), """\
907Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
908 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000909
910""")
911
912 def test_long_to_header(self):
913 eq = self.ndiffAssertEqual
914 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
915 '<someone@eecs.umich.edu>,'
916 '"Someone Test #B" <someone@umich.edu>, '
917 '"Someone Test #C" <someone@eecs.umich.edu>, '
918 '"Someone Test #D" <someone@eecs.umich.edu>')
919 msg = Message()
920 msg['To'] = to
921 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +0000922To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000923 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +0000924 "Someone Test #C" <someone@eecs.umich.edu>,
925 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000926
927''')
928
929 def test_long_line_after_append(self):
930 eq = self.ndiffAssertEqual
931 s = 'This is an example of string which has almost the limit of header length.'
932 h = Header(s)
933 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +0000934 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000935This is an example of string which has almost the limit of header length.
936 Add another line.""")
937
938 def test_shorter_line_with_append(self):
939 eq = self.ndiffAssertEqual
940 s = 'This is a shorter line.'
941 h = Header(s)
942 h.append('Add another sentence. (Surprise?)')
943 eq(h.encode(),
944 'This is a shorter line. Add another sentence. (Surprise?)')
945
946 def test_long_field_name(self):
947 eq = self.ndiffAssertEqual
948 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +0000949 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
950 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
951 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
952 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000953 h = Header(gs, 'iso-8859-1', header_name=fn)
954 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +0000955 eq(h.encode(maxlinelen=76), """\
956=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
957 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
958 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
959 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000960
961 def test_long_received_header(self):
962 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
963 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
964 'Wed, 05 Mar 2003 18:10:18 -0700')
965 msg = Message()
966 msg['Received-1'] = Header(h, continuation_ws='\t')
967 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000968 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000969 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000970Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
971 Wed, 05 Mar 2003 18:10:18 -0700
972Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
973 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000974
975""")
976
977 def test_string_headerinst_eq(self):
978 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
979 'tu-muenchen.de> (David Bremner\'s message of '
980 '"Thu, 6 Mar 2003 13:58:21 +0100")')
981 msg = Message()
982 msg['Received-1'] = Header(h, header_name='Received-1',
983 continuation_ws='\t')
984 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000985 # XXX This should be splitting on spaces not commas.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000986 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000987Received-1: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
988 6 Mar 2003 13:58:21 +0100\")
989Received-2: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
990 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000991
992""")
993
994 def test_long_unbreakable_lines_with_continuation(self):
995 eq = self.ndiffAssertEqual
996 msg = Message()
997 t = """\
998iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
999 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
1000 msg['Face-1'] = t
1001 msg['Face-2'] = Header(t, header_name='Face-2')
Barry Warsawbef9d212007-08-31 10:55:37 +00001002 # XXX This splitting is all wrong. It the first value line should be
1003 # snug against the field name.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001004 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +00001005Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001006 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001007 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +00001008Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +00001009 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001010 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
1011
1012""")
1013
1014 def test_another_long_multiline_header(self):
1015 eq = self.ndiffAssertEqual
1016 m = ('Received: from siimage.com '
1017 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +00001018 'Microsoft SMTPSVC(5.0.2195.4905); '
1019 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001020 msg = email.message_from_string(m)
1021 eq(msg.as_string(maxheaderlen=78), '''\
Barry Warsawbef9d212007-08-31 10:55:37 +00001022Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with Microsoft SMTPSVC(5.0.2195.4905);
1023 Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001024
1025''')
1026
1027 def test_long_lines_with_different_header(self):
1028 eq = self.ndiffAssertEqual
1029 h = ('List-Unsubscribe: '
1030 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
1031 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
1032 '?subject=unsubscribe>')
1033 msg = Message()
1034 msg['List'] = h
1035 msg['List'] = Header(h, header_name='List')
1036 eq(msg.as_string(maxheaderlen=78), """\
1037List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001038 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001039List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001040 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001041
1042""")
1043
R. David Murray6f0022d2011-01-07 21:57:25 +00001044 def test_long_rfc2047_header_with_embedded_fws(self):
1045 h = Header(textwrap.dedent("""\
1046 We're going to pretend this header is in a non-ascii character set
1047 \tto see if line wrapping with encoded words and embedded
1048 folding white space works"""),
1049 charset='utf-8',
1050 header_name='Test')
1051 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1052 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1053 =?utf-8?q?cter_set?=
1054 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1055 =?utf-8?q?_folding_white_space_works?=""")+'\n')
1056
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001057
Ezio Melottib3aedd42010-11-20 19:04:17 +00001058
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001059# Test mangling of "From " lines in the body of a message
1060class TestFromMangling(unittest.TestCase):
1061 def setUp(self):
1062 self.msg = Message()
1063 self.msg['From'] = 'aaa@bbb.org'
1064 self.msg.set_payload("""\
1065From the desk of A.A.A.:
1066Blah blah blah
1067""")
1068
1069 def test_mangled_from(self):
1070 s = StringIO()
1071 g = Generator(s, mangle_from_=True)
1072 g.flatten(self.msg)
1073 self.assertEqual(s.getvalue(), """\
1074From: aaa@bbb.org
1075
1076>From the desk of A.A.A.:
1077Blah blah blah
1078""")
1079
1080 def test_dont_mangle_from(self):
1081 s = StringIO()
1082 g = Generator(s, mangle_from_=False)
1083 g.flatten(self.msg)
1084 self.assertEqual(s.getvalue(), """\
1085From: aaa@bbb.org
1086
1087From the desk of A.A.A.:
1088Blah blah blah
1089""")
1090
1091
Ezio Melottib3aedd42010-11-20 19:04:17 +00001092
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001093# Test the basic MIMEAudio class
1094class TestMIMEAudio(unittest.TestCase):
1095 def setUp(self):
R David Murray28346b82011-03-31 11:40:20 -04001096 with openfile('audiotest.au', 'rb') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001097 self._audiodata = fp.read()
1098 self._au = MIMEAudio(self._audiodata)
1099
1100 def test_guess_minor_type(self):
1101 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1102
1103 def test_encoding(self):
1104 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001105 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1106 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001107
1108 def test_checkSetMinor(self):
1109 au = MIMEAudio(self._audiodata, 'fish')
1110 self.assertEqual(au.get_content_type(), 'audio/fish')
1111
1112 def test_add_header(self):
1113 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001114 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001115 self._au.add_header('Content-Disposition', 'attachment',
1116 filename='audiotest.au')
1117 eq(self._au['content-disposition'],
1118 'attachment; filename="audiotest.au"')
1119 eq(self._au.get_params(header='content-disposition'),
1120 [('attachment', ''), ('filename', 'audiotest.au')])
1121 eq(self._au.get_param('filename', header='content-disposition'),
1122 'audiotest.au')
1123 missing = []
1124 eq(self._au.get_param('attachment', header='content-disposition'), '')
1125 unless(self._au.get_param('foo', failobj=missing,
1126 header='content-disposition') is missing)
1127 # Try some missing stuff
1128 unless(self._au.get_param('foobar', missing) is missing)
1129 unless(self._au.get_param('attachment', missing,
1130 header='foobar') is missing)
1131
1132
Ezio Melottib3aedd42010-11-20 19:04:17 +00001133
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001134# Test the basic MIMEImage class
1135class TestMIMEImage(unittest.TestCase):
1136 def setUp(self):
1137 with openfile('PyBanner048.gif', 'rb') as fp:
1138 self._imgdata = fp.read()
1139 self._im = MIMEImage(self._imgdata)
1140
1141 def test_guess_minor_type(self):
1142 self.assertEqual(self._im.get_content_type(), 'image/gif')
1143
1144 def test_encoding(self):
1145 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001146 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1147 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001148
1149 def test_checkSetMinor(self):
1150 im = MIMEImage(self._imgdata, 'fish')
1151 self.assertEqual(im.get_content_type(), 'image/fish')
1152
1153 def test_add_header(self):
1154 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001155 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001156 self._im.add_header('Content-Disposition', 'attachment',
1157 filename='dingusfish.gif')
1158 eq(self._im['content-disposition'],
1159 'attachment; filename="dingusfish.gif"')
1160 eq(self._im.get_params(header='content-disposition'),
1161 [('attachment', ''), ('filename', 'dingusfish.gif')])
1162 eq(self._im.get_param('filename', header='content-disposition'),
1163 'dingusfish.gif')
1164 missing = []
1165 eq(self._im.get_param('attachment', header='content-disposition'), '')
1166 unless(self._im.get_param('foo', failobj=missing,
1167 header='content-disposition') is missing)
1168 # Try some missing stuff
1169 unless(self._im.get_param('foobar', missing) is missing)
1170 unless(self._im.get_param('attachment', missing,
1171 header='foobar') is missing)
1172
1173
Ezio Melottib3aedd42010-11-20 19:04:17 +00001174
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001175# Test the basic MIMEApplication class
1176class TestMIMEApplication(unittest.TestCase):
1177 def test_headers(self):
1178 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001179 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001180 eq(msg.get_content_type(), 'application/octet-stream')
1181 eq(msg['content-transfer-encoding'], 'base64')
1182
1183 def test_body(self):
1184 eq = self.assertEqual
R David Murray6d94bd42011-03-16 15:52:22 -04001185 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1186 msg = MIMEApplication(bytesdata)
1187 # whitespace in the cte encoded block is RFC-irrelevant.
1188 eq(msg.get_payload().strip(), '+vv8/f7/')
1189 eq(msg.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001190
1191
Ezio Melottib3aedd42010-11-20 19:04:17 +00001192
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001193# Test the basic MIMEText class
1194class TestMIMEText(unittest.TestCase):
1195 def setUp(self):
1196 self._msg = MIMEText('hello there')
1197
1198 def test_types(self):
1199 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001200 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001201 eq(self._msg.get_content_type(), 'text/plain')
1202 eq(self._msg.get_param('charset'), 'us-ascii')
1203 missing = []
1204 unless(self._msg.get_param('foobar', missing) is missing)
1205 unless(self._msg.get_param('charset', missing, header='foobar')
1206 is missing)
1207
1208 def test_payload(self):
1209 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001210 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001211
1212 def test_charset(self):
1213 eq = self.assertEqual
1214 msg = MIMEText('hello there', _charset='us-ascii')
1215 eq(msg.get_charset().input_charset, 'us-ascii')
1216 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1217
R. David Murray850fc852010-06-03 01:58:28 +00001218 def test_7bit_input(self):
1219 eq = self.assertEqual
1220 msg = MIMEText('hello there', _charset='us-ascii')
1221 eq(msg.get_charset().input_charset, 'us-ascii')
1222 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1223
1224 def test_7bit_input_no_charset(self):
1225 eq = self.assertEqual
1226 msg = MIMEText('hello there')
1227 eq(msg.get_charset(), 'us-ascii')
1228 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1229 self.assertTrue('hello there' in msg.as_string())
1230
1231 def test_utf8_input(self):
1232 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1233 eq = self.assertEqual
1234 msg = MIMEText(teststr, _charset='utf-8')
1235 eq(msg.get_charset().output_charset, 'utf-8')
1236 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1237 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1238
1239 @unittest.skip("can't fix because of backward compat in email5, "
1240 "will fix in email6")
1241 def test_utf8_input_no_charset(self):
1242 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1243 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1244
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001245
Ezio Melottib3aedd42010-11-20 19:04:17 +00001246
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001247# Test complicated multipart/* messages
1248class TestMultipart(TestEmailBase):
1249 def setUp(self):
1250 with openfile('PyBanner048.gif', 'rb') as fp:
1251 data = fp.read()
1252 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1253 image = MIMEImage(data, name='dingusfish.gif')
1254 image.add_header('content-disposition', 'attachment',
1255 filename='dingusfish.gif')
1256 intro = MIMEText('''\
1257Hi there,
1258
1259This is the dingus fish.
1260''')
1261 container.attach(intro)
1262 container.attach(image)
1263 container['From'] = 'Barry <barry@digicool.com>'
1264 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1265 container['Subject'] = 'Here is your dingus fish'
1266
1267 now = 987809702.54848599
1268 timetuple = time.localtime(now)
1269 if timetuple[-1] == 0:
1270 tzsecs = time.timezone
1271 else:
1272 tzsecs = time.altzone
1273 if tzsecs > 0:
1274 sign = '-'
1275 else:
1276 sign = '+'
1277 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1278 container['Date'] = time.strftime(
1279 '%a, %d %b %Y %H:%M:%S',
1280 time.localtime(now)) + tzoffset
1281 self._msg = container
1282 self._im = image
1283 self._txt = intro
1284
1285 def test_hierarchy(self):
1286 # convenience
1287 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001288 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001289 raises = self.assertRaises
1290 # tests
1291 m = self._msg
1292 unless(m.is_multipart())
1293 eq(m.get_content_type(), 'multipart/mixed')
1294 eq(len(m.get_payload()), 2)
1295 raises(IndexError, m.get_payload, 2)
1296 m0 = m.get_payload(0)
1297 m1 = m.get_payload(1)
1298 unless(m0 is self._txt)
1299 unless(m1 is self._im)
1300 eq(m.get_payload(), [m0, m1])
1301 unless(not m0.is_multipart())
1302 unless(not m1.is_multipart())
1303
1304 def test_empty_multipart_idempotent(self):
1305 text = """\
1306Content-Type: multipart/mixed; boundary="BOUNDARY"
1307MIME-Version: 1.0
1308Subject: A subject
1309To: aperson@dom.ain
1310From: bperson@dom.ain
1311
1312
1313--BOUNDARY
1314
1315
1316--BOUNDARY--
1317"""
1318 msg = Parser().parsestr(text)
1319 self.ndiffAssertEqual(text, msg.as_string())
1320
1321 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1322 outer = MIMEBase('multipart', 'mixed')
1323 outer['Subject'] = 'A subject'
1324 outer['To'] = 'aperson@dom.ain'
1325 outer['From'] = 'bperson@dom.ain'
1326 outer.set_boundary('BOUNDARY')
1327 self.ndiffAssertEqual(outer.as_string(), '''\
1328Content-Type: multipart/mixed; boundary="BOUNDARY"
1329MIME-Version: 1.0
1330Subject: A subject
1331To: aperson@dom.ain
1332From: bperson@dom.ain
1333
1334--BOUNDARY
1335
1336--BOUNDARY--''')
1337
1338 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1339 outer = MIMEBase('multipart', 'mixed')
1340 outer['Subject'] = 'A subject'
1341 outer['To'] = 'aperson@dom.ain'
1342 outer['From'] = 'bperson@dom.ain'
1343 outer.preamble = ''
1344 outer.epilogue = ''
1345 outer.set_boundary('BOUNDARY')
1346 self.ndiffAssertEqual(outer.as_string(), '''\
1347Content-Type: multipart/mixed; boundary="BOUNDARY"
1348MIME-Version: 1.0
1349Subject: A subject
1350To: aperson@dom.ain
1351From: bperson@dom.ain
1352
1353
1354--BOUNDARY
1355
1356--BOUNDARY--
1357''')
1358
1359 def test_one_part_in_a_multipart(self):
1360 eq = self.ndiffAssertEqual
1361 outer = MIMEBase('multipart', 'mixed')
1362 outer['Subject'] = 'A subject'
1363 outer['To'] = 'aperson@dom.ain'
1364 outer['From'] = 'bperson@dom.ain'
1365 outer.set_boundary('BOUNDARY')
1366 msg = MIMEText('hello world')
1367 outer.attach(msg)
1368 eq(outer.as_string(), '''\
1369Content-Type: multipart/mixed; boundary="BOUNDARY"
1370MIME-Version: 1.0
1371Subject: A subject
1372To: aperson@dom.ain
1373From: bperson@dom.ain
1374
1375--BOUNDARY
1376Content-Type: text/plain; charset="us-ascii"
1377MIME-Version: 1.0
1378Content-Transfer-Encoding: 7bit
1379
1380hello world
1381--BOUNDARY--''')
1382
1383 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1384 eq = self.ndiffAssertEqual
1385 outer = MIMEBase('multipart', 'mixed')
1386 outer['Subject'] = 'A subject'
1387 outer['To'] = 'aperson@dom.ain'
1388 outer['From'] = 'bperson@dom.ain'
1389 outer.preamble = ''
1390 msg = MIMEText('hello world')
1391 outer.attach(msg)
1392 outer.set_boundary('BOUNDARY')
1393 eq(outer.as_string(), '''\
1394Content-Type: multipart/mixed; boundary="BOUNDARY"
1395MIME-Version: 1.0
1396Subject: A subject
1397To: aperson@dom.ain
1398From: bperson@dom.ain
1399
1400
1401--BOUNDARY
1402Content-Type: text/plain; charset="us-ascii"
1403MIME-Version: 1.0
1404Content-Transfer-Encoding: 7bit
1405
1406hello world
1407--BOUNDARY--''')
1408
1409
1410 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1411 eq = self.ndiffAssertEqual
1412 outer = MIMEBase('multipart', 'mixed')
1413 outer['Subject'] = 'A subject'
1414 outer['To'] = 'aperson@dom.ain'
1415 outer['From'] = 'bperson@dom.ain'
1416 outer.preamble = None
1417 msg = MIMEText('hello world')
1418 outer.attach(msg)
1419 outer.set_boundary('BOUNDARY')
1420 eq(outer.as_string(), '''\
1421Content-Type: multipart/mixed; boundary="BOUNDARY"
1422MIME-Version: 1.0
1423Subject: A subject
1424To: aperson@dom.ain
1425From: bperson@dom.ain
1426
1427--BOUNDARY
1428Content-Type: text/plain; charset="us-ascii"
1429MIME-Version: 1.0
1430Content-Transfer-Encoding: 7bit
1431
1432hello world
1433--BOUNDARY--''')
1434
1435
1436 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1437 eq = self.ndiffAssertEqual
1438 outer = MIMEBase('multipart', 'mixed')
1439 outer['Subject'] = 'A subject'
1440 outer['To'] = 'aperson@dom.ain'
1441 outer['From'] = 'bperson@dom.ain'
1442 outer.epilogue = None
1443 msg = MIMEText('hello world')
1444 outer.attach(msg)
1445 outer.set_boundary('BOUNDARY')
1446 eq(outer.as_string(), '''\
1447Content-Type: multipart/mixed; boundary="BOUNDARY"
1448MIME-Version: 1.0
1449Subject: A subject
1450To: aperson@dom.ain
1451From: bperson@dom.ain
1452
1453--BOUNDARY
1454Content-Type: text/plain; charset="us-ascii"
1455MIME-Version: 1.0
1456Content-Transfer-Encoding: 7bit
1457
1458hello world
1459--BOUNDARY--''')
1460
1461
1462 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1463 eq = self.ndiffAssertEqual
1464 outer = MIMEBase('multipart', 'mixed')
1465 outer['Subject'] = 'A subject'
1466 outer['To'] = 'aperson@dom.ain'
1467 outer['From'] = 'bperson@dom.ain'
1468 outer.epilogue = ''
1469 msg = MIMEText('hello world')
1470 outer.attach(msg)
1471 outer.set_boundary('BOUNDARY')
1472 eq(outer.as_string(), '''\
1473Content-Type: multipart/mixed; boundary="BOUNDARY"
1474MIME-Version: 1.0
1475Subject: A subject
1476To: aperson@dom.ain
1477From: bperson@dom.ain
1478
1479--BOUNDARY
1480Content-Type: text/plain; charset="us-ascii"
1481MIME-Version: 1.0
1482Content-Transfer-Encoding: 7bit
1483
1484hello world
1485--BOUNDARY--
1486''')
1487
1488
1489 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1490 eq = self.ndiffAssertEqual
1491 outer = MIMEBase('multipart', 'mixed')
1492 outer['Subject'] = 'A subject'
1493 outer['To'] = 'aperson@dom.ain'
1494 outer['From'] = 'bperson@dom.ain'
1495 outer.epilogue = '\n'
1496 msg = MIMEText('hello world')
1497 outer.attach(msg)
1498 outer.set_boundary('BOUNDARY')
1499 eq(outer.as_string(), '''\
1500Content-Type: multipart/mixed; boundary="BOUNDARY"
1501MIME-Version: 1.0
1502Subject: A subject
1503To: aperson@dom.ain
1504From: bperson@dom.ain
1505
1506--BOUNDARY
1507Content-Type: text/plain; charset="us-ascii"
1508MIME-Version: 1.0
1509Content-Transfer-Encoding: 7bit
1510
1511hello world
1512--BOUNDARY--
1513
1514''')
1515
1516 def test_message_external_body(self):
1517 eq = self.assertEqual
1518 msg = self._msgobj('msg_36.txt')
1519 eq(len(msg.get_payload()), 2)
1520 msg1 = msg.get_payload(1)
1521 eq(msg1.get_content_type(), 'multipart/alternative')
1522 eq(len(msg1.get_payload()), 2)
1523 for subpart in msg1.get_payload():
1524 eq(subpart.get_content_type(), 'message/external-body')
1525 eq(len(subpart.get_payload()), 1)
1526 subsubpart = subpart.get_payload(0)
1527 eq(subsubpart.get_content_type(), 'text/plain')
1528
1529 def test_double_boundary(self):
1530 # msg_37.txt is a multipart that contains two dash-boundary's in a
1531 # row. Our interpretation of RFC 2046 calls for ignoring the second
1532 # and subsequent boundaries.
1533 msg = self._msgobj('msg_37.txt')
1534 self.assertEqual(len(msg.get_payload()), 3)
1535
1536 def test_nested_inner_contains_outer_boundary(self):
1537 eq = self.ndiffAssertEqual
1538 # msg_38.txt has an inner part that contains outer boundaries. My
1539 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1540 # these are illegal and should be interpreted as unterminated inner
1541 # parts.
1542 msg = self._msgobj('msg_38.txt')
1543 sfp = StringIO()
1544 iterators._structure(msg, sfp)
1545 eq(sfp.getvalue(), """\
1546multipart/mixed
1547 multipart/mixed
1548 multipart/alternative
1549 text/plain
1550 text/plain
1551 text/plain
1552 text/plain
1553""")
1554
1555 def test_nested_with_same_boundary(self):
1556 eq = self.ndiffAssertEqual
1557 # msg 39.txt is similarly evil in that it's got inner parts that use
1558 # the same boundary as outer parts. Again, I believe the way this is
1559 # parsed is closest to the spirit of RFC 2046
1560 msg = self._msgobj('msg_39.txt')
1561 sfp = StringIO()
1562 iterators._structure(msg, sfp)
1563 eq(sfp.getvalue(), """\
1564multipart/mixed
1565 multipart/mixed
1566 multipart/alternative
1567 application/octet-stream
1568 application/octet-stream
1569 text/plain
1570""")
1571
1572 def test_boundary_in_non_multipart(self):
1573 msg = self._msgobj('msg_40.txt')
1574 self.assertEqual(msg.as_string(), '''\
1575MIME-Version: 1.0
1576Content-Type: text/html; boundary="--961284236552522269"
1577
1578----961284236552522269
1579Content-Type: text/html;
1580Content-Transfer-Encoding: 7Bit
1581
1582<html></html>
1583
1584----961284236552522269--
1585''')
1586
1587 def test_boundary_with_leading_space(self):
1588 eq = self.assertEqual
1589 msg = email.message_from_string('''\
1590MIME-Version: 1.0
1591Content-Type: multipart/mixed; boundary=" XXXX"
1592
1593-- XXXX
1594Content-Type: text/plain
1595
1596
1597-- XXXX
1598Content-Type: text/plain
1599
1600-- XXXX--
1601''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001602 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001603 eq(msg.get_boundary(), ' XXXX')
1604 eq(len(msg.get_payload()), 2)
1605
1606 def test_boundary_without_trailing_newline(self):
1607 m = Parser().parsestr("""\
1608Content-Type: multipart/mixed; boundary="===============0012394164=="
1609MIME-Version: 1.0
1610
1611--===============0012394164==
1612Content-Type: image/file1.jpg
1613MIME-Version: 1.0
1614Content-Transfer-Encoding: base64
1615
1616YXNkZg==
1617--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001618 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001619
1620
Ezio Melottib3aedd42010-11-20 19:04:17 +00001621
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001622# Test some badly formatted messages
1623class TestNonConformant(TestEmailBase):
1624 def test_parse_missing_minor_type(self):
1625 eq = self.assertEqual
1626 msg = self._msgobj('msg_14.txt')
1627 eq(msg.get_content_type(), 'text/plain')
1628 eq(msg.get_content_maintype(), 'text')
1629 eq(msg.get_content_subtype(), 'plain')
1630
1631 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001632 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001633 msg = self._msgobj('msg_15.txt')
1634 # XXX We can probably eventually do better
1635 inner = msg.get_payload(0)
1636 unless(hasattr(inner, 'defects'))
1637 self.assertEqual(len(inner.defects), 1)
1638 unless(isinstance(inner.defects[0],
1639 errors.StartBoundaryNotFoundDefect))
1640
1641 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001642 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001643 msg = self._msgobj('msg_25.txt')
1644 unless(isinstance(msg.get_payload(), str))
1645 self.assertEqual(len(msg.defects), 2)
1646 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1647 unless(isinstance(msg.defects[1],
1648 errors.MultipartInvariantViolationDefect))
1649
1650 def test_invalid_content_type(self):
1651 eq = self.assertEqual
1652 neq = self.ndiffAssertEqual
1653 msg = Message()
1654 # RFC 2045, $5.2 says invalid yields text/plain
1655 msg['Content-Type'] = 'text'
1656 eq(msg.get_content_maintype(), 'text')
1657 eq(msg.get_content_subtype(), 'plain')
1658 eq(msg.get_content_type(), 'text/plain')
1659 # Clear the old value and try something /really/ invalid
1660 del msg['content-type']
1661 msg['Content-Type'] = 'foo'
1662 eq(msg.get_content_maintype(), 'text')
1663 eq(msg.get_content_subtype(), 'plain')
1664 eq(msg.get_content_type(), 'text/plain')
1665 # Still, make sure that the message is idempotently generated
1666 s = StringIO()
1667 g = Generator(s)
1668 g.flatten(msg)
1669 neq(s.getvalue(), 'Content-Type: foo\n\n')
1670
1671 def test_no_start_boundary(self):
1672 eq = self.ndiffAssertEqual
1673 msg = self._msgobj('msg_31.txt')
1674 eq(msg.get_payload(), """\
1675--BOUNDARY
1676Content-Type: text/plain
1677
1678message 1
1679
1680--BOUNDARY
1681Content-Type: text/plain
1682
1683message 2
1684
1685--BOUNDARY--
1686""")
1687
1688 def test_no_separating_blank_line(self):
1689 eq = self.ndiffAssertEqual
1690 msg = self._msgobj('msg_35.txt')
1691 eq(msg.as_string(), """\
1692From: aperson@dom.ain
1693To: bperson@dom.ain
1694Subject: here's something interesting
1695
1696counter to RFC 2822, there's no separating newline here
1697""")
1698
1699 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001700 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001701 msg = self._msgobj('msg_41.txt')
1702 unless(hasattr(msg, 'defects'))
1703 self.assertEqual(len(msg.defects), 2)
1704 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1705 unless(isinstance(msg.defects[1],
1706 errors.MultipartInvariantViolationDefect))
1707
1708 def test_missing_start_boundary(self):
1709 outer = self._msgobj('msg_42.txt')
1710 # The message structure is:
1711 #
1712 # multipart/mixed
1713 # text/plain
1714 # message/rfc822
1715 # multipart/mixed [*]
1716 #
1717 # [*] This message is missing its start boundary
1718 bad = outer.get_payload(1).get_payload(0)
1719 self.assertEqual(len(bad.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001720 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001721 errors.StartBoundaryNotFoundDefect))
1722
1723 def test_first_line_is_continuation_header(self):
1724 eq = self.assertEqual
1725 m = ' Line 1\nLine 2\nLine 3'
1726 msg = email.message_from_string(m)
1727 eq(msg.keys(), [])
1728 eq(msg.get_payload(), 'Line 2\nLine 3')
1729 eq(len(msg.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001730 self.assertTrue(isinstance(msg.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001731 errors.FirstHeaderLineIsContinuationDefect))
1732 eq(msg.defects[0].line, ' Line 1\n')
1733
1734
Ezio Melottib3aedd42010-11-20 19:04:17 +00001735
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001736# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00001737class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001738 def test_rfc2047_multiline(self):
1739 eq = self.assertEqual
1740 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
1741 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
1742 dh = decode_header(s)
1743 eq(dh, [
1744 (b'Re:', None),
1745 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
1746 (b'baz foo bar', None),
1747 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
1748 header = make_header(dh)
1749 eq(str(header),
1750 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00001751 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001752Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
1753 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001754
1755 def test_whitespace_eater_unicode(self):
1756 eq = self.assertEqual
1757 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
1758 dh = decode_header(s)
1759 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
1760 (b'Pirard <pirard@dom.ain>', None)])
1761 header = str(make_header(dh))
1762 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
1763
1764 def test_whitespace_eater_unicode_2(self):
1765 eq = self.assertEqual
1766 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
1767 dh = decode_header(s)
1768 eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'),
1769 (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')])
1770 hu = str(make_header(dh))
1771 eq(hu, 'The quick brown fox jumped over the lazy dog')
1772
1773 def test_rfc2047_missing_whitespace(self):
1774 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
1775 dh = decode_header(s)
1776 self.assertEqual(dh, [(s, None)])
1777
1778 def test_rfc2047_with_whitespace(self):
1779 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
1780 dh = decode_header(s)
1781 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
1782 (b'rg', None), (b'\xe5', 'iso-8859-1'),
1783 (b'sbord', None)])
1784
R. David Murrayc4e69cc2010-08-03 22:14:10 +00001785 def test_rfc2047_B_bad_padding(self):
1786 s = '=?iso-8859-1?B?%s?='
1787 data = [ # only test complete bytes
1788 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
1789 ('dmk=', b'vi'), ('dmk', b'vi')
1790 ]
1791 for q, a in data:
1792 dh = decode_header(s % q)
1793 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001794
R. David Murray31e984c2010-10-01 15:40:20 +00001795 def test_rfc2047_Q_invalid_digits(self):
1796 # issue 10004.
1797 s = '=?iso-8659-1?Q?andr=e9=zz?='
1798 self.assertEqual(decode_header(s),
1799 [(b'andr\xe9=zz', 'iso-8659-1')])
1800
Ezio Melottib3aedd42010-11-20 19:04:17 +00001801
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001802# Test the MIMEMessage class
1803class TestMIMEMessage(TestEmailBase):
1804 def setUp(self):
1805 with openfile('msg_11.txt') as fp:
1806 self._text = fp.read()
1807
1808 def test_type_error(self):
1809 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
1810
1811 def test_valid_argument(self):
1812 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001813 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001814 subject = 'A sub-message'
1815 m = Message()
1816 m['Subject'] = subject
1817 r = MIMEMessage(m)
1818 eq(r.get_content_type(), 'message/rfc822')
1819 payload = r.get_payload()
1820 unless(isinstance(payload, list))
1821 eq(len(payload), 1)
1822 subpart = payload[0]
1823 unless(subpart is m)
1824 eq(subpart['subject'], subject)
1825
1826 def test_bad_multipart(self):
1827 eq = self.assertEqual
1828 msg1 = Message()
1829 msg1['Subject'] = 'subpart 1'
1830 msg2 = Message()
1831 msg2['Subject'] = 'subpart 2'
1832 r = MIMEMessage(msg1)
1833 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
1834
1835 def test_generate(self):
1836 # First craft the message to be encapsulated
1837 m = Message()
1838 m['Subject'] = 'An enclosed message'
1839 m.set_payload('Here is the body of the message.\n')
1840 r = MIMEMessage(m)
1841 r['Subject'] = 'The enclosing message'
1842 s = StringIO()
1843 g = Generator(s)
1844 g.flatten(r)
1845 self.assertEqual(s.getvalue(), """\
1846Content-Type: message/rfc822
1847MIME-Version: 1.0
1848Subject: The enclosing message
1849
1850Subject: An enclosed message
1851
1852Here is the body of the message.
1853""")
1854
1855 def test_parse_message_rfc822(self):
1856 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001857 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001858 msg = self._msgobj('msg_11.txt')
1859 eq(msg.get_content_type(), 'message/rfc822')
1860 payload = msg.get_payload()
1861 unless(isinstance(payload, list))
1862 eq(len(payload), 1)
1863 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001864 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001865 eq(submsg['subject'], 'An enclosed message')
1866 eq(submsg.get_payload(), 'Here is the body of the message.\n')
1867
1868 def test_dsn(self):
1869 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001870 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001871 # msg 16 is a Delivery Status Notification, see RFC 1894
1872 msg = self._msgobj('msg_16.txt')
1873 eq(msg.get_content_type(), 'multipart/report')
1874 unless(msg.is_multipart())
1875 eq(len(msg.get_payload()), 3)
1876 # Subpart 1 is a text/plain, human readable section
1877 subpart = msg.get_payload(0)
1878 eq(subpart.get_content_type(), 'text/plain')
1879 eq(subpart.get_payload(), """\
1880This report relates to a message you sent with the following header fields:
1881
1882 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
1883 Date: Sun, 23 Sep 2001 20:10:55 -0700
1884 From: "Ian T. Henry" <henryi@oxy.edu>
1885 To: SoCal Raves <scr@socal-raves.org>
1886 Subject: [scr] yeah for Ians!!
1887
1888Your message cannot be delivered to the following recipients:
1889
1890 Recipient address: jangel1@cougar.noc.ucla.edu
1891 Reason: recipient reached disk quota
1892
1893""")
1894 # Subpart 2 contains the machine parsable DSN information. It
1895 # consists of two blocks of headers, represented by two nested Message
1896 # objects.
1897 subpart = msg.get_payload(1)
1898 eq(subpart.get_content_type(), 'message/delivery-status')
1899 eq(len(subpart.get_payload()), 2)
1900 # message/delivery-status should treat each block as a bunch of
1901 # headers, i.e. a bunch of Message objects.
1902 dsn1 = subpart.get_payload(0)
1903 unless(isinstance(dsn1, Message))
1904 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
1905 eq(dsn1.get_param('dns', header='reporting-mta'), '')
1906 # Try a missing one <wink>
1907 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
1908 dsn2 = subpart.get_payload(1)
1909 unless(isinstance(dsn2, Message))
1910 eq(dsn2['action'], 'failed')
1911 eq(dsn2.get_params(header='original-recipient'),
1912 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
1913 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
1914 # Subpart 3 is the original message
1915 subpart = msg.get_payload(2)
1916 eq(subpart.get_content_type(), 'message/rfc822')
1917 payload = subpart.get_payload()
1918 unless(isinstance(payload, list))
1919 eq(len(payload), 1)
1920 subsubpart = payload[0]
1921 unless(isinstance(subsubpart, Message))
1922 eq(subsubpart.get_content_type(), 'text/plain')
1923 eq(subsubpart['message-id'],
1924 '<002001c144a6$8752e060$56104586@oxy.edu>')
1925
1926 def test_epilogue(self):
1927 eq = self.ndiffAssertEqual
1928 with openfile('msg_21.txt') as fp:
1929 text = fp.read()
1930 msg = Message()
1931 msg['From'] = 'aperson@dom.ain'
1932 msg['To'] = 'bperson@dom.ain'
1933 msg['Subject'] = 'Test'
1934 msg.preamble = 'MIME message'
1935 msg.epilogue = 'End of MIME message\n'
1936 msg1 = MIMEText('One')
1937 msg2 = MIMEText('Two')
1938 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1939 msg.attach(msg1)
1940 msg.attach(msg2)
1941 sfp = StringIO()
1942 g = Generator(sfp)
1943 g.flatten(msg)
1944 eq(sfp.getvalue(), text)
1945
1946 def test_no_nl_preamble(self):
1947 eq = self.ndiffAssertEqual
1948 msg = Message()
1949 msg['From'] = 'aperson@dom.ain'
1950 msg['To'] = 'bperson@dom.ain'
1951 msg['Subject'] = 'Test'
1952 msg.preamble = 'MIME message'
1953 msg.epilogue = ''
1954 msg1 = MIMEText('One')
1955 msg2 = MIMEText('Two')
1956 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1957 msg.attach(msg1)
1958 msg.attach(msg2)
1959 eq(msg.as_string(), """\
1960From: aperson@dom.ain
1961To: bperson@dom.ain
1962Subject: Test
1963Content-Type: multipart/mixed; boundary="BOUNDARY"
1964
1965MIME message
1966--BOUNDARY
1967Content-Type: text/plain; charset="us-ascii"
1968MIME-Version: 1.0
1969Content-Transfer-Encoding: 7bit
1970
1971One
1972--BOUNDARY
1973Content-Type: text/plain; charset="us-ascii"
1974MIME-Version: 1.0
1975Content-Transfer-Encoding: 7bit
1976
1977Two
1978--BOUNDARY--
1979""")
1980
1981 def test_default_type(self):
1982 eq = self.assertEqual
1983 with openfile('msg_30.txt') as fp:
1984 msg = email.message_from_file(fp)
1985 container1 = msg.get_payload(0)
1986 eq(container1.get_default_type(), 'message/rfc822')
1987 eq(container1.get_content_type(), 'message/rfc822')
1988 container2 = msg.get_payload(1)
1989 eq(container2.get_default_type(), 'message/rfc822')
1990 eq(container2.get_content_type(), 'message/rfc822')
1991 container1a = container1.get_payload(0)
1992 eq(container1a.get_default_type(), 'text/plain')
1993 eq(container1a.get_content_type(), 'text/plain')
1994 container2a = container2.get_payload(0)
1995 eq(container2a.get_default_type(), 'text/plain')
1996 eq(container2a.get_content_type(), 'text/plain')
1997
1998 def test_default_type_with_explicit_container_type(self):
1999 eq = self.assertEqual
2000 with openfile('msg_28.txt') as fp:
2001 msg = email.message_from_file(fp)
2002 container1 = msg.get_payload(0)
2003 eq(container1.get_default_type(), 'message/rfc822')
2004 eq(container1.get_content_type(), 'message/rfc822')
2005 container2 = msg.get_payload(1)
2006 eq(container2.get_default_type(), 'message/rfc822')
2007 eq(container2.get_content_type(), 'message/rfc822')
2008 container1a = container1.get_payload(0)
2009 eq(container1a.get_default_type(), 'text/plain')
2010 eq(container1a.get_content_type(), 'text/plain')
2011 container2a = container2.get_payload(0)
2012 eq(container2a.get_default_type(), 'text/plain')
2013 eq(container2a.get_content_type(), 'text/plain')
2014
2015 def test_default_type_non_parsed(self):
2016 eq = self.assertEqual
2017 neq = self.ndiffAssertEqual
2018 # Set up container
2019 container = MIMEMultipart('digest', 'BOUNDARY')
2020 container.epilogue = ''
2021 # Set up subparts
2022 subpart1a = MIMEText('message 1\n')
2023 subpart2a = MIMEText('message 2\n')
2024 subpart1 = MIMEMessage(subpart1a)
2025 subpart2 = MIMEMessage(subpart2a)
2026 container.attach(subpart1)
2027 container.attach(subpart2)
2028 eq(subpart1.get_content_type(), 'message/rfc822')
2029 eq(subpart1.get_default_type(), 'message/rfc822')
2030 eq(subpart2.get_content_type(), 'message/rfc822')
2031 eq(subpart2.get_default_type(), 'message/rfc822')
2032 neq(container.as_string(0), '''\
2033Content-Type: multipart/digest; boundary="BOUNDARY"
2034MIME-Version: 1.0
2035
2036--BOUNDARY
2037Content-Type: message/rfc822
2038MIME-Version: 1.0
2039
2040Content-Type: text/plain; charset="us-ascii"
2041MIME-Version: 1.0
2042Content-Transfer-Encoding: 7bit
2043
2044message 1
2045
2046--BOUNDARY
2047Content-Type: message/rfc822
2048MIME-Version: 1.0
2049
2050Content-Type: text/plain; charset="us-ascii"
2051MIME-Version: 1.0
2052Content-Transfer-Encoding: 7bit
2053
2054message 2
2055
2056--BOUNDARY--
2057''')
2058 del subpart1['content-type']
2059 del subpart1['mime-version']
2060 del subpart2['content-type']
2061 del subpart2['mime-version']
2062 eq(subpart1.get_content_type(), 'message/rfc822')
2063 eq(subpart1.get_default_type(), 'message/rfc822')
2064 eq(subpart2.get_content_type(), 'message/rfc822')
2065 eq(subpart2.get_default_type(), 'message/rfc822')
2066 neq(container.as_string(0), '''\
2067Content-Type: multipart/digest; boundary="BOUNDARY"
2068MIME-Version: 1.0
2069
2070--BOUNDARY
2071
2072Content-Type: text/plain; charset="us-ascii"
2073MIME-Version: 1.0
2074Content-Transfer-Encoding: 7bit
2075
2076message 1
2077
2078--BOUNDARY
2079
2080Content-Type: text/plain; charset="us-ascii"
2081MIME-Version: 1.0
2082Content-Transfer-Encoding: 7bit
2083
2084message 2
2085
2086--BOUNDARY--
2087''')
2088
2089 def test_mime_attachments_in_constructor(self):
2090 eq = self.assertEqual
2091 text1 = MIMEText('')
2092 text2 = MIMEText('')
2093 msg = MIMEMultipart(_subparts=(text1, text2))
2094 eq(len(msg.get_payload()), 2)
2095 eq(msg.get_payload(0), text1)
2096 eq(msg.get_payload(1), text2)
2097
Christian Heimes587c2bf2008-01-19 16:21:02 +00002098 def test_default_multipart_constructor(self):
2099 msg = MIMEMultipart()
2100 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002101
Ezio Melottib3aedd42010-11-20 19:04:17 +00002102
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002103# A general test of parser->model->generator idempotency. IOW, read a message
2104# in, parse it into a message object tree, then without touching the tree,
2105# regenerate the plain text. The original text and the transformed text
2106# should be identical. Note: that we ignore the Unix-From since that may
2107# contain a changed date.
2108class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002109
2110 linesep = '\n'
2111
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002112 def _msgobj(self, filename):
2113 with openfile(filename) as fp:
2114 data = fp.read()
2115 msg = email.message_from_string(data)
2116 return msg, data
2117
R. David Murray719a4492010-11-21 16:53:48 +00002118 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002119 eq = self.ndiffAssertEqual
2120 s = StringIO()
2121 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002122 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002123 eq(text, s.getvalue())
2124
2125 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002126 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002127 msg, text = self._msgobj('msg_01.txt')
2128 eq(msg.get_content_type(), 'text/plain')
2129 eq(msg.get_content_maintype(), 'text')
2130 eq(msg.get_content_subtype(), 'plain')
2131 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2132 eq(msg.get_param('charset'), 'us-ascii')
2133 eq(msg.preamble, None)
2134 eq(msg.epilogue, None)
2135 self._idempotent(msg, text)
2136
2137 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002138 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002139 msg, text = self._msgobj('msg_03.txt')
2140 eq(msg.get_content_type(), 'text/plain')
2141 eq(msg.get_params(), None)
2142 eq(msg.get_param('charset'), None)
2143 self._idempotent(msg, text)
2144
2145 def test_simple_multipart(self):
2146 msg, text = self._msgobj('msg_04.txt')
2147 self._idempotent(msg, text)
2148
2149 def test_MIME_digest(self):
2150 msg, text = self._msgobj('msg_02.txt')
2151 self._idempotent(msg, text)
2152
2153 def test_long_header(self):
2154 msg, text = self._msgobj('msg_27.txt')
2155 self._idempotent(msg, text)
2156
2157 def test_MIME_digest_with_part_headers(self):
2158 msg, text = self._msgobj('msg_28.txt')
2159 self._idempotent(msg, text)
2160
2161 def test_mixed_with_image(self):
2162 msg, text = self._msgobj('msg_06.txt')
2163 self._idempotent(msg, text)
2164
2165 def test_multipart_report(self):
2166 msg, text = self._msgobj('msg_05.txt')
2167 self._idempotent(msg, text)
2168
2169 def test_dsn(self):
2170 msg, text = self._msgobj('msg_16.txt')
2171 self._idempotent(msg, text)
2172
2173 def test_preamble_epilogue(self):
2174 msg, text = self._msgobj('msg_21.txt')
2175 self._idempotent(msg, text)
2176
2177 def test_multipart_one_part(self):
2178 msg, text = self._msgobj('msg_23.txt')
2179 self._idempotent(msg, text)
2180
2181 def test_multipart_no_parts(self):
2182 msg, text = self._msgobj('msg_24.txt')
2183 self._idempotent(msg, text)
2184
2185 def test_no_start_boundary(self):
2186 msg, text = self._msgobj('msg_31.txt')
2187 self._idempotent(msg, text)
2188
2189 def test_rfc2231_charset(self):
2190 msg, text = self._msgobj('msg_32.txt')
2191 self._idempotent(msg, text)
2192
2193 def test_more_rfc2231_parameters(self):
2194 msg, text = self._msgobj('msg_33.txt')
2195 self._idempotent(msg, text)
2196
2197 def test_text_plain_in_a_multipart_digest(self):
2198 msg, text = self._msgobj('msg_34.txt')
2199 self._idempotent(msg, text)
2200
2201 def test_nested_multipart_mixeds(self):
2202 msg, text = self._msgobj('msg_12a.txt')
2203 self._idempotent(msg, text)
2204
2205 def test_message_external_body_idempotent(self):
2206 msg, text = self._msgobj('msg_36.txt')
2207 self._idempotent(msg, text)
2208
R. David Murray719a4492010-11-21 16:53:48 +00002209 def test_message_delivery_status(self):
2210 msg, text = self._msgobj('msg_43.txt')
2211 self._idempotent(msg, text, unixfrom=True)
2212
R. David Murray96fd54e2010-10-08 15:55:28 +00002213 def test_message_signed_idempotent(self):
2214 msg, text = self._msgobj('msg_45.txt')
2215 self._idempotent(msg, text)
2216
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002217 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002218 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002219 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002220 # Get a message object and reset the seek pointer for other tests
2221 msg, text = self._msgobj('msg_05.txt')
2222 eq(msg.get_content_type(), 'multipart/report')
2223 # Test the Content-Type: parameters
2224 params = {}
2225 for pk, pv in msg.get_params():
2226 params[pk] = pv
2227 eq(params['report-type'], 'delivery-status')
2228 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002229 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2230 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002231 eq(len(msg.get_payload()), 3)
2232 # Make sure the subparts are what we expect
2233 msg1 = msg.get_payload(0)
2234 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002235 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002236 msg2 = msg.get_payload(1)
2237 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002238 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002239 msg3 = msg.get_payload(2)
2240 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002241 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002242 payload = msg3.get_payload()
2243 unless(isinstance(payload, list))
2244 eq(len(payload), 1)
2245 msg4 = payload[0]
2246 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002247 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002248
2249 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002250 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002251 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002252 msg, text = self._msgobj('msg_06.txt')
2253 # Check some of the outer headers
2254 eq(msg.get_content_type(), 'message/rfc822')
2255 # Make sure the payload is a list of exactly one sub-Message, and that
2256 # that submessage has a type of text/plain
2257 payload = msg.get_payload()
2258 unless(isinstance(payload, list))
2259 eq(len(payload), 1)
2260 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002261 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002262 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002263 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002264 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002265
2266
Ezio Melottib3aedd42010-11-20 19:04:17 +00002267
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002268# Test various other bits of the package's functionality
2269class TestMiscellaneous(TestEmailBase):
2270 def test_message_from_string(self):
2271 with openfile('msg_01.txt') as fp:
2272 text = fp.read()
2273 msg = email.message_from_string(text)
2274 s = StringIO()
2275 # Don't wrap/continue long headers since we're trying to test
2276 # idempotency.
2277 g = Generator(s, maxheaderlen=0)
2278 g.flatten(msg)
2279 self.assertEqual(text, s.getvalue())
2280
2281 def test_message_from_file(self):
2282 with openfile('msg_01.txt') as fp:
2283 text = fp.read()
2284 fp.seek(0)
2285 msg = email.message_from_file(fp)
2286 s = StringIO()
2287 # Don't wrap/continue long headers since we're trying to test
2288 # idempotency.
2289 g = Generator(s, maxheaderlen=0)
2290 g.flatten(msg)
2291 self.assertEqual(text, s.getvalue())
2292
2293 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002294 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002295 with openfile('msg_01.txt') as fp:
2296 text = fp.read()
2297
2298 # Create a subclass
2299 class MyMessage(Message):
2300 pass
2301
2302 msg = email.message_from_string(text, MyMessage)
2303 unless(isinstance(msg, MyMessage))
2304 # Try something more complicated
2305 with openfile('msg_02.txt') as fp:
2306 text = fp.read()
2307 msg = email.message_from_string(text, MyMessage)
2308 for subpart in msg.walk():
2309 unless(isinstance(subpart, MyMessage))
2310
2311 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002312 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002313 # Create a subclass
2314 class MyMessage(Message):
2315 pass
2316
2317 with openfile('msg_01.txt') as fp:
2318 msg = email.message_from_file(fp, MyMessage)
2319 unless(isinstance(msg, MyMessage))
2320 # Try something more complicated
2321 with openfile('msg_02.txt') as fp:
2322 msg = email.message_from_file(fp, MyMessage)
2323 for subpart in msg.walk():
2324 unless(isinstance(subpart, MyMessage))
2325
2326 def test__all__(self):
2327 module = __import__('email')
2328 # Can't use sorted() here due to Python 2.3 compatibility
2329 all = module.__all__[:]
2330 all.sort()
2331 self.assertEqual(all, [
2332 'base64mime', 'charset', 'encoders', 'errors', 'generator',
R. David Murray96fd54e2010-10-08 15:55:28 +00002333 'header', 'iterators', 'message', 'message_from_binary_file',
2334 'message_from_bytes', 'message_from_file',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002335 'message_from_string', 'mime', 'parser',
2336 'quoprimime', 'utils',
2337 ])
2338
2339 def test_formatdate(self):
2340 now = time.time()
2341 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2342 time.gmtime(now)[:6])
2343
2344 def test_formatdate_localtime(self):
2345 now = time.time()
2346 self.assertEqual(
2347 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2348 time.localtime(now)[:6])
2349
2350 def test_formatdate_usegmt(self):
2351 now = time.time()
2352 self.assertEqual(
2353 utils.formatdate(now, localtime=False),
2354 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2355 self.assertEqual(
2356 utils.formatdate(now, localtime=False, usegmt=True),
2357 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2358
2359 def test_parsedate_none(self):
2360 self.assertEqual(utils.parsedate(''), None)
2361
2362 def test_parsedate_compact(self):
2363 # The FWS after the comma is optional
2364 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2365 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2366
2367 def test_parsedate_no_dayofweek(self):
2368 eq = self.assertEqual
2369 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2370 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2371
2372 def test_parsedate_compact_no_dayofweek(self):
2373 eq = self.assertEqual
2374 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2375 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2376
R. David Murray4a62e892010-12-23 20:35:46 +00002377 def test_parsedate_no_space_before_positive_offset(self):
2378 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2379 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2380
2381 def test_parsedate_no_space_before_negative_offset(self):
2382 # Issue 1155362: we already handled '+' for this case.
2383 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
2384 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
2385
2386
R David Murrayaccd1c02011-03-13 20:06:23 -04002387 def test_parsedate_accepts_time_with_dots(self):
2388 eq = self.assertEqual
2389 eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'),
2390 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2391 eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'),
2392 (2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800))
2393
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002394 def test_parsedate_acceptable_to_time_functions(self):
2395 eq = self.assertEqual
2396 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2397 t = int(time.mktime(timetup))
2398 eq(time.localtime(t)[:6], timetup[:6])
2399 eq(int(time.strftime('%Y', timetup)), 2003)
2400 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2401 t = int(time.mktime(timetup[:9]))
2402 eq(time.localtime(t)[:6], timetup[:6])
2403 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2404
R. David Murray219d1c82010-08-25 00:45:55 +00002405 def test_parsedate_y2k(self):
2406 """Test for parsing a date with a two-digit year.
2407
2408 Parsing a date with a two-digit year should return the correct
2409 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2410 obsoletes RFC822) requires four-digit years.
2411
2412 """
2413 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2414 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2415 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2416 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2417
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002418 def test_parseaddr_empty(self):
2419 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2420 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2421
2422 def test_noquote_dump(self):
2423 self.assertEqual(
2424 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2425 'A Silly Person <person@dom.ain>')
2426
2427 def test_escape_dump(self):
2428 self.assertEqual(
2429 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
2430 r'"A \(Very\) Silly Person" <person@dom.ain>')
2431 a = r'A \(Special\) Person'
2432 b = 'person@dom.ain'
2433 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2434
2435 def test_escape_backslashes(self):
2436 self.assertEqual(
2437 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2438 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2439 a = r'Arthur \Backslash\ Foobar'
2440 b = 'person@dom.ain'
2441 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2442
R David Murray8debacb2011-04-06 09:35:57 -04002443 def test_quotes_unicode_names(self):
2444 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2445 name = "H\u00e4ns W\u00fcrst"
2446 addr = 'person@dom.ain'
2447 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2448 latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <person@dom.ain>"
2449 self.assertEqual(utils.formataddr((name, addr)), utf8_base64)
2450 self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'),
2451 latin1_quopri)
2452
2453 def test_accepts_any_charset_like_object(self):
2454 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2455 name = "H\u00e4ns W\u00fcrst"
2456 addr = 'person@dom.ain'
2457 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2458 foobar = "FOOBAR"
2459 class CharsetMock:
2460 def header_encode(self, string):
2461 return foobar
2462 mock = CharsetMock()
2463 mock_expected = "%s <%s>" % (foobar, addr)
2464 self.assertEqual(utils.formataddr((name, addr), mock), mock_expected)
2465 self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')),
2466 utf8_base64)
2467
2468 def test_invalid_charset_like_object_raises_error(self):
2469 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2470 name = "H\u00e4ns W\u00fcrst"
2471 addr = 'person@dom.ain'
2472 # A object without a header_encode method:
2473 bad_charset = object()
2474 self.assertRaises(AttributeError, utils.formataddr, (name, addr),
2475 bad_charset)
2476
2477 def test_unicode_address_raises_error(self):
2478 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2479 addr = 'pers\u00f6n@dom.in'
2480 self.assertRaises(UnicodeError, utils.formataddr, (None, addr))
2481 self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr))
2482
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002483 def test_name_with_dot(self):
2484 x = 'John X. Doe <jxd@example.com>'
2485 y = '"John X. Doe" <jxd@example.com>'
2486 a, b = ('John X. Doe', 'jxd@example.com')
2487 self.assertEqual(utils.parseaddr(x), (a, b))
2488 self.assertEqual(utils.parseaddr(y), (a, b))
2489 # formataddr() quotes the name if there's a dot in it
2490 self.assertEqual(utils.formataddr((a, b)), y)
2491
R. David Murray5397e862010-10-02 15:58:26 +00002492 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2493 # issue 10005. Note that in the third test the second pair of
2494 # backslashes is not actually a quoted pair because it is not inside a
2495 # comment or quoted string: the address being parsed has a quoted
2496 # string containing a quoted backslash, followed by 'example' and two
2497 # backslashes, followed by another quoted string containing a space and
2498 # the word 'example'. parseaddr copies those two backslashes
2499 # literally. Per rfc5322 this is not technically correct since a \ may
2500 # not appear in an address outside of a quoted string. It is probably
2501 # a sensible Postel interpretation, though.
2502 eq = self.assertEqual
2503 eq(utils.parseaddr('""example" example"@example.com'),
2504 ('', '""example" example"@example.com'))
2505 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2506 ('', '"\\"example\\" example"@example.com'))
2507 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2508 ('', '"\\\\"example\\\\" example"@example.com'))
2509
R. David Murray63563cd2010-12-18 18:25:38 +00002510 def test_parseaddr_preserves_spaces_in_local_part(self):
2511 # issue 9286. A normal RFC5322 local part should not contain any
2512 # folding white space, but legacy local parts can (they are a sequence
2513 # of atoms, not dotatoms). On the other hand we strip whitespace from
2514 # before the @ and around dots, on the assumption that the whitespace
2515 # around the punctuation is a mistake in what would otherwise be
2516 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
2517 self.assertEqual(('', "merwok wok@xample.com"),
2518 utils.parseaddr("merwok wok@xample.com"))
2519 self.assertEqual(('', "merwok wok@xample.com"),
2520 utils.parseaddr("merwok wok@xample.com"))
2521 self.assertEqual(('', "merwok wok@xample.com"),
2522 utils.parseaddr(" merwok wok @xample.com"))
2523 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
2524 utils.parseaddr('merwok"wok" wok@xample.com'))
2525 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
2526 utils.parseaddr('merwok. wok . wok@xample.com'))
2527
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002528 def test_multiline_from_comment(self):
2529 x = """\
2530Foo
2531\tBar <foo@example.com>"""
2532 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2533
2534 def test_quote_dump(self):
2535 self.assertEqual(
2536 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2537 r'"A Silly; Person" <person@dom.ain>')
2538
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002539 def test_charset_richcomparisons(self):
2540 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002541 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002542 cset1 = Charset()
2543 cset2 = Charset()
2544 eq(cset1, 'us-ascii')
2545 eq(cset1, 'US-ASCII')
2546 eq(cset1, 'Us-AsCiI')
2547 eq('us-ascii', cset1)
2548 eq('US-ASCII', cset1)
2549 eq('Us-AsCiI', cset1)
2550 ne(cset1, 'usascii')
2551 ne(cset1, 'USASCII')
2552 ne(cset1, 'UsAsCiI')
2553 ne('usascii', cset1)
2554 ne('USASCII', cset1)
2555 ne('UsAsCiI', cset1)
2556 eq(cset1, cset2)
2557 eq(cset2, cset1)
2558
2559 def test_getaddresses(self):
2560 eq = self.assertEqual
2561 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2562 'Bud Person <bperson@dom.ain>']),
2563 [('Al Person', 'aperson@dom.ain'),
2564 ('Bud Person', 'bperson@dom.ain')])
2565
2566 def test_getaddresses_nasty(self):
2567 eq = self.assertEqual
2568 eq(utils.getaddresses(['foo: ;']), [('', '')])
2569 eq(utils.getaddresses(
2570 ['[]*-- =~$']),
2571 [('', ''), ('', ''), ('', '*--')])
2572 eq(utils.getaddresses(
2573 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2574 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2575
2576 def test_getaddresses_embedded_comment(self):
2577 """Test proper handling of a nested comment"""
2578 eq = self.assertEqual
2579 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2580 eq(addrs[0][1], 'foo@bar.com')
2581
2582 def test_utils_quote_unquote(self):
2583 eq = self.assertEqual
2584 msg = Message()
2585 msg.add_header('content-disposition', 'attachment',
2586 filename='foo\\wacky"name')
2587 eq(msg.get_filename(), 'foo\\wacky"name')
2588
2589 def test_get_body_encoding_with_bogus_charset(self):
2590 charset = Charset('not a charset')
2591 self.assertEqual(charset.get_body_encoding(), 'base64')
2592
2593 def test_get_body_encoding_with_uppercase_charset(self):
2594 eq = self.assertEqual
2595 msg = Message()
2596 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2597 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2598 charsets = msg.get_charsets()
2599 eq(len(charsets), 1)
2600 eq(charsets[0], 'utf-8')
2601 charset = Charset(charsets[0])
2602 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002603 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002604 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2605 eq(msg.get_payload(decode=True), b'hello world')
2606 eq(msg['content-transfer-encoding'], 'base64')
2607 # Try another one
2608 msg = Message()
2609 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2610 charsets = msg.get_charsets()
2611 eq(len(charsets), 1)
2612 eq(charsets[0], 'us-ascii')
2613 charset = Charset(charsets[0])
2614 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2615 msg.set_payload('hello world', charset=charset)
2616 eq(msg.get_payload(), 'hello world')
2617 eq(msg['content-transfer-encoding'], '7bit')
2618
2619 def test_charsets_case_insensitive(self):
2620 lc = Charset('us-ascii')
2621 uc = Charset('US-ASCII')
2622 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2623
2624 def test_partial_falls_inside_message_delivery_status(self):
2625 eq = self.ndiffAssertEqual
2626 # The Parser interface provides chunks of data to FeedParser in 8192
2627 # byte gulps. SF bug #1076485 found one of those chunks inside
2628 # message/delivery-status header block, which triggered an
2629 # unreadline() of NeedMoreData.
2630 msg = self._msgobj('msg_43.txt')
2631 sfp = StringIO()
2632 iterators._structure(msg, sfp)
2633 eq(sfp.getvalue(), """\
2634multipart/report
2635 text/plain
2636 message/delivery-status
2637 text/plain
2638 text/plain
2639 text/plain
2640 text/plain
2641 text/plain
2642 text/plain
2643 text/plain
2644 text/plain
2645 text/plain
2646 text/plain
2647 text/plain
2648 text/plain
2649 text/plain
2650 text/plain
2651 text/plain
2652 text/plain
2653 text/plain
2654 text/plain
2655 text/plain
2656 text/plain
2657 text/plain
2658 text/plain
2659 text/plain
2660 text/plain
2661 text/plain
2662 text/plain
2663 text/rfc822-headers
2664""")
2665
R. David Murraya0b44b52010-12-02 21:47:19 +00002666 def test_make_msgid_domain(self):
2667 self.assertEqual(
2668 email.utils.make_msgid(domain='testdomain-string')[-19:],
2669 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002670
Ezio Melottib3aedd42010-11-20 19:04:17 +00002671
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002672# Test the iterator/generators
2673class TestIterators(TestEmailBase):
2674 def test_body_line_iterator(self):
2675 eq = self.assertEqual
2676 neq = self.ndiffAssertEqual
2677 # First a simple non-multipart message
2678 msg = self._msgobj('msg_01.txt')
2679 it = iterators.body_line_iterator(msg)
2680 lines = list(it)
2681 eq(len(lines), 6)
2682 neq(EMPTYSTRING.join(lines), msg.get_payload())
2683 # Now a more complicated multipart
2684 msg = self._msgobj('msg_02.txt')
2685 it = iterators.body_line_iterator(msg)
2686 lines = list(it)
2687 eq(len(lines), 43)
2688 with openfile('msg_19.txt') as fp:
2689 neq(EMPTYSTRING.join(lines), fp.read())
2690
2691 def test_typed_subpart_iterator(self):
2692 eq = self.assertEqual
2693 msg = self._msgobj('msg_04.txt')
2694 it = iterators.typed_subpart_iterator(msg, 'text')
2695 lines = []
2696 subparts = 0
2697 for subpart in it:
2698 subparts += 1
2699 lines.append(subpart.get_payload())
2700 eq(subparts, 2)
2701 eq(EMPTYSTRING.join(lines), """\
2702a simple kind of mirror
2703to reflect upon our own
2704a simple kind of mirror
2705to reflect upon our own
2706""")
2707
2708 def test_typed_subpart_iterator_default_type(self):
2709 eq = self.assertEqual
2710 msg = self._msgobj('msg_03.txt')
2711 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
2712 lines = []
2713 subparts = 0
2714 for subpart in it:
2715 subparts += 1
2716 lines.append(subpart.get_payload())
2717 eq(subparts, 1)
2718 eq(EMPTYSTRING.join(lines), """\
2719
2720Hi,
2721
2722Do you like this message?
2723
2724-Me
2725""")
2726
R. David Murray45bf773f2010-07-17 01:19:57 +00002727 def test_pushCR_LF(self):
2728 '''FeedParser BufferedSubFile.push() assumed it received complete
2729 line endings. A CR ending one push() followed by a LF starting
2730 the next push() added an empty line.
2731 '''
2732 imt = [
2733 ("a\r \n", 2),
2734 ("b", 0),
2735 ("c\n", 1),
2736 ("", 0),
2737 ("d\r\n", 1),
2738 ("e\r", 0),
2739 ("\nf", 1),
2740 ("\r\n", 1),
2741 ]
2742 from email.feedparser import BufferedSubFile, NeedMoreData
2743 bsf = BufferedSubFile()
2744 om = []
2745 nt = 0
2746 for il, n in imt:
2747 bsf.push(il)
2748 nt += n
2749 n1 = 0
2750 while True:
2751 ol = bsf.readline()
2752 if ol == NeedMoreData:
2753 break
2754 om.append(ol)
2755 n1 += 1
2756 self.assertTrue(n == n1)
2757 self.assertTrue(len(om) == nt)
2758 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
2759
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002760
Ezio Melottib3aedd42010-11-20 19:04:17 +00002761
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002762class TestParsers(TestEmailBase):
R David Murrayb35c8502011-04-13 16:46:05 -04002763
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002764 def test_header_parser(self):
2765 eq = self.assertEqual
2766 # Parse only the headers of a complex multipart MIME document
2767 with openfile('msg_02.txt') as fp:
2768 msg = HeaderParser().parse(fp)
2769 eq(msg['from'], 'ppp-request@zzz.org')
2770 eq(msg['to'], 'ppp@zzz.org')
2771 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002772 self.assertFalse(msg.is_multipart())
2773 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002774
R David Murrayb35c8502011-04-13 16:46:05 -04002775 def test_bytes_header_parser(self):
2776 eq = self.assertEqual
2777 # Parse only the headers of a complex multipart MIME document
2778 with openfile('msg_02.txt', 'rb') as fp:
2779 msg = email.parser.BytesHeaderParser().parse(fp)
2780 eq(msg['from'], 'ppp-request@zzz.org')
2781 eq(msg['to'], 'ppp@zzz.org')
2782 eq(msg.get_content_type(), 'multipart/mixed')
2783 self.assertFalse(msg.is_multipart())
2784 self.assertTrue(isinstance(msg.get_payload(), str))
2785 self.assertTrue(isinstance(msg.get_payload(decode=True), bytes))
2786
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002787 def test_whitespace_continuation(self):
2788 eq = self.assertEqual
2789 # This message contains a line after the Subject: header that has only
2790 # whitespace, but it is not empty!
2791 msg = email.message_from_string("""\
2792From: aperson@dom.ain
2793To: bperson@dom.ain
2794Subject: the next line has a space on it
2795\x20
2796Date: Mon, 8 Apr 2002 15:09:19 -0400
2797Message-ID: spam
2798
2799Here's the message body
2800""")
2801 eq(msg['subject'], 'the next line has a space on it\n ')
2802 eq(msg['message-id'], 'spam')
2803 eq(msg.get_payload(), "Here's the message body\n")
2804
2805 def test_whitespace_continuation_last_header(self):
2806 eq = self.assertEqual
2807 # Like the previous test, but the subject line is the last
2808 # header.
2809 msg = email.message_from_string("""\
2810From: aperson@dom.ain
2811To: bperson@dom.ain
2812Date: Mon, 8 Apr 2002 15:09:19 -0400
2813Message-ID: spam
2814Subject: the next line has a space on it
2815\x20
2816
2817Here's the message body
2818""")
2819 eq(msg['subject'], 'the next line has a space on it\n ')
2820 eq(msg['message-id'], 'spam')
2821 eq(msg.get_payload(), "Here's the message body\n")
2822
2823 def test_crlf_separation(self):
2824 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00002825 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002826 msg = Parser().parse(fp)
2827 eq(len(msg.get_payload()), 2)
2828 part1 = msg.get_payload(0)
2829 eq(part1.get_content_type(), 'text/plain')
2830 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
2831 part2 = msg.get_payload(1)
2832 eq(part2.get_content_type(), 'application/riscos')
2833
R. David Murray8451c4b2010-10-23 22:19:56 +00002834 def test_crlf_flatten(self):
2835 # Using newline='\n' preserves the crlfs in this input file.
2836 with openfile('msg_26.txt', newline='\n') as fp:
2837 text = fp.read()
2838 msg = email.message_from_string(text)
2839 s = StringIO()
2840 g = Generator(s)
2841 g.flatten(msg, linesep='\r\n')
2842 self.assertEqual(s.getvalue(), text)
2843
2844 maxDiff = None
2845
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002846 def test_multipart_digest_with_extra_mime_headers(self):
2847 eq = self.assertEqual
2848 neq = self.ndiffAssertEqual
2849 with openfile('msg_28.txt') as fp:
2850 msg = email.message_from_file(fp)
2851 # Structure is:
2852 # multipart/digest
2853 # message/rfc822
2854 # text/plain
2855 # message/rfc822
2856 # text/plain
2857 eq(msg.is_multipart(), 1)
2858 eq(len(msg.get_payload()), 2)
2859 part1 = msg.get_payload(0)
2860 eq(part1.get_content_type(), 'message/rfc822')
2861 eq(part1.is_multipart(), 1)
2862 eq(len(part1.get_payload()), 1)
2863 part1a = part1.get_payload(0)
2864 eq(part1a.is_multipart(), 0)
2865 eq(part1a.get_content_type(), 'text/plain')
2866 neq(part1a.get_payload(), 'message 1\n')
2867 # next message/rfc822
2868 part2 = msg.get_payload(1)
2869 eq(part2.get_content_type(), 'message/rfc822')
2870 eq(part2.is_multipart(), 1)
2871 eq(len(part2.get_payload()), 1)
2872 part2a = part2.get_payload(0)
2873 eq(part2a.is_multipart(), 0)
2874 eq(part2a.get_content_type(), 'text/plain')
2875 neq(part2a.get_payload(), 'message 2\n')
2876
2877 def test_three_lines(self):
2878 # A bug report by Andrew McNamara
2879 lines = ['From: Andrew Person <aperson@dom.ain',
2880 'Subject: Test',
2881 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
2882 msg = email.message_from_string(NL.join(lines))
2883 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
2884
2885 def test_strip_line_feed_and_carriage_return_in_headers(self):
2886 eq = self.assertEqual
2887 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
2888 value1 = 'text'
2889 value2 = 'more text'
2890 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
2891 value1, value2)
2892 msg = email.message_from_string(m)
2893 eq(msg.get('Header'), value1)
2894 eq(msg.get('Next-Header'), value2)
2895
2896 def test_rfc2822_header_syntax(self):
2897 eq = self.assertEqual
2898 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2899 msg = email.message_from_string(m)
2900 eq(len(msg), 3)
2901 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
2902 eq(msg.get_payload(), 'body')
2903
2904 def test_rfc2822_space_not_allowed_in_header(self):
2905 eq = self.assertEqual
2906 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2907 msg = email.message_from_string(m)
2908 eq(len(msg.keys()), 0)
2909
2910 def test_rfc2822_one_character_header(self):
2911 eq = self.assertEqual
2912 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
2913 msg = email.message_from_string(m)
2914 headers = msg.keys()
2915 headers.sort()
2916 eq(headers, ['A', 'B', 'CC'])
2917 eq(msg.get_payload(), 'body')
2918
R. David Murray45e0e142010-06-16 02:19:40 +00002919 def test_CRLFLF_at_end_of_part(self):
2920 # issue 5610: feedparser should not eat two chars from body part ending
2921 # with "\r\n\n".
2922 m = (
2923 "From: foo@bar.com\n"
2924 "To: baz\n"
2925 "Mime-Version: 1.0\n"
2926 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
2927 "\n"
2928 "--BOUNDARY\n"
2929 "Content-Type: text/plain\n"
2930 "\n"
2931 "body ending with CRLF newline\r\n"
2932 "\n"
2933 "--BOUNDARY--\n"
2934 )
2935 msg = email.message_from_string(m)
2936 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002937
Ezio Melottib3aedd42010-11-20 19:04:17 +00002938
R. David Murray96fd54e2010-10-08 15:55:28 +00002939class Test8BitBytesHandling(unittest.TestCase):
2940 # In Python3 all input is string, but that doesn't work if the actual input
2941 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
2942 # decode byte streams using the surrogateescape error handler, and
2943 # reconvert to binary at appropriate places if we detect surrogates. This
2944 # doesn't allow us to transform headers with 8bit bytes (they get munged),
2945 # but it does allow us to parse and preserve them, and to decode body
2946 # parts that use an 8bit CTE.
2947
2948 bodytest_msg = textwrap.dedent("""\
2949 From: foo@bar.com
2950 To: baz
2951 Mime-Version: 1.0
2952 Content-Type: text/plain; charset={charset}
2953 Content-Transfer-Encoding: {cte}
2954
2955 {bodyline}
2956 """)
2957
2958 def test_known_8bit_CTE(self):
2959 m = self.bodytest_msg.format(charset='utf-8',
2960 cte='8bit',
2961 bodyline='pöstal').encode('utf-8')
2962 msg = email.message_from_bytes(m)
2963 self.assertEqual(msg.get_payload(), "pöstal\n")
2964 self.assertEqual(msg.get_payload(decode=True),
2965 "pöstal\n".encode('utf-8'))
2966
2967 def test_unknown_8bit_CTE(self):
2968 m = self.bodytest_msg.format(charset='notavalidcharset',
2969 cte='8bit',
2970 bodyline='pöstal').encode('utf-8')
2971 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00002972 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
R. David Murray96fd54e2010-10-08 15:55:28 +00002973 self.assertEqual(msg.get_payload(decode=True),
2974 "pöstal\n".encode('utf-8'))
2975
2976 def test_8bit_in_quopri_body(self):
2977 # This is non-RFC compliant data...without 'decode' the library code
2978 # decodes the body using the charset from the headers, and because the
2979 # source byte really is utf-8 this works. This is likely to fail
2980 # against real dirty data (ie: produce mojibake), but the data is
2981 # invalid anyway so it is as good a guess as any. But this means that
2982 # this test just confirms the current behavior; that behavior is not
2983 # necessarily the best possible behavior. With 'decode' it is
2984 # returning the raw bytes, so that test should be of correct behavior,
2985 # or at least produce the same result that email4 did.
2986 m = self.bodytest_msg.format(charset='utf-8',
2987 cte='quoted-printable',
2988 bodyline='p=C3=B6stál').encode('utf-8')
2989 msg = email.message_from_bytes(m)
2990 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
2991 self.assertEqual(msg.get_payload(decode=True),
2992 'pöstál\n'.encode('utf-8'))
2993
2994 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
2995 # This is similar to the previous test, but proves that if the 8bit
2996 # byte is undecodeable in the specified charset, it gets replaced
2997 # by the unicode 'unknown' character. Again, this may or may not
2998 # be the ideal behavior. Note that if decode=False none of the
2999 # decoders will get involved, so this is the only test we need
3000 # for this behavior.
3001 m = self.bodytest_msg.format(charset='ascii',
3002 cte='quoted-printable',
3003 bodyline='p=C3=B6stál').encode('utf-8')
3004 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003005 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
R. David Murray96fd54e2010-10-08 15:55:28 +00003006 self.assertEqual(msg.get_payload(decode=True),
3007 'pöstál\n'.encode('utf-8'))
3008
3009 def test_8bit_in_base64_body(self):
3010 # Sticking an 8bit byte in a base64 block makes it undecodable by
3011 # normal means, so the block is returned undecoded, but as bytes.
3012 m = self.bodytest_msg.format(charset='utf-8',
3013 cte='base64',
3014 bodyline='cMO2c3RhbAá=').encode('utf-8')
3015 msg = email.message_from_bytes(m)
3016 self.assertEqual(msg.get_payload(decode=True),
3017 'cMO2c3RhbAá=\n'.encode('utf-8'))
3018
3019 def test_8bit_in_uuencode_body(self):
3020 # Sticking an 8bit byte in a uuencode block makes it undecodable by
3021 # normal means, so the block is returned undecoded, but as bytes.
3022 m = self.bodytest_msg.format(charset='utf-8',
3023 cte='uuencode',
3024 bodyline='<,.V<W1A; á ').encode('utf-8')
3025 msg = email.message_from_bytes(m)
3026 self.assertEqual(msg.get_payload(decode=True),
3027 '<,.V<W1A; á \n'.encode('utf-8'))
3028
3029
R. David Murray92532142011-01-07 23:25:30 +00003030 headertest_headers = (
3031 ('From: foo@bar.com', ('From', 'foo@bar.com')),
3032 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
3033 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
3034 '\tJean de Baddie',
3035 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3036 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
3037 ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
3038 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
3039 )
3040 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
3041 '\nYes, they are flying.\n').encode('utf-8')
R. David Murray96fd54e2010-10-08 15:55:28 +00003042
3043 def test_get_8bit_header(self):
3044 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003045 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
3046 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
R. David Murray96fd54e2010-10-08 15:55:28 +00003047
3048 def test_print_8bit_headers(self):
3049 msg = email.message_from_bytes(self.headertest_msg)
3050 self.assertEqual(str(msg),
R. David Murray92532142011-01-07 23:25:30 +00003051 textwrap.dedent("""\
3052 From: {}
3053 To: {}
3054 Subject: {}
3055 From: {}
3056
3057 Yes, they are flying.
3058 """).format(*[expected[1] for (_, expected) in
3059 self.headertest_headers]))
R. David Murray96fd54e2010-10-08 15:55:28 +00003060
3061 def test_values_with_8bit_headers(self):
3062 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003063 self.assertListEqual([str(x) for x in msg.values()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003064 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003065 'b\uFFFD\uFFFDz',
3066 'Maintenant je vous pr\uFFFD\uFFFDsente mon '
3067 'coll\uFFFD\uFFFDgue, le pouf '
3068 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
R. David Murray96fd54e2010-10-08 15:55:28 +00003069 '\tJean de Baddie',
R. David Murray92532142011-01-07 23:25:30 +00003070 "g\uFFFD\uFFFDst"])
R. David Murray96fd54e2010-10-08 15:55:28 +00003071
3072 def test_items_with_8bit_headers(self):
3073 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003074 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003075 [('From', 'foo@bar.com'),
R. David Murray92532142011-01-07 23:25:30 +00003076 ('To', 'b\uFFFD\uFFFDz'),
3077 ('Subject', 'Maintenant je vous '
3078 'pr\uFFFD\uFFFDsente '
3079 'mon coll\uFFFD\uFFFDgue, le pouf '
3080 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3081 '\tJean de Baddie'),
3082 ('From', 'g\uFFFD\uFFFDst')])
R. David Murray96fd54e2010-10-08 15:55:28 +00003083
3084 def test_get_all_with_8bit_headers(self):
3085 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003086 self.assertListEqual([str(x) for x in msg.get_all('from')],
R. David Murray96fd54e2010-10-08 15:55:28 +00003087 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003088 'g\uFFFD\uFFFDst'])
R. David Murray96fd54e2010-10-08 15:55:28 +00003089
R David Murraya2150232011-03-16 21:11:23 -04003090 def test_get_content_type_with_8bit(self):
3091 msg = email.message_from_bytes(textwrap.dedent("""\
3092 Content-Type: text/pl\xA7in; charset=utf-8
3093 """).encode('latin-1'))
3094 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
3095 self.assertEqual(msg.get_content_maintype(), "text")
3096 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
3097
3098 def test_get_params_with_8bit(self):
3099 msg = email.message_from_bytes(
3100 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
3101 self.assertEqual(msg.get_params(header='x-header'),
3102 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
3103 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
3104 # XXX: someday you might be able to get 'b\xa7r', for now you can't.
3105 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
3106
3107 def test_get_rfc2231_params_with_8bit(self):
3108 msg = email.message_from_bytes(textwrap.dedent("""\
3109 Content-Type: text/plain; charset=us-ascii;
3110 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3111 ).encode('latin-1'))
3112 self.assertEqual(msg.get_param('title'),
3113 ('us-ascii', 'en', 'This is not f\uFFFDn'))
3114
3115 def test_set_rfc2231_params_with_8bit(self):
3116 msg = email.message_from_bytes(textwrap.dedent("""\
3117 Content-Type: text/plain; charset=us-ascii;
3118 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3119 ).encode('latin-1'))
3120 msg.set_param('title', 'test')
3121 self.assertEqual(msg.get_param('title'), 'test')
3122
3123 def test_del_rfc2231_params_with_8bit(self):
3124 msg = email.message_from_bytes(textwrap.dedent("""\
3125 Content-Type: text/plain; charset=us-ascii;
3126 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3127 ).encode('latin-1'))
3128 msg.del_param('title')
3129 self.assertEqual(msg.get_param('title'), None)
3130 self.assertEqual(msg.get_content_maintype(), 'text')
3131
3132 def test_get_payload_with_8bit_cte_header(self):
3133 msg = email.message_from_bytes(textwrap.dedent("""\
3134 Content-Transfer-Encoding: b\xa7se64
3135 Content-Type: text/plain; charset=latin-1
3136
3137 payload
3138 """).encode('latin-1'))
3139 self.assertEqual(msg.get_payload(), 'payload\n')
3140 self.assertEqual(msg.get_payload(decode=True), b'payload\n')
3141
R. David Murray96fd54e2010-10-08 15:55:28 +00003142 non_latin_bin_msg = textwrap.dedent("""\
3143 From: foo@bar.com
3144 To: báz
3145 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
3146 \tJean de Baddie
3147 Mime-Version: 1.0
3148 Content-Type: text/plain; charset="utf-8"
3149 Content-Transfer-Encoding: 8bit
3150
3151 Да, они летят.
3152 """).encode('utf-8')
3153
3154 def test_bytes_generator(self):
3155 msg = email.message_from_bytes(self.non_latin_bin_msg)
3156 out = BytesIO()
3157 email.generator.BytesGenerator(out).flatten(msg)
3158 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
3159
R. David Murray7372a072011-01-26 21:21:32 +00003160 def test_bytes_generator_handles_None_body(self):
3161 #Issue 11019
3162 msg = email.message.Message()
3163 out = BytesIO()
3164 email.generator.BytesGenerator(out).flatten(msg)
3165 self.assertEqual(out.getvalue(), b"\n")
3166
R. David Murray92532142011-01-07 23:25:30 +00003167 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
R. David Murray96fd54e2010-10-08 15:55:28 +00003168 From: foo@bar.com
R. David Murray92532142011-01-07 23:25:30 +00003169 To: =?unknown-8bit?q?b=C3=A1z?=
3170 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
3171 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
3172 =?unknown-8bit?q?_Jean_de_Baddie?=
R. David Murray96fd54e2010-10-08 15:55:28 +00003173 Mime-Version: 1.0
3174 Content-Type: text/plain; charset="utf-8"
3175 Content-Transfer-Encoding: base64
3176
3177 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
3178 """)
3179
3180 def test_generator_handles_8bit(self):
3181 msg = email.message_from_bytes(self.non_latin_bin_msg)
3182 out = StringIO()
3183 email.generator.Generator(out).flatten(msg)
R. David Murray92532142011-01-07 23:25:30 +00003184 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
R. David Murray96fd54e2010-10-08 15:55:28 +00003185
3186 def test_bytes_generator_with_unix_from(self):
3187 # The unixfrom contains a current date, so we can't check it
3188 # literally. Just make sure the first word is 'From' and the
3189 # rest of the message matches the input.
3190 msg = email.message_from_bytes(self.non_latin_bin_msg)
3191 out = BytesIO()
3192 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
3193 lines = out.getvalue().split(b'\n')
3194 self.assertEqual(lines[0].split()[0], b'From')
3195 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
3196
R. David Murray92532142011-01-07 23:25:30 +00003197 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
3198 non_latin_bin_msg_as7bit[2:4] = [
3199 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3200 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
3201 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
3202
R. David Murray96fd54e2010-10-08 15:55:28 +00003203 def test_message_from_binary_file(self):
3204 fn = 'test.msg'
3205 self.addCleanup(unlink, fn)
3206 with open(fn, 'wb') as testfile:
3207 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00003208 with open(fn, 'rb') as testfile:
3209 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00003210 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
3211
3212 latin_bin_msg = textwrap.dedent("""\
3213 From: foo@bar.com
3214 To: Dinsdale
3215 Subject: Nudge nudge, wink, wink
3216 Mime-Version: 1.0
3217 Content-Type: text/plain; charset="latin-1"
3218 Content-Transfer-Encoding: 8bit
3219
3220 oh là là, know what I mean, know what I mean?
3221 """).encode('latin-1')
3222
3223 latin_bin_msg_as7bit = textwrap.dedent("""\
3224 From: foo@bar.com
3225 To: Dinsdale
3226 Subject: Nudge nudge, wink, wink
3227 Mime-Version: 1.0
3228 Content-Type: text/plain; charset="iso-8859-1"
3229 Content-Transfer-Encoding: quoted-printable
3230
3231 oh l=E0 l=E0, know what I mean, know what I mean?
3232 """)
3233
3234 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
3235 m = email.message_from_bytes(self.latin_bin_msg)
3236 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3237
3238 def test_decoded_generator_emits_unicode_body(self):
3239 m = email.message_from_bytes(self.latin_bin_msg)
3240 out = StringIO()
3241 email.generator.DecodedGenerator(out).flatten(m)
3242 #DecodedHeader output contains an extra blank line compared
3243 #to the input message. RDM: not sure if this is a bug or not,
3244 #but it is not specific to the 8bit->7bit conversion.
3245 self.assertEqual(out.getvalue(),
3246 self.latin_bin_msg.decode('latin-1')+'\n')
3247
3248 def test_bytes_feedparser(self):
3249 bfp = email.feedparser.BytesFeedParser()
3250 for i in range(0, len(self.latin_bin_msg), 10):
3251 bfp.feed(self.latin_bin_msg[i:i+10])
3252 m = bfp.close()
3253 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3254
R. David Murray8451c4b2010-10-23 22:19:56 +00003255 def test_crlf_flatten(self):
3256 with openfile('msg_26.txt', 'rb') as fp:
3257 text = fp.read()
3258 msg = email.message_from_bytes(text)
3259 s = BytesIO()
3260 g = email.generator.BytesGenerator(s)
3261 g.flatten(msg, linesep='\r\n')
3262 self.assertEqual(s.getvalue(), text)
R David Murrayc5c14722011-04-06 08:13:02 -04003263
3264 def test_8bit_multipart(self):
3265 # Issue 11605
3266 source = textwrap.dedent("""\
3267 Date: Fri, 18 Mar 2011 17:15:43 +0100
3268 To: foo@example.com
3269 From: foodwatch-Newsletter <bar@example.com>
3270 Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
3271 Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
3272 MIME-Version: 1.0
3273 Content-Type: multipart/alternative;
3274 boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
3275
3276 --b1_76a486bee62b0d200f33dc2ca08220ad
3277 Content-Type: text/plain; charset="utf-8"
3278 Content-Transfer-Encoding: 8bit
3279
3280 Guten Tag, ,
3281
3282 mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
3283 Nachrichten aus Japan.
3284
3285
3286 --b1_76a486bee62b0d200f33dc2ca08220ad
3287 Content-Type: text/html; charset="utf-8"
3288 Content-Transfer-Encoding: 8bit
3289
3290 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
3291 "http://www.w3.org/TR/html4/loose.dtd">
3292 <html lang="de">
3293 <head>
3294 <title>foodwatch - Newsletter</title>
3295 </head>
3296 <body>
3297 <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
3298 die Nachrichten aus Japan.</p>
3299 </body>
3300 </html>
3301 --b1_76a486bee62b0d200f33dc2ca08220ad--
3302
3303 """).encode('utf-8')
3304 msg = email.message_from_bytes(source)
3305 s = BytesIO()
3306 g = email.generator.BytesGenerator(s)
3307 g.flatten(msg)
3308 self.assertEqual(s.getvalue(), source)
3309
R. David Murray8451c4b2010-10-23 22:19:56 +00003310 maxDiff = None
3311
Ezio Melottib3aedd42010-11-20 19:04:17 +00003312
R. David Murray719a4492010-11-21 16:53:48 +00003313class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003314
R. David Murraye5db2632010-11-20 15:10:13 +00003315 maxDiff = None
3316
R. David Murray96fd54e2010-10-08 15:55:28 +00003317 def _msgobj(self, filename):
3318 with openfile(filename, 'rb') as fp:
3319 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003320 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003321 msg = email.message_from_bytes(data)
3322 return msg, data
3323
R. David Murray719a4492010-11-21 16:53:48 +00003324 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003325 b = BytesIO()
3326 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003327 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R David Murraya46ed112011-03-31 13:11:40 -04003328 self.assertEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003329
3330
R. David Murray719a4492010-11-21 16:53:48 +00003331class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3332 TestIdempotent):
3333 linesep = '\n'
3334 blinesep = b'\n'
3335 normalize_linesep_regex = re.compile(br'\r\n')
3336
3337
3338class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3339 TestIdempotent):
3340 linesep = '\r\n'
3341 blinesep = b'\r\n'
3342 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3343
Ezio Melottib3aedd42010-11-20 19:04:17 +00003344
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003345class TestBase64(unittest.TestCase):
3346 def test_len(self):
3347 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003348 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003349 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003350 for size in range(15):
3351 if size == 0 : bsize = 0
3352 elif size <= 3 : bsize = 4
3353 elif size <= 6 : bsize = 8
3354 elif size <= 9 : bsize = 12
3355 elif size <= 12: bsize = 16
3356 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003357 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003358
3359 def test_decode(self):
3360 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003361 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003362 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003363
3364 def test_encode(self):
3365 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003366 eq(base64mime.body_encode(b''), b'')
3367 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003368 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003369 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003370 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003371 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003372eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3373eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3374eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3375eHh4eCB4eHh4IA==
3376""")
3377 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003378 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003379 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003380eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3381eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3382eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3383eHh4eCB4eHh4IA==\r
3384""")
3385
3386 def test_header_encode(self):
3387 eq = self.assertEqual
3388 he = base64mime.header_encode
3389 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003390 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3391 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003392 # Test the charset option
3393 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3394 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003395
3396
Ezio Melottib3aedd42010-11-20 19:04:17 +00003397
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003398class TestQuopri(unittest.TestCase):
3399 def setUp(self):
3400 # Set of characters (as byte integers) that don't need to be encoded
3401 # in headers.
3402 self.hlit = list(chain(
3403 range(ord('a'), ord('z') + 1),
3404 range(ord('A'), ord('Z') + 1),
3405 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003406 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003407 # Set of characters (as byte integers) that do need to be encoded in
3408 # headers.
3409 self.hnon = [c for c in range(256) if c not in self.hlit]
3410 assert len(self.hlit) + len(self.hnon) == 256
3411 # Set of characters (as byte integers) that don't need to be encoded
3412 # in bodies.
3413 self.blit = list(range(ord(' '), ord('~') + 1))
3414 self.blit.append(ord('\t'))
3415 self.blit.remove(ord('='))
3416 # Set of characters (as byte integers) that do need to be encoded in
3417 # bodies.
3418 self.bnon = [c for c in range(256) if c not in self.blit]
3419 assert len(self.blit) + len(self.bnon) == 256
3420
Guido van Rossum9604e662007-08-30 03:46:43 +00003421 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003422 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003423 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003424 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003425 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003426 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003427 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003428
Guido van Rossum9604e662007-08-30 03:46:43 +00003429 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003430 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003431 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003432 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003433 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003434 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003435 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003436
3437 def test_header_quopri_len(self):
3438 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003439 eq(quoprimime.header_length(b'hello'), 5)
3440 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003441 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003442 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003443 # =?xxx?q?...?= means 10 extra characters
3444 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00003445 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
3446 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003447 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003448 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003449 # =?xxx?q?...?= means 10 extra characters
3450 10)
3451 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003452 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003453 'expected length 1 for %r' % chr(c))
3454 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003455 # Space is special; it's encoded to _
3456 if c == ord(' '):
3457 continue
3458 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003459 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00003460 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003461
3462 def test_body_quopri_len(self):
3463 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003464 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003465 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003466 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003467 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003468
3469 def test_quote_unquote_idempotent(self):
3470 for x in range(256):
3471 c = chr(x)
3472 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
3473
R David Murrayec1b5b82011-03-23 14:19:05 -04003474 def _test_header_encode(self, header, expected_encoded_header, charset=None):
3475 if charset is None:
3476 encoded_header = quoprimime.header_encode(header)
3477 else:
3478 encoded_header = quoprimime.header_encode(header, charset)
3479 self.assertEqual(encoded_header, expected_encoded_header)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003480
R David Murraycafd79d2011-03-23 15:25:55 -04003481 def test_header_encode_null(self):
3482 self._test_header_encode(b'', '')
3483
R David Murrayec1b5b82011-03-23 14:19:05 -04003484 def test_header_encode_one_word(self):
3485 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
3486
3487 def test_header_encode_two_lines(self):
3488 self._test_header_encode(b'hello\nworld',
3489 '=?iso-8859-1?q?hello=0Aworld?=')
3490
3491 def test_header_encode_non_ascii(self):
3492 self._test_header_encode(b'hello\xc7there',
3493 '=?iso-8859-1?q?hello=C7there?=')
3494
3495 def test_header_encode_alt_charset(self):
3496 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
3497 charset='iso-8859-2')
3498
3499 def _test_header_decode(self, encoded_header, expected_decoded_header):
3500 decoded_header = quoprimime.header_decode(encoded_header)
3501 self.assertEqual(decoded_header, expected_decoded_header)
3502
3503 def test_header_decode_null(self):
3504 self._test_header_decode('', '')
3505
3506 def test_header_decode_one_word(self):
3507 self._test_header_decode('hello', 'hello')
3508
3509 def test_header_decode_two_lines(self):
3510 self._test_header_decode('hello=0Aworld', 'hello\nworld')
3511
3512 def test_header_decode_non_ascii(self):
3513 self._test_header_decode('hello=C7there', 'hello\xc7there')
3514
3515 def _test_decode(self, encoded, expected_decoded, eol=None):
3516 if eol is None:
3517 decoded = quoprimime.decode(encoded)
3518 else:
3519 decoded = quoprimime.decode(encoded, eol=eol)
3520 self.assertEqual(decoded, expected_decoded)
3521
3522 def test_decode_null_word(self):
3523 self._test_decode('', '')
3524
3525 def test_decode_null_line_null_word(self):
3526 self._test_decode('\r\n', '\n')
3527
3528 def test_decode_one_word(self):
3529 self._test_decode('hello', 'hello')
3530
3531 def test_decode_one_word_eol(self):
3532 self._test_decode('hello', 'hello', eol='X')
3533
3534 def test_decode_one_line(self):
3535 self._test_decode('hello\r\n', 'hello\n')
3536
3537 def test_decode_one_line_lf(self):
3538 self._test_decode('hello\n', 'hello\n')
3539
R David Murraycafd79d2011-03-23 15:25:55 -04003540 def test_decode_one_line_cr(self):
3541 self._test_decode('hello\r', 'hello\n')
3542
3543 def test_decode_one_line_nl(self):
3544 self._test_decode('hello\n', 'helloX', eol='X')
3545
3546 def test_decode_one_line_crnl(self):
3547 self._test_decode('hello\r\n', 'helloX', eol='X')
3548
R David Murrayec1b5b82011-03-23 14:19:05 -04003549 def test_decode_one_line_one_word(self):
3550 self._test_decode('hello\r\nworld', 'hello\nworld')
3551
3552 def test_decode_one_line_one_word_eol(self):
3553 self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
3554
3555 def test_decode_two_lines(self):
3556 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
3557
R David Murraycafd79d2011-03-23 15:25:55 -04003558 def test_decode_two_lines_eol(self):
3559 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
3560
R David Murrayec1b5b82011-03-23 14:19:05 -04003561 def test_decode_one_long_line(self):
3562 self._test_decode('Spam' * 250, 'Spam' * 250)
3563
3564 def test_decode_one_space(self):
3565 self._test_decode(' ', '')
3566
3567 def test_decode_multiple_spaces(self):
3568 self._test_decode(' ' * 5, '')
3569
3570 def test_decode_one_line_trailing_spaces(self):
3571 self._test_decode('hello \r\n', 'hello\n')
3572
3573 def test_decode_two_lines_trailing_spaces(self):
3574 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n')
3575
3576 def test_decode_quoted_word(self):
3577 self._test_decode('=22quoted=20words=22', '"quoted words"')
3578
3579 def test_decode_uppercase_quoting(self):
3580 self._test_decode('ab=CD=EF', 'ab\xcd\xef')
3581
3582 def test_decode_lowercase_quoting(self):
3583 self._test_decode('ab=cd=ef', 'ab\xcd\xef')
3584
3585 def test_decode_soft_line_break(self):
3586 self._test_decode('soft line=\r\nbreak', 'soft linebreak')
3587
3588 def test_decode_false_quoting(self):
3589 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
3590
3591 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
3592 kwargs = {}
3593 if maxlinelen is None:
3594 # Use body_encode's default.
3595 maxlinelen = 76
3596 else:
3597 kwargs['maxlinelen'] = maxlinelen
3598 if eol is None:
3599 # Use body_encode's default.
3600 eol = '\n'
3601 else:
3602 kwargs['eol'] = eol
3603 encoded_body = quoprimime.body_encode(body, **kwargs)
3604 self.assertEqual(encoded_body, expected_encoded_body)
3605 if eol == '\n' or eol == '\r\n':
3606 # We know how to split the result back into lines, so maxlinelen
3607 # can be checked.
3608 for line in encoded_body.splitlines():
3609 self.assertLessEqual(len(line), maxlinelen)
3610
3611 def test_encode_null(self):
3612 self._test_encode('', '')
3613
3614 def test_encode_null_lines(self):
3615 self._test_encode('\n\n', '\n\n')
3616
3617 def test_encode_one_line(self):
3618 self._test_encode('hello\n', 'hello\n')
3619
3620 def test_encode_one_line_crlf(self):
3621 self._test_encode('hello\r\n', 'hello\n')
3622
3623 def test_encode_one_line_eol(self):
3624 self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
3625
3626 def test_encode_one_space(self):
3627 self._test_encode(' ', '=20')
3628
3629 def test_encode_one_line_one_space(self):
3630 self._test_encode(' \n', '=20\n')
3631
R David Murrayb938c8c2011-03-24 12:19:26 -04003632# XXX: body_encode() expect strings, but uses ord(char) from these strings
3633# to index into a 256-entry list. For code points above 255, this will fail.
3634# Should there be a check for 8-bit only ord() values in body, or at least
3635# a comment about the expected input?
3636
3637 def test_encode_two_lines_one_space(self):
3638 self._test_encode(' \n \n', '=20\n=20\n')
3639
R David Murrayec1b5b82011-03-23 14:19:05 -04003640 def test_encode_one_word_trailing_spaces(self):
3641 self._test_encode('hello ', 'hello =20')
3642
3643 def test_encode_one_line_trailing_spaces(self):
3644 self._test_encode('hello \n', 'hello =20\n')
3645
3646 def test_encode_one_word_trailing_tab(self):
3647 self._test_encode('hello \t', 'hello =09')
3648
3649 def test_encode_one_line_trailing_tab(self):
3650 self._test_encode('hello \t\n', 'hello =09\n')
3651
3652 def test_encode_trailing_space_before_maxlinelen(self):
3653 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
3654
R David Murrayb938c8c2011-03-24 12:19:26 -04003655 def test_encode_trailing_space_at_maxlinelen(self):
3656 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
3657
R David Murrayec1b5b82011-03-23 14:19:05 -04003658 def test_encode_trailing_space_beyond_maxlinelen(self):
R David Murrayb938c8c2011-03-24 12:19:26 -04003659 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
3660
3661 def test_encode_whitespace_lines(self):
3662 self._test_encode(' \n' * 5, '=20\n' * 5)
R David Murrayec1b5b82011-03-23 14:19:05 -04003663
3664 def test_encode_quoted_equals(self):
3665 self._test_encode('a = b', 'a =3D b')
3666
3667 def test_encode_one_long_string(self):
3668 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
3669
3670 def test_encode_one_long_line(self):
3671 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
3672
3673 def test_encode_one_very_long_line(self):
3674 self._test_encode('x' * 200 + '\n',
3675 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
3676
3677 def test_encode_one_long_line(self):
3678 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
3679
3680 def test_encode_shortest_maxlinelen(self):
3681 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003682
R David Murrayb938c8c2011-03-24 12:19:26 -04003683 def test_encode_maxlinelen_too_small(self):
3684 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
3685
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003686 def test_encode(self):
3687 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003688 eq(quoprimime.body_encode(''), '')
3689 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003690 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00003691 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003692 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00003693 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003694xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
3695 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
3696x xxxx xxxx xxxx xxxx=20""")
3697 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00003698 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
3699 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003700xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
3701 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
3702x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00003703 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003704one line
3705
3706two line"""), """\
3707one line
3708
3709two line""")
3710
3711
Ezio Melottib3aedd42010-11-20 19:04:17 +00003712
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003713# Test the Charset class
3714class TestCharset(unittest.TestCase):
3715 def tearDown(self):
3716 from email import charset as CharsetModule
3717 try:
3718 del CharsetModule.CHARSETS['fake']
3719 except KeyError:
3720 pass
3721
Guido van Rossum9604e662007-08-30 03:46:43 +00003722 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003723 eq = self.assertEqual
3724 # Make sure us-ascii = no Unicode conversion
3725 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00003726 eq(c.header_encode('Hello World!'), 'Hello World!')
3727 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003728 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00003729 self.assertRaises(UnicodeError, c.header_encode, s)
3730 c = Charset('utf-8')
3731 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003732
3733 def test_body_encode(self):
3734 eq = self.assertEqual
3735 # Try a charset with QP body encoding
3736 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003737 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003738 # Try a charset with Base64 body encoding
3739 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003740 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003741 # Try a charset with None body encoding
3742 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003743 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003744 # Try the convert argument, where input codec != output codec
3745 c = Charset('euc-jp')
3746 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00003747 # XXX FIXME
3748## try:
3749## eq('\x1b$B5FCO;~IW\x1b(B',
3750## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
3751## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
3752## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
3753## except LookupError:
3754## # We probably don't have the Japanese codecs installed
3755## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003756 # Testing SF bug #625509, which we have to fake, since there are no
3757 # built-in encodings where the header encoding is QP but the body
3758 # encoding is not.
3759 from email import charset as CharsetModule
R David Murray56a9d7e2011-03-15 12:20:02 -04003760 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003761 c = Charset('fake')
R David Murray56a9d7e2011-03-15 12:20:02 -04003762 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003763
3764 def test_unicode_charset_name(self):
3765 charset = Charset('us-ascii')
3766 self.assertEqual(str(charset), 'us-ascii')
3767 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
3768
3769
Ezio Melottib3aedd42010-11-20 19:04:17 +00003770
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003771# Test multilingual MIME headers.
3772class TestHeader(TestEmailBase):
3773 def test_simple(self):
3774 eq = self.ndiffAssertEqual
3775 h = Header('Hello World!')
3776 eq(h.encode(), 'Hello World!')
3777 h.append(' Goodbye World!')
3778 eq(h.encode(), 'Hello World! Goodbye World!')
3779
3780 def test_simple_surprise(self):
3781 eq = self.ndiffAssertEqual
3782 h = Header('Hello World!')
3783 eq(h.encode(), 'Hello World!')
3784 h.append('Goodbye World!')
3785 eq(h.encode(), 'Hello World! Goodbye World!')
3786
3787 def test_header_needs_no_decoding(self):
3788 h = 'no decoding needed'
3789 self.assertEqual(decode_header(h), [(h, None)])
3790
3791 def test_long(self):
3792 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
3793 maxlinelen=76)
3794 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003795 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003796
3797 def test_multilingual(self):
3798 eq = self.ndiffAssertEqual
3799 g = Charset("iso-8859-1")
3800 cz = Charset("iso-8859-2")
3801 utf8 = Charset("utf-8")
3802 g_head = (b'Die Mieter treten hier ein werden mit einem '
3803 b'Foerderband komfortabel den Korridor entlang, '
3804 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
3805 b'gegen die rotierenden Klingen bef\xf6rdert. ')
3806 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
3807 b'd\xf9vtipu.. ')
3808 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
3809 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
3810 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
3811 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
3812 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
3813 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
3814 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
3815 '\u3044\u307e\u3059\u3002')
3816 h = Header(g_head, g)
3817 h.append(cz_head, cz)
3818 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00003819 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003820 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00003821=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
3822 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
3823 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
3824 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003825 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
3826 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
3827 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
3828 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00003829 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
3830 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
3831 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
3832 decoded = decode_header(enc)
3833 eq(len(decoded), 3)
3834 eq(decoded[0], (g_head, 'iso-8859-1'))
3835 eq(decoded[1], (cz_head, 'iso-8859-2'))
3836 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003837 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00003838 eq(ustr,
3839 (b'Die Mieter treten hier ein werden mit einem Foerderband '
3840 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
3841 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
3842 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
3843 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
3844 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
3845 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
3846 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
3847 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
3848 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
3849 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
3850 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
3851 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
3852 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
3853 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
3854 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
3855 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003856 # Test make_header()
3857 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00003858 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003859
3860 def test_empty_header_encode(self):
3861 h = Header()
3862 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00003863
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003864 def test_header_ctor_default_args(self):
3865 eq = self.ndiffAssertEqual
3866 h = Header()
3867 eq(h, '')
3868 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00003869 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003870
3871 def test_explicit_maxlinelen(self):
3872 eq = self.ndiffAssertEqual
3873 hstr = ('A very long line that must get split to something other '
3874 'than at the 76th character boundary to test the non-default '
3875 'behavior')
3876 h = Header(hstr)
3877 eq(h.encode(), '''\
3878A very long line that must get split to something other than at the 76th
3879 character boundary to test the non-default behavior''')
3880 eq(str(h), hstr)
3881 h = Header(hstr, header_name='Subject')
3882 eq(h.encode(), '''\
3883A very long line that must get split to something other than at the
3884 76th character boundary to test the non-default behavior''')
3885 eq(str(h), hstr)
3886 h = Header(hstr, maxlinelen=1024, header_name='Subject')
3887 eq(h.encode(), hstr)
3888 eq(str(h), hstr)
3889
Guido van Rossum9604e662007-08-30 03:46:43 +00003890 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003891 eq = self.ndiffAssertEqual
3892 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003893 x = 'xxxx ' * 20
3894 h.append(x)
3895 s = h.encode()
3896 eq(s, """\
3897=?iso-8859-1?q?xxx?=
3898 =?iso-8859-1?q?x_?=
3899 =?iso-8859-1?q?xx?=
3900 =?iso-8859-1?q?xx?=
3901 =?iso-8859-1?q?_x?=
3902 =?iso-8859-1?q?xx?=
3903 =?iso-8859-1?q?x_?=
3904 =?iso-8859-1?q?xx?=
3905 =?iso-8859-1?q?xx?=
3906 =?iso-8859-1?q?_x?=
3907 =?iso-8859-1?q?xx?=
3908 =?iso-8859-1?q?x_?=
3909 =?iso-8859-1?q?xx?=
3910 =?iso-8859-1?q?xx?=
3911 =?iso-8859-1?q?_x?=
3912 =?iso-8859-1?q?xx?=
3913 =?iso-8859-1?q?x_?=
3914 =?iso-8859-1?q?xx?=
3915 =?iso-8859-1?q?xx?=
3916 =?iso-8859-1?q?_x?=
3917 =?iso-8859-1?q?xx?=
3918 =?iso-8859-1?q?x_?=
3919 =?iso-8859-1?q?xx?=
3920 =?iso-8859-1?q?xx?=
3921 =?iso-8859-1?q?_x?=
3922 =?iso-8859-1?q?xx?=
3923 =?iso-8859-1?q?x_?=
3924 =?iso-8859-1?q?xx?=
3925 =?iso-8859-1?q?xx?=
3926 =?iso-8859-1?q?_x?=
3927 =?iso-8859-1?q?xx?=
3928 =?iso-8859-1?q?x_?=
3929 =?iso-8859-1?q?xx?=
3930 =?iso-8859-1?q?xx?=
3931 =?iso-8859-1?q?_x?=
3932 =?iso-8859-1?q?xx?=
3933 =?iso-8859-1?q?x_?=
3934 =?iso-8859-1?q?xx?=
3935 =?iso-8859-1?q?xx?=
3936 =?iso-8859-1?q?_x?=
3937 =?iso-8859-1?q?xx?=
3938 =?iso-8859-1?q?x_?=
3939 =?iso-8859-1?q?xx?=
3940 =?iso-8859-1?q?xx?=
3941 =?iso-8859-1?q?_x?=
3942 =?iso-8859-1?q?xx?=
3943 =?iso-8859-1?q?x_?=
3944 =?iso-8859-1?q?xx?=
3945 =?iso-8859-1?q?xx?=
3946 =?iso-8859-1?q?_?=""")
3947 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003948 h = Header(charset='iso-8859-1', maxlinelen=40)
3949 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003950 s = h.encode()
3951 eq(s, """\
3952=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
3953 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
3954 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
3955 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
3956 =?iso-8859-1?q?_xxxx_xxxx_?=""")
3957 eq(x, str(make_header(decode_header(s))))
3958
3959 def test_base64_splittable(self):
3960 eq = self.ndiffAssertEqual
3961 h = Header(charset='koi8-r', maxlinelen=20)
3962 x = 'xxxx ' * 20
3963 h.append(x)
3964 s = h.encode()
3965 eq(s, """\
3966=?koi8-r?b?eHh4?=
3967 =?koi8-r?b?eCB4?=
3968 =?koi8-r?b?eHh4?=
3969 =?koi8-r?b?IHh4?=
3970 =?koi8-r?b?eHgg?=
3971 =?koi8-r?b?eHh4?=
3972 =?koi8-r?b?eCB4?=
3973 =?koi8-r?b?eHh4?=
3974 =?koi8-r?b?IHh4?=
3975 =?koi8-r?b?eHgg?=
3976 =?koi8-r?b?eHh4?=
3977 =?koi8-r?b?eCB4?=
3978 =?koi8-r?b?eHh4?=
3979 =?koi8-r?b?IHh4?=
3980 =?koi8-r?b?eHgg?=
3981 =?koi8-r?b?eHh4?=
3982 =?koi8-r?b?eCB4?=
3983 =?koi8-r?b?eHh4?=
3984 =?koi8-r?b?IHh4?=
3985 =?koi8-r?b?eHgg?=
3986 =?koi8-r?b?eHh4?=
3987 =?koi8-r?b?eCB4?=
3988 =?koi8-r?b?eHh4?=
3989 =?koi8-r?b?IHh4?=
3990 =?koi8-r?b?eHgg?=
3991 =?koi8-r?b?eHh4?=
3992 =?koi8-r?b?eCB4?=
3993 =?koi8-r?b?eHh4?=
3994 =?koi8-r?b?IHh4?=
3995 =?koi8-r?b?eHgg?=
3996 =?koi8-r?b?eHh4?=
3997 =?koi8-r?b?eCB4?=
3998 =?koi8-r?b?eHh4?=
3999 =?koi8-r?b?IA==?=""")
4000 eq(x, str(make_header(decode_header(s))))
4001 h = Header(charset='koi8-r', maxlinelen=40)
4002 h.append(x)
4003 s = h.encode()
4004 eq(s, """\
4005=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
4006 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
4007 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
4008 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
4009 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
4010 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
4011 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004012
4013 def test_us_ascii_header(self):
4014 eq = self.assertEqual
4015 s = 'hello'
4016 x = decode_header(s)
4017 eq(x, [('hello', None)])
4018 h = make_header(x)
4019 eq(s, h.encode())
4020
4021 def test_string_charset(self):
4022 eq = self.assertEqual
4023 h = Header()
4024 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00004025 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004026
4027## def test_unicode_error(self):
4028## raises = self.assertRaises
4029## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
4030## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
4031## h = Header()
4032## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
4033## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
4034## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
4035
4036 def test_utf8_shortest(self):
4037 eq = self.assertEqual
4038 h = Header('p\xf6stal', 'utf-8')
4039 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
4040 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
4041 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
4042
4043 def test_bad_8bit_header(self):
4044 raises = self.assertRaises
4045 eq = self.assertEqual
4046 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4047 raises(UnicodeError, Header, x)
4048 h = Header()
4049 raises(UnicodeError, h.append, x)
4050 e = x.decode('utf-8', 'replace')
4051 eq(str(Header(x, errors='replace')), e)
4052 h.append(x, errors='replace')
4053 eq(str(h), e)
4054
R David Murray041015c2011-03-25 15:10:55 -04004055 def test_escaped_8bit_header(self):
4056 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4057 x = x.decode('ascii', 'surrogateescape')
4058 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4059 self.assertEqual(str(h),
4060 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4061 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4062
4063 def test_modify_returned_list_does_not_change_header(self):
4064 h = Header('test')
4065 chunks = email.header.decode_header(h)
4066 chunks.append(('ascii', 'test2'))
4067 self.assertEqual(str(h), 'test')
4068
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004069 def test_encoded_adjacent_nonencoded(self):
4070 eq = self.assertEqual
4071 h = Header()
4072 h.append('hello', 'iso-8859-1')
4073 h.append('world')
4074 s = h.encode()
4075 eq(s, '=?iso-8859-1?q?hello?= world')
4076 h = make_header(decode_header(s))
4077 eq(h.encode(), s)
4078
4079 def test_whitespace_eater(self):
4080 eq = self.assertEqual
4081 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
4082 parts = decode_header(s)
4083 eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)])
4084 hdr = make_header(parts)
4085 eq(hdr.encode(),
4086 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
4087
4088 def test_broken_base64_header(self):
4089 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00004090 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004091 raises(errors.HeaderParseError, decode_header, s)
4092
R. David Murray477efb32011-01-05 01:39:32 +00004093 def test_shift_jis_charset(self):
4094 h = Header('文', charset='shift_jis')
4095 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
4096
R David Murrayde912762011-03-16 18:26:23 -04004097 def test_flatten_header_with_no_value(self):
4098 # Issue 11401 (regression from email 4.x) Note that the space after
4099 # the header doesn't reflect the input, but this is also the way
4100 # email 4.x behaved. At some point it would be nice to fix that.
4101 msg = email.message_from_string("EmptyHeader:")
4102 self.assertEqual(str(msg), "EmptyHeader: \n\n")
4103
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004104
Ezio Melottib3aedd42010-11-20 19:04:17 +00004105
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004106# Test RFC 2231 header parameters (en/de)coding
4107class TestRFC2231(TestEmailBase):
4108 def test_get_param(self):
4109 eq = self.assertEqual
4110 msg = self._msgobj('msg_29.txt')
4111 eq(msg.get_param('title'),
4112 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4113 eq(msg.get_param('title', unquote=False),
4114 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
4115
4116 def test_set_param(self):
4117 eq = self.ndiffAssertEqual
4118 msg = Message()
4119 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4120 charset='us-ascii')
4121 eq(msg.get_param('title'),
4122 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
4123 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4124 charset='us-ascii', language='en')
4125 eq(msg.get_param('title'),
4126 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4127 msg = self._msgobj('msg_01.txt')
4128 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4129 charset='us-ascii', language='en')
4130 eq(msg.as_string(maxheaderlen=78), """\
4131Return-Path: <bbb@zzz.org>
4132Delivered-To: bbb@zzz.org
4133Received: by mail.zzz.org (Postfix, from userid 889)
4134\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4135MIME-Version: 1.0
4136Content-Transfer-Encoding: 7bit
4137Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4138From: bbb@ddd.com (John X. Doe)
4139To: bbb@zzz.org
4140Subject: This is a test message
4141Date: Fri, 4 May 2001 14:05:44 -0400
4142Content-Type: text/plain; charset=us-ascii;
R. David Murraydfd7eb02010-12-24 22:36:49 +00004143 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004144
4145
4146Hi,
4147
4148Do you like this message?
4149
4150-Me
4151""")
4152
4153 def test_del_param(self):
4154 eq = self.ndiffAssertEqual
4155 msg = self._msgobj('msg_01.txt')
4156 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
4157 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4158 charset='us-ascii', language='en')
4159 msg.del_param('foo', header='Content-Type')
4160 eq(msg.as_string(maxheaderlen=78), """\
4161Return-Path: <bbb@zzz.org>
4162Delivered-To: bbb@zzz.org
4163Received: by mail.zzz.org (Postfix, from userid 889)
4164\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4165MIME-Version: 1.0
4166Content-Transfer-Encoding: 7bit
4167Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4168From: bbb@ddd.com (John X. Doe)
4169To: bbb@zzz.org
4170Subject: This is a test message
4171Date: Fri, 4 May 2001 14:05:44 -0400
4172Content-Type: text/plain; charset="us-ascii";
R. David Murraydfd7eb02010-12-24 22:36:49 +00004173 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004174
4175
4176Hi,
4177
4178Do you like this message?
4179
4180-Me
4181""")
4182
4183 def test_rfc2231_get_content_charset(self):
4184 eq = self.assertEqual
4185 msg = self._msgobj('msg_32.txt')
4186 eq(msg.get_content_charset(), 'us-ascii')
4187
R. David Murraydfd7eb02010-12-24 22:36:49 +00004188 def test_rfc2231_parse_rfc_quoting(self):
4189 m = textwrap.dedent('''\
4190 Content-Disposition: inline;
4191 \tfilename*0*=''This%20is%20even%20more%20;
4192 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
4193 \tfilename*2="is it not.pdf"
4194
4195 ''')
4196 msg = email.message_from_string(m)
4197 self.assertEqual(msg.get_filename(),
4198 'This is even more ***fun*** is it not.pdf')
4199 self.assertEqual(m, msg.as_string())
4200
4201 def test_rfc2231_parse_extra_quoting(self):
4202 m = textwrap.dedent('''\
4203 Content-Disposition: inline;
4204 \tfilename*0*="''This%20is%20even%20more%20";
4205 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4206 \tfilename*2="is it not.pdf"
4207
4208 ''')
4209 msg = email.message_from_string(m)
4210 self.assertEqual(msg.get_filename(),
4211 'This is even more ***fun*** is it not.pdf')
4212 self.assertEqual(m, msg.as_string())
4213
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004214 def test_rfc2231_no_language_or_charset(self):
4215 m = '''\
4216Content-Transfer-Encoding: 8bit
4217Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
4218Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
4219
4220'''
4221 msg = email.message_from_string(m)
4222 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004223 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004224 self.assertEqual(
4225 param,
4226 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
4227
4228 def test_rfc2231_no_language_or_charset_in_filename(self):
4229 m = '''\
4230Content-Disposition: inline;
4231\tfilename*0*="''This%20is%20even%20more%20";
4232\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4233\tfilename*2="is it not.pdf"
4234
4235'''
4236 msg = email.message_from_string(m)
4237 self.assertEqual(msg.get_filename(),
4238 'This is even more ***fun*** is it not.pdf')
4239
4240 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
4241 m = '''\
4242Content-Disposition: inline;
4243\tfilename*0*="''This%20is%20even%20more%20";
4244\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4245\tfilename*2="is it not.pdf"
4246
4247'''
4248 msg = email.message_from_string(m)
4249 self.assertEqual(msg.get_filename(),
4250 'This is even more ***fun*** is it not.pdf')
4251
4252 def test_rfc2231_partly_encoded(self):
4253 m = '''\
4254Content-Disposition: inline;
4255\tfilename*0="''This%20is%20even%20more%20";
4256\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4257\tfilename*2="is it not.pdf"
4258
4259'''
4260 msg = email.message_from_string(m)
4261 self.assertEqual(
4262 msg.get_filename(),
4263 'This%20is%20even%20more%20***fun*** is it not.pdf')
4264
4265 def test_rfc2231_partly_nonencoded(self):
4266 m = '''\
4267Content-Disposition: inline;
4268\tfilename*0="This%20is%20even%20more%20";
4269\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
4270\tfilename*2="is it not.pdf"
4271
4272'''
4273 msg = email.message_from_string(m)
4274 self.assertEqual(
4275 msg.get_filename(),
4276 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
4277
4278 def test_rfc2231_no_language_or_charset_in_boundary(self):
4279 m = '''\
4280Content-Type: multipart/alternative;
4281\tboundary*0*="''This%20is%20even%20more%20";
4282\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
4283\tboundary*2="is it not.pdf"
4284
4285'''
4286 msg = email.message_from_string(m)
4287 self.assertEqual(msg.get_boundary(),
4288 'This is even more ***fun*** is it not.pdf')
4289
4290 def test_rfc2231_no_language_or_charset_in_charset(self):
4291 # This is a nonsensical charset value, but tests the code anyway
4292 m = '''\
4293Content-Type: text/plain;
4294\tcharset*0*="This%20is%20even%20more%20";
4295\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
4296\tcharset*2="is it not.pdf"
4297
4298'''
4299 msg = email.message_from_string(m)
4300 self.assertEqual(msg.get_content_charset(),
4301 'this is even more ***fun*** is it not.pdf')
4302
4303 def test_rfc2231_bad_encoding_in_filename(self):
4304 m = '''\
4305Content-Disposition: inline;
4306\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
4307\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4308\tfilename*2="is it not.pdf"
4309
4310'''
4311 msg = email.message_from_string(m)
4312 self.assertEqual(msg.get_filename(),
4313 'This is even more ***fun*** is it not.pdf')
4314
4315 def test_rfc2231_bad_encoding_in_charset(self):
4316 m = """\
4317Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
4318
4319"""
4320 msg = email.message_from_string(m)
4321 # This should return None because non-ascii characters in the charset
4322 # are not allowed.
4323 self.assertEqual(msg.get_content_charset(), None)
4324
4325 def test_rfc2231_bad_character_in_charset(self):
4326 m = """\
4327Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
4328
4329"""
4330 msg = email.message_from_string(m)
4331 # This should return None because non-ascii characters in the charset
4332 # are not allowed.
4333 self.assertEqual(msg.get_content_charset(), None)
4334
4335 def test_rfc2231_bad_character_in_filename(self):
4336 m = '''\
4337Content-Disposition: inline;
4338\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
4339\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4340\tfilename*2*="is it not.pdf%E2"
4341
4342'''
4343 msg = email.message_from_string(m)
4344 self.assertEqual(msg.get_filename(),
4345 'This is even more ***fun*** is it not.pdf\ufffd')
4346
4347 def test_rfc2231_unknown_encoding(self):
4348 m = """\
4349Content-Transfer-Encoding: 8bit
4350Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
4351
4352"""
4353 msg = email.message_from_string(m)
4354 self.assertEqual(msg.get_filename(), 'myfile.txt')
4355
4356 def test_rfc2231_single_tick_in_filename_extended(self):
4357 eq = self.assertEqual
4358 m = """\
4359Content-Type: application/x-foo;
4360\tname*0*=\"Frank's\"; name*1*=\" Document\"
4361
4362"""
4363 msg = email.message_from_string(m)
4364 charset, language, s = msg.get_param('name')
4365 eq(charset, None)
4366 eq(language, None)
4367 eq(s, "Frank's Document")
4368
4369 def test_rfc2231_single_tick_in_filename(self):
4370 m = """\
4371Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
4372
4373"""
4374 msg = email.message_from_string(m)
4375 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004376 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004377 self.assertEqual(param, "Frank's Document")
4378
4379 def test_rfc2231_tick_attack_extended(self):
4380 eq = self.assertEqual
4381 m = """\
4382Content-Type: application/x-foo;
4383\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
4384
4385"""
4386 msg = email.message_from_string(m)
4387 charset, language, s = msg.get_param('name')
4388 eq(charset, 'us-ascii')
4389 eq(language, 'en-us')
4390 eq(s, "Frank's Document")
4391
4392 def test_rfc2231_tick_attack(self):
4393 m = """\
4394Content-Type: application/x-foo;
4395\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
4396
4397"""
4398 msg = email.message_from_string(m)
4399 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004400 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004401 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
4402
4403 def test_rfc2231_no_extended_values(self):
4404 eq = self.assertEqual
4405 m = """\
4406Content-Type: application/x-foo; name=\"Frank's Document\"
4407
4408"""
4409 msg = email.message_from_string(m)
4410 eq(msg.get_param('name'), "Frank's Document")
4411
4412 def test_rfc2231_encoded_then_unencoded_segments(self):
4413 eq = self.assertEqual
4414 m = """\
4415Content-Type: application/x-foo;
4416\tname*0*=\"us-ascii'en-us'My\";
4417\tname*1=\" Document\";
4418\tname*2*=\" For You\"
4419
4420"""
4421 msg = email.message_from_string(m)
4422 charset, language, s = msg.get_param('name')
4423 eq(charset, 'us-ascii')
4424 eq(language, 'en-us')
4425 eq(s, 'My Document For You')
4426
4427 def test_rfc2231_unencoded_then_encoded_segments(self):
4428 eq = self.assertEqual
4429 m = """\
4430Content-Type: application/x-foo;
4431\tname*0=\"us-ascii'en-us'My\";
4432\tname*1*=\" Document\";
4433\tname*2*=\" For You\"
4434
4435"""
4436 msg = email.message_from_string(m)
4437 charset, language, s = msg.get_param('name')
4438 eq(charset, 'us-ascii')
4439 eq(language, 'en-us')
4440 eq(s, 'My Document For You')
4441
4442
Ezio Melottib3aedd42010-11-20 19:04:17 +00004443
R. David Murraya8f480f2010-01-16 18:30:03 +00004444# Tests to ensure that signed parts of an email are completely preserved, as
4445# required by RFC1847 section 2.1. Note that these are incomplete, because the
4446# email package does not currently always preserve the body. See issue 1670765.
4447class TestSigned(TestEmailBase):
4448
4449 def _msg_and_obj(self, filename):
R David Murray28346b82011-03-31 11:40:20 -04004450 with openfile(filename) as fp:
R. David Murraya8f480f2010-01-16 18:30:03 +00004451 original = fp.read()
4452 msg = email.message_from_string(original)
4453 return original, msg
4454
4455 def _signed_parts_eq(self, original, result):
4456 # Extract the first mime part of each message
4457 import re
4458 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
4459 inpart = repart.search(original).group(2)
4460 outpart = repart.search(result).group(2)
4461 self.assertEqual(outpart, inpart)
4462
4463 def test_long_headers_as_string(self):
4464 original, msg = self._msg_and_obj('msg_45.txt')
4465 result = msg.as_string()
4466 self._signed_parts_eq(original, result)
4467
4468 def test_long_headers_as_string_maxheaderlen(self):
4469 original, msg = self._msg_and_obj('msg_45.txt')
4470 result = msg.as_string(maxheaderlen=60)
4471 self._signed_parts_eq(original, result)
4472
4473 def test_long_headers_flatten(self):
4474 original, msg = self._msg_and_obj('msg_45.txt')
4475 fp = StringIO()
4476 Generator(fp).flatten(msg)
4477 result = fp.getvalue()
4478 self._signed_parts_eq(original, result)
4479
4480
Ezio Melottib3aedd42010-11-20 19:04:17 +00004481
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004482if __name__ == '__main__':
R David Murray9aaba782011-03-21 17:17:06 -04004483 unittest.main()