blob: e5eece2720ce984b5eaa02d01d0b7c45bcc6ebc9 [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
R. David Murray719a4492010-11-21 16:53:48 +00006import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00007import sys
8import time
9import base64
10import difflib
11import unittest
12import warnings
R. David Murray96fd54e2010-10-08 15:55:28 +000013import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000014
R. David Murray96fd54e2010-10-08 15:55:28 +000015from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016from itertools import chain
17
18import email
19
20from email.charset import Charset
21from email.header import Header, decode_header, make_header
22from email.parser import Parser, HeaderParser
23from email.generator import Generator, DecodedGenerator
24from email.message import Message
25from email.mime.application import MIMEApplication
26from email.mime.audio import MIMEAudio
27from email.mime.text import MIMEText
28from email.mime.image import MIMEImage
29from email.mime.base import MIMEBase
30from email.mime.message import MIMEMessage
31from email.mime.multipart import MIMEMultipart
32from email import utils
33from email import errors
34from email import encoders
35from email import iterators
36from email import base64mime
37from email import quoprimime
38
R. David Murray96fd54e2010-10-08 15:55:28 +000039from test.support import findfile, run_unittest, unlink
Guido van Rossum8b3febe2007-08-30 01:15:14 +000040from email.test import __file__ as landmark
41
42
43NL = '\n'
44EMPTYSTRING = ''
45SPACE = ' '
46
47
Ezio Melottib3aedd42010-11-20 19:04:17 +000048
Guido van Rossum8b3febe2007-08-30 01:15:14 +000049def openfile(filename, *args, **kws):
50 path = os.path.join(os.path.dirname(landmark), 'data', filename)
51 return open(path, *args, **kws)
52
53
Ezio Melottib3aedd42010-11-20 19:04:17 +000054
Guido van Rossum8b3febe2007-08-30 01:15:14 +000055# Base test class
56class TestEmailBase(unittest.TestCase):
57 def ndiffAssertEqual(self, first, second):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000058 """Like assertEqual except use ndiff for readable output."""
Guido van Rossum8b3febe2007-08-30 01:15:14 +000059 if first != second:
60 sfirst = str(first)
61 ssecond = str(second)
62 rfirst = [repr(line) for line in sfirst.splitlines()]
63 rsecond = [repr(line) for line in ssecond.splitlines()]
64 diff = difflib.ndiff(rfirst, rsecond)
65 raise self.failureException(NL + NL.join(diff))
66
67 def _msgobj(self, filename):
68 with openfile(findfile(filename)) as fp:
69 return email.message_from_file(fp)
70
71
Ezio Melottib3aedd42010-11-20 19:04:17 +000072
Guido van Rossum8b3febe2007-08-30 01:15:14 +000073# Test various aspects of the Message class's API
74class TestMessageAPI(TestEmailBase):
75 def test_get_all(self):
76 eq = self.assertEqual
77 msg = self._msgobj('msg_20.txt')
78 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
79 eq(msg.get_all('xx', 'n/a'), 'n/a')
80
R. David Murraye5db2632010-11-20 15:10:13 +000081 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000082 eq = self.assertEqual
83 msg = Message()
84 eq(msg.get_charset(), None)
85 charset = Charset('iso-8859-1')
86 msg.set_charset(charset)
87 eq(msg['mime-version'], '1.0')
88 eq(msg.get_content_type(), 'text/plain')
89 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
90 eq(msg.get_param('charset'), 'iso-8859-1')
91 eq(msg['content-transfer-encoding'], 'quoted-printable')
92 eq(msg.get_charset().input_charset, 'iso-8859-1')
93 # Remove the charset
94 msg.set_charset(None)
95 eq(msg.get_charset(), None)
96 eq(msg['content-type'], 'text/plain')
97 # Try adding a charset when there's already MIME headers present
98 msg = Message()
99 msg['MIME-Version'] = '2.0'
100 msg['Content-Type'] = 'text/x-weird'
101 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
102 msg.set_charset(charset)
103 eq(msg['mime-version'], '2.0')
104 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
105 eq(msg['content-transfer-encoding'], 'quinted-puntable')
106
107 def test_set_charset_from_string(self):
108 eq = self.assertEqual
109 msg = Message()
110 msg.set_charset('us-ascii')
111 eq(msg.get_charset().input_charset, 'us-ascii')
112 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
113
114 def test_set_payload_with_charset(self):
115 msg = Message()
116 charset = Charset('iso-8859-1')
117 msg.set_payload('This is a string payload', charset)
118 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
119
120 def test_get_charsets(self):
121 eq = self.assertEqual
122
123 msg = self._msgobj('msg_08.txt')
124 charsets = msg.get_charsets()
125 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
126
127 msg = self._msgobj('msg_09.txt')
128 charsets = msg.get_charsets('dingbat')
129 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
130 'koi8-r'])
131
132 msg = self._msgobj('msg_12.txt')
133 charsets = msg.get_charsets()
134 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
135 'iso-8859-3', 'us-ascii', 'koi8-r'])
136
137 def test_get_filename(self):
138 eq = self.assertEqual
139
140 msg = self._msgobj('msg_04.txt')
141 filenames = [p.get_filename() for p in msg.get_payload()]
142 eq(filenames, ['msg.txt', 'msg.txt'])
143
144 msg = self._msgobj('msg_07.txt')
145 subpart = msg.get_payload(1)
146 eq(subpart.get_filename(), 'dingusfish.gif')
147
148 def test_get_filename_with_name_parameter(self):
149 eq = self.assertEqual
150
151 msg = self._msgobj('msg_44.txt')
152 filenames = [p.get_filename() for p in msg.get_payload()]
153 eq(filenames, ['msg.txt', 'msg.txt'])
154
155 def test_get_boundary(self):
156 eq = self.assertEqual
157 msg = self._msgobj('msg_07.txt')
158 # No quotes!
159 eq(msg.get_boundary(), 'BOUNDARY')
160
161 def test_set_boundary(self):
162 eq = self.assertEqual
163 # This one has no existing boundary parameter, but the Content-Type:
164 # header appears fifth.
165 msg = self._msgobj('msg_01.txt')
166 msg.set_boundary('BOUNDARY')
167 header, value = msg.items()[4]
168 eq(header.lower(), 'content-type')
169 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
170 # This one has a Content-Type: header, with a boundary, stuck in the
171 # middle of its headers. Make sure the order is preserved; it should
172 # be fifth.
173 msg = self._msgobj('msg_04.txt')
174 msg.set_boundary('BOUNDARY')
175 header, value = msg.items()[4]
176 eq(header.lower(), 'content-type')
177 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
178 # And this one has no Content-Type: header at all.
179 msg = self._msgobj('msg_03.txt')
180 self.assertRaises(errors.HeaderParseError,
181 msg.set_boundary, 'BOUNDARY')
182
R. David Murray57c45ac2010-02-21 04:39:40 +0000183 def test_message_rfc822_only(self):
184 # Issue 7970: message/rfc822 not in multipart parsed by
185 # HeaderParser caused an exception when flattened.
Brett Cannon384917a2010-10-29 23:08:36 +0000186 with openfile(findfile('msg_46.txt')) as fp:
187 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000188 parser = HeaderParser()
189 msg = parser.parsestr(msgdata)
190 out = StringIO()
191 gen = Generator(out, True, 0)
192 gen.flatten(msg, False)
193 self.assertEqual(out.getvalue(), msgdata)
194
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000195 def test_get_decoded_payload(self):
196 eq = self.assertEqual
197 msg = self._msgobj('msg_10.txt')
198 # The outer message is a multipart
199 eq(msg.get_payload(decode=True), None)
200 # Subpart 1 is 7bit encoded
201 eq(msg.get_payload(0).get_payload(decode=True),
202 b'This is a 7bit encoded message.\n')
203 # Subpart 2 is quopri
204 eq(msg.get_payload(1).get_payload(decode=True),
205 b'\xa1This is a Quoted Printable encoded message!\n')
206 # Subpart 3 is base64
207 eq(msg.get_payload(2).get_payload(decode=True),
208 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000209 # Subpart 4 is base64 with a trailing newline, which
210 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000211 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000212 b'This is a Base64 encoded message.\n')
213 # Subpart 5 has no Content-Transfer-Encoding: header.
214 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000215 b'This has no Content-Transfer-Encoding: header.\n')
216
217 def test_get_decoded_uu_payload(self):
218 eq = self.assertEqual
219 msg = Message()
220 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
221 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
222 msg['content-transfer-encoding'] = cte
223 eq(msg.get_payload(decode=True), b'hello world')
224 # Now try some bogus data
225 msg.set_payload('foo')
226 eq(msg.get_payload(decode=True), b'foo')
227
228 def test_decoded_generator(self):
229 eq = self.assertEqual
230 msg = self._msgobj('msg_07.txt')
231 with openfile('msg_17.txt') as fp:
232 text = fp.read()
233 s = StringIO()
234 g = DecodedGenerator(s)
235 g.flatten(msg)
236 eq(s.getvalue(), text)
237
238 def test__contains__(self):
239 msg = Message()
240 msg['From'] = 'Me'
241 msg['to'] = 'You'
242 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000243 self.assertTrue('from' in msg)
244 self.assertTrue('From' in msg)
245 self.assertTrue('FROM' in msg)
246 self.assertTrue('to' in msg)
247 self.assertTrue('To' in msg)
248 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000249
250 def test_as_string(self):
251 eq = self.ndiffAssertEqual
252 msg = self._msgobj('msg_01.txt')
253 with openfile('msg_01.txt') as fp:
254 text = fp.read()
255 eq(text, str(msg))
256 fullrepr = msg.as_string(unixfrom=True)
257 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000258 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000259 eq(text, NL.join(lines[1:]))
260
261 def test_bad_param(self):
262 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
263 self.assertEqual(msg.get_param('baz'), '')
264
265 def test_missing_filename(self):
266 msg = email.message_from_string("From: foo\n")
267 self.assertEqual(msg.get_filename(), None)
268
269 def test_bogus_filename(self):
270 msg = email.message_from_string(
271 "Content-Disposition: blarg; filename\n")
272 self.assertEqual(msg.get_filename(), '')
273
274 def test_missing_boundary(self):
275 msg = email.message_from_string("From: foo\n")
276 self.assertEqual(msg.get_boundary(), None)
277
278 def test_get_params(self):
279 eq = self.assertEqual
280 msg = email.message_from_string(
281 'X-Header: foo=one; bar=two; baz=three\n')
282 eq(msg.get_params(header='x-header'),
283 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
284 msg = email.message_from_string(
285 'X-Header: foo; bar=one; baz=two\n')
286 eq(msg.get_params(header='x-header'),
287 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
288 eq(msg.get_params(), None)
289 msg = email.message_from_string(
290 'X-Header: foo; bar="one"; baz=two\n')
291 eq(msg.get_params(header='x-header'),
292 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
293
294 def test_get_param_liberal(self):
295 msg = Message()
296 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
297 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
298
299 def test_get_param(self):
300 eq = self.assertEqual
301 msg = email.message_from_string(
302 "X-Header: foo=one; bar=two; baz=three\n")
303 eq(msg.get_param('bar', header='x-header'), 'two')
304 eq(msg.get_param('quuz', header='x-header'), None)
305 eq(msg.get_param('quuz'), None)
306 msg = email.message_from_string(
307 'X-Header: foo; bar="one"; baz=two\n')
308 eq(msg.get_param('foo', header='x-header'), '')
309 eq(msg.get_param('bar', header='x-header'), 'one')
310 eq(msg.get_param('baz', header='x-header'), 'two')
311 # XXX: We are not RFC-2045 compliant! We cannot parse:
312 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
313 # msg.get_param("weird")
314 # yet.
315
316 def test_get_param_funky_continuation_lines(self):
317 msg = self._msgobj('msg_22.txt')
318 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
319
320 def test_get_param_with_semis_in_quotes(self):
321 msg = email.message_from_string(
322 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
323 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
324 self.assertEqual(msg.get_param('name', unquote=False),
325 '"Jim&amp;&amp;Jill"')
326
R. David Murrayd48739f2010-04-14 18:59:18 +0000327 def test_get_param_with_quotes(self):
328 msg = email.message_from_string(
329 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
330 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
331 msg = email.message_from_string(
332 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
333 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
334
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000335 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000336 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000337 msg = email.message_from_string('Header: exists')
338 unless('header' in msg)
339 unless('Header' in msg)
340 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000341 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000342
343 def test_set_param(self):
344 eq = self.assertEqual
345 msg = Message()
346 msg.set_param('charset', 'iso-2022-jp')
347 eq(msg.get_param('charset'), 'iso-2022-jp')
348 msg.set_param('importance', 'high value')
349 eq(msg.get_param('importance'), 'high value')
350 eq(msg.get_param('importance', unquote=False), '"high value"')
351 eq(msg.get_params(), [('text/plain', ''),
352 ('charset', 'iso-2022-jp'),
353 ('importance', 'high value')])
354 eq(msg.get_params(unquote=False), [('text/plain', ''),
355 ('charset', '"iso-2022-jp"'),
356 ('importance', '"high value"')])
357 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
358 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
359
360 def test_del_param(self):
361 eq = self.assertEqual
362 msg = self._msgobj('msg_05.txt')
363 eq(msg.get_params(),
364 [('multipart/report', ''), ('report-type', 'delivery-status'),
365 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
366 old_val = msg.get_param("report-type")
367 msg.del_param("report-type")
368 eq(msg.get_params(),
369 [('multipart/report', ''),
370 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
371 msg.set_param("report-type", old_val)
372 eq(msg.get_params(),
373 [('multipart/report', ''),
374 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
375 ('report-type', old_val)])
376
377 def test_del_param_on_other_header(self):
378 msg = Message()
379 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
380 msg.del_param('filename', 'content-disposition')
381 self.assertEqual(msg['content-disposition'], 'attachment')
382
383 def test_set_type(self):
384 eq = self.assertEqual
385 msg = Message()
386 self.assertRaises(ValueError, msg.set_type, 'text')
387 msg.set_type('text/plain')
388 eq(msg['content-type'], 'text/plain')
389 msg.set_param('charset', 'us-ascii')
390 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
391 msg.set_type('text/html')
392 eq(msg['content-type'], 'text/html; charset="us-ascii"')
393
394 def test_set_type_on_other_header(self):
395 msg = Message()
396 msg['X-Content-Type'] = 'text/plain'
397 msg.set_type('application/octet-stream', 'X-Content-Type')
398 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
399
400 def test_get_content_type_missing(self):
401 msg = Message()
402 self.assertEqual(msg.get_content_type(), 'text/plain')
403
404 def test_get_content_type_missing_with_default_type(self):
405 msg = Message()
406 msg.set_default_type('message/rfc822')
407 self.assertEqual(msg.get_content_type(), 'message/rfc822')
408
409 def test_get_content_type_from_message_implicit(self):
410 msg = self._msgobj('msg_30.txt')
411 self.assertEqual(msg.get_payload(0).get_content_type(),
412 'message/rfc822')
413
414 def test_get_content_type_from_message_explicit(self):
415 msg = self._msgobj('msg_28.txt')
416 self.assertEqual(msg.get_payload(0).get_content_type(),
417 'message/rfc822')
418
419 def test_get_content_type_from_message_text_plain_implicit(self):
420 msg = self._msgobj('msg_03.txt')
421 self.assertEqual(msg.get_content_type(), 'text/plain')
422
423 def test_get_content_type_from_message_text_plain_explicit(self):
424 msg = self._msgobj('msg_01.txt')
425 self.assertEqual(msg.get_content_type(), 'text/plain')
426
427 def test_get_content_maintype_missing(self):
428 msg = Message()
429 self.assertEqual(msg.get_content_maintype(), 'text')
430
431 def test_get_content_maintype_missing_with_default_type(self):
432 msg = Message()
433 msg.set_default_type('message/rfc822')
434 self.assertEqual(msg.get_content_maintype(), 'message')
435
436 def test_get_content_maintype_from_message_implicit(self):
437 msg = self._msgobj('msg_30.txt')
438 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
439
440 def test_get_content_maintype_from_message_explicit(self):
441 msg = self._msgobj('msg_28.txt')
442 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
443
444 def test_get_content_maintype_from_message_text_plain_implicit(self):
445 msg = self._msgobj('msg_03.txt')
446 self.assertEqual(msg.get_content_maintype(), 'text')
447
448 def test_get_content_maintype_from_message_text_plain_explicit(self):
449 msg = self._msgobj('msg_01.txt')
450 self.assertEqual(msg.get_content_maintype(), 'text')
451
452 def test_get_content_subtype_missing(self):
453 msg = Message()
454 self.assertEqual(msg.get_content_subtype(), 'plain')
455
456 def test_get_content_subtype_missing_with_default_type(self):
457 msg = Message()
458 msg.set_default_type('message/rfc822')
459 self.assertEqual(msg.get_content_subtype(), 'rfc822')
460
461 def test_get_content_subtype_from_message_implicit(self):
462 msg = self._msgobj('msg_30.txt')
463 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
464
465 def test_get_content_subtype_from_message_explicit(self):
466 msg = self._msgobj('msg_28.txt')
467 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
468
469 def test_get_content_subtype_from_message_text_plain_implicit(self):
470 msg = self._msgobj('msg_03.txt')
471 self.assertEqual(msg.get_content_subtype(), 'plain')
472
473 def test_get_content_subtype_from_message_text_plain_explicit(self):
474 msg = self._msgobj('msg_01.txt')
475 self.assertEqual(msg.get_content_subtype(), 'plain')
476
477 def test_get_content_maintype_error(self):
478 msg = Message()
479 msg['Content-Type'] = 'no-slash-in-this-string'
480 self.assertEqual(msg.get_content_maintype(), 'text')
481
482 def test_get_content_subtype_error(self):
483 msg = Message()
484 msg['Content-Type'] = 'no-slash-in-this-string'
485 self.assertEqual(msg.get_content_subtype(), 'plain')
486
487 def test_replace_header(self):
488 eq = self.assertEqual
489 msg = Message()
490 msg.add_header('First', 'One')
491 msg.add_header('Second', 'Two')
492 msg.add_header('Third', 'Three')
493 eq(msg.keys(), ['First', 'Second', 'Third'])
494 eq(msg.values(), ['One', 'Two', 'Three'])
495 msg.replace_header('Second', 'Twenty')
496 eq(msg.keys(), ['First', 'Second', 'Third'])
497 eq(msg.values(), ['One', 'Twenty', 'Three'])
498 msg.add_header('First', 'Eleven')
499 msg.replace_header('First', 'One Hundred')
500 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
501 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
502 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
503
504 def test_broken_base64_payload(self):
505 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
506 msg = Message()
507 msg['content-type'] = 'audio/x-midi'
508 msg['content-transfer-encoding'] = 'base64'
509 msg.set_payload(x)
510 self.assertEqual(msg.get_payload(decode=True),
Guido van Rossum9604e662007-08-30 03:46:43 +0000511 bytes(x, 'raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000512
R. David Murray7ec754b2010-12-13 23:51:19 +0000513 # Issue 1078919
514 def test_ascii_add_header(self):
515 msg = Message()
516 msg.add_header('Content-Disposition', 'attachment',
517 filename='bud.gif')
518 self.assertEqual('attachment; filename="bud.gif"',
519 msg['Content-Disposition'])
520
521 def test_noascii_add_header(self):
522 msg = Message()
523 msg.add_header('Content-Disposition', 'attachment',
524 filename="Fußballer.ppt")
525 self.assertEqual(
526 'attachment; filename*="utf-8\'\'Fu%C3%9Fballer.ppt"',
527 msg['Content-Disposition'])
528
529 def test_nonascii_add_header_via_triple(self):
530 msg = Message()
531 msg.add_header('Content-Disposition', 'attachment',
532 filename=('iso-8859-1', '', 'Fußballer.ppt'))
533 self.assertEqual(
534 'attachment; filename*="iso-8859-1\'\'Fu%DFballer.ppt"',
535 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000536
Ezio Melottib3aedd42010-11-20 19:04:17 +0000537
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000538# Test the email.encoders module
539class TestEncoders(unittest.TestCase):
540 def test_encode_empty_payload(self):
541 eq = self.assertEqual
542 msg = Message()
543 msg.set_charset('us-ascii')
544 eq(msg['content-transfer-encoding'], '7bit')
545
546 def test_default_cte(self):
547 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000548 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000549 msg = MIMEText('hello world')
550 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000551 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000552 msg = MIMEText('hello \xf8 world')
553 eq(msg['content-transfer-encoding'], '8bit')
554 # And now with a different charset
555 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
556 eq(msg['content-transfer-encoding'], 'quoted-printable')
557
R. David Murraye85200d2010-05-06 01:41:14 +0000558 def test_encode7or8bit(self):
559 # Make sure a charset whose input character set is 8bit but
560 # whose output character set is 7bit gets a transfer-encoding
561 # of 7bit.
562 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000563 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000564 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000565
Ezio Melottib3aedd42010-11-20 19:04:17 +0000566
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000567# Test long header wrapping
568class TestLongHeaders(TestEmailBase):
569 def test_split_long_continuation(self):
570 eq = self.ndiffAssertEqual
571 msg = email.message_from_string("""\
572Subject: bug demonstration
573\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
574\tmore text
575
576test
577""")
578 sfp = StringIO()
579 g = Generator(sfp)
580 g.flatten(msg)
581 eq(sfp.getvalue(), """\
582Subject: bug demonstration
583\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
584\tmore text
585
586test
587""")
588
589 def test_another_long_almost_unsplittable_header(self):
590 eq = self.ndiffAssertEqual
591 hstr = """\
592bug demonstration
593\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
594\tmore text"""
595 h = Header(hstr, continuation_ws='\t')
596 eq(h.encode(), """\
597bug demonstration
598\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
599\tmore text""")
600 h = Header(hstr.replace('\t', ' '))
601 eq(h.encode(), """\
602bug demonstration
603 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
604 more text""")
605
606 def test_long_nonstring(self):
607 eq = self.ndiffAssertEqual
608 g = Charset("iso-8859-1")
609 cz = Charset("iso-8859-2")
610 utf8 = Charset("utf-8")
611 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
612 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
613 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
614 b'bef\xf6rdert. ')
615 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
616 b'd\xf9vtipu.. ')
617 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
618 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
619 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
620 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
621 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
622 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
623 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
624 '\u3044\u307e\u3059\u3002')
625 h = Header(g_head, g, header_name='Subject')
626 h.append(cz_head, cz)
627 h.append(utf8_head, utf8)
628 msg = Message()
629 msg['Subject'] = h
630 sfp = StringIO()
631 g = Generator(sfp)
632 g.flatten(msg)
633 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000634Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
635 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
636 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
637 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
638 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
639 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
640 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
641 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
642 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
643 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
644 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000645
646""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000647 eq(h.encode(maxlinelen=76), """\
648=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
649 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
650 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
651 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
652 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
653 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
654 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
655 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
656 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
657 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
658 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000659
660 def test_long_header_encode(self):
661 eq = self.ndiffAssertEqual
662 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
663 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
664 header_name='X-Foobar-Spoink-Defrobnit')
665 eq(h.encode(), '''\
666wasnipoop; giraffes="very-long-necked-animals";
667 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
668
669 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
670 eq = self.ndiffAssertEqual
671 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
672 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
673 header_name='X-Foobar-Spoink-Defrobnit',
674 continuation_ws='\t')
675 eq(h.encode(), '''\
676wasnipoop; giraffes="very-long-necked-animals";
677 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
678
679 def test_long_header_encode_with_tab_continuation(self):
680 eq = self.ndiffAssertEqual
681 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
682 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
683 header_name='X-Foobar-Spoink-Defrobnit',
684 continuation_ws='\t')
685 eq(h.encode(), '''\
686wasnipoop; giraffes="very-long-necked-animals";
687\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
688
689 def test_header_splitter(self):
690 eq = self.ndiffAssertEqual
691 msg = MIMEText('')
692 # It'd be great if we could use add_header() here, but that doesn't
693 # guarantee an order of the parameters.
694 msg['X-Foobar-Spoink-Defrobnit'] = (
695 'wasnipoop; giraffes="very-long-necked-animals"; '
696 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
697 sfp = StringIO()
698 g = Generator(sfp)
699 g.flatten(msg)
700 eq(sfp.getvalue(), '''\
701Content-Type: text/plain; charset="us-ascii"
702MIME-Version: 1.0
703Content-Transfer-Encoding: 7bit
704X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
705 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
706
707''')
708
709 def test_no_semis_header_splitter(self):
710 eq = self.ndiffAssertEqual
711 msg = Message()
712 msg['From'] = 'test@dom.ain'
713 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
714 msg.set_payload('Test')
715 sfp = StringIO()
716 g = Generator(sfp)
717 g.flatten(msg)
718 eq(sfp.getvalue(), """\
719From: test@dom.ain
720References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
721 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
722
723Test""")
724
725 def test_no_split_long_header(self):
726 eq = self.ndiffAssertEqual
727 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +0000728 h = Header(hstr)
729 # These come on two lines because Headers are really field value
730 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000731 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000732References:
733 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
734 h = Header('x' * 80)
735 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000736
737 def test_splitting_multiple_long_lines(self):
738 eq = self.ndiffAssertEqual
739 hstr = """\
740from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
741\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
742\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
743"""
744 h = Header(hstr, continuation_ws='\t')
745 eq(h.encode(), """\
746from babylon.socal-raves.org (localhost [127.0.0.1]);
747 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
748 for <mailman-admin@babylon.socal-raves.org>;
749 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
750\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
751 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
752 for <mailman-admin@babylon.socal-raves.org>;
753 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
754\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
755 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
756 for <mailman-admin@babylon.socal-raves.org>;
757 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
758
759 def test_splitting_first_line_only_is_long(self):
760 eq = self.ndiffAssertEqual
761 hstr = """\
762from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
763\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
764\tid 17k4h5-00034i-00
765\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
766 h = Header(hstr, maxlinelen=78, header_name='Received',
767 continuation_ws='\t')
768 eq(h.encode(), """\
769from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
770 helo=cthulhu.gerg.ca)
771\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
772\tid 17k4h5-00034i-00
773\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
774
775 def test_long_8bit_header(self):
776 eq = self.ndiffAssertEqual
777 msg = Message()
778 h = Header('Britische Regierung gibt', 'iso-8859-1',
779 header_name='Subject')
780 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +0000781 eq(h.encode(maxlinelen=76), """\
782=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
783 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000784 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +0000785 eq(msg.as_string(maxheaderlen=76), """\
786Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
787 =?iso-8859-1?q?hore-Windkraftprojekte?=
788
789""")
790 eq(msg.as_string(maxheaderlen=0), """\
791Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000792
793""")
794
795 def test_long_8bit_header_no_charset(self):
796 eq = self.ndiffAssertEqual
797 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +0000798 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
799 'f\xfcr Offshore-Windkraftprojekte '
800 '<a-very-long-address@example.com>')
801 msg['Reply-To'] = header_string
802 self.assertRaises(UnicodeEncodeError, msg.as_string)
803 msg = Message()
804 msg['Reply-To'] = Header(header_string, 'utf-8',
805 header_name='Reply-To')
806 eq(msg.as_string(maxheaderlen=78), """\
807Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
808 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000809
810""")
811
812 def test_long_to_header(self):
813 eq = self.ndiffAssertEqual
814 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
815 '<someone@eecs.umich.edu>,'
816 '"Someone Test #B" <someone@umich.edu>, '
817 '"Someone Test #C" <someone@eecs.umich.edu>, '
818 '"Someone Test #D" <someone@eecs.umich.edu>')
819 msg = Message()
820 msg['To'] = to
821 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +0000822To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000823 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +0000824 "Someone Test #C" <someone@eecs.umich.edu>,
825 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000826
827''')
828
829 def test_long_line_after_append(self):
830 eq = self.ndiffAssertEqual
831 s = 'This is an example of string which has almost the limit of header length.'
832 h = Header(s)
833 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +0000834 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000835This is an example of string which has almost the limit of header length.
836 Add another line.""")
837
838 def test_shorter_line_with_append(self):
839 eq = self.ndiffAssertEqual
840 s = 'This is a shorter line.'
841 h = Header(s)
842 h.append('Add another sentence. (Surprise?)')
843 eq(h.encode(),
844 'This is a shorter line. Add another sentence. (Surprise?)')
845
846 def test_long_field_name(self):
847 eq = self.ndiffAssertEqual
848 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +0000849 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
850 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
851 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
852 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000853 h = Header(gs, 'iso-8859-1', header_name=fn)
854 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +0000855 eq(h.encode(maxlinelen=76), """\
856=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
857 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
858 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
859 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000860
861 def test_long_received_header(self):
862 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
863 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
864 'Wed, 05 Mar 2003 18:10:18 -0700')
865 msg = Message()
866 msg['Received-1'] = Header(h, continuation_ws='\t')
867 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000868 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000869 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000870Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
871 Wed, 05 Mar 2003 18:10:18 -0700
872Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
873 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000874
875""")
876
877 def test_string_headerinst_eq(self):
878 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
879 'tu-muenchen.de> (David Bremner\'s message of '
880 '"Thu, 6 Mar 2003 13:58:21 +0100")')
881 msg = Message()
882 msg['Received-1'] = Header(h, header_name='Received-1',
883 continuation_ws='\t')
884 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000885 # XXX This should be splitting on spaces not commas.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000886 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000887Received-1: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
888 6 Mar 2003 13:58:21 +0100\")
889Received-2: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
890 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000891
892""")
893
894 def test_long_unbreakable_lines_with_continuation(self):
895 eq = self.ndiffAssertEqual
896 msg = Message()
897 t = """\
898iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
899 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
900 msg['Face-1'] = t
901 msg['Face-2'] = Header(t, header_name='Face-2')
Barry Warsawbef9d212007-08-31 10:55:37 +0000902 # XXX This splitting is all wrong. It the first value line should be
903 # snug against the field name.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000904 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +0000905Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000906 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000907 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +0000908Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +0000909 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000910 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
911
912""")
913
914 def test_another_long_multiline_header(self):
915 eq = self.ndiffAssertEqual
916 m = ('Received: from siimage.com '
917 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +0000918 'Microsoft SMTPSVC(5.0.2195.4905); '
919 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000920 msg = email.message_from_string(m)
921 eq(msg.as_string(maxheaderlen=78), '''\
Barry Warsawbef9d212007-08-31 10:55:37 +0000922Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with Microsoft SMTPSVC(5.0.2195.4905);
923 Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000924
925''')
926
927 def test_long_lines_with_different_header(self):
928 eq = self.ndiffAssertEqual
929 h = ('List-Unsubscribe: '
930 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
931 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
932 '?subject=unsubscribe>')
933 msg = Message()
934 msg['List'] = h
935 msg['List'] = Header(h, header_name='List')
936 eq(msg.as_string(maxheaderlen=78), """\
937List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +0000938 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000939List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +0000940 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000941
942""")
943
944
Ezio Melottib3aedd42010-11-20 19:04:17 +0000945
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000946# Test mangling of "From " lines in the body of a message
947class TestFromMangling(unittest.TestCase):
948 def setUp(self):
949 self.msg = Message()
950 self.msg['From'] = 'aaa@bbb.org'
951 self.msg.set_payload("""\
952From the desk of A.A.A.:
953Blah blah blah
954""")
955
956 def test_mangled_from(self):
957 s = StringIO()
958 g = Generator(s, mangle_from_=True)
959 g.flatten(self.msg)
960 self.assertEqual(s.getvalue(), """\
961From: aaa@bbb.org
962
963>From the desk of A.A.A.:
964Blah blah blah
965""")
966
967 def test_dont_mangle_from(self):
968 s = StringIO()
969 g = Generator(s, mangle_from_=False)
970 g.flatten(self.msg)
971 self.assertEqual(s.getvalue(), """\
972From: aaa@bbb.org
973
974From the desk of A.A.A.:
975Blah blah blah
976""")
977
978
Ezio Melottib3aedd42010-11-20 19:04:17 +0000979
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000980# Test the basic MIMEAudio class
981class TestMIMEAudio(unittest.TestCase):
982 def setUp(self):
983 # Make sure we pick up the audiotest.au that lives in email/test/data.
984 # In Python, there's an audiotest.au living in Lib/test but that isn't
985 # included in some binary distros that don't include the test
986 # package. The trailing empty string on the .join() is significant
987 # since findfile() will do a dirname().
988 datadir = os.path.join(os.path.dirname(landmark), 'data', '')
989 with open(findfile('audiotest.au', datadir), 'rb') as fp:
990 self._audiodata = fp.read()
991 self._au = MIMEAudio(self._audiodata)
992
993 def test_guess_minor_type(self):
994 self.assertEqual(self._au.get_content_type(), 'audio/basic')
995
996 def test_encoding(self):
997 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +0000998 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
999 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001000
1001 def test_checkSetMinor(self):
1002 au = MIMEAudio(self._audiodata, 'fish')
1003 self.assertEqual(au.get_content_type(), 'audio/fish')
1004
1005 def test_add_header(self):
1006 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001007 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001008 self._au.add_header('Content-Disposition', 'attachment',
1009 filename='audiotest.au')
1010 eq(self._au['content-disposition'],
1011 'attachment; filename="audiotest.au"')
1012 eq(self._au.get_params(header='content-disposition'),
1013 [('attachment', ''), ('filename', 'audiotest.au')])
1014 eq(self._au.get_param('filename', header='content-disposition'),
1015 'audiotest.au')
1016 missing = []
1017 eq(self._au.get_param('attachment', header='content-disposition'), '')
1018 unless(self._au.get_param('foo', failobj=missing,
1019 header='content-disposition') is missing)
1020 # Try some missing stuff
1021 unless(self._au.get_param('foobar', missing) is missing)
1022 unless(self._au.get_param('attachment', missing,
1023 header='foobar') is missing)
1024
1025
Ezio Melottib3aedd42010-11-20 19:04:17 +00001026
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001027# Test the basic MIMEImage class
1028class TestMIMEImage(unittest.TestCase):
1029 def setUp(self):
1030 with openfile('PyBanner048.gif', 'rb') as fp:
1031 self._imgdata = fp.read()
1032 self._im = MIMEImage(self._imgdata)
1033
1034 def test_guess_minor_type(self):
1035 self.assertEqual(self._im.get_content_type(), 'image/gif')
1036
1037 def test_encoding(self):
1038 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001039 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1040 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001041
1042 def test_checkSetMinor(self):
1043 im = MIMEImage(self._imgdata, 'fish')
1044 self.assertEqual(im.get_content_type(), 'image/fish')
1045
1046 def test_add_header(self):
1047 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001048 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001049 self._im.add_header('Content-Disposition', 'attachment',
1050 filename='dingusfish.gif')
1051 eq(self._im['content-disposition'],
1052 'attachment; filename="dingusfish.gif"')
1053 eq(self._im.get_params(header='content-disposition'),
1054 [('attachment', ''), ('filename', 'dingusfish.gif')])
1055 eq(self._im.get_param('filename', header='content-disposition'),
1056 'dingusfish.gif')
1057 missing = []
1058 eq(self._im.get_param('attachment', header='content-disposition'), '')
1059 unless(self._im.get_param('foo', failobj=missing,
1060 header='content-disposition') is missing)
1061 # Try some missing stuff
1062 unless(self._im.get_param('foobar', missing) is missing)
1063 unless(self._im.get_param('attachment', missing,
1064 header='foobar') is missing)
1065
1066
Ezio Melottib3aedd42010-11-20 19:04:17 +00001067
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001068# Test the basic MIMEApplication class
1069class TestMIMEApplication(unittest.TestCase):
1070 def test_headers(self):
1071 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001072 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001073 eq(msg.get_content_type(), 'application/octet-stream')
1074 eq(msg['content-transfer-encoding'], 'base64')
1075
1076 def test_body(self):
1077 eq = self.assertEqual
Barry Warsaw8c571042007-08-30 19:17:18 +00001078 bytes = b'\xfa\xfb\xfc\xfd\xfe\xff'
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001079 msg = MIMEApplication(bytes)
R. David Murray7da8f062010-06-04 16:11:08 +00001080 eq(msg.get_payload(), '+vv8/f7/')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001081 eq(msg.get_payload(decode=True), bytes)
1082
1083
Ezio Melottib3aedd42010-11-20 19:04:17 +00001084
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001085# Test the basic MIMEText class
1086class TestMIMEText(unittest.TestCase):
1087 def setUp(self):
1088 self._msg = MIMEText('hello there')
1089
1090 def test_types(self):
1091 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001092 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001093 eq(self._msg.get_content_type(), 'text/plain')
1094 eq(self._msg.get_param('charset'), 'us-ascii')
1095 missing = []
1096 unless(self._msg.get_param('foobar', missing) is missing)
1097 unless(self._msg.get_param('charset', missing, header='foobar')
1098 is missing)
1099
1100 def test_payload(self):
1101 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001102 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001103
1104 def test_charset(self):
1105 eq = self.assertEqual
1106 msg = MIMEText('hello there', _charset='us-ascii')
1107 eq(msg.get_charset().input_charset, 'us-ascii')
1108 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1109
R. David Murray850fc852010-06-03 01:58:28 +00001110 def test_7bit_input(self):
1111 eq = self.assertEqual
1112 msg = MIMEText('hello there', _charset='us-ascii')
1113 eq(msg.get_charset().input_charset, 'us-ascii')
1114 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1115
1116 def test_7bit_input_no_charset(self):
1117 eq = self.assertEqual
1118 msg = MIMEText('hello there')
1119 eq(msg.get_charset(), 'us-ascii')
1120 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1121 self.assertTrue('hello there' in msg.as_string())
1122
1123 def test_utf8_input(self):
1124 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1125 eq = self.assertEqual
1126 msg = MIMEText(teststr, _charset='utf-8')
1127 eq(msg.get_charset().output_charset, 'utf-8')
1128 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1129 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1130
1131 @unittest.skip("can't fix because of backward compat in email5, "
1132 "will fix in email6")
1133 def test_utf8_input_no_charset(self):
1134 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1135 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1136
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001137
Ezio Melottib3aedd42010-11-20 19:04:17 +00001138
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001139# Test complicated multipart/* messages
1140class TestMultipart(TestEmailBase):
1141 def setUp(self):
1142 with openfile('PyBanner048.gif', 'rb') as fp:
1143 data = fp.read()
1144 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1145 image = MIMEImage(data, name='dingusfish.gif')
1146 image.add_header('content-disposition', 'attachment',
1147 filename='dingusfish.gif')
1148 intro = MIMEText('''\
1149Hi there,
1150
1151This is the dingus fish.
1152''')
1153 container.attach(intro)
1154 container.attach(image)
1155 container['From'] = 'Barry <barry@digicool.com>'
1156 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1157 container['Subject'] = 'Here is your dingus fish'
1158
1159 now = 987809702.54848599
1160 timetuple = time.localtime(now)
1161 if timetuple[-1] == 0:
1162 tzsecs = time.timezone
1163 else:
1164 tzsecs = time.altzone
1165 if tzsecs > 0:
1166 sign = '-'
1167 else:
1168 sign = '+'
1169 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1170 container['Date'] = time.strftime(
1171 '%a, %d %b %Y %H:%M:%S',
1172 time.localtime(now)) + tzoffset
1173 self._msg = container
1174 self._im = image
1175 self._txt = intro
1176
1177 def test_hierarchy(self):
1178 # convenience
1179 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001180 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001181 raises = self.assertRaises
1182 # tests
1183 m = self._msg
1184 unless(m.is_multipart())
1185 eq(m.get_content_type(), 'multipart/mixed')
1186 eq(len(m.get_payload()), 2)
1187 raises(IndexError, m.get_payload, 2)
1188 m0 = m.get_payload(0)
1189 m1 = m.get_payload(1)
1190 unless(m0 is self._txt)
1191 unless(m1 is self._im)
1192 eq(m.get_payload(), [m0, m1])
1193 unless(not m0.is_multipart())
1194 unless(not m1.is_multipart())
1195
1196 def test_empty_multipart_idempotent(self):
1197 text = """\
1198Content-Type: multipart/mixed; boundary="BOUNDARY"
1199MIME-Version: 1.0
1200Subject: A subject
1201To: aperson@dom.ain
1202From: bperson@dom.ain
1203
1204
1205--BOUNDARY
1206
1207
1208--BOUNDARY--
1209"""
1210 msg = Parser().parsestr(text)
1211 self.ndiffAssertEqual(text, msg.as_string())
1212
1213 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1214 outer = MIMEBase('multipart', 'mixed')
1215 outer['Subject'] = 'A subject'
1216 outer['To'] = 'aperson@dom.ain'
1217 outer['From'] = 'bperson@dom.ain'
1218 outer.set_boundary('BOUNDARY')
1219 self.ndiffAssertEqual(outer.as_string(), '''\
1220Content-Type: multipart/mixed; boundary="BOUNDARY"
1221MIME-Version: 1.0
1222Subject: A subject
1223To: aperson@dom.ain
1224From: bperson@dom.ain
1225
1226--BOUNDARY
1227
1228--BOUNDARY--''')
1229
1230 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1231 outer = MIMEBase('multipart', 'mixed')
1232 outer['Subject'] = 'A subject'
1233 outer['To'] = 'aperson@dom.ain'
1234 outer['From'] = 'bperson@dom.ain'
1235 outer.preamble = ''
1236 outer.epilogue = ''
1237 outer.set_boundary('BOUNDARY')
1238 self.ndiffAssertEqual(outer.as_string(), '''\
1239Content-Type: multipart/mixed; boundary="BOUNDARY"
1240MIME-Version: 1.0
1241Subject: A subject
1242To: aperson@dom.ain
1243From: bperson@dom.ain
1244
1245
1246--BOUNDARY
1247
1248--BOUNDARY--
1249''')
1250
1251 def test_one_part_in_a_multipart(self):
1252 eq = self.ndiffAssertEqual
1253 outer = MIMEBase('multipart', 'mixed')
1254 outer['Subject'] = 'A subject'
1255 outer['To'] = 'aperson@dom.ain'
1256 outer['From'] = 'bperson@dom.ain'
1257 outer.set_boundary('BOUNDARY')
1258 msg = MIMEText('hello world')
1259 outer.attach(msg)
1260 eq(outer.as_string(), '''\
1261Content-Type: multipart/mixed; boundary="BOUNDARY"
1262MIME-Version: 1.0
1263Subject: A subject
1264To: aperson@dom.ain
1265From: bperson@dom.ain
1266
1267--BOUNDARY
1268Content-Type: text/plain; charset="us-ascii"
1269MIME-Version: 1.0
1270Content-Transfer-Encoding: 7bit
1271
1272hello world
1273--BOUNDARY--''')
1274
1275 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1276 eq = self.ndiffAssertEqual
1277 outer = MIMEBase('multipart', 'mixed')
1278 outer['Subject'] = 'A subject'
1279 outer['To'] = 'aperson@dom.ain'
1280 outer['From'] = 'bperson@dom.ain'
1281 outer.preamble = ''
1282 msg = MIMEText('hello world')
1283 outer.attach(msg)
1284 outer.set_boundary('BOUNDARY')
1285 eq(outer.as_string(), '''\
1286Content-Type: multipart/mixed; boundary="BOUNDARY"
1287MIME-Version: 1.0
1288Subject: A subject
1289To: aperson@dom.ain
1290From: bperson@dom.ain
1291
1292
1293--BOUNDARY
1294Content-Type: text/plain; charset="us-ascii"
1295MIME-Version: 1.0
1296Content-Transfer-Encoding: 7bit
1297
1298hello world
1299--BOUNDARY--''')
1300
1301
1302 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1303 eq = self.ndiffAssertEqual
1304 outer = MIMEBase('multipart', 'mixed')
1305 outer['Subject'] = 'A subject'
1306 outer['To'] = 'aperson@dom.ain'
1307 outer['From'] = 'bperson@dom.ain'
1308 outer.preamble = None
1309 msg = MIMEText('hello world')
1310 outer.attach(msg)
1311 outer.set_boundary('BOUNDARY')
1312 eq(outer.as_string(), '''\
1313Content-Type: multipart/mixed; boundary="BOUNDARY"
1314MIME-Version: 1.0
1315Subject: A subject
1316To: aperson@dom.ain
1317From: bperson@dom.ain
1318
1319--BOUNDARY
1320Content-Type: text/plain; charset="us-ascii"
1321MIME-Version: 1.0
1322Content-Transfer-Encoding: 7bit
1323
1324hello world
1325--BOUNDARY--''')
1326
1327
1328 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1329 eq = self.ndiffAssertEqual
1330 outer = MIMEBase('multipart', 'mixed')
1331 outer['Subject'] = 'A subject'
1332 outer['To'] = 'aperson@dom.ain'
1333 outer['From'] = 'bperson@dom.ain'
1334 outer.epilogue = None
1335 msg = MIMEText('hello world')
1336 outer.attach(msg)
1337 outer.set_boundary('BOUNDARY')
1338 eq(outer.as_string(), '''\
1339Content-Type: multipart/mixed; boundary="BOUNDARY"
1340MIME-Version: 1.0
1341Subject: A subject
1342To: aperson@dom.ain
1343From: bperson@dom.ain
1344
1345--BOUNDARY
1346Content-Type: text/plain; charset="us-ascii"
1347MIME-Version: 1.0
1348Content-Transfer-Encoding: 7bit
1349
1350hello world
1351--BOUNDARY--''')
1352
1353
1354 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1355 eq = self.ndiffAssertEqual
1356 outer = MIMEBase('multipart', 'mixed')
1357 outer['Subject'] = 'A subject'
1358 outer['To'] = 'aperson@dom.ain'
1359 outer['From'] = 'bperson@dom.ain'
1360 outer.epilogue = ''
1361 msg = MIMEText('hello world')
1362 outer.attach(msg)
1363 outer.set_boundary('BOUNDARY')
1364 eq(outer.as_string(), '''\
1365Content-Type: multipart/mixed; boundary="BOUNDARY"
1366MIME-Version: 1.0
1367Subject: A subject
1368To: aperson@dom.ain
1369From: bperson@dom.ain
1370
1371--BOUNDARY
1372Content-Type: text/plain; charset="us-ascii"
1373MIME-Version: 1.0
1374Content-Transfer-Encoding: 7bit
1375
1376hello world
1377--BOUNDARY--
1378''')
1379
1380
1381 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1382 eq = self.ndiffAssertEqual
1383 outer = MIMEBase('multipart', 'mixed')
1384 outer['Subject'] = 'A subject'
1385 outer['To'] = 'aperson@dom.ain'
1386 outer['From'] = 'bperson@dom.ain'
1387 outer.epilogue = '\n'
1388 msg = MIMEText('hello world')
1389 outer.attach(msg)
1390 outer.set_boundary('BOUNDARY')
1391 eq(outer.as_string(), '''\
1392Content-Type: multipart/mixed; boundary="BOUNDARY"
1393MIME-Version: 1.0
1394Subject: A subject
1395To: aperson@dom.ain
1396From: bperson@dom.ain
1397
1398--BOUNDARY
1399Content-Type: text/plain; charset="us-ascii"
1400MIME-Version: 1.0
1401Content-Transfer-Encoding: 7bit
1402
1403hello world
1404--BOUNDARY--
1405
1406''')
1407
1408 def test_message_external_body(self):
1409 eq = self.assertEqual
1410 msg = self._msgobj('msg_36.txt')
1411 eq(len(msg.get_payload()), 2)
1412 msg1 = msg.get_payload(1)
1413 eq(msg1.get_content_type(), 'multipart/alternative')
1414 eq(len(msg1.get_payload()), 2)
1415 for subpart in msg1.get_payload():
1416 eq(subpart.get_content_type(), 'message/external-body')
1417 eq(len(subpart.get_payload()), 1)
1418 subsubpart = subpart.get_payload(0)
1419 eq(subsubpart.get_content_type(), 'text/plain')
1420
1421 def test_double_boundary(self):
1422 # msg_37.txt is a multipart that contains two dash-boundary's in a
1423 # row. Our interpretation of RFC 2046 calls for ignoring the second
1424 # and subsequent boundaries.
1425 msg = self._msgobj('msg_37.txt')
1426 self.assertEqual(len(msg.get_payload()), 3)
1427
1428 def test_nested_inner_contains_outer_boundary(self):
1429 eq = self.ndiffAssertEqual
1430 # msg_38.txt has an inner part that contains outer boundaries. My
1431 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1432 # these are illegal and should be interpreted as unterminated inner
1433 # parts.
1434 msg = self._msgobj('msg_38.txt')
1435 sfp = StringIO()
1436 iterators._structure(msg, sfp)
1437 eq(sfp.getvalue(), """\
1438multipart/mixed
1439 multipart/mixed
1440 multipart/alternative
1441 text/plain
1442 text/plain
1443 text/plain
1444 text/plain
1445""")
1446
1447 def test_nested_with_same_boundary(self):
1448 eq = self.ndiffAssertEqual
1449 # msg 39.txt is similarly evil in that it's got inner parts that use
1450 # the same boundary as outer parts. Again, I believe the way this is
1451 # parsed is closest to the spirit of RFC 2046
1452 msg = self._msgobj('msg_39.txt')
1453 sfp = StringIO()
1454 iterators._structure(msg, sfp)
1455 eq(sfp.getvalue(), """\
1456multipart/mixed
1457 multipart/mixed
1458 multipart/alternative
1459 application/octet-stream
1460 application/octet-stream
1461 text/plain
1462""")
1463
1464 def test_boundary_in_non_multipart(self):
1465 msg = self._msgobj('msg_40.txt')
1466 self.assertEqual(msg.as_string(), '''\
1467MIME-Version: 1.0
1468Content-Type: text/html; boundary="--961284236552522269"
1469
1470----961284236552522269
1471Content-Type: text/html;
1472Content-Transfer-Encoding: 7Bit
1473
1474<html></html>
1475
1476----961284236552522269--
1477''')
1478
1479 def test_boundary_with_leading_space(self):
1480 eq = self.assertEqual
1481 msg = email.message_from_string('''\
1482MIME-Version: 1.0
1483Content-Type: multipart/mixed; boundary=" XXXX"
1484
1485-- XXXX
1486Content-Type: text/plain
1487
1488
1489-- XXXX
1490Content-Type: text/plain
1491
1492-- XXXX--
1493''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001494 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001495 eq(msg.get_boundary(), ' XXXX')
1496 eq(len(msg.get_payload()), 2)
1497
1498 def test_boundary_without_trailing_newline(self):
1499 m = Parser().parsestr("""\
1500Content-Type: multipart/mixed; boundary="===============0012394164=="
1501MIME-Version: 1.0
1502
1503--===============0012394164==
1504Content-Type: image/file1.jpg
1505MIME-Version: 1.0
1506Content-Transfer-Encoding: base64
1507
1508YXNkZg==
1509--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001510 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001511
1512
Ezio Melottib3aedd42010-11-20 19:04:17 +00001513
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001514# Test some badly formatted messages
1515class TestNonConformant(TestEmailBase):
1516 def test_parse_missing_minor_type(self):
1517 eq = self.assertEqual
1518 msg = self._msgobj('msg_14.txt')
1519 eq(msg.get_content_type(), 'text/plain')
1520 eq(msg.get_content_maintype(), 'text')
1521 eq(msg.get_content_subtype(), 'plain')
1522
1523 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001524 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001525 msg = self._msgobj('msg_15.txt')
1526 # XXX We can probably eventually do better
1527 inner = msg.get_payload(0)
1528 unless(hasattr(inner, 'defects'))
1529 self.assertEqual(len(inner.defects), 1)
1530 unless(isinstance(inner.defects[0],
1531 errors.StartBoundaryNotFoundDefect))
1532
1533 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001534 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001535 msg = self._msgobj('msg_25.txt')
1536 unless(isinstance(msg.get_payload(), str))
1537 self.assertEqual(len(msg.defects), 2)
1538 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1539 unless(isinstance(msg.defects[1],
1540 errors.MultipartInvariantViolationDefect))
1541
1542 def test_invalid_content_type(self):
1543 eq = self.assertEqual
1544 neq = self.ndiffAssertEqual
1545 msg = Message()
1546 # RFC 2045, $5.2 says invalid yields text/plain
1547 msg['Content-Type'] = 'text'
1548 eq(msg.get_content_maintype(), 'text')
1549 eq(msg.get_content_subtype(), 'plain')
1550 eq(msg.get_content_type(), 'text/plain')
1551 # Clear the old value and try something /really/ invalid
1552 del msg['content-type']
1553 msg['Content-Type'] = 'foo'
1554 eq(msg.get_content_maintype(), 'text')
1555 eq(msg.get_content_subtype(), 'plain')
1556 eq(msg.get_content_type(), 'text/plain')
1557 # Still, make sure that the message is idempotently generated
1558 s = StringIO()
1559 g = Generator(s)
1560 g.flatten(msg)
1561 neq(s.getvalue(), 'Content-Type: foo\n\n')
1562
1563 def test_no_start_boundary(self):
1564 eq = self.ndiffAssertEqual
1565 msg = self._msgobj('msg_31.txt')
1566 eq(msg.get_payload(), """\
1567--BOUNDARY
1568Content-Type: text/plain
1569
1570message 1
1571
1572--BOUNDARY
1573Content-Type: text/plain
1574
1575message 2
1576
1577--BOUNDARY--
1578""")
1579
1580 def test_no_separating_blank_line(self):
1581 eq = self.ndiffAssertEqual
1582 msg = self._msgobj('msg_35.txt')
1583 eq(msg.as_string(), """\
1584From: aperson@dom.ain
1585To: bperson@dom.ain
1586Subject: here's something interesting
1587
1588counter to RFC 2822, there's no separating newline here
1589""")
1590
1591 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001592 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001593 msg = self._msgobj('msg_41.txt')
1594 unless(hasattr(msg, 'defects'))
1595 self.assertEqual(len(msg.defects), 2)
1596 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1597 unless(isinstance(msg.defects[1],
1598 errors.MultipartInvariantViolationDefect))
1599
1600 def test_missing_start_boundary(self):
1601 outer = self._msgobj('msg_42.txt')
1602 # The message structure is:
1603 #
1604 # multipart/mixed
1605 # text/plain
1606 # message/rfc822
1607 # multipart/mixed [*]
1608 #
1609 # [*] This message is missing its start boundary
1610 bad = outer.get_payload(1).get_payload(0)
1611 self.assertEqual(len(bad.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001612 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001613 errors.StartBoundaryNotFoundDefect))
1614
1615 def test_first_line_is_continuation_header(self):
1616 eq = self.assertEqual
1617 m = ' Line 1\nLine 2\nLine 3'
1618 msg = email.message_from_string(m)
1619 eq(msg.keys(), [])
1620 eq(msg.get_payload(), 'Line 2\nLine 3')
1621 eq(len(msg.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001622 self.assertTrue(isinstance(msg.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001623 errors.FirstHeaderLineIsContinuationDefect))
1624 eq(msg.defects[0].line, ' Line 1\n')
1625
1626
Ezio Melottib3aedd42010-11-20 19:04:17 +00001627
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001628# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00001629class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001630 def test_rfc2047_multiline(self):
1631 eq = self.assertEqual
1632 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
1633 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
1634 dh = decode_header(s)
1635 eq(dh, [
1636 (b'Re:', None),
1637 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
1638 (b'baz foo bar', None),
1639 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
1640 header = make_header(dh)
1641 eq(str(header),
1642 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00001643 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001644Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
1645 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001646
1647 def test_whitespace_eater_unicode(self):
1648 eq = self.assertEqual
1649 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
1650 dh = decode_header(s)
1651 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
1652 (b'Pirard <pirard@dom.ain>', None)])
1653 header = str(make_header(dh))
1654 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
1655
1656 def test_whitespace_eater_unicode_2(self):
1657 eq = self.assertEqual
1658 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
1659 dh = decode_header(s)
1660 eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'),
1661 (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')])
1662 hu = str(make_header(dh))
1663 eq(hu, 'The quick brown fox jumped over the lazy dog')
1664
1665 def test_rfc2047_missing_whitespace(self):
1666 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
1667 dh = decode_header(s)
1668 self.assertEqual(dh, [(s, None)])
1669
1670 def test_rfc2047_with_whitespace(self):
1671 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
1672 dh = decode_header(s)
1673 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
1674 (b'rg', None), (b'\xe5', 'iso-8859-1'),
1675 (b'sbord', None)])
1676
R. David Murrayc4e69cc2010-08-03 22:14:10 +00001677 def test_rfc2047_B_bad_padding(self):
1678 s = '=?iso-8859-1?B?%s?='
1679 data = [ # only test complete bytes
1680 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
1681 ('dmk=', b'vi'), ('dmk', b'vi')
1682 ]
1683 for q, a in data:
1684 dh = decode_header(s % q)
1685 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001686
R. David Murray31e984c2010-10-01 15:40:20 +00001687 def test_rfc2047_Q_invalid_digits(self):
1688 # issue 10004.
1689 s = '=?iso-8659-1?Q?andr=e9=zz?='
1690 self.assertEqual(decode_header(s),
1691 [(b'andr\xe9=zz', 'iso-8659-1')])
1692
Ezio Melottib3aedd42010-11-20 19:04:17 +00001693
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001694# Test the MIMEMessage class
1695class TestMIMEMessage(TestEmailBase):
1696 def setUp(self):
1697 with openfile('msg_11.txt') as fp:
1698 self._text = fp.read()
1699
1700 def test_type_error(self):
1701 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
1702
1703 def test_valid_argument(self):
1704 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001705 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001706 subject = 'A sub-message'
1707 m = Message()
1708 m['Subject'] = subject
1709 r = MIMEMessage(m)
1710 eq(r.get_content_type(), 'message/rfc822')
1711 payload = r.get_payload()
1712 unless(isinstance(payload, list))
1713 eq(len(payload), 1)
1714 subpart = payload[0]
1715 unless(subpart is m)
1716 eq(subpart['subject'], subject)
1717
1718 def test_bad_multipart(self):
1719 eq = self.assertEqual
1720 msg1 = Message()
1721 msg1['Subject'] = 'subpart 1'
1722 msg2 = Message()
1723 msg2['Subject'] = 'subpart 2'
1724 r = MIMEMessage(msg1)
1725 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
1726
1727 def test_generate(self):
1728 # First craft the message to be encapsulated
1729 m = Message()
1730 m['Subject'] = 'An enclosed message'
1731 m.set_payload('Here is the body of the message.\n')
1732 r = MIMEMessage(m)
1733 r['Subject'] = 'The enclosing message'
1734 s = StringIO()
1735 g = Generator(s)
1736 g.flatten(r)
1737 self.assertEqual(s.getvalue(), """\
1738Content-Type: message/rfc822
1739MIME-Version: 1.0
1740Subject: The enclosing message
1741
1742Subject: An enclosed message
1743
1744Here is the body of the message.
1745""")
1746
1747 def test_parse_message_rfc822(self):
1748 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001749 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001750 msg = self._msgobj('msg_11.txt')
1751 eq(msg.get_content_type(), 'message/rfc822')
1752 payload = msg.get_payload()
1753 unless(isinstance(payload, list))
1754 eq(len(payload), 1)
1755 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001756 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001757 eq(submsg['subject'], 'An enclosed message')
1758 eq(submsg.get_payload(), 'Here is the body of the message.\n')
1759
1760 def test_dsn(self):
1761 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001762 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001763 # msg 16 is a Delivery Status Notification, see RFC 1894
1764 msg = self._msgobj('msg_16.txt')
1765 eq(msg.get_content_type(), 'multipart/report')
1766 unless(msg.is_multipart())
1767 eq(len(msg.get_payload()), 3)
1768 # Subpart 1 is a text/plain, human readable section
1769 subpart = msg.get_payload(0)
1770 eq(subpart.get_content_type(), 'text/plain')
1771 eq(subpart.get_payload(), """\
1772This report relates to a message you sent with the following header fields:
1773
1774 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
1775 Date: Sun, 23 Sep 2001 20:10:55 -0700
1776 From: "Ian T. Henry" <henryi@oxy.edu>
1777 To: SoCal Raves <scr@socal-raves.org>
1778 Subject: [scr] yeah for Ians!!
1779
1780Your message cannot be delivered to the following recipients:
1781
1782 Recipient address: jangel1@cougar.noc.ucla.edu
1783 Reason: recipient reached disk quota
1784
1785""")
1786 # Subpart 2 contains the machine parsable DSN information. It
1787 # consists of two blocks of headers, represented by two nested Message
1788 # objects.
1789 subpart = msg.get_payload(1)
1790 eq(subpart.get_content_type(), 'message/delivery-status')
1791 eq(len(subpart.get_payload()), 2)
1792 # message/delivery-status should treat each block as a bunch of
1793 # headers, i.e. a bunch of Message objects.
1794 dsn1 = subpart.get_payload(0)
1795 unless(isinstance(dsn1, Message))
1796 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
1797 eq(dsn1.get_param('dns', header='reporting-mta'), '')
1798 # Try a missing one <wink>
1799 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
1800 dsn2 = subpart.get_payload(1)
1801 unless(isinstance(dsn2, Message))
1802 eq(dsn2['action'], 'failed')
1803 eq(dsn2.get_params(header='original-recipient'),
1804 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
1805 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
1806 # Subpart 3 is the original message
1807 subpart = msg.get_payload(2)
1808 eq(subpart.get_content_type(), 'message/rfc822')
1809 payload = subpart.get_payload()
1810 unless(isinstance(payload, list))
1811 eq(len(payload), 1)
1812 subsubpart = payload[0]
1813 unless(isinstance(subsubpart, Message))
1814 eq(subsubpart.get_content_type(), 'text/plain')
1815 eq(subsubpart['message-id'],
1816 '<002001c144a6$8752e060$56104586@oxy.edu>')
1817
1818 def test_epilogue(self):
1819 eq = self.ndiffAssertEqual
1820 with openfile('msg_21.txt') as fp:
1821 text = fp.read()
1822 msg = Message()
1823 msg['From'] = 'aperson@dom.ain'
1824 msg['To'] = 'bperson@dom.ain'
1825 msg['Subject'] = 'Test'
1826 msg.preamble = 'MIME message'
1827 msg.epilogue = 'End of MIME message\n'
1828 msg1 = MIMEText('One')
1829 msg2 = MIMEText('Two')
1830 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1831 msg.attach(msg1)
1832 msg.attach(msg2)
1833 sfp = StringIO()
1834 g = Generator(sfp)
1835 g.flatten(msg)
1836 eq(sfp.getvalue(), text)
1837
1838 def test_no_nl_preamble(self):
1839 eq = self.ndiffAssertEqual
1840 msg = Message()
1841 msg['From'] = 'aperson@dom.ain'
1842 msg['To'] = 'bperson@dom.ain'
1843 msg['Subject'] = 'Test'
1844 msg.preamble = 'MIME message'
1845 msg.epilogue = ''
1846 msg1 = MIMEText('One')
1847 msg2 = MIMEText('Two')
1848 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1849 msg.attach(msg1)
1850 msg.attach(msg2)
1851 eq(msg.as_string(), """\
1852From: aperson@dom.ain
1853To: bperson@dom.ain
1854Subject: Test
1855Content-Type: multipart/mixed; boundary="BOUNDARY"
1856
1857MIME message
1858--BOUNDARY
1859Content-Type: text/plain; charset="us-ascii"
1860MIME-Version: 1.0
1861Content-Transfer-Encoding: 7bit
1862
1863One
1864--BOUNDARY
1865Content-Type: text/plain; charset="us-ascii"
1866MIME-Version: 1.0
1867Content-Transfer-Encoding: 7bit
1868
1869Two
1870--BOUNDARY--
1871""")
1872
1873 def test_default_type(self):
1874 eq = self.assertEqual
1875 with openfile('msg_30.txt') as fp:
1876 msg = email.message_from_file(fp)
1877 container1 = msg.get_payload(0)
1878 eq(container1.get_default_type(), 'message/rfc822')
1879 eq(container1.get_content_type(), 'message/rfc822')
1880 container2 = msg.get_payload(1)
1881 eq(container2.get_default_type(), 'message/rfc822')
1882 eq(container2.get_content_type(), 'message/rfc822')
1883 container1a = container1.get_payload(0)
1884 eq(container1a.get_default_type(), 'text/plain')
1885 eq(container1a.get_content_type(), 'text/plain')
1886 container2a = container2.get_payload(0)
1887 eq(container2a.get_default_type(), 'text/plain')
1888 eq(container2a.get_content_type(), 'text/plain')
1889
1890 def test_default_type_with_explicit_container_type(self):
1891 eq = self.assertEqual
1892 with openfile('msg_28.txt') as fp:
1893 msg = email.message_from_file(fp)
1894 container1 = msg.get_payload(0)
1895 eq(container1.get_default_type(), 'message/rfc822')
1896 eq(container1.get_content_type(), 'message/rfc822')
1897 container2 = msg.get_payload(1)
1898 eq(container2.get_default_type(), 'message/rfc822')
1899 eq(container2.get_content_type(), 'message/rfc822')
1900 container1a = container1.get_payload(0)
1901 eq(container1a.get_default_type(), 'text/plain')
1902 eq(container1a.get_content_type(), 'text/plain')
1903 container2a = container2.get_payload(0)
1904 eq(container2a.get_default_type(), 'text/plain')
1905 eq(container2a.get_content_type(), 'text/plain')
1906
1907 def test_default_type_non_parsed(self):
1908 eq = self.assertEqual
1909 neq = self.ndiffAssertEqual
1910 # Set up container
1911 container = MIMEMultipart('digest', 'BOUNDARY')
1912 container.epilogue = ''
1913 # Set up subparts
1914 subpart1a = MIMEText('message 1\n')
1915 subpart2a = MIMEText('message 2\n')
1916 subpart1 = MIMEMessage(subpart1a)
1917 subpart2 = MIMEMessage(subpart2a)
1918 container.attach(subpart1)
1919 container.attach(subpart2)
1920 eq(subpart1.get_content_type(), 'message/rfc822')
1921 eq(subpart1.get_default_type(), 'message/rfc822')
1922 eq(subpart2.get_content_type(), 'message/rfc822')
1923 eq(subpart2.get_default_type(), 'message/rfc822')
1924 neq(container.as_string(0), '''\
1925Content-Type: multipart/digest; boundary="BOUNDARY"
1926MIME-Version: 1.0
1927
1928--BOUNDARY
1929Content-Type: message/rfc822
1930MIME-Version: 1.0
1931
1932Content-Type: text/plain; charset="us-ascii"
1933MIME-Version: 1.0
1934Content-Transfer-Encoding: 7bit
1935
1936message 1
1937
1938--BOUNDARY
1939Content-Type: message/rfc822
1940MIME-Version: 1.0
1941
1942Content-Type: text/plain; charset="us-ascii"
1943MIME-Version: 1.0
1944Content-Transfer-Encoding: 7bit
1945
1946message 2
1947
1948--BOUNDARY--
1949''')
1950 del subpart1['content-type']
1951 del subpart1['mime-version']
1952 del subpart2['content-type']
1953 del subpart2['mime-version']
1954 eq(subpart1.get_content_type(), 'message/rfc822')
1955 eq(subpart1.get_default_type(), 'message/rfc822')
1956 eq(subpart2.get_content_type(), 'message/rfc822')
1957 eq(subpart2.get_default_type(), 'message/rfc822')
1958 neq(container.as_string(0), '''\
1959Content-Type: multipart/digest; boundary="BOUNDARY"
1960MIME-Version: 1.0
1961
1962--BOUNDARY
1963
1964Content-Type: text/plain; charset="us-ascii"
1965MIME-Version: 1.0
1966Content-Transfer-Encoding: 7bit
1967
1968message 1
1969
1970--BOUNDARY
1971
1972Content-Type: text/plain; charset="us-ascii"
1973MIME-Version: 1.0
1974Content-Transfer-Encoding: 7bit
1975
1976message 2
1977
1978--BOUNDARY--
1979''')
1980
1981 def test_mime_attachments_in_constructor(self):
1982 eq = self.assertEqual
1983 text1 = MIMEText('')
1984 text2 = MIMEText('')
1985 msg = MIMEMultipart(_subparts=(text1, text2))
1986 eq(len(msg.get_payload()), 2)
1987 eq(msg.get_payload(0), text1)
1988 eq(msg.get_payload(1), text2)
1989
Christian Heimes587c2bf2008-01-19 16:21:02 +00001990 def test_default_multipart_constructor(self):
1991 msg = MIMEMultipart()
1992 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001993
Ezio Melottib3aedd42010-11-20 19:04:17 +00001994
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001995# A general test of parser->model->generator idempotency. IOW, read a message
1996# in, parse it into a message object tree, then without touching the tree,
1997# regenerate the plain text. The original text and the transformed text
1998# should be identical. Note: that we ignore the Unix-From since that may
1999# contain a changed date.
2000class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002001
2002 linesep = '\n'
2003
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002004 def _msgobj(self, filename):
2005 with openfile(filename) as fp:
2006 data = fp.read()
2007 msg = email.message_from_string(data)
2008 return msg, data
2009
R. David Murray719a4492010-11-21 16:53:48 +00002010 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002011 eq = self.ndiffAssertEqual
2012 s = StringIO()
2013 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002014 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002015 eq(text, s.getvalue())
2016
2017 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002018 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002019 msg, text = self._msgobj('msg_01.txt')
2020 eq(msg.get_content_type(), 'text/plain')
2021 eq(msg.get_content_maintype(), 'text')
2022 eq(msg.get_content_subtype(), 'plain')
2023 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2024 eq(msg.get_param('charset'), 'us-ascii')
2025 eq(msg.preamble, None)
2026 eq(msg.epilogue, None)
2027 self._idempotent(msg, text)
2028
2029 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002030 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002031 msg, text = self._msgobj('msg_03.txt')
2032 eq(msg.get_content_type(), 'text/plain')
2033 eq(msg.get_params(), None)
2034 eq(msg.get_param('charset'), None)
2035 self._idempotent(msg, text)
2036
2037 def test_simple_multipart(self):
2038 msg, text = self._msgobj('msg_04.txt')
2039 self._idempotent(msg, text)
2040
2041 def test_MIME_digest(self):
2042 msg, text = self._msgobj('msg_02.txt')
2043 self._idempotent(msg, text)
2044
2045 def test_long_header(self):
2046 msg, text = self._msgobj('msg_27.txt')
2047 self._idempotent(msg, text)
2048
2049 def test_MIME_digest_with_part_headers(self):
2050 msg, text = self._msgobj('msg_28.txt')
2051 self._idempotent(msg, text)
2052
2053 def test_mixed_with_image(self):
2054 msg, text = self._msgobj('msg_06.txt')
2055 self._idempotent(msg, text)
2056
2057 def test_multipart_report(self):
2058 msg, text = self._msgobj('msg_05.txt')
2059 self._idempotent(msg, text)
2060
2061 def test_dsn(self):
2062 msg, text = self._msgobj('msg_16.txt')
2063 self._idempotent(msg, text)
2064
2065 def test_preamble_epilogue(self):
2066 msg, text = self._msgobj('msg_21.txt')
2067 self._idempotent(msg, text)
2068
2069 def test_multipart_one_part(self):
2070 msg, text = self._msgobj('msg_23.txt')
2071 self._idempotent(msg, text)
2072
2073 def test_multipart_no_parts(self):
2074 msg, text = self._msgobj('msg_24.txt')
2075 self._idempotent(msg, text)
2076
2077 def test_no_start_boundary(self):
2078 msg, text = self._msgobj('msg_31.txt')
2079 self._idempotent(msg, text)
2080
2081 def test_rfc2231_charset(self):
2082 msg, text = self._msgobj('msg_32.txt')
2083 self._idempotent(msg, text)
2084
2085 def test_more_rfc2231_parameters(self):
2086 msg, text = self._msgobj('msg_33.txt')
2087 self._idempotent(msg, text)
2088
2089 def test_text_plain_in_a_multipart_digest(self):
2090 msg, text = self._msgobj('msg_34.txt')
2091 self._idempotent(msg, text)
2092
2093 def test_nested_multipart_mixeds(self):
2094 msg, text = self._msgobj('msg_12a.txt')
2095 self._idempotent(msg, text)
2096
2097 def test_message_external_body_idempotent(self):
2098 msg, text = self._msgobj('msg_36.txt')
2099 self._idempotent(msg, text)
2100
R. David Murray719a4492010-11-21 16:53:48 +00002101 def test_message_delivery_status(self):
2102 msg, text = self._msgobj('msg_43.txt')
2103 self._idempotent(msg, text, unixfrom=True)
2104
R. David Murray96fd54e2010-10-08 15:55:28 +00002105 def test_message_signed_idempotent(self):
2106 msg, text = self._msgobj('msg_45.txt')
2107 self._idempotent(msg, text)
2108
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002109 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002110 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002111 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002112 # Get a message object and reset the seek pointer for other tests
2113 msg, text = self._msgobj('msg_05.txt')
2114 eq(msg.get_content_type(), 'multipart/report')
2115 # Test the Content-Type: parameters
2116 params = {}
2117 for pk, pv in msg.get_params():
2118 params[pk] = pv
2119 eq(params['report-type'], 'delivery-status')
2120 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002121 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2122 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002123 eq(len(msg.get_payload()), 3)
2124 # Make sure the subparts are what we expect
2125 msg1 = msg.get_payload(0)
2126 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002127 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002128 msg2 = msg.get_payload(1)
2129 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002130 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002131 msg3 = msg.get_payload(2)
2132 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002133 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002134 payload = msg3.get_payload()
2135 unless(isinstance(payload, list))
2136 eq(len(payload), 1)
2137 msg4 = payload[0]
2138 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002139 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002140
2141 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002142 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002143 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002144 msg, text = self._msgobj('msg_06.txt')
2145 # Check some of the outer headers
2146 eq(msg.get_content_type(), 'message/rfc822')
2147 # Make sure the payload is a list of exactly one sub-Message, and that
2148 # that submessage has a type of text/plain
2149 payload = msg.get_payload()
2150 unless(isinstance(payload, list))
2151 eq(len(payload), 1)
2152 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002153 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002154 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002155 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002156 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002157
2158
Ezio Melottib3aedd42010-11-20 19:04:17 +00002159
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002160# Test various other bits of the package's functionality
2161class TestMiscellaneous(TestEmailBase):
2162 def test_message_from_string(self):
2163 with openfile('msg_01.txt') as fp:
2164 text = fp.read()
2165 msg = email.message_from_string(text)
2166 s = StringIO()
2167 # Don't wrap/continue long headers since we're trying to test
2168 # idempotency.
2169 g = Generator(s, maxheaderlen=0)
2170 g.flatten(msg)
2171 self.assertEqual(text, s.getvalue())
2172
2173 def test_message_from_file(self):
2174 with openfile('msg_01.txt') as fp:
2175 text = fp.read()
2176 fp.seek(0)
2177 msg = email.message_from_file(fp)
2178 s = StringIO()
2179 # Don't wrap/continue long headers since we're trying to test
2180 # idempotency.
2181 g = Generator(s, maxheaderlen=0)
2182 g.flatten(msg)
2183 self.assertEqual(text, s.getvalue())
2184
2185 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002186 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002187 with openfile('msg_01.txt') as fp:
2188 text = fp.read()
2189
2190 # Create a subclass
2191 class MyMessage(Message):
2192 pass
2193
2194 msg = email.message_from_string(text, MyMessage)
2195 unless(isinstance(msg, MyMessage))
2196 # Try something more complicated
2197 with openfile('msg_02.txt') as fp:
2198 text = fp.read()
2199 msg = email.message_from_string(text, MyMessage)
2200 for subpart in msg.walk():
2201 unless(isinstance(subpart, MyMessage))
2202
2203 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002204 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002205 # Create a subclass
2206 class MyMessage(Message):
2207 pass
2208
2209 with openfile('msg_01.txt') as fp:
2210 msg = email.message_from_file(fp, MyMessage)
2211 unless(isinstance(msg, MyMessage))
2212 # Try something more complicated
2213 with openfile('msg_02.txt') as fp:
2214 msg = email.message_from_file(fp, MyMessage)
2215 for subpart in msg.walk():
2216 unless(isinstance(subpart, MyMessage))
2217
2218 def test__all__(self):
2219 module = __import__('email')
2220 # Can't use sorted() here due to Python 2.3 compatibility
2221 all = module.__all__[:]
2222 all.sort()
2223 self.assertEqual(all, [
2224 'base64mime', 'charset', 'encoders', 'errors', 'generator',
R. David Murray96fd54e2010-10-08 15:55:28 +00002225 'header', 'iterators', 'message', 'message_from_binary_file',
2226 'message_from_bytes', 'message_from_file',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002227 'message_from_string', 'mime', 'parser',
2228 'quoprimime', 'utils',
2229 ])
2230
2231 def test_formatdate(self):
2232 now = time.time()
2233 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2234 time.gmtime(now)[:6])
2235
2236 def test_formatdate_localtime(self):
2237 now = time.time()
2238 self.assertEqual(
2239 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2240 time.localtime(now)[:6])
2241
2242 def test_formatdate_usegmt(self):
2243 now = time.time()
2244 self.assertEqual(
2245 utils.formatdate(now, localtime=False),
2246 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2247 self.assertEqual(
2248 utils.formatdate(now, localtime=False, usegmt=True),
2249 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2250
2251 def test_parsedate_none(self):
2252 self.assertEqual(utils.parsedate(''), None)
2253
2254 def test_parsedate_compact(self):
2255 # The FWS after the comma is optional
2256 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2257 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2258
2259 def test_parsedate_no_dayofweek(self):
2260 eq = self.assertEqual
2261 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2262 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2263
2264 def test_parsedate_compact_no_dayofweek(self):
2265 eq = self.assertEqual
2266 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2267 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2268
2269 def test_parsedate_acceptable_to_time_functions(self):
2270 eq = self.assertEqual
2271 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2272 t = int(time.mktime(timetup))
2273 eq(time.localtime(t)[:6], timetup[:6])
2274 eq(int(time.strftime('%Y', timetup)), 2003)
2275 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2276 t = int(time.mktime(timetup[:9]))
2277 eq(time.localtime(t)[:6], timetup[:6])
2278 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2279
R. David Murray219d1c82010-08-25 00:45:55 +00002280 def test_parsedate_y2k(self):
2281 """Test for parsing a date with a two-digit year.
2282
2283 Parsing a date with a two-digit year should return the correct
2284 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2285 obsoletes RFC822) requires four-digit years.
2286
2287 """
2288 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2289 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2290 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2291 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2292
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002293 def test_parseaddr_empty(self):
2294 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2295 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2296
2297 def test_noquote_dump(self):
2298 self.assertEqual(
2299 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2300 'A Silly Person <person@dom.ain>')
2301
2302 def test_escape_dump(self):
2303 self.assertEqual(
2304 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
2305 r'"A \(Very\) Silly Person" <person@dom.ain>')
2306 a = r'A \(Special\) Person'
2307 b = 'person@dom.ain'
2308 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2309
2310 def test_escape_backslashes(self):
2311 self.assertEqual(
2312 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2313 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2314 a = r'Arthur \Backslash\ Foobar'
2315 b = 'person@dom.ain'
2316 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2317
2318 def test_name_with_dot(self):
2319 x = 'John X. Doe <jxd@example.com>'
2320 y = '"John X. Doe" <jxd@example.com>'
2321 a, b = ('John X. Doe', 'jxd@example.com')
2322 self.assertEqual(utils.parseaddr(x), (a, b))
2323 self.assertEqual(utils.parseaddr(y), (a, b))
2324 # formataddr() quotes the name if there's a dot in it
2325 self.assertEqual(utils.formataddr((a, b)), y)
2326
R. David Murray5397e862010-10-02 15:58:26 +00002327 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2328 # issue 10005. Note that in the third test the second pair of
2329 # backslashes is not actually a quoted pair because it is not inside a
2330 # comment or quoted string: the address being parsed has a quoted
2331 # string containing a quoted backslash, followed by 'example' and two
2332 # backslashes, followed by another quoted string containing a space and
2333 # the word 'example'. parseaddr copies those two backslashes
2334 # literally. Per rfc5322 this is not technically correct since a \ may
2335 # not appear in an address outside of a quoted string. It is probably
2336 # a sensible Postel interpretation, though.
2337 eq = self.assertEqual
2338 eq(utils.parseaddr('""example" example"@example.com'),
2339 ('', '""example" example"@example.com'))
2340 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2341 ('', '"\\"example\\" example"@example.com'))
2342 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2343 ('', '"\\\\"example\\\\" example"@example.com'))
2344
R. David Murray63563cd2010-12-18 18:25:38 +00002345 def test_parseaddr_preserves_spaces_in_local_part(self):
2346 # issue 9286. A normal RFC5322 local part should not contain any
2347 # folding white space, but legacy local parts can (they are a sequence
2348 # of atoms, not dotatoms). On the other hand we strip whitespace from
2349 # before the @ and around dots, on the assumption that the whitespace
2350 # around the punctuation is a mistake in what would otherwise be
2351 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
2352 self.assertEqual(('', "merwok wok@xample.com"),
2353 utils.parseaddr("merwok wok@xample.com"))
2354 self.assertEqual(('', "merwok wok@xample.com"),
2355 utils.parseaddr("merwok wok@xample.com"))
2356 self.assertEqual(('', "merwok wok@xample.com"),
2357 utils.parseaddr(" merwok wok @xample.com"))
2358 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
2359 utils.parseaddr('merwok"wok" wok@xample.com'))
2360 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
2361 utils.parseaddr('merwok. wok . wok@xample.com'))
2362
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002363 def test_multiline_from_comment(self):
2364 x = """\
2365Foo
2366\tBar <foo@example.com>"""
2367 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2368
2369 def test_quote_dump(self):
2370 self.assertEqual(
2371 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2372 r'"A Silly; Person" <person@dom.ain>')
2373
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002374 def test_charset_richcomparisons(self):
2375 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002376 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002377 cset1 = Charset()
2378 cset2 = Charset()
2379 eq(cset1, 'us-ascii')
2380 eq(cset1, 'US-ASCII')
2381 eq(cset1, 'Us-AsCiI')
2382 eq('us-ascii', cset1)
2383 eq('US-ASCII', cset1)
2384 eq('Us-AsCiI', cset1)
2385 ne(cset1, 'usascii')
2386 ne(cset1, 'USASCII')
2387 ne(cset1, 'UsAsCiI')
2388 ne('usascii', cset1)
2389 ne('USASCII', cset1)
2390 ne('UsAsCiI', cset1)
2391 eq(cset1, cset2)
2392 eq(cset2, cset1)
2393
2394 def test_getaddresses(self):
2395 eq = self.assertEqual
2396 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2397 'Bud Person <bperson@dom.ain>']),
2398 [('Al Person', 'aperson@dom.ain'),
2399 ('Bud Person', 'bperson@dom.ain')])
2400
2401 def test_getaddresses_nasty(self):
2402 eq = self.assertEqual
2403 eq(utils.getaddresses(['foo: ;']), [('', '')])
2404 eq(utils.getaddresses(
2405 ['[]*-- =~$']),
2406 [('', ''), ('', ''), ('', '*--')])
2407 eq(utils.getaddresses(
2408 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2409 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2410
2411 def test_getaddresses_embedded_comment(self):
2412 """Test proper handling of a nested comment"""
2413 eq = self.assertEqual
2414 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2415 eq(addrs[0][1], 'foo@bar.com')
2416
2417 def test_utils_quote_unquote(self):
2418 eq = self.assertEqual
2419 msg = Message()
2420 msg.add_header('content-disposition', 'attachment',
2421 filename='foo\\wacky"name')
2422 eq(msg.get_filename(), 'foo\\wacky"name')
2423
2424 def test_get_body_encoding_with_bogus_charset(self):
2425 charset = Charset('not a charset')
2426 self.assertEqual(charset.get_body_encoding(), 'base64')
2427
2428 def test_get_body_encoding_with_uppercase_charset(self):
2429 eq = self.assertEqual
2430 msg = Message()
2431 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2432 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2433 charsets = msg.get_charsets()
2434 eq(len(charsets), 1)
2435 eq(charsets[0], 'utf-8')
2436 charset = Charset(charsets[0])
2437 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002438 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002439 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2440 eq(msg.get_payload(decode=True), b'hello world')
2441 eq(msg['content-transfer-encoding'], 'base64')
2442 # Try another one
2443 msg = Message()
2444 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2445 charsets = msg.get_charsets()
2446 eq(len(charsets), 1)
2447 eq(charsets[0], 'us-ascii')
2448 charset = Charset(charsets[0])
2449 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2450 msg.set_payload('hello world', charset=charset)
2451 eq(msg.get_payload(), 'hello world')
2452 eq(msg['content-transfer-encoding'], '7bit')
2453
2454 def test_charsets_case_insensitive(self):
2455 lc = Charset('us-ascii')
2456 uc = Charset('US-ASCII')
2457 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2458
2459 def test_partial_falls_inside_message_delivery_status(self):
2460 eq = self.ndiffAssertEqual
2461 # The Parser interface provides chunks of data to FeedParser in 8192
2462 # byte gulps. SF bug #1076485 found one of those chunks inside
2463 # message/delivery-status header block, which triggered an
2464 # unreadline() of NeedMoreData.
2465 msg = self._msgobj('msg_43.txt')
2466 sfp = StringIO()
2467 iterators._structure(msg, sfp)
2468 eq(sfp.getvalue(), """\
2469multipart/report
2470 text/plain
2471 message/delivery-status
2472 text/plain
2473 text/plain
2474 text/plain
2475 text/plain
2476 text/plain
2477 text/plain
2478 text/plain
2479 text/plain
2480 text/plain
2481 text/plain
2482 text/plain
2483 text/plain
2484 text/plain
2485 text/plain
2486 text/plain
2487 text/plain
2488 text/plain
2489 text/plain
2490 text/plain
2491 text/plain
2492 text/plain
2493 text/plain
2494 text/plain
2495 text/plain
2496 text/plain
2497 text/plain
2498 text/rfc822-headers
2499""")
2500
R. David Murraya0b44b52010-12-02 21:47:19 +00002501 def test_make_msgid_domain(self):
2502 self.assertEqual(
2503 email.utils.make_msgid(domain='testdomain-string')[-19:],
2504 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002505
Ezio Melottib3aedd42010-11-20 19:04:17 +00002506
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002507# Test the iterator/generators
2508class TestIterators(TestEmailBase):
2509 def test_body_line_iterator(self):
2510 eq = self.assertEqual
2511 neq = self.ndiffAssertEqual
2512 # First a simple non-multipart message
2513 msg = self._msgobj('msg_01.txt')
2514 it = iterators.body_line_iterator(msg)
2515 lines = list(it)
2516 eq(len(lines), 6)
2517 neq(EMPTYSTRING.join(lines), msg.get_payload())
2518 # Now a more complicated multipart
2519 msg = self._msgobj('msg_02.txt')
2520 it = iterators.body_line_iterator(msg)
2521 lines = list(it)
2522 eq(len(lines), 43)
2523 with openfile('msg_19.txt') as fp:
2524 neq(EMPTYSTRING.join(lines), fp.read())
2525
2526 def test_typed_subpart_iterator(self):
2527 eq = self.assertEqual
2528 msg = self._msgobj('msg_04.txt')
2529 it = iterators.typed_subpart_iterator(msg, 'text')
2530 lines = []
2531 subparts = 0
2532 for subpart in it:
2533 subparts += 1
2534 lines.append(subpart.get_payload())
2535 eq(subparts, 2)
2536 eq(EMPTYSTRING.join(lines), """\
2537a simple kind of mirror
2538to reflect upon our own
2539a simple kind of mirror
2540to reflect upon our own
2541""")
2542
2543 def test_typed_subpart_iterator_default_type(self):
2544 eq = self.assertEqual
2545 msg = self._msgobj('msg_03.txt')
2546 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
2547 lines = []
2548 subparts = 0
2549 for subpart in it:
2550 subparts += 1
2551 lines.append(subpart.get_payload())
2552 eq(subparts, 1)
2553 eq(EMPTYSTRING.join(lines), """\
2554
2555Hi,
2556
2557Do you like this message?
2558
2559-Me
2560""")
2561
R. David Murray45bf773f2010-07-17 01:19:57 +00002562 def test_pushCR_LF(self):
2563 '''FeedParser BufferedSubFile.push() assumed it received complete
2564 line endings. A CR ending one push() followed by a LF starting
2565 the next push() added an empty line.
2566 '''
2567 imt = [
2568 ("a\r \n", 2),
2569 ("b", 0),
2570 ("c\n", 1),
2571 ("", 0),
2572 ("d\r\n", 1),
2573 ("e\r", 0),
2574 ("\nf", 1),
2575 ("\r\n", 1),
2576 ]
2577 from email.feedparser import BufferedSubFile, NeedMoreData
2578 bsf = BufferedSubFile()
2579 om = []
2580 nt = 0
2581 for il, n in imt:
2582 bsf.push(il)
2583 nt += n
2584 n1 = 0
2585 while True:
2586 ol = bsf.readline()
2587 if ol == NeedMoreData:
2588 break
2589 om.append(ol)
2590 n1 += 1
2591 self.assertTrue(n == n1)
2592 self.assertTrue(len(om) == nt)
2593 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
2594
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002595
Ezio Melottib3aedd42010-11-20 19:04:17 +00002596
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002597class TestParsers(TestEmailBase):
2598 def test_header_parser(self):
2599 eq = self.assertEqual
2600 # Parse only the headers of a complex multipart MIME document
2601 with openfile('msg_02.txt') as fp:
2602 msg = HeaderParser().parse(fp)
2603 eq(msg['from'], 'ppp-request@zzz.org')
2604 eq(msg['to'], 'ppp@zzz.org')
2605 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002606 self.assertFalse(msg.is_multipart())
2607 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002608
2609 def test_whitespace_continuation(self):
2610 eq = self.assertEqual
2611 # This message contains a line after the Subject: header that has only
2612 # whitespace, but it is not empty!
2613 msg = email.message_from_string("""\
2614From: aperson@dom.ain
2615To: bperson@dom.ain
2616Subject: the next line has a space on it
2617\x20
2618Date: Mon, 8 Apr 2002 15:09:19 -0400
2619Message-ID: spam
2620
2621Here's the message body
2622""")
2623 eq(msg['subject'], 'the next line has a space on it\n ')
2624 eq(msg['message-id'], 'spam')
2625 eq(msg.get_payload(), "Here's the message body\n")
2626
2627 def test_whitespace_continuation_last_header(self):
2628 eq = self.assertEqual
2629 # Like the previous test, but the subject line is the last
2630 # header.
2631 msg = email.message_from_string("""\
2632From: aperson@dom.ain
2633To: bperson@dom.ain
2634Date: Mon, 8 Apr 2002 15:09:19 -0400
2635Message-ID: spam
2636Subject: the next line has a space on it
2637\x20
2638
2639Here's the message body
2640""")
2641 eq(msg['subject'], 'the next line has a space on it\n ')
2642 eq(msg['message-id'], 'spam')
2643 eq(msg.get_payload(), "Here's the message body\n")
2644
2645 def test_crlf_separation(self):
2646 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00002647 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002648 msg = Parser().parse(fp)
2649 eq(len(msg.get_payload()), 2)
2650 part1 = msg.get_payload(0)
2651 eq(part1.get_content_type(), 'text/plain')
2652 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
2653 part2 = msg.get_payload(1)
2654 eq(part2.get_content_type(), 'application/riscos')
2655
R. David Murray8451c4b2010-10-23 22:19:56 +00002656 def test_crlf_flatten(self):
2657 # Using newline='\n' preserves the crlfs in this input file.
2658 with openfile('msg_26.txt', newline='\n') as fp:
2659 text = fp.read()
2660 msg = email.message_from_string(text)
2661 s = StringIO()
2662 g = Generator(s)
2663 g.flatten(msg, linesep='\r\n')
2664 self.assertEqual(s.getvalue(), text)
2665
2666 maxDiff = None
2667
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002668 def test_multipart_digest_with_extra_mime_headers(self):
2669 eq = self.assertEqual
2670 neq = self.ndiffAssertEqual
2671 with openfile('msg_28.txt') as fp:
2672 msg = email.message_from_file(fp)
2673 # Structure is:
2674 # multipart/digest
2675 # message/rfc822
2676 # text/plain
2677 # message/rfc822
2678 # text/plain
2679 eq(msg.is_multipart(), 1)
2680 eq(len(msg.get_payload()), 2)
2681 part1 = msg.get_payload(0)
2682 eq(part1.get_content_type(), 'message/rfc822')
2683 eq(part1.is_multipart(), 1)
2684 eq(len(part1.get_payload()), 1)
2685 part1a = part1.get_payload(0)
2686 eq(part1a.is_multipart(), 0)
2687 eq(part1a.get_content_type(), 'text/plain')
2688 neq(part1a.get_payload(), 'message 1\n')
2689 # next message/rfc822
2690 part2 = msg.get_payload(1)
2691 eq(part2.get_content_type(), 'message/rfc822')
2692 eq(part2.is_multipart(), 1)
2693 eq(len(part2.get_payload()), 1)
2694 part2a = part2.get_payload(0)
2695 eq(part2a.is_multipart(), 0)
2696 eq(part2a.get_content_type(), 'text/plain')
2697 neq(part2a.get_payload(), 'message 2\n')
2698
2699 def test_three_lines(self):
2700 # A bug report by Andrew McNamara
2701 lines = ['From: Andrew Person <aperson@dom.ain',
2702 'Subject: Test',
2703 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
2704 msg = email.message_from_string(NL.join(lines))
2705 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
2706
2707 def test_strip_line_feed_and_carriage_return_in_headers(self):
2708 eq = self.assertEqual
2709 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
2710 value1 = 'text'
2711 value2 = 'more text'
2712 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
2713 value1, value2)
2714 msg = email.message_from_string(m)
2715 eq(msg.get('Header'), value1)
2716 eq(msg.get('Next-Header'), value2)
2717
2718 def test_rfc2822_header_syntax(self):
2719 eq = self.assertEqual
2720 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2721 msg = email.message_from_string(m)
2722 eq(len(msg), 3)
2723 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
2724 eq(msg.get_payload(), 'body')
2725
2726 def test_rfc2822_space_not_allowed_in_header(self):
2727 eq = self.assertEqual
2728 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2729 msg = email.message_from_string(m)
2730 eq(len(msg.keys()), 0)
2731
2732 def test_rfc2822_one_character_header(self):
2733 eq = self.assertEqual
2734 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
2735 msg = email.message_from_string(m)
2736 headers = msg.keys()
2737 headers.sort()
2738 eq(headers, ['A', 'B', 'CC'])
2739 eq(msg.get_payload(), 'body')
2740
R. David Murray45e0e142010-06-16 02:19:40 +00002741 def test_CRLFLF_at_end_of_part(self):
2742 # issue 5610: feedparser should not eat two chars from body part ending
2743 # with "\r\n\n".
2744 m = (
2745 "From: foo@bar.com\n"
2746 "To: baz\n"
2747 "Mime-Version: 1.0\n"
2748 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
2749 "\n"
2750 "--BOUNDARY\n"
2751 "Content-Type: text/plain\n"
2752 "\n"
2753 "body ending with CRLF newline\r\n"
2754 "\n"
2755 "--BOUNDARY--\n"
2756 )
2757 msg = email.message_from_string(m)
2758 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002759
Ezio Melottib3aedd42010-11-20 19:04:17 +00002760
R. David Murray96fd54e2010-10-08 15:55:28 +00002761class Test8BitBytesHandling(unittest.TestCase):
2762 # In Python3 all input is string, but that doesn't work if the actual input
2763 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
2764 # decode byte streams using the surrogateescape error handler, and
2765 # reconvert to binary at appropriate places if we detect surrogates. This
2766 # doesn't allow us to transform headers with 8bit bytes (they get munged),
2767 # but it does allow us to parse and preserve them, and to decode body
2768 # parts that use an 8bit CTE.
2769
2770 bodytest_msg = textwrap.dedent("""\
2771 From: foo@bar.com
2772 To: baz
2773 Mime-Version: 1.0
2774 Content-Type: text/plain; charset={charset}
2775 Content-Transfer-Encoding: {cte}
2776
2777 {bodyline}
2778 """)
2779
2780 def test_known_8bit_CTE(self):
2781 m = self.bodytest_msg.format(charset='utf-8',
2782 cte='8bit',
2783 bodyline='pöstal').encode('utf-8')
2784 msg = email.message_from_bytes(m)
2785 self.assertEqual(msg.get_payload(), "pöstal\n")
2786 self.assertEqual(msg.get_payload(decode=True),
2787 "pöstal\n".encode('utf-8'))
2788
2789 def test_unknown_8bit_CTE(self):
2790 m = self.bodytest_msg.format(charset='notavalidcharset',
2791 cte='8bit',
2792 bodyline='pöstal').encode('utf-8')
2793 msg = email.message_from_bytes(m)
2794 self.assertEqual(msg.get_payload(), "p��stal\n")
2795 self.assertEqual(msg.get_payload(decode=True),
2796 "pöstal\n".encode('utf-8'))
2797
2798 def test_8bit_in_quopri_body(self):
2799 # This is non-RFC compliant data...without 'decode' the library code
2800 # decodes the body using the charset from the headers, and because the
2801 # source byte really is utf-8 this works. This is likely to fail
2802 # against real dirty data (ie: produce mojibake), but the data is
2803 # invalid anyway so it is as good a guess as any. But this means that
2804 # this test just confirms the current behavior; that behavior is not
2805 # necessarily the best possible behavior. With 'decode' it is
2806 # returning the raw bytes, so that test should be of correct behavior,
2807 # or at least produce the same result that email4 did.
2808 m = self.bodytest_msg.format(charset='utf-8',
2809 cte='quoted-printable',
2810 bodyline='p=C3=B6stál').encode('utf-8')
2811 msg = email.message_from_bytes(m)
2812 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
2813 self.assertEqual(msg.get_payload(decode=True),
2814 'pöstál\n'.encode('utf-8'))
2815
2816 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
2817 # This is similar to the previous test, but proves that if the 8bit
2818 # byte is undecodeable in the specified charset, it gets replaced
2819 # by the unicode 'unknown' character. Again, this may or may not
2820 # be the ideal behavior. Note that if decode=False none of the
2821 # decoders will get involved, so this is the only test we need
2822 # for this behavior.
2823 m = self.bodytest_msg.format(charset='ascii',
2824 cte='quoted-printable',
2825 bodyline='p=C3=B6stál').encode('utf-8')
2826 msg = email.message_from_bytes(m)
2827 self.assertEqual(msg.get_payload(), 'p=C3=B6st��l\n')
2828 self.assertEqual(msg.get_payload(decode=True),
2829 'pöstál\n'.encode('utf-8'))
2830
2831 def test_8bit_in_base64_body(self):
2832 # Sticking an 8bit byte in a base64 block makes it undecodable by
2833 # normal means, so the block is returned undecoded, but as bytes.
2834 m = self.bodytest_msg.format(charset='utf-8',
2835 cte='base64',
2836 bodyline='cMO2c3RhbAá=').encode('utf-8')
2837 msg = email.message_from_bytes(m)
2838 self.assertEqual(msg.get_payload(decode=True),
2839 'cMO2c3RhbAá=\n'.encode('utf-8'))
2840
2841 def test_8bit_in_uuencode_body(self):
2842 # Sticking an 8bit byte in a uuencode block makes it undecodable by
2843 # normal means, so the block is returned undecoded, but as bytes.
2844 m = self.bodytest_msg.format(charset='utf-8',
2845 cte='uuencode',
2846 bodyline='<,.V<W1A; á ').encode('utf-8')
2847 msg = email.message_from_bytes(m)
2848 self.assertEqual(msg.get_payload(decode=True),
2849 '<,.V<W1A; á \n'.encode('utf-8'))
2850
2851
2852 headertest_msg = textwrap.dedent("""\
2853 From: foo@bar.com
2854 To: báz
2855 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
2856 \tJean de Baddie
2857 From: göst
2858
2859 Yes, they are flying.
2860 """).encode('utf-8')
2861
2862 def test_get_8bit_header(self):
2863 msg = email.message_from_bytes(self.headertest_msg)
2864 self.assertEqual(msg.get('to'), 'b??z')
2865 self.assertEqual(msg['to'], 'b??z')
2866
2867 def test_print_8bit_headers(self):
2868 msg = email.message_from_bytes(self.headertest_msg)
2869 self.assertEqual(str(msg),
2870 self.headertest_msg.decode(
2871 'ascii', 'replace').replace('�', '?'))
2872
2873 def test_values_with_8bit_headers(self):
2874 msg = email.message_from_bytes(self.headertest_msg)
2875 self.assertListEqual(msg.values(),
2876 ['foo@bar.com',
2877 'b??z',
2878 'Maintenant je vous pr??sente mon '
2879 'coll??gue, le pouf c??l??bre\n'
2880 '\tJean de Baddie',
2881 "g??st"])
2882
2883 def test_items_with_8bit_headers(self):
2884 msg = email.message_from_bytes(self.headertest_msg)
2885 self.assertListEqual(msg.items(),
2886 [('From', 'foo@bar.com'),
2887 ('To', 'b??z'),
2888 ('Subject', 'Maintenant je vous pr??sente mon '
2889 'coll??gue, le pouf c??l??bre\n'
2890 '\tJean de Baddie'),
2891 ('From', 'g??st')])
2892
2893 def test_get_all_with_8bit_headers(self):
2894 msg = email.message_from_bytes(self.headertest_msg)
2895 self.assertListEqual(msg.get_all('from'),
2896 ['foo@bar.com',
2897 'g??st'])
2898
2899 non_latin_bin_msg = textwrap.dedent("""\
2900 From: foo@bar.com
2901 To: báz
2902 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
2903 \tJean de Baddie
2904 Mime-Version: 1.0
2905 Content-Type: text/plain; charset="utf-8"
2906 Content-Transfer-Encoding: 8bit
2907
2908 Да, они летят.
2909 """).encode('utf-8')
2910
2911 def test_bytes_generator(self):
2912 msg = email.message_from_bytes(self.non_latin_bin_msg)
2913 out = BytesIO()
2914 email.generator.BytesGenerator(out).flatten(msg)
2915 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
2916
2917 # XXX: ultimately the '?' should turn into CTE encoded bytes
2918 # using 'unknown-8bit' charset.
2919 non_latin_bin_msg_as7bit = textwrap.dedent("""\
2920 From: foo@bar.com
2921 To: b??z
2922 Subject: Maintenant je vous pr??sente mon coll??gue, le pouf c??l??bre
2923 \tJean de Baddie
2924 Mime-Version: 1.0
2925 Content-Type: text/plain; charset="utf-8"
2926 Content-Transfer-Encoding: base64
2927
2928 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
2929 """)
2930
2931 def test_generator_handles_8bit(self):
2932 msg = email.message_from_bytes(self.non_latin_bin_msg)
2933 out = StringIO()
2934 email.generator.Generator(out).flatten(msg)
2935 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit)
2936
2937 def test_bytes_generator_with_unix_from(self):
2938 # The unixfrom contains a current date, so we can't check it
2939 # literally. Just make sure the first word is 'From' and the
2940 # rest of the message matches the input.
2941 msg = email.message_from_bytes(self.non_latin_bin_msg)
2942 out = BytesIO()
2943 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
2944 lines = out.getvalue().split(b'\n')
2945 self.assertEqual(lines[0].split()[0], b'From')
2946 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
2947
2948 def test_message_from_binary_file(self):
2949 fn = 'test.msg'
2950 self.addCleanup(unlink, fn)
2951 with open(fn, 'wb') as testfile:
2952 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00002953 with open(fn, 'rb') as testfile:
2954 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00002955 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
2956
2957 latin_bin_msg = textwrap.dedent("""\
2958 From: foo@bar.com
2959 To: Dinsdale
2960 Subject: Nudge nudge, wink, wink
2961 Mime-Version: 1.0
2962 Content-Type: text/plain; charset="latin-1"
2963 Content-Transfer-Encoding: 8bit
2964
2965 oh là là, know what I mean, know what I mean?
2966 """).encode('latin-1')
2967
2968 latin_bin_msg_as7bit = textwrap.dedent("""\
2969 From: foo@bar.com
2970 To: Dinsdale
2971 Subject: Nudge nudge, wink, wink
2972 Mime-Version: 1.0
2973 Content-Type: text/plain; charset="iso-8859-1"
2974 Content-Transfer-Encoding: quoted-printable
2975
2976 oh l=E0 l=E0, know what I mean, know what I mean?
2977 """)
2978
2979 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
2980 m = email.message_from_bytes(self.latin_bin_msg)
2981 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
2982
2983 def test_decoded_generator_emits_unicode_body(self):
2984 m = email.message_from_bytes(self.latin_bin_msg)
2985 out = StringIO()
2986 email.generator.DecodedGenerator(out).flatten(m)
2987 #DecodedHeader output contains an extra blank line compared
2988 #to the input message. RDM: not sure if this is a bug or not,
2989 #but it is not specific to the 8bit->7bit conversion.
2990 self.assertEqual(out.getvalue(),
2991 self.latin_bin_msg.decode('latin-1')+'\n')
2992
2993 def test_bytes_feedparser(self):
2994 bfp = email.feedparser.BytesFeedParser()
2995 for i in range(0, len(self.latin_bin_msg), 10):
2996 bfp.feed(self.latin_bin_msg[i:i+10])
2997 m = bfp.close()
2998 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
2999
R. David Murray8451c4b2010-10-23 22:19:56 +00003000 def test_crlf_flatten(self):
3001 with openfile('msg_26.txt', 'rb') as fp:
3002 text = fp.read()
3003 msg = email.message_from_bytes(text)
3004 s = BytesIO()
3005 g = email.generator.BytesGenerator(s)
3006 g.flatten(msg, linesep='\r\n')
3007 self.assertEqual(s.getvalue(), text)
3008 maxDiff = None
3009
Ezio Melottib3aedd42010-11-20 19:04:17 +00003010
R. David Murray719a4492010-11-21 16:53:48 +00003011class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003012
R. David Murraye5db2632010-11-20 15:10:13 +00003013 maxDiff = None
3014
R. David Murray96fd54e2010-10-08 15:55:28 +00003015 def _msgobj(self, filename):
3016 with openfile(filename, 'rb') as fp:
3017 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003018 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003019 msg = email.message_from_bytes(data)
3020 return msg, data
3021
R. David Murray719a4492010-11-21 16:53:48 +00003022 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003023 b = BytesIO()
3024 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003025 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R. David Murraye5db2632010-11-20 15:10:13 +00003026 self.assertByteStringsEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003027
R. David Murraye5db2632010-11-20 15:10:13 +00003028 def assertByteStringsEqual(self, str1, str2):
R. David Murray719a4492010-11-21 16:53:48 +00003029 # Not using self.blinesep here is intentional. This way the output
3030 # is more useful when the failure results in mixed line endings.
R. David Murray96fd54e2010-10-08 15:55:28 +00003031 self.assertListEqual(str1.split(b'\n'), str2.split(b'\n'))
3032
3033
R. David Murray719a4492010-11-21 16:53:48 +00003034class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3035 TestIdempotent):
3036 linesep = '\n'
3037 blinesep = b'\n'
3038 normalize_linesep_regex = re.compile(br'\r\n')
3039
3040
3041class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3042 TestIdempotent):
3043 linesep = '\r\n'
3044 blinesep = b'\r\n'
3045 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3046
Ezio Melottib3aedd42010-11-20 19:04:17 +00003047
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003048class TestBase64(unittest.TestCase):
3049 def test_len(self):
3050 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003051 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003052 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003053 for size in range(15):
3054 if size == 0 : bsize = 0
3055 elif size <= 3 : bsize = 4
3056 elif size <= 6 : bsize = 8
3057 elif size <= 9 : bsize = 12
3058 elif size <= 12: bsize = 16
3059 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003060 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003061
3062 def test_decode(self):
3063 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003064 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003065 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003066
3067 def test_encode(self):
3068 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003069 eq(base64mime.body_encode(b''), b'')
3070 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003071 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003072 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003073 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003074 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003075eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3076eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3077eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3078eHh4eCB4eHh4IA==
3079""")
3080 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003081 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003082 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003083eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3084eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3085eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3086eHh4eCB4eHh4IA==\r
3087""")
3088
3089 def test_header_encode(self):
3090 eq = self.assertEqual
3091 he = base64mime.header_encode
3092 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003093 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3094 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003095 # Test the charset option
3096 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3097 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003098
3099
Ezio Melottib3aedd42010-11-20 19:04:17 +00003100
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003101class TestQuopri(unittest.TestCase):
3102 def setUp(self):
3103 # Set of characters (as byte integers) that don't need to be encoded
3104 # in headers.
3105 self.hlit = list(chain(
3106 range(ord('a'), ord('z') + 1),
3107 range(ord('A'), ord('Z') + 1),
3108 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003109 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003110 # Set of characters (as byte integers) that do need to be encoded in
3111 # headers.
3112 self.hnon = [c for c in range(256) if c not in self.hlit]
3113 assert len(self.hlit) + len(self.hnon) == 256
3114 # Set of characters (as byte integers) that don't need to be encoded
3115 # in bodies.
3116 self.blit = list(range(ord(' '), ord('~') + 1))
3117 self.blit.append(ord('\t'))
3118 self.blit.remove(ord('='))
3119 # Set of characters (as byte integers) that do need to be encoded in
3120 # bodies.
3121 self.bnon = [c for c in range(256) if c not in self.blit]
3122 assert len(self.blit) + len(self.bnon) == 256
3123
Guido van Rossum9604e662007-08-30 03:46:43 +00003124 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003125 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003126 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003127 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003128 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003129 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003130 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003131
Guido van Rossum9604e662007-08-30 03:46:43 +00003132 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003133 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003134 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003135 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003136 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003137 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003138 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003139
3140 def test_header_quopri_len(self):
3141 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003142 eq(quoprimime.header_length(b'hello'), 5)
3143 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003144 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003145 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003146 # =?xxx?q?...?= means 10 extra characters
3147 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00003148 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
3149 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003150 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003151 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003152 # =?xxx?q?...?= means 10 extra characters
3153 10)
3154 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003155 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003156 'expected length 1 for %r' % chr(c))
3157 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003158 # Space is special; it's encoded to _
3159 if c == ord(' '):
3160 continue
3161 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003162 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00003163 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003164
3165 def test_body_quopri_len(self):
3166 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003167 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003168 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003169 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003170 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003171
3172 def test_quote_unquote_idempotent(self):
3173 for x in range(256):
3174 c = chr(x)
3175 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
3176
3177 def test_header_encode(self):
3178 eq = self.assertEqual
3179 he = quoprimime.header_encode
3180 eq(he(b'hello'), '=?iso-8859-1?q?hello?=')
3181 eq(he(b'hello', charset='iso-8859-2'), '=?iso-8859-2?q?hello?=')
3182 eq(he(b'hello\nworld'), '=?iso-8859-1?q?hello=0Aworld?=')
3183 # Test a non-ASCII character
3184 eq(he(b'hello\xc7there'), '=?iso-8859-1?q?hello=C7there?=')
3185
3186 def test_decode(self):
3187 eq = self.assertEqual
3188 eq(quoprimime.decode(''), '')
3189 eq(quoprimime.decode('hello'), 'hello')
3190 eq(quoprimime.decode('hello', 'X'), 'hello')
3191 eq(quoprimime.decode('hello\nworld', 'X'), 'helloXworld')
3192
3193 def test_encode(self):
3194 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003195 eq(quoprimime.body_encode(''), '')
3196 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003197 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00003198 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003199 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00003200 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003201xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
3202 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
3203x xxxx xxxx xxxx xxxx=20""")
3204 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00003205 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
3206 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003207xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
3208 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
3209x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00003210 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003211one line
3212
3213two line"""), """\
3214one line
3215
3216two line""")
3217
3218
Ezio Melottib3aedd42010-11-20 19:04:17 +00003219
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003220# Test the Charset class
3221class TestCharset(unittest.TestCase):
3222 def tearDown(self):
3223 from email import charset as CharsetModule
3224 try:
3225 del CharsetModule.CHARSETS['fake']
3226 except KeyError:
3227 pass
3228
Guido van Rossum9604e662007-08-30 03:46:43 +00003229 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003230 eq = self.assertEqual
3231 # Make sure us-ascii = no Unicode conversion
3232 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00003233 eq(c.header_encode('Hello World!'), 'Hello World!')
3234 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003235 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00003236 self.assertRaises(UnicodeError, c.header_encode, s)
3237 c = Charset('utf-8')
3238 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003239
3240 def test_body_encode(self):
3241 eq = self.assertEqual
3242 # Try a charset with QP body encoding
3243 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003244 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003245 # Try a charset with Base64 body encoding
3246 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003247 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003248 # Try a charset with None body encoding
3249 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003250 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003251 # Try the convert argument, where input codec != output codec
3252 c = Charset('euc-jp')
3253 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00003254 # XXX FIXME
3255## try:
3256## eq('\x1b$B5FCO;~IW\x1b(B',
3257## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
3258## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
3259## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
3260## except LookupError:
3261## # We probably don't have the Japanese codecs installed
3262## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003263 # Testing SF bug #625509, which we have to fake, since there are no
3264 # built-in encodings where the header encoding is QP but the body
3265 # encoding is not.
3266 from email import charset as CharsetModule
3267 CharsetModule.add_charset('fake', CharsetModule.QP, None)
3268 c = Charset('fake')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003269 eq('hello w\xf6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003270
3271 def test_unicode_charset_name(self):
3272 charset = Charset('us-ascii')
3273 self.assertEqual(str(charset), 'us-ascii')
3274 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
3275
3276
Ezio Melottib3aedd42010-11-20 19:04:17 +00003277
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003278# Test multilingual MIME headers.
3279class TestHeader(TestEmailBase):
3280 def test_simple(self):
3281 eq = self.ndiffAssertEqual
3282 h = Header('Hello World!')
3283 eq(h.encode(), 'Hello World!')
3284 h.append(' Goodbye World!')
3285 eq(h.encode(), 'Hello World! Goodbye World!')
3286
3287 def test_simple_surprise(self):
3288 eq = self.ndiffAssertEqual
3289 h = Header('Hello World!')
3290 eq(h.encode(), 'Hello World!')
3291 h.append('Goodbye World!')
3292 eq(h.encode(), 'Hello World! Goodbye World!')
3293
3294 def test_header_needs_no_decoding(self):
3295 h = 'no decoding needed'
3296 self.assertEqual(decode_header(h), [(h, None)])
3297
3298 def test_long(self):
3299 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
3300 maxlinelen=76)
3301 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003302 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003303
3304 def test_multilingual(self):
3305 eq = self.ndiffAssertEqual
3306 g = Charset("iso-8859-1")
3307 cz = Charset("iso-8859-2")
3308 utf8 = Charset("utf-8")
3309 g_head = (b'Die Mieter treten hier ein werden mit einem '
3310 b'Foerderband komfortabel den Korridor entlang, '
3311 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
3312 b'gegen die rotierenden Klingen bef\xf6rdert. ')
3313 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
3314 b'd\xf9vtipu.. ')
3315 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
3316 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
3317 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
3318 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
3319 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
3320 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
3321 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
3322 '\u3044\u307e\u3059\u3002')
3323 h = Header(g_head, g)
3324 h.append(cz_head, cz)
3325 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00003326 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003327 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00003328=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
3329 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
3330 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
3331 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003332 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
3333 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
3334 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
3335 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00003336 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
3337 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
3338 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
3339 decoded = decode_header(enc)
3340 eq(len(decoded), 3)
3341 eq(decoded[0], (g_head, 'iso-8859-1'))
3342 eq(decoded[1], (cz_head, 'iso-8859-2'))
3343 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003344 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00003345 eq(ustr,
3346 (b'Die Mieter treten hier ein werden mit einem Foerderband '
3347 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
3348 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
3349 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
3350 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
3351 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
3352 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
3353 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
3354 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
3355 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
3356 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
3357 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
3358 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
3359 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
3360 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
3361 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
3362 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003363 # Test make_header()
3364 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00003365 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003366
3367 def test_empty_header_encode(self):
3368 h = Header()
3369 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00003370
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003371 def test_header_ctor_default_args(self):
3372 eq = self.ndiffAssertEqual
3373 h = Header()
3374 eq(h, '')
3375 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00003376 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003377
3378 def test_explicit_maxlinelen(self):
3379 eq = self.ndiffAssertEqual
3380 hstr = ('A very long line that must get split to something other '
3381 'than at the 76th character boundary to test the non-default '
3382 'behavior')
3383 h = Header(hstr)
3384 eq(h.encode(), '''\
3385A very long line that must get split to something other than at the 76th
3386 character boundary to test the non-default behavior''')
3387 eq(str(h), hstr)
3388 h = Header(hstr, header_name='Subject')
3389 eq(h.encode(), '''\
3390A very long line that must get split to something other than at the
3391 76th character boundary to test the non-default behavior''')
3392 eq(str(h), hstr)
3393 h = Header(hstr, maxlinelen=1024, header_name='Subject')
3394 eq(h.encode(), hstr)
3395 eq(str(h), hstr)
3396
Guido van Rossum9604e662007-08-30 03:46:43 +00003397 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003398 eq = self.ndiffAssertEqual
3399 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003400 x = 'xxxx ' * 20
3401 h.append(x)
3402 s = h.encode()
3403 eq(s, """\
3404=?iso-8859-1?q?xxx?=
3405 =?iso-8859-1?q?x_?=
3406 =?iso-8859-1?q?xx?=
3407 =?iso-8859-1?q?xx?=
3408 =?iso-8859-1?q?_x?=
3409 =?iso-8859-1?q?xx?=
3410 =?iso-8859-1?q?x_?=
3411 =?iso-8859-1?q?xx?=
3412 =?iso-8859-1?q?xx?=
3413 =?iso-8859-1?q?_x?=
3414 =?iso-8859-1?q?xx?=
3415 =?iso-8859-1?q?x_?=
3416 =?iso-8859-1?q?xx?=
3417 =?iso-8859-1?q?xx?=
3418 =?iso-8859-1?q?_x?=
3419 =?iso-8859-1?q?xx?=
3420 =?iso-8859-1?q?x_?=
3421 =?iso-8859-1?q?xx?=
3422 =?iso-8859-1?q?xx?=
3423 =?iso-8859-1?q?_x?=
3424 =?iso-8859-1?q?xx?=
3425 =?iso-8859-1?q?x_?=
3426 =?iso-8859-1?q?xx?=
3427 =?iso-8859-1?q?xx?=
3428 =?iso-8859-1?q?_x?=
3429 =?iso-8859-1?q?xx?=
3430 =?iso-8859-1?q?x_?=
3431 =?iso-8859-1?q?xx?=
3432 =?iso-8859-1?q?xx?=
3433 =?iso-8859-1?q?_x?=
3434 =?iso-8859-1?q?xx?=
3435 =?iso-8859-1?q?x_?=
3436 =?iso-8859-1?q?xx?=
3437 =?iso-8859-1?q?xx?=
3438 =?iso-8859-1?q?_x?=
3439 =?iso-8859-1?q?xx?=
3440 =?iso-8859-1?q?x_?=
3441 =?iso-8859-1?q?xx?=
3442 =?iso-8859-1?q?xx?=
3443 =?iso-8859-1?q?_x?=
3444 =?iso-8859-1?q?xx?=
3445 =?iso-8859-1?q?x_?=
3446 =?iso-8859-1?q?xx?=
3447 =?iso-8859-1?q?xx?=
3448 =?iso-8859-1?q?_x?=
3449 =?iso-8859-1?q?xx?=
3450 =?iso-8859-1?q?x_?=
3451 =?iso-8859-1?q?xx?=
3452 =?iso-8859-1?q?xx?=
3453 =?iso-8859-1?q?_?=""")
3454 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003455 h = Header(charset='iso-8859-1', maxlinelen=40)
3456 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003457 s = h.encode()
3458 eq(s, """\
3459=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
3460 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
3461 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
3462 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
3463 =?iso-8859-1?q?_xxxx_xxxx_?=""")
3464 eq(x, str(make_header(decode_header(s))))
3465
3466 def test_base64_splittable(self):
3467 eq = self.ndiffAssertEqual
3468 h = Header(charset='koi8-r', maxlinelen=20)
3469 x = 'xxxx ' * 20
3470 h.append(x)
3471 s = h.encode()
3472 eq(s, """\
3473=?koi8-r?b?eHh4?=
3474 =?koi8-r?b?eCB4?=
3475 =?koi8-r?b?eHh4?=
3476 =?koi8-r?b?IHh4?=
3477 =?koi8-r?b?eHgg?=
3478 =?koi8-r?b?eHh4?=
3479 =?koi8-r?b?eCB4?=
3480 =?koi8-r?b?eHh4?=
3481 =?koi8-r?b?IHh4?=
3482 =?koi8-r?b?eHgg?=
3483 =?koi8-r?b?eHh4?=
3484 =?koi8-r?b?eCB4?=
3485 =?koi8-r?b?eHh4?=
3486 =?koi8-r?b?IHh4?=
3487 =?koi8-r?b?eHgg?=
3488 =?koi8-r?b?eHh4?=
3489 =?koi8-r?b?eCB4?=
3490 =?koi8-r?b?eHh4?=
3491 =?koi8-r?b?IHh4?=
3492 =?koi8-r?b?eHgg?=
3493 =?koi8-r?b?eHh4?=
3494 =?koi8-r?b?eCB4?=
3495 =?koi8-r?b?eHh4?=
3496 =?koi8-r?b?IHh4?=
3497 =?koi8-r?b?eHgg?=
3498 =?koi8-r?b?eHh4?=
3499 =?koi8-r?b?eCB4?=
3500 =?koi8-r?b?eHh4?=
3501 =?koi8-r?b?IHh4?=
3502 =?koi8-r?b?eHgg?=
3503 =?koi8-r?b?eHh4?=
3504 =?koi8-r?b?eCB4?=
3505 =?koi8-r?b?eHh4?=
3506 =?koi8-r?b?IA==?=""")
3507 eq(x, str(make_header(decode_header(s))))
3508 h = Header(charset='koi8-r', maxlinelen=40)
3509 h.append(x)
3510 s = h.encode()
3511 eq(s, """\
3512=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
3513 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
3514 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
3515 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
3516 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
3517 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
3518 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003519
3520 def test_us_ascii_header(self):
3521 eq = self.assertEqual
3522 s = 'hello'
3523 x = decode_header(s)
3524 eq(x, [('hello', None)])
3525 h = make_header(x)
3526 eq(s, h.encode())
3527
3528 def test_string_charset(self):
3529 eq = self.assertEqual
3530 h = Header()
3531 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00003532 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003533
3534## def test_unicode_error(self):
3535## raises = self.assertRaises
3536## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
3537## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
3538## h = Header()
3539## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
3540## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
3541## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
3542
3543 def test_utf8_shortest(self):
3544 eq = self.assertEqual
3545 h = Header('p\xf6stal', 'utf-8')
3546 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
3547 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
3548 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
3549
3550 def test_bad_8bit_header(self):
3551 raises = self.assertRaises
3552 eq = self.assertEqual
3553 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
3554 raises(UnicodeError, Header, x)
3555 h = Header()
3556 raises(UnicodeError, h.append, x)
3557 e = x.decode('utf-8', 'replace')
3558 eq(str(Header(x, errors='replace')), e)
3559 h.append(x, errors='replace')
3560 eq(str(h), e)
3561
3562 def test_encoded_adjacent_nonencoded(self):
3563 eq = self.assertEqual
3564 h = Header()
3565 h.append('hello', 'iso-8859-1')
3566 h.append('world')
3567 s = h.encode()
3568 eq(s, '=?iso-8859-1?q?hello?= world')
3569 h = make_header(decode_header(s))
3570 eq(h.encode(), s)
3571
3572 def test_whitespace_eater(self):
3573 eq = self.assertEqual
3574 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
3575 parts = decode_header(s)
3576 eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)])
3577 hdr = make_header(parts)
3578 eq(hdr.encode(),
3579 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
3580
3581 def test_broken_base64_header(self):
3582 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00003583 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003584 raises(errors.HeaderParseError, decode_header, s)
3585
3586
Ezio Melottib3aedd42010-11-20 19:04:17 +00003587
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003588# Test RFC 2231 header parameters (en/de)coding
3589class TestRFC2231(TestEmailBase):
3590 def test_get_param(self):
3591 eq = self.assertEqual
3592 msg = self._msgobj('msg_29.txt')
3593 eq(msg.get_param('title'),
3594 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
3595 eq(msg.get_param('title', unquote=False),
3596 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
3597
3598 def test_set_param(self):
3599 eq = self.ndiffAssertEqual
3600 msg = Message()
3601 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3602 charset='us-ascii')
3603 eq(msg.get_param('title'),
3604 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
3605 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3606 charset='us-ascii', language='en')
3607 eq(msg.get_param('title'),
3608 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
3609 msg = self._msgobj('msg_01.txt')
3610 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3611 charset='us-ascii', language='en')
3612 eq(msg.as_string(maxheaderlen=78), """\
3613Return-Path: <bbb@zzz.org>
3614Delivered-To: bbb@zzz.org
3615Received: by mail.zzz.org (Postfix, from userid 889)
3616\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
3617MIME-Version: 1.0
3618Content-Transfer-Encoding: 7bit
3619Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
3620From: bbb@ddd.com (John X. Doe)
3621To: bbb@zzz.org
3622Subject: This is a test message
3623Date: Fri, 4 May 2001 14:05:44 -0400
3624Content-Type: text/plain; charset=us-ascii;
3625 title*="us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21"
3626
3627
3628Hi,
3629
3630Do you like this message?
3631
3632-Me
3633""")
3634
3635 def test_del_param(self):
3636 eq = self.ndiffAssertEqual
3637 msg = self._msgobj('msg_01.txt')
3638 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
3639 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3640 charset='us-ascii', language='en')
3641 msg.del_param('foo', header='Content-Type')
3642 eq(msg.as_string(maxheaderlen=78), """\
3643Return-Path: <bbb@zzz.org>
3644Delivered-To: bbb@zzz.org
3645Received: by mail.zzz.org (Postfix, from userid 889)
3646\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
3647MIME-Version: 1.0
3648Content-Transfer-Encoding: 7bit
3649Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
3650From: bbb@ddd.com (John X. Doe)
3651To: bbb@zzz.org
3652Subject: This is a test message
3653Date: Fri, 4 May 2001 14:05:44 -0400
3654Content-Type: text/plain; charset="us-ascii";
3655 title*="us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21"
3656
3657
3658Hi,
3659
3660Do you like this message?
3661
3662-Me
3663""")
3664
3665 def test_rfc2231_get_content_charset(self):
3666 eq = self.assertEqual
3667 msg = self._msgobj('msg_32.txt')
3668 eq(msg.get_content_charset(), 'us-ascii')
3669
3670 def test_rfc2231_no_language_or_charset(self):
3671 m = '''\
3672Content-Transfer-Encoding: 8bit
3673Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
3674Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
3675
3676'''
3677 msg = email.message_from_string(m)
3678 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003679 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003680 self.assertEqual(
3681 param,
3682 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
3683
3684 def test_rfc2231_no_language_or_charset_in_filename(self):
3685 m = '''\
3686Content-Disposition: inline;
3687\tfilename*0*="''This%20is%20even%20more%20";
3688\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3689\tfilename*2="is it not.pdf"
3690
3691'''
3692 msg = email.message_from_string(m)
3693 self.assertEqual(msg.get_filename(),
3694 'This is even more ***fun*** is it not.pdf')
3695
3696 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
3697 m = '''\
3698Content-Disposition: inline;
3699\tfilename*0*="''This%20is%20even%20more%20";
3700\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3701\tfilename*2="is it not.pdf"
3702
3703'''
3704 msg = email.message_from_string(m)
3705 self.assertEqual(msg.get_filename(),
3706 'This is even more ***fun*** is it not.pdf')
3707
3708 def test_rfc2231_partly_encoded(self):
3709 m = '''\
3710Content-Disposition: inline;
3711\tfilename*0="''This%20is%20even%20more%20";
3712\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3713\tfilename*2="is it not.pdf"
3714
3715'''
3716 msg = email.message_from_string(m)
3717 self.assertEqual(
3718 msg.get_filename(),
3719 'This%20is%20even%20more%20***fun*** is it not.pdf')
3720
3721 def test_rfc2231_partly_nonencoded(self):
3722 m = '''\
3723Content-Disposition: inline;
3724\tfilename*0="This%20is%20even%20more%20";
3725\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
3726\tfilename*2="is it not.pdf"
3727
3728'''
3729 msg = email.message_from_string(m)
3730 self.assertEqual(
3731 msg.get_filename(),
3732 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
3733
3734 def test_rfc2231_no_language_or_charset_in_boundary(self):
3735 m = '''\
3736Content-Type: multipart/alternative;
3737\tboundary*0*="''This%20is%20even%20more%20";
3738\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
3739\tboundary*2="is it not.pdf"
3740
3741'''
3742 msg = email.message_from_string(m)
3743 self.assertEqual(msg.get_boundary(),
3744 'This is even more ***fun*** is it not.pdf')
3745
3746 def test_rfc2231_no_language_or_charset_in_charset(self):
3747 # This is a nonsensical charset value, but tests the code anyway
3748 m = '''\
3749Content-Type: text/plain;
3750\tcharset*0*="This%20is%20even%20more%20";
3751\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
3752\tcharset*2="is it not.pdf"
3753
3754'''
3755 msg = email.message_from_string(m)
3756 self.assertEqual(msg.get_content_charset(),
3757 'this is even more ***fun*** is it not.pdf')
3758
3759 def test_rfc2231_bad_encoding_in_filename(self):
3760 m = '''\
3761Content-Disposition: inline;
3762\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
3763\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3764\tfilename*2="is it not.pdf"
3765
3766'''
3767 msg = email.message_from_string(m)
3768 self.assertEqual(msg.get_filename(),
3769 'This is even more ***fun*** is it not.pdf')
3770
3771 def test_rfc2231_bad_encoding_in_charset(self):
3772 m = """\
3773Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
3774
3775"""
3776 msg = email.message_from_string(m)
3777 # This should return None because non-ascii characters in the charset
3778 # are not allowed.
3779 self.assertEqual(msg.get_content_charset(), None)
3780
3781 def test_rfc2231_bad_character_in_charset(self):
3782 m = """\
3783Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
3784
3785"""
3786 msg = email.message_from_string(m)
3787 # This should return None because non-ascii characters in the charset
3788 # are not allowed.
3789 self.assertEqual(msg.get_content_charset(), None)
3790
3791 def test_rfc2231_bad_character_in_filename(self):
3792 m = '''\
3793Content-Disposition: inline;
3794\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
3795\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3796\tfilename*2*="is it not.pdf%E2"
3797
3798'''
3799 msg = email.message_from_string(m)
3800 self.assertEqual(msg.get_filename(),
3801 'This is even more ***fun*** is it not.pdf\ufffd')
3802
3803 def test_rfc2231_unknown_encoding(self):
3804 m = """\
3805Content-Transfer-Encoding: 8bit
3806Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
3807
3808"""
3809 msg = email.message_from_string(m)
3810 self.assertEqual(msg.get_filename(), 'myfile.txt')
3811
3812 def test_rfc2231_single_tick_in_filename_extended(self):
3813 eq = self.assertEqual
3814 m = """\
3815Content-Type: application/x-foo;
3816\tname*0*=\"Frank's\"; name*1*=\" Document\"
3817
3818"""
3819 msg = email.message_from_string(m)
3820 charset, language, s = msg.get_param('name')
3821 eq(charset, None)
3822 eq(language, None)
3823 eq(s, "Frank's Document")
3824
3825 def test_rfc2231_single_tick_in_filename(self):
3826 m = """\
3827Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
3828
3829"""
3830 msg = email.message_from_string(m)
3831 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003832 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003833 self.assertEqual(param, "Frank's Document")
3834
3835 def test_rfc2231_tick_attack_extended(self):
3836 eq = self.assertEqual
3837 m = """\
3838Content-Type: application/x-foo;
3839\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
3840
3841"""
3842 msg = email.message_from_string(m)
3843 charset, language, s = msg.get_param('name')
3844 eq(charset, 'us-ascii')
3845 eq(language, 'en-us')
3846 eq(s, "Frank's Document")
3847
3848 def test_rfc2231_tick_attack(self):
3849 m = """\
3850Content-Type: application/x-foo;
3851\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
3852
3853"""
3854 msg = email.message_from_string(m)
3855 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003856 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003857 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
3858
3859 def test_rfc2231_no_extended_values(self):
3860 eq = self.assertEqual
3861 m = """\
3862Content-Type: application/x-foo; name=\"Frank's Document\"
3863
3864"""
3865 msg = email.message_from_string(m)
3866 eq(msg.get_param('name'), "Frank's Document")
3867
3868 def test_rfc2231_encoded_then_unencoded_segments(self):
3869 eq = self.assertEqual
3870 m = """\
3871Content-Type: application/x-foo;
3872\tname*0*=\"us-ascii'en-us'My\";
3873\tname*1=\" Document\";
3874\tname*2*=\" For You\"
3875
3876"""
3877 msg = email.message_from_string(m)
3878 charset, language, s = msg.get_param('name')
3879 eq(charset, 'us-ascii')
3880 eq(language, 'en-us')
3881 eq(s, 'My Document For You')
3882
3883 def test_rfc2231_unencoded_then_encoded_segments(self):
3884 eq = self.assertEqual
3885 m = """\
3886Content-Type: application/x-foo;
3887\tname*0=\"us-ascii'en-us'My\";
3888\tname*1*=\" Document\";
3889\tname*2*=\" For You\"
3890
3891"""
3892 msg = email.message_from_string(m)
3893 charset, language, s = msg.get_param('name')
3894 eq(charset, 'us-ascii')
3895 eq(language, 'en-us')
3896 eq(s, 'My Document For You')
3897
3898
Ezio Melottib3aedd42010-11-20 19:04:17 +00003899
R. David Murraya8f480f2010-01-16 18:30:03 +00003900# Tests to ensure that signed parts of an email are completely preserved, as
3901# required by RFC1847 section 2.1. Note that these are incomplete, because the
3902# email package does not currently always preserve the body. See issue 1670765.
3903class TestSigned(TestEmailBase):
3904
3905 def _msg_and_obj(self, filename):
3906 with openfile(findfile(filename)) as fp:
3907 original = fp.read()
3908 msg = email.message_from_string(original)
3909 return original, msg
3910
3911 def _signed_parts_eq(self, original, result):
3912 # Extract the first mime part of each message
3913 import re
3914 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
3915 inpart = repart.search(original).group(2)
3916 outpart = repart.search(result).group(2)
3917 self.assertEqual(outpart, inpart)
3918
3919 def test_long_headers_as_string(self):
3920 original, msg = self._msg_and_obj('msg_45.txt')
3921 result = msg.as_string()
3922 self._signed_parts_eq(original, result)
3923
3924 def test_long_headers_as_string_maxheaderlen(self):
3925 original, msg = self._msg_and_obj('msg_45.txt')
3926 result = msg.as_string(maxheaderlen=60)
3927 self._signed_parts_eq(original, result)
3928
3929 def test_long_headers_flatten(self):
3930 original, msg = self._msg_and_obj('msg_45.txt')
3931 fp = StringIO()
3932 Generator(fp).flatten(msg)
3933 result = fp.getvalue()
3934 self._signed_parts_eq(original, result)
3935
3936
Ezio Melottib3aedd42010-11-20 19:04:17 +00003937
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003938def _testclasses():
3939 mod = sys.modules[__name__]
3940 return [getattr(mod, name) for name in dir(mod) if name.startswith('Test')]
3941
3942
3943def suite():
3944 suite = unittest.TestSuite()
3945 for testclass in _testclasses():
3946 suite.addTest(unittest.makeSuite(testclass))
3947 return suite
3948
3949
3950def test_main():
3951 for testclass in _testclasses():
3952 run_unittest(testclass)
3953
3954
Ezio Melottib3aedd42010-11-20 19:04:17 +00003955
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003956if __name__ == '__main__':
3957 unittest.main(defaultTest='suite')