blob: 4855371d1b1753f17f8b57909b360bc02bcede05 [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
R. David Murray719a4492010-11-21 16:53:48 +00006import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00007import sys
8import time
9import base64
10import difflib
11import unittest
12import warnings
R. David Murray96fd54e2010-10-08 15:55:28 +000013import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000014
R. David Murray96fd54e2010-10-08 15:55:28 +000015from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016from itertools import chain
17
18import email
19
20from email.charset import Charset
21from email.header import Header, decode_header, make_header
22from email.parser import Parser, HeaderParser
23from email.generator import Generator, DecodedGenerator
24from email.message import Message
25from email.mime.application import MIMEApplication
26from email.mime.audio import MIMEAudio
27from email.mime.text import MIMEText
28from email.mime.image import MIMEImage
29from email.mime.base import MIMEBase
30from email.mime.message import MIMEMessage
31from email.mime.multipart import MIMEMultipart
32from email import utils
33from email import errors
34from email import encoders
35from email import iterators
36from email import base64mime
37from email import quoprimime
38
R. David Murray96fd54e2010-10-08 15:55:28 +000039from test.support import findfile, run_unittest, unlink
Guido van Rossum8b3febe2007-08-30 01:15:14 +000040from email.test import __file__ as landmark
41
42
43NL = '\n'
44EMPTYSTRING = ''
45SPACE = ' '
46
47
Ezio Melottib3aedd42010-11-20 19:04:17 +000048
Guido van Rossum8b3febe2007-08-30 01:15:14 +000049def openfile(filename, *args, **kws):
50 path = os.path.join(os.path.dirname(landmark), 'data', filename)
51 return open(path, *args, **kws)
52
53
Ezio Melottib3aedd42010-11-20 19:04:17 +000054
Guido van Rossum8b3febe2007-08-30 01:15:14 +000055# Base test class
56class TestEmailBase(unittest.TestCase):
57 def ndiffAssertEqual(self, first, second):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000058 """Like assertEqual except use ndiff for readable output."""
Guido van Rossum8b3febe2007-08-30 01:15:14 +000059 if first != second:
60 sfirst = str(first)
61 ssecond = str(second)
62 rfirst = [repr(line) for line in sfirst.splitlines()]
63 rsecond = [repr(line) for line in ssecond.splitlines()]
64 diff = difflib.ndiff(rfirst, rsecond)
65 raise self.failureException(NL + NL.join(diff))
66
67 def _msgobj(self, filename):
68 with openfile(findfile(filename)) as fp:
69 return email.message_from_file(fp)
70
71
Ezio Melottib3aedd42010-11-20 19:04:17 +000072
Guido van Rossum8b3febe2007-08-30 01:15:14 +000073# Test various aspects of the Message class's API
74class TestMessageAPI(TestEmailBase):
75 def test_get_all(self):
76 eq = self.assertEqual
77 msg = self._msgobj('msg_20.txt')
78 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
79 eq(msg.get_all('xx', 'n/a'), 'n/a')
80
R. David Murraye5db2632010-11-20 15:10:13 +000081 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000082 eq = self.assertEqual
83 msg = Message()
84 eq(msg.get_charset(), None)
85 charset = Charset('iso-8859-1')
86 msg.set_charset(charset)
87 eq(msg['mime-version'], '1.0')
88 eq(msg.get_content_type(), 'text/plain')
89 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
90 eq(msg.get_param('charset'), 'iso-8859-1')
91 eq(msg['content-transfer-encoding'], 'quoted-printable')
92 eq(msg.get_charset().input_charset, 'iso-8859-1')
93 # Remove the charset
94 msg.set_charset(None)
95 eq(msg.get_charset(), None)
96 eq(msg['content-type'], 'text/plain')
97 # Try adding a charset when there's already MIME headers present
98 msg = Message()
99 msg['MIME-Version'] = '2.0'
100 msg['Content-Type'] = 'text/x-weird'
101 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
102 msg.set_charset(charset)
103 eq(msg['mime-version'], '2.0')
104 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
105 eq(msg['content-transfer-encoding'], 'quinted-puntable')
106
107 def test_set_charset_from_string(self):
108 eq = self.assertEqual
109 msg = Message()
110 msg.set_charset('us-ascii')
111 eq(msg.get_charset().input_charset, 'us-ascii')
112 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
113
114 def test_set_payload_with_charset(self):
115 msg = Message()
116 charset = Charset('iso-8859-1')
117 msg.set_payload('This is a string payload', charset)
118 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
119
120 def test_get_charsets(self):
121 eq = self.assertEqual
122
123 msg = self._msgobj('msg_08.txt')
124 charsets = msg.get_charsets()
125 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
126
127 msg = self._msgobj('msg_09.txt')
128 charsets = msg.get_charsets('dingbat')
129 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
130 'koi8-r'])
131
132 msg = self._msgobj('msg_12.txt')
133 charsets = msg.get_charsets()
134 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
135 'iso-8859-3', 'us-ascii', 'koi8-r'])
136
137 def test_get_filename(self):
138 eq = self.assertEqual
139
140 msg = self._msgobj('msg_04.txt')
141 filenames = [p.get_filename() for p in msg.get_payload()]
142 eq(filenames, ['msg.txt', 'msg.txt'])
143
144 msg = self._msgobj('msg_07.txt')
145 subpart = msg.get_payload(1)
146 eq(subpart.get_filename(), 'dingusfish.gif')
147
148 def test_get_filename_with_name_parameter(self):
149 eq = self.assertEqual
150
151 msg = self._msgobj('msg_44.txt')
152 filenames = [p.get_filename() for p in msg.get_payload()]
153 eq(filenames, ['msg.txt', 'msg.txt'])
154
155 def test_get_boundary(self):
156 eq = self.assertEqual
157 msg = self._msgobj('msg_07.txt')
158 # No quotes!
159 eq(msg.get_boundary(), 'BOUNDARY')
160
161 def test_set_boundary(self):
162 eq = self.assertEqual
163 # This one has no existing boundary parameter, but the Content-Type:
164 # header appears fifth.
165 msg = self._msgobj('msg_01.txt')
166 msg.set_boundary('BOUNDARY')
167 header, value = msg.items()[4]
168 eq(header.lower(), 'content-type')
169 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
170 # This one has a Content-Type: header, with a boundary, stuck in the
171 # middle of its headers. Make sure the order is preserved; it should
172 # be fifth.
173 msg = self._msgobj('msg_04.txt')
174 msg.set_boundary('BOUNDARY')
175 header, value = msg.items()[4]
176 eq(header.lower(), 'content-type')
177 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
178 # And this one has no Content-Type: header at all.
179 msg = self._msgobj('msg_03.txt')
180 self.assertRaises(errors.HeaderParseError,
181 msg.set_boundary, 'BOUNDARY')
182
R. David Murray73a559d2010-12-21 18:07:59 +0000183 def test_make_boundary(self):
184 msg = MIMEMultipart('form-data')
185 # Note that when the boundary gets created is an implementation
186 # detail and might change.
187 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
188 # Trigger creation of boundary
189 msg.as_string()
190 self.assertEqual(msg.items()[0][1][:33],
191 'multipart/form-data; boundary="==')
192 # XXX: there ought to be tests of the uniqueness of the boundary, too.
193
R. David Murray57c45ac2010-02-21 04:39:40 +0000194 def test_message_rfc822_only(self):
195 # Issue 7970: message/rfc822 not in multipart parsed by
196 # HeaderParser caused an exception when flattened.
Brett Cannon384917a2010-10-29 23:08:36 +0000197 with openfile(findfile('msg_46.txt')) as fp:
198 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000199 parser = HeaderParser()
200 msg = parser.parsestr(msgdata)
201 out = StringIO()
202 gen = Generator(out, True, 0)
203 gen.flatten(msg, False)
204 self.assertEqual(out.getvalue(), msgdata)
205
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000206 def test_get_decoded_payload(self):
207 eq = self.assertEqual
208 msg = self._msgobj('msg_10.txt')
209 # The outer message is a multipart
210 eq(msg.get_payload(decode=True), None)
211 # Subpart 1 is 7bit encoded
212 eq(msg.get_payload(0).get_payload(decode=True),
213 b'This is a 7bit encoded message.\n')
214 # Subpart 2 is quopri
215 eq(msg.get_payload(1).get_payload(decode=True),
216 b'\xa1This is a Quoted Printable encoded message!\n')
217 # Subpart 3 is base64
218 eq(msg.get_payload(2).get_payload(decode=True),
219 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000220 # Subpart 4 is base64 with a trailing newline, which
221 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000222 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000223 b'This is a Base64 encoded message.\n')
224 # Subpart 5 has no Content-Transfer-Encoding: header.
225 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000226 b'This has no Content-Transfer-Encoding: header.\n')
227
228 def test_get_decoded_uu_payload(self):
229 eq = self.assertEqual
230 msg = Message()
231 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
232 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
233 msg['content-transfer-encoding'] = cte
234 eq(msg.get_payload(decode=True), b'hello world')
235 # Now try some bogus data
236 msg.set_payload('foo')
237 eq(msg.get_payload(decode=True), b'foo')
238
239 def test_decoded_generator(self):
240 eq = self.assertEqual
241 msg = self._msgobj('msg_07.txt')
242 with openfile('msg_17.txt') as fp:
243 text = fp.read()
244 s = StringIO()
245 g = DecodedGenerator(s)
246 g.flatten(msg)
247 eq(s.getvalue(), text)
248
249 def test__contains__(self):
250 msg = Message()
251 msg['From'] = 'Me'
252 msg['to'] = 'You'
253 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000254 self.assertTrue('from' in msg)
255 self.assertTrue('From' in msg)
256 self.assertTrue('FROM' in msg)
257 self.assertTrue('to' in msg)
258 self.assertTrue('To' in msg)
259 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000260
261 def test_as_string(self):
262 eq = self.ndiffAssertEqual
263 msg = self._msgobj('msg_01.txt')
264 with openfile('msg_01.txt') as fp:
265 text = fp.read()
266 eq(text, str(msg))
267 fullrepr = msg.as_string(unixfrom=True)
268 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000269 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000270 eq(text, NL.join(lines[1:]))
271
272 def test_bad_param(self):
273 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
274 self.assertEqual(msg.get_param('baz'), '')
275
276 def test_missing_filename(self):
277 msg = email.message_from_string("From: foo\n")
278 self.assertEqual(msg.get_filename(), None)
279
280 def test_bogus_filename(self):
281 msg = email.message_from_string(
282 "Content-Disposition: blarg; filename\n")
283 self.assertEqual(msg.get_filename(), '')
284
285 def test_missing_boundary(self):
286 msg = email.message_from_string("From: foo\n")
287 self.assertEqual(msg.get_boundary(), None)
288
289 def test_get_params(self):
290 eq = self.assertEqual
291 msg = email.message_from_string(
292 'X-Header: foo=one; bar=two; baz=three\n')
293 eq(msg.get_params(header='x-header'),
294 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
295 msg = email.message_from_string(
296 'X-Header: foo; bar=one; baz=two\n')
297 eq(msg.get_params(header='x-header'),
298 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
299 eq(msg.get_params(), None)
300 msg = email.message_from_string(
301 'X-Header: foo; bar="one"; baz=two\n')
302 eq(msg.get_params(header='x-header'),
303 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
304
305 def test_get_param_liberal(self):
306 msg = Message()
307 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
308 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
309
310 def test_get_param(self):
311 eq = self.assertEqual
312 msg = email.message_from_string(
313 "X-Header: foo=one; bar=two; baz=three\n")
314 eq(msg.get_param('bar', header='x-header'), 'two')
315 eq(msg.get_param('quuz', header='x-header'), None)
316 eq(msg.get_param('quuz'), None)
317 msg = email.message_from_string(
318 'X-Header: foo; bar="one"; baz=two\n')
319 eq(msg.get_param('foo', header='x-header'), '')
320 eq(msg.get_param('bar', header='x-header'), 'one')
321 eq(msg.get_param('baz', header='x-header'), 'two')
322 # XXX: We are not RFC-2045 compliant! We cannot parse:
323 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
324 # msg.get_param("weird")
325 # yet.
326
327 def test_get_param_funky_continuation_lines(self):
328 msg = self._msgobj('msg_22.txt')
329 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
330
331 def test_get_param_with_semis_in_quotes(self):
332 msg = email.message_from_string(
333 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
334 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
335 self.assertEqual(msg.get_param('name', unquote=False),
336 '"Jim&amp;&amp;Jill"')
337
R. David Murrayd48739f2010-04-14 18:59:18 +0000338 def test_get_param_with_quotes(self):
339 msg = email.message_from_string(
340 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
341 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
342 msg = email.message_from_string(
343 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
344 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
345
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000346 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000347 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000348 msg = email.message_from_string('Header: exists')
349 unless('header' in msg)
350 unless('Header' in msg)
351 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000352 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000353
354 def test_set_param(self):
355 eq = self.assertEqual
356 msg = Message()
357 msg.set_param('charset', 'iso-2022-jp')
358 eq(msg.get_param('charset'), 'iso-2022-jp')
359 msg.set_param('importance', 'high value')
360 eq(msg.get_param('importance'), 'high value')
361 eq(msg.get_param('importance', unquote=False), '"high value"')
362 eq(msg.get_params(), [('text/plain', ''),
363 ('charset', 'iso-2022-jp'),
364 ('importance', 'high value')])
365 eq(msg.get_params(unquote=False), [('text/plain', ''),
366 ('charset', '"iso-2022-jp"'),
367 ('importance', '"high value"')])
368 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
369 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
370
371 def test_del_param(self):
372 eq = self.assertEqual
373 msg = self._msgobj('msg_05.txt')
374 eq(msg.get_params(),
375 [('multipart/report', ''), ('report-type', 'delivery-status'),
376 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
377 old_val = msg.get_param("report-type")
378 msg.del_param("report-type")
379 eq(msg.get_params(),
380 [('multipart/report', ''),
381 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
382 msg.set_param("report-type", old_val)
383 eq(msg.get_params(),
384 [('multipart/report', ''),
385 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
386 ('report-type', old_val)])
387
388 def test_del_param_on_other_header(self):
389 msg = Message()
390 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
391 msg.del_param('filename', 'content-disposition')
392 self.assertEqual(msg['content-disposition'], 'attachment')
393
394 def test_set_type(self):
395 eq = self.assertEqual
396 msg = Message()
397 self.assertRaises(ValueError, msg.set_type, 'text')
398 msg.set_type('text/plain')
399 eq(msg['content-type'], 'text/plain')
400 msg.set_param('charset', 'us-ascii')
401 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
402 msg.set_type('text/html')
403 eq(msg['content-type'], 'text/html; charset="us-ascii"')
404
405 def test_set_type_on_other_header(self):
406 msg = Message()
407 msg['X-Content-Type'] = 'text/plain'
408 msg.set_type('application/octet-stream', 'X-Content-Type')
409 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
410
411 def test_get_content_type_missing(self):
412 msg = Message()
413 self.assertEqual(msg.get_content_type(), 'text/plain')
414
415 def test_get_content_type_missing_with_default_type(self):
416 msg = Message()
417 msg.set_default_type('message/rfc822')
418 self.assertEqual(msg.get_content_type(), 'message/rfc822')
419
420 def test_get_content_type_from_message_implicit(self):
421 msg = self._msgobj('msg_30.txt')
422 self.assertEqual(msg.get_payload(0).get_content_type(),
423 'message/rfc822')
424
425 def test_get_content_type_from_message_explicit(self):
426 msg = self._msgobj('msg_28.txt')
427 self.assertEqual(msg.get_payload(0).get_content_type(),
428 'message/rfc822')
429
430 def test_get_content_type_from_message_text_plain_implicit(self):
431 msg = self._msgobj('msg_03.txt')
432 self.assertEqual(msg.get_content_type(), 'text/plain')
433
434 def test_get_content_type_from_message_text_plain_explicit(self):
435 msg = self._msgobj('msg_01.txt')
436 self.assertEqual(msg.get_content_type(), 'text/plain')
437
438 def test_get_content_maintype_missing(self):
439 msg = Message()
440 self.assertEqual(msg.get_content_maintype(), 'text')
441
442 def test_get_content_maintype_missing_with_default_type(self):
443 msg = Message()
444 msg.set_default_type('message/rfc822')
445 self.assertEqual(msg.get_content_maintype(), 'message')
446
447 def test_get_content_maintype_from_message_implicit(self):
448 msg = self._msgobj('msg_30.txt')
449 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
450
451 def test_get_content_maintype_from_message_explicit(self):
452 msg = self._msgobj('msg_28.txt')
453 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
454
455 def test_get_content_maintype_from_message_text_plain_implicit(self):
456 msg = self._msgobj('msg_03.txt')
457 self.assertEqual(msg.get_content_maintype(), 'text')
458
459 def test_get_content_maintype_from_message_text_plain_explicit(self):
460 msg = self._msgobj('msg_01.txt')
461 self.assertEqual(msg.get_content_maintype(), 'text')
462
463 def test_get_content_subtype_missing(self):
464 msg = Message()
465 self.assertEqual(msg.get_content_subtype(), 'plain')
466
467 def test_get_content_subtype_missing_with_default_type(self):
468 msg = Message()
469 msg.set_default_type('message/rfc822')
470 self.assertEqual(msg.get_content_subtype(), 'rfc822')
471
472 def test_get_content_subtype_from_message_implicit(self):
473 msg = self._msgobj('msg_30.txt')
474 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
475
476 def test_get_content_subtype_from_message_explicit(self):
477 msg = self._msgobj('msg_28.txt')
478 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
479
480 def test_get_content_subtype_from_message_text_plain_implicit(self):
481 msg = self._msgobj('msg_03.txt')
482 self.assertEqual(msg.get_content_subtype(), 'plain')
483
484 def test_get_content_subtype_from_message_text_plain_explicit(self):
485 msg = self._msgobj('msg_01.txt')
486 self.assertEqual(msg.get_content_subtype(), 'plain')
487
488 def test_get_content_maintype_error(self):
489 msg = Message()
490 msg['Content-Type'] = 'no-slash-in-this-string'
491 self.assertEqual(msg.get_content_maintype(), 'text')
492
493 def test_get_content_subtype_error(self):
494 msg = Message()
495 msg['Content-Type'] = 'no-slash-in-this-string'
496 self.assertEqual(msg.get_content_subtype(), 'plain')
497
498 def test_replace_header(self):
499 eq = self.assertEqual
500 msg = Message()
501 msg.add_header('First', 'One')
502 msg.add_header('Second', 'Two')
503 msg.add_header('Third', 'Three')
504 eq(msg.keys(), ['First', 'Second', 'Third'])
505 eq(msg.values(), ['One', 'Two', 'Three'])
506 msg.replace_header('Second', 'Twenty')
507 eq(msg.keys(), ['First', 'Second', 'Third'])
508 eq(msg.values(), ['One', 'Twenty', 'Three'])
509 msg.add_header('First', 'Eleven')
510 msg.replace_header('First', 'One Hundred')
511 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
512 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
513 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
514
515 def test_broken_base64_payload(self):
516 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
517 msg = Message()
518 msg['content-type'] = 'audio/x-midi'
519 msg['content-transfer-encoding'] = 'base64'
520 msg.set_payload(x)
521 self.assertEqual(msg.get_payload(decode=True),
Guido van Rossum9604e662007-08-30 03:46:43 +0000522 bytes(x, 'raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000523
R. David Murray7ec754b2010-12-13 23:51:19 +0000524 # Issue 1078919
525 def test_ascii_add_header(self):
526 msg = Message()
527 msg.add_header('Content-Disposition', 'attachment',
528 filename='bud.gif')
529 self.assertEqual('attachment; filename="bud.gif"',
530 msg['Content-Disposition'])
531
532 def test_noascii_add_header(self):
533 msg = Message()
534 msg.add_header('Content-Disposition', 'attachment',
535 filename="Fußballer.ppt")
536 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000537 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
R. David Murray7ec754b2010-12-13 23:51:19 +0000538 msg['Content-Disposition'])
539
540 def test_nonascii_add_header_via_triple(self):
541 msg = Message()
542 msg.add_header('Content-Disposition', 'attachment',
543 filename=('iso-8859-1', '', 'Fußballer.ppt'))
544 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000545 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
546 msg['Content-Disposition'])
547
548 def test_ascii_add_header_with_tspecial(self):
549 msg = Message()
550 msg.add_header('Content-Disposition', 'attachment',
551 filename="windows [filename].ppt")
552 self.assertEqual(
553 'attachment; filename="windows [filename].ppt"',
554 msg['Content-Disposition'])
555
556 def test_nonascii_add_header_with_tspecial(self):
557 msg = Message()
558 msg.add_header('Content-Disposition', 'attachment',
559 filename="Fußballer [filename].ppt")
560 self.assertEqual(
561 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
R. David Murray7ec754b2010-12-13 23:51:19 +0000562 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000563
Ezio Melottib3aedd42010-11-20 19:04:17 +0000564
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000565# Test the email.encoders module
566class TestEncoders(unittest.TestCase):
567 def test_encode_empty_payload(self):
568 eq = self.assertEqual
569 msg = Message()
570 msg.set_charset('us-ascii')
571 eq(msg['content-transfer-encoding'], '7bit')
572
573 def test_default_cte(self):
574 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000575 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000576 msg = MIMEText('hello world')
577 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000578 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000579 msg = MIMEText('hello \xf8 world')
580 eq(msg['content-transfer-encoding'], '8bit')
581 # And now with a different charset
582 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
583 eq(msg['content-transfer-encoding'], 'quoted-printable')
584
R. David Murraye85200d2010-05-06 01:41:14 +0000585 def test_encode7or8bit(self):
586 # Make sure a charset whose input character set is 8bit but
587 # whose output character set is 7bit gets a transfer-encoding
588 # of 7bit.
589 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000590 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000591 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000592
Ezio Melottib3aedd42010-11-20 19:04:17 +0000593
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000594# Test long header wrapping
595class TestLongHeaders(TestEmailBase):
596 def test_split_long_continuation(self):
597 eq = self.ndiffAssertEqual
598 msg = email.message_from_string("""\
599Subject: bug demonstration
600\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
601\tmore text
602
603test
604""")
605 sfp = StringIO()
606 g = Generator(sfp)
607 g.flatten(msg)
608 eq(sfp.getvalue(), """\
609Subject: bug demonstration
610\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
611\tmore text
612
613test
614""")
615
616 def test_another_long_almost_unsplittable_header(self):
617 eq = self.ndiffAssertEqual
618 hstr = """\
619bug demonstration
620\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
621\tmore text"""
622 h = Header(hstr, continuation_ws='\t')
623 eq(h.encode(), """\
624bug demonstration
625\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
626\tmore text""")
627 h = Header(hstr.replace('\t', ' '))
628 eq(h.encode(), """\
629bug demonstration
630 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
631 more text""")
632
633 def test_long_nonstring(self):
634 eq = self.ndiffAssertEqual
635 g = Charset("iso-8859-1")
636 cz = Charset("iso-8859-2")
637 utf8 = Charset("utf-8")
638 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
639 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
640 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
641 b'bef\xf6rdert. ')
642 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
643 b'd\xf9vtipu.. ')
644 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
645 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
646 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
647 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
648 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
649 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
650 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
651 '\u3044\u307e\u3059\u3002')
652 h = Header(g_head, g, header_name='Subject')
653 h.append(cz_head, cz)
654 h.append(utf8_head, utf8)
655 msg = Message()
656 msg['Subject'] = h
657 sfp = StringIO()
658 g = Generator(sfp)
659 g.flatten(msg)
660 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000661Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
662 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
663 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
664 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
665 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
666 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
667 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
668 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
669 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
670 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
671 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000672
673""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000674 eq(h.encode(maxlinelen=76), """\
675=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
676 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
677 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
678 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
679 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
680 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
681 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
682 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
683 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
684 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
685 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000686
687 def test_long_header_encode(self):
688 eq = self.ndiffAssertEqual
689 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
690 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
691 header_name='X-Foobar-Spoink-Defrobnit')
692 eq(h.encode(), '''\
693wasnipoop; giraffes="very-long-necked-animals";
694 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
695
696 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
697 eq = self.ndiffAssertEqual
698 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
699 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
700 header_name='X-Foobar-Spoink-Defrobnit',
701 continuation_ws='\t')
702 eq(h.encode(), '''\
703wasnipoop; giraffes="very-long-necked-animals";
704 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
705
706 def test_long_header_encode_with_tab_continuation(self):
707 eq = self.ndiffAssertEqual
708 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
709 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
710 header_name='X-Foobar-Spoink-Defrobnit',
711 continuation_ws='\t')
712 eq(h.encode(), '''\
713wasnipoop; giraffes="very-long-necked-animals";
714\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
715
716 def test_header_splitter(self):
717 eq = self.ndiffAssertEqual
718 msg = MIMEText('')
719 # It'd be great if we could use add_header() here, but that doesn't
720 # guarantee an order of the parameters.
721 msg['X-Foobar-Spoink-Defrobnit'] = (
722 'wasnipoop; giraffes="very-long-necked-animals"; '
723 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
724 sfp = StringIO()
725 g = Generator(sfp)
726 g.flatten(msg)
727 eq(sfp.getvalue(), '''\
728Content-Type: text/plain; charset="us-ascii"
729MIME-Version: 1.0
730Content-Transfer-Encoding: 7bit
731X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
732 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
733
734''')
735
736 def test_no_semis_header_splitter(self):
737 eq = self.ndiffAssertEqual
738 msg = Message()
739 msg['From'] = 'test@dom.ain'
740 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
741 msg.set_payload('Test')
742 sfp = StringIO()
743 g = Generator(sfp)
744 g.flatten(msg)
745 eq(sfp.getvalue(), """\
746From: test@dom.ain
747References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
748 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
749
750Test""")
751
752 def test_no_split_long_header(self):
753 eq = self.ndiffAssertEqual
754 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +0000755 h = Header(hstr)
756 # These come on two lines because Headers are really field value
757 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000758 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000759References:
760 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
761 h = Header('x' * 80)
762 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000763
764 def test_splitting_multiple_long_lines(self):
765 eq = self.ndiffAssertEqual
766 hstr = """\
767from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
768\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
769\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
770"""
771 h = Header(hstr, continuation_ws='\t')
772 eq(h.encode(), """\
773from babylon.socal-raves.org (localhost [127.0.0.1]);
774 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
775 for <mailman-admin@babylon.socal-raves.org>;
776 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
777\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
778 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
779 for <mailman-admin@babylon.socal-raves.org>;
780 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
781\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
782 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
783 for <mailman-admin@babylon.socal-raves.org>;
784 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
785
786 def test_splitting_first_line_only_is_long(self):
787 eq = self.ndiffAssertEqual
788 hstr = """\
789from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
790\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
791\tid 17k4h5-00034i-00
792\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
793 h = Header(hstr, maxlinelen=78, header_name='Received',
794 continuation_ws='\t')
795 eq(h.encode(), """\
796from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
797 helo=cthulhu.gerg.ca)
798\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
799\tid 17k4h5-00034i-00
800\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
801
802 def test_long_8bit_header(self):
803 eq = self.ndiffAssertEqual
804 msg = Message()
805 h = Header('Britische Regierung gibt', 'iso-8859-1',
806 header_name='Subject')
807 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +0000808 eq(h.encode(maxlinelen=76), """\
809=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
810 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000811 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +0000812 eq(msg.as_string(maxheaderlen=76), """\
813Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
814 =?iso-8859-1?q?hore-Windkraftprojekte?=
815
816""")
817 eq(msg.as_string(maxheaderlen=0), """\
818Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000819
820""")
821
822 def test_long_8bit_header_no_charset(self):
823 eq = self.ndiffAssertEqual
824 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +0000825 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
826 'f\xfcr Offshore-Windkraftprojekte '
827 '<a-very-long-address@example.com>')
828 msg['Reply-To'] = header_string
829 self.assertRaises(UnicodeEncodeError, msg.as_string)
830 msg = Message()
831 msg['Reply-To'] = Header(header_string, 'utf-8',
832 header_name='Reply-To')
833 eq(msg.as_string(maxheaderlen=78), """\
834Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
835 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000836
837""")
838
839 def test_long_to_header(self):
840 eq = self.ndiffAssertEqual
841 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
842 '<someone@eecs.umich.edu>,'
843 '"Someone Test #B" <someone@umich.edu>, '
844 '"Someone Test #C" <someone@eecs.umich.edu>, '
845 '"Someone Test #D" <someone@eecs.umich.edu>')
846 msg = Message()
847 msg['To'] = to
848 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +0000849To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000850 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +0000851 "Someone Test #C" <someone@eecs.umich.edu>,
852 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000853
854''')
855
856 def test_long_line_after_append(self):
857 eq = self.ndiffAssertEqual
858 s = 'This is an example of string which has almost the limit of header length.'
859 h = Header(s)
860 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +0000861 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000862This is an example of string which has almost the limit of header length.
863 Add another line.""")
864
865 def test_shorter_line_with_append(self):
866 eq = self.ndiffAssertEqual
867 s = 'This is a shorter line.'
868 h = Header(s)
869 h.append('Add another sentence. (Surprise?)')
870 eq(h.encode(),
871 'This is a shorter line. Add another sentence. (Surprise?)')
872
873 def test_long_field_name(self):
874 eq = self.ndiffAssertEqual
875 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +0000876 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
877 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
878 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
879 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000880 h = Header(gs, 'iso-8859-1', header_name=fn)
881 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +0000882 eq(h.encode(maxlinelen=76), """\
883=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
884 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
885 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
886 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000887
888 def test_long_received_header(self):
889 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
890 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
891 'Wed, 05 Mar 2003 18:10:18 -0700')
892 msg = Message()
893 msg['Received-1'] = Header(h, continuation_ws='\t')
894 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000895 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000896 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000897Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
898 Wed, 05 Mar 2003 18:10:18 -0700
899Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
900 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000901
902""")
903
904 def test_string_headerinst_eq(self):
905 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
906 'tu-muenchen.de> (David Bremner\'s message of '
907 '"Thu, 6 Mar 2003 13:58:21 +0100")')
908 msg = Message()
909 msg['Received-1'] = Header(h, header_name='Received-1',
910 continuation_ws='\t')
911 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000912 # XXX This should be splitting on spaces not commas.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000913 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000914Received-1: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
915 6 Mar 2003 13:58:21 +0100\")
916Received-2: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
917 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000918
919""")
920
921 def test_long_unbreakable_lines_with_continuation(self):
922 eq = self.ndiffAssertEqual
923 msg = Message()
924 t = """\
925iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
926 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
927 msg['Face-1'] = t
928 msg['Face-2'] = Header(t, header_name='Face-2')
Barry Warsawbef9d212007-08-31 10:55:37 +0000929 # XXX This splitting is all wrong. It the first value line should be
930 # snug against the field name.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000931 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +0000932Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000933 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000934 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +0000935Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +0000936 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000937 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
938
939""")
940
941 def test_another_long_multiline_header(self):
942 eq = self.ndiffAssertEqual
943 m = ('Received: from siimage.com '
944 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +0000945 'Microsoft SMTPSVC(5.0.2195.4905); '
946 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000947 msg = email.message_from_string(m)
948 eq(msg.as_string(maxheaderlen=78), '''\
Barry Warsawbef9d212007-08-31 10:55:37 +0000949Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with Microsoft SMTPSVC(5.0.2195.4905);
950 Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000951
952''')
953
954 def test_long_lines_with_different_header(self):
955 eq = self.ndiffAssertEqual
956 h = ('List-Unsubscribe: '
957 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
958 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
959 '?subject=unsubscribe>')
960 msg = Message()
961 msg['List'] = h
962 msg['List'] = Header(h, header_name='List')
963 eq(msg.as_string(maxheaderlen=78), """\
964List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +0000965 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000966List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +0000967 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000968
969""")
970
R. David Murray6f0022d2011-01-07 21:57:25 +0000971 def test_long_rfc2047_header_with_embedded_fws(self):
972 h = Header(textwrap.dedent("""\
973 We're going to pretend this header is in a non-ascii character set
974 \tto see if line wrapping with encoded words and embedded
975 folding white space works"""),
976 charset='utf-8',
977 header_name='Test')
978 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
979 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
980 =?utf-8?q?cter_set?=
981 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
982 =?utf-8?q?_folding_white_space_works?=""")+'\n')
983
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000984
Ezio Melottib3aedd42010-11-20 19:04:17 +0000985
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000986# Test mangling of "From " lines in the body of a message
987class TestFromMangling(unittest.TestCase):
988 def setUp(self):
989 self.msg = Message()
990 self.msg['From'] = 'aaa@bbb.org'
991 self.msg.set_payload("""\
992From the desk of A.A.A.:
993Blah blah blah
994""")
995
996 def test_mangled_from(self):
997 s = StringIO()
998 g = Generator(s, mangle_from_=True)
999 g.flatten(self.msg)
1000 self.assertEqual(s.getvalue(), """\
1001From: aaa@bbb.org
1002
1003>From the desk of A.A.A.:
1004Blah blah blah
1005""")
1006
1007 def test_dont_mangle_from(self):
1008 s = StringIO()
1009 g = Generator(s, mangle_from_=False)
1010 g.flatten(self.msg)
1011 self.assertEqual(s.getvalue(), """\
1012From: aaa@bbb.org
1013
1014From the desk of A.A.A.:
1015Blah blah blah
1016""")
1017
1018
Ezio Melottib3aedd42010-11-20 19:04:17 +00001019
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001020# Test the basic MIMEAudio class
1021class TestMIMEAudio(unittest.TestCase):
1022 def setUp(self):
1023 # Make sure we pick up the audiotest.au that lives in email/test/data.
1024 # In Python, there's an audiotest.au living in Lib/test but that isn't
1025 # included in some binary distros that don't include the test
1026 # package. The trailing empty string on the .join() is significant
1027 # since findfile() will do a dirname().
1028 datadir = os.path.join(os.path.dirname(landmark), 'data', '')
1029 with open(findfile('audiotest.au', datadir), 'rb') as fp:
1030 self._audiodata = fp.read()
1031 self._au = MIMEAudio(self._audiodata)
1032
1033 def test_guess_minor_type(self):
1034 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1035
1036 def test_encoding(self):
1037 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001038 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1039 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001040
1041 def test_checkSetMinor(self):
1042 au = MIMEAudio(self._audiodata, 'fish')
1043 self.assertEqual(au.get_content_type(), 'audio/fish')
1044
1045 def test_add_header(self):
1046 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001047 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001048 self._au.add_header('Content-Disposition', 'attachment',
1049 filename='audiotest.au')
1050 eq(self._au['content-disposition'],
1051 'attachment; filename="audiotest.au"')
1052 eq(self._au.get_params(header='content-disposition'),
1053 [('attachment', ''), ('filename', 'audiotest.au')])
1054 eq(self._au.get_param('filename', header='content-disposition'),
1055 'audiotest.au')
1056 missing = []
1057 eq(self._au.get_param('attachment', header='content-disposition'), '')
1058 unless(self._au.get_param('foo', failobj=missing,
1059 header='content-disposition') is missing)
1060 # Try some missing stuff
1061 unless(self._au.get_param('foobar', missing) is missing)
1062 unless(self._au.get_param('attachment', missing,
1063 header='foobar') is missing)
1064
1065
Ezio Melottib3aedd42010-11-20 19:04:17 +00001066
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001067# Test the basic MIMEImage class
1068class TestMIMEImage(unittest.TestCase):
1069 def setUp(self):
1070 with openfile('PyBanner048.gif', 'rb') as fp:
1071 self._imgdata = fp.read()
1072 self._im = MIMEImage(self._imgdata)
1073
1074 def test_guess_minor_type(self):
1075 self.assertEqual(self._im.get_content_type(), 'image/gif')
1076
1077 def test_encoding(self):
1078 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001079 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1080 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001081
1082 def test_checkSetMinor(self):
1083 im = MIMEImage(self._imgdata, 'fish')
1084 self.assertEqual(im.get_content_type(), 'image/fish')
1085
1086 def test_add_header(self):
1087 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001088 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001089 self._im.add_header('Content-Disposition', 'attachment',
1090 filename='dingusfish.gif')
1091 eq(self._im['content-disposition'],
1092 'attachment; filename="dingusfish.gif"')
1093 eq(self._im.get_params(header='content-disposition'),
1094 [('attachment', ''), ('filename', 'dingusfish.gif')])
1095 eq(self._im.get_param('filename', header='content-disposition'),
1096 'dingusfish.gif')
1097 missing = []
1098 eq(self._im.get_param('attachment', header='content-disposition'), '')
1099 unless(self._im.get_param('foo', failobj=missing,
1100 header='content-disposition') is missing)
1101 # Try some missing stuff
1102 unless(self._im.get_param('foobar', missing) is missing)
1103 unless(self._im.get_param('attachment', missing,
1104 header='foobar') is missing)
1105
1106
Ezio Melottib3aedd42010-11-20 19:04:17 +00001107
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001108# Test the basic MIMEApplication class
1109class TestMIMEApplication(unittest.TestCase):
1110 def test_headers(self):
1111 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001112 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001113 eq(msg.get_content_type(), 'application/octet-stream')
1114 eq(msg['content-transfer-encoding'], 'base64')
1115
1116 def test_body(self):
1117 eq = self.assertEqual
Barry Warsaw8c571042007-08-30 19:17:18 +00001118 bytes = b'\xfa\xfb\xfc\xfd\xfe\xff'
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001119 msg = MIMEApplication(bytes)
R. David Murray7da8f062010-06-04 16:11:08 +00001120 eq(msg.get_payload(), '+vv8/f7/')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001121 eq(msg.get_payload(decode=True), bytes)
1122
1123
Ezio Melottib3aedd42010-11-20 19:04:17 +00001124
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001125# Test the basic MIMEText class
1126class TestMIMEText(unittest.TestCase):
1127 def setUp(self):
1128 self._msg = MIMEText('hello there')
1129
1130 def test_types(self):
1131 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001132 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001133 eq(self._msg.get_content_type(), 'text/plain')
1134 eq(self._msg.get_param('charset'), 'us-ascii')
1135 missing = []
1136 unless(self._msg.get_param('foobar', missing) is missing)
1137 unless(self._msg.get_param('charset', missing, header='foobar')
1138 is missing)
1139
1140 def test_payload(self):
1141 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001142 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001143
1144 def test_charset(self):
1145 eq = self.assertEqual
1146 msg = MIMEText('hello there', _charset='us-ascii')
1147 eq(msg.get_charset().input_charset, 'us-ascii')
1148 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1149
R. David Murray850fc852010-06-03 01:58:28 +00001150 def test_7bit_input(self):
1151 eq = self.assertEqual
1152 msg = MIMEText('hello there', _charset='us-ascii')
1153 eq(msg.get_charset().input_charset, 'us-ascii')
1154 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1155
1156 def test_7bit_input_no_charset(self):
1157 eq = self.assertEqual
1158 msg = MIMEText('hello there')
1159 eq(msg.get_charset(), 'us-ascii')
1160 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1161 self.assertTrue('hello there' in msg.as_string())
1162
1163 def test_utf8_input(self):
1164 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1165 eq = self.assertEqual
1166 msg = MIMEText(teststr, _charset='utf-8')
1167 eq(msg.get_charset().output_charset, 'utf-8')
1168 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1169 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1170
1171 @unittest.skip("can't fix because of backward compat in email5, "
1172 "will fix in email6")
1173 def test_utf8_input_no_charset(self):
1174 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1175 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1176
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001177
Ezio Melottib3aedd42010-11-20 19:04:17 +00001178
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001179# Test complicated multipart/* messages
1180class TestMultipart(TestEmailBase):
1181 def setUp(self):
1182 with openfile('PyBanner048.gif', 'rb') as fp:
1183 data = fp.read()
1184 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1185 image = MIMEImage(data, name='dingusfish.gif')
1186 image.add_header('content-disposition', 'attachment',
1187 filename='dingusfish.gif')
1188 intro = MIMEText('''\
1189Hi there,
1190
1191This is the dingus fish.
1192''')
1193 container.attach(intro)
1194 container.attach(image)
1195 container['From'] = 'Barry <barry@digicool.com>'
1196 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1197 container['Subject'] = 'Here is your dingus fish'
1198
1199 now = 987809702.54848599
1200 timetuple = time.localtime(now)
1201 if timetuple[-1] == 0:
1202 tzsecs = time.timezone
1203 else:
1204 tzsecs = time.altzone
1205 if tzsecs > 0:
1206 sign = '-'
1207 else:
1208 sign = '+'
1209 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1210 container['Date'] = time.strftime(
1211 '%a, %d %b %Y %H:%M:%S',
1212 time.localtime(now)) + tzoffset
1213 self._msg = container
1214 self._im = image
1215 self._txt = intro
1216
1217 def test_hierarchy(self):
1218 # convenience
1219 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001220 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001221 raises = self.assertRaises
1222 # tests
1223 m = self._msg
1224 unless(m.is_multipart())
1225 eq(m.get_content_type(), 'multipart/mixed')
1226 eq(len(m.get_payload()), 2)
1227 raises(IndexError, m.get_payload, 2)
1228 m0 = m.get_payload(0)
1229 m1 = m.get_payload(1)
1230 unless(m0 is self._txt)
1231 unless(m1 is self._im)
1232 eq(m.get_payload(), [m0, m1])
1233 unless(not m0.is_multipart())
1234 unless(not m1.is_multipart())
1235
1236 def test_empty_multipart_idempotent(self):
1237 text = """\
1238Content-Type: multipart/mixed; boundary="BOUNDARY"
1239MIME-Version: 1.0
1240Subject: A subject
1241To: aperson@dom.ain
1242From: bperson@dom.ain
1243
1244
1245--BOUNDARY
1246
1247
1248--BOUNDARY--
1249"""
1250 msg = Parser().parsestr(text)
1251 self.ndiffAssertEqual(text, msg.as_string())
1252
1253 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1254 outer = MIMEBase('multipart', 'mixed')
1255 outer['Subject'] = 'A subject'
1256 outer['To'] = 'aperson@dom.ain'
1257 outer['From'] = 'bperson@dom.ain'
1258 outer.set_boundary('BOUNDARY')
1259 self.ndiffAssertEqual(outer.as_string(), '''\
1260Content-Type: multipart/mixed; boundary="BOUNDARY"
1261MIME-Version: 1.0
1262Subject: A subject
1263To: aperson@dom.ain
1264From: bperson@dom.ain
1265
1266--BOUNDARY
1267
1268--BOUNDARY--''')
1269
1270 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1271 outer = MIMEBase('multipart', 'mixed')
1272 outer['Subject'] = 'A subject'
1273 outer['To'] = 'aperson@dom.ain'
1274 outer['From'] = 'bperson@dom.ain'
1275 outer.preamble = ''
1276 outer.epilogue = ''
1277 outer.set_boundary('BOUNDARY')
1278 self.ndiffAssertEqual(outer.as_string(), '''\
1279Content-Type: multipart/mixed; boundary="BOUNDARY"
1280MIME-Version: 1.0
1281Subject: A subject
1282To: aperson@dom.ain
1283From: bperson@dom.ain
1284
1285
1286--BOUNDARY
1287
1288--BOUNDARY--
1289''')
1290
1291 def test_one_part_in_a_multipart(self):
1292 eq = self.ndiffAssertEqual
1293 outer = MIMEBase('multipart', 'mixed')
1294 outer['Subject'] = 'A subject'
1295 outer['To'] = 'aperson@dom.ain'
1296 outer['From'] = 'bperson@dom.ain'
1297 outer.set_boundary('BOUNDARY')
1298 msg = MIMEText('hello world')
1299 outer.attach(msg)
1300 eq(outer.as_string(), '''\
1301Content-Type: multipart/mixed; boundary="BOUNDARY"
1302MIME-Version: 1.0
1303Subject: A subject
1304To: aperson@dom.ain
1305From: bperson@dom.ain
1306
1307--BOUNDARY
1308Content-Type: text/plain; charset="us-ascii"
1309MIME-Version: 1.0
1310Content-Transfer-Encoding: 7bit
1311
1312hello world
1313--BOUNDARY--''')
1314
1315 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1316 eq = self.ndiffAssertEqual
1317 outer = MIMEBase('multipart', 'mixed')
1318 outer['Subject'] = 'A subject'
1319 outer['To'] = 'aperson@dom.ain'
1320 outer['From'] = 'bperson@dom.ain'
1321 outer.preamble = ''
1322 msg = MIMEText('hello world')
1323 outer.attach(msg)
1324 outer.set_boundary('BOUNDARY')
1325 eq(outer.as_string(), '''\
1326Content-Type: multipart/mixed; boundary="BOUNDARY"
1327MIME-Version: 1.0
1328Subject: A subject
1329To: aperson@dom.ain
1330From: bperson@dom.ain
1331
1332
1333--BOUNDARY
1334Content-Type: text/plain; charset="us-ascii"
1335MIME-Version: 1.0
1336Content-Transfer-Encoding: 7bit
1337
1338hello world
1339--BOUNDARY--''')
1340
1341
1342 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1343 eq = self.ndiffAssertEqual
1344 outer = MIMEBase('multipart', 'mixed')
1345 outer['Subject'] = 'A subject'
1346 outer['To'] = 'aperson@dom.ain'
1347 outer['From'] = 'bperson@dom.ain'
1348 outer.preamble = None
1349 msg = MIMEText('hello world')
1350 outer.attach(msg)
1351 outer.set_boundary('BOUNDARY')
1352 eq(outer.as_string(), '''\
1353Content-Type: multipart/mixed; boundary="BOUNDARY"
1354MIME-Version: 1.0
1355Subject: A subject
1356To: aperson@dom.ain
1357From: bperson@dom.ain
1358
1359--BOUNDARY
1360Content-Type: text/plain; charset="us-ascii"
1361MIME-Version: 1.0
1362Content-Transfer-Encoding: 7bit
1363
1364hello world
1365--BOUNDARY--''')
1366
1367
1368 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1369 eq = self.ndiffAssertEqual
1370 outer = MIMEBase('multipart', 'mixed')
1371 outer['Subject'] = 'A subject'
1372 outer['To'] = 'aperson@dom.ain'
1373 outer['From'] = 'bperson@dom.ain'
1374 outer.epilogue = None
1375 msg = MIMEText('hello world')
1376 outer.attach(msg)
1377 outer.set_boundary('BOUNDARY')
1378 eq(outer.as_string(), '''\
1379Content-Type: multipart/mixed; boundary="BOUNDARY"
1380MIME-Version: 1.0
1381Subject: A subject
1382To: aperson@dom.ain
1383From: bperson@dom.ain
1384
1385--BOUNDARY
1386Content-Type: text/plain; charset="us-ascii"
1387MIME-Version: 1.0
1388Content-Transfer-Encoding: 7bit
1389
1390hello world
1391--BOUNDARY--''')
1392
1393
1394 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1395 eq = self.ndiffAssertEqual
1396 outer = MIMEBase('multipart', 'mixed')
1397 outer['Subject'] = 'A subject'
1398 outer['To'] = 'aperson@dom.ain'
1399 outer['From'] = 'bperson@dom.ain'
1400 outer.epilogue = ''
1401 msg = MIMEText('hello world')
1402 outer.attach(msg)
1403 outer.set_boundary('BOUNDARY')
1404 eq(outer.as_string(), '''\
1405Content-Type: multipart/mixed; boundary="BOUNDARY"
1406MIME-Version: 1.0
1407Subject: A subject
1408To: aperson@dom.ain
1409From: bperson@dom.ain
1410
1411--BOUNDARY
1412Content-Type: text/plain; charset="us-ascii"
1413MIME-Version: 1.0
1414Content-Transfer-Encoding: 7bit
1415
1416hello world
1417--BOUNDARY--
1418''')
1419
1420
1421 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1422 eq = self.ndiffAssertEqual
1423 outer = MIMEBase('multipart', 'mixed')
1424 outer['Subject'] = 'A subject'
1425 outer['To'] = 'aperson@dom.ain'
1426 outer['From'] = 'bperson@dom.ain'
1427 outer.epilogue = '\n'
1428 msg = MIMEText('hello world')
1429 outer.attach(msg)
1430 outer.set_boundary('BOUNDARY')
1431 eq(outer.as_string(), '''\
1432Content-Type: multipart/mixed; boundary="BOUNDARY"
1433MIME-Version: 1.0
1434Subject: A subject
1435To: aperson@dom.ain
1436From: bperson@dom.ain
1437
1438--BOUNDARY
1439Content-Type: text/plain; charset="us-ascii"
1440MIME-Version: 1.0
1441Content-Transfer-Encoding: 7bit
1442
1443hello world
1444--BOUNDARY--
1445
1446''')
1447
1448 def test_message_external_body(self):
1449 eq = self.assertEqual
1450 msg = self._msgobj('msg_36.txt')
1451 eq(len(msg.get_payload()), 2)
1452 msg1 = msg.get_payload(1)
1453 eq(msg1.get_content_type(), 'multipart/alternative')
1454 eq(len(msg1.get_payload()), 2)
1455 for subpart in msg1.get_payload():
1456 eq(subpart.get_content_type(), 'message/external-body')
1457 eq(len(subpart.get_payload()), 1)
1458 subsubpart = subpart.get_payload(0)
1459 eq(subsubpart.get_content_type(), 'text/plain')
1460
1461 def test_double_boundary(self):
1462 # msg_37.txt is a multipart that contains two dash-boundary's in a
1463 # row. Our interpretation of RFC 2046 calls for ignoring the second
1464 # and subsequent boundaries.
1465 msg = self._msgobj('msg_37.txt')
1466 self.assertEqual(len(msg.get_payload()), 3)
1467
1468 def test_nested_inner_contains_outer_boundary(self):
1469 eq = self.ndiffAssertEqual
1470 # msg_38.txt has an inner part that contains outer boundaries. My
1471 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1472 # these are illegal and should be interpreted as unterminated inner
1473 # parts.
1474 msg = self._msgobj('msg_38.txt')
1475 sfp = StringIO()
1476 iterators._structure(msg, sfp)
1477 eq(sfp.getvalue(), """\
1478multipart/mixed
1479 multipart/mixed
1480 multipart/alternative
1481 text/plain
1482 text/plain
1483 text/plain
1484 text/plain
1485""")
1486
1487 def test_nested_with_same_boundary(self):
1488 eq = self.ndiffAssertEqual
1489 # msg 39.txt is similarly evil in that it's got inner parts that use
1490 # the same boundary as outer parts. Again, I believe the way this is
1491 # parsed is closest to the spirit of RFC 2046
1492 msg = self._msgobj('msg_39.txt')
1493 sfp = StringIO()
1494 iterators._structure(msg, sfp)
1495 eq(sfp.getvalue(), """\
1496multipart/mixed
1497 multipart/mixed
1498 multipart/alternative
1499 application/octet-stream
1500 application/octet-stream
1501 text/plain
1502""")
1503
1504 def test_boundary_in_non_multipart(self):
1505 msg = self._msgobj('msg_40.txt')
1506 self.assertEqual(msg.as_string(), '''\
1507MIME-Version: 1.0
1508Content-Type: text/html; boundary="--961284236552522269"
1509
1510----961284236552522269
1511Content-Type: text/html;
1512Content-Transfer-Encoding: 7Bit
1513
1514<html></html>
1515
1516----961284236552522269--
1517''')
1518
1519 def test_boundary_with_leading_space(self):
1520 eq = self.assertEqual
1521 msg = email.message_from_string('''\
1522MIME-Version: 1.0
1523Content-Type: multipart/mixed; boundary=" XXXX"
1524
1525-- XXXX
1526Content-Type: text/plain
1527
1528
1529-- XXXX
1530Content-Type: text/plain
1531
1532-- XXXX--
1533''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001534 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001535 eq(msg.get_boundary(), ' XXXX')
1536 eq(len(msg.get_payload()), 2)
1537
1538 def test_boundary_without_trailing_newline(self):
1539 m = Parser().parsestr("""\
1540Content-Type: multipart/mixed; boundary="===============0012394164=="
1541MIME-Version: 1.0
1542
1543--===============0012394164==
1544Content-Type: image/file1.jpg
1545MIME-Version: 1.0
1546Content-Transfer-Encoding: base64
1547
1548YXNkZg==
1549--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001550 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001551
1552
Ezio Melottib3aedd42010-11-20 19:04:17 +00001553
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001554# Test some badly formatted messages
1555class TestNonConformant(TestEmailBase):
1556 def test_parse_missing_minor_type(self):
1557 eq = self.assertEqual
1558 msg = self._msgobj('msg_14.txt')
1559 eq(msg.get_content_type(), 'text/plain')
1560 eq(msg.get_content_maintype(), 'text')
1561 eq(msg.get_content_subtype(), 'plain')
1562
1563 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001564 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001565 msg = self._msgobj('msg_15.txt')
1566 # XXX We can probably eventually do better
1567 inner = msg.get_payload(0)
1568 unless(hasattr(inner, 'defects'))
1569 self.assertEqual(len(inner.defects), 1)
1570 unless(isinstance(inner.defects[0],
1571 errors.StartBoundaryNotFoundDefect))
1572
1573 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001574 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001575 msg = self._msgobj('msg_25.txt')
1576 unless(isinstance(msg.get_payload(), str))
1577 self.assertEqual(len(msg.defects), 2)
1578 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1579 unless(isinstance(msg.defects[1],
1580 errors.MultipartInvariantViolationDefect))
1581
1582 def test_invalid_content_type(self):
1583 eq = self.assertEqual
1584 neq = self.ndiffAssertEqual
1585 msg = Message()
1586 # RFC 2045, $5.2 says invalid yields text/plain
1587 msg['Content-Type'] = 'text'
1588 eq(msg.get_content_maintype(), 'text')
1589 eq(msg.get_content_subtype(), 'plain')
1590 eq(msg.get_content_type(), 'text/plain')
1591 # Clear the old value and try something /really/ invalid
1592 del msg['content-type']
1593 msg['Content-Type'] = 'foo'
1594 eq(msg.get_content_maintype(), 'text')
1595 eq(msg.get_content_subtype(), 'plain')
1596 eq(msg.get_content_type(), 'text/plain')
1597 # Still, make sure that the message is idempotently generated
1598 s = StringIO()
1599 g = Generator(s)
1600 g.flatten(msg)
1601 neq(s.getvalue(), 'Content-Type: foo\n\n')
1602
1603 def test_no_start_boundary(self):
1604 eq = self.ndiffAssertEqual
1605 msg = self._msgobj('msg_31.txt')
1606 eq(msg.get_payload(), """\
1607--BOUNDARY
1608Content-Type: text/plain
1609
1610message 1
1611
1612--BOUNDARY
1613Content-Type: text/plain
1614
1615message 2
1616
1617--BOUNDARY--
1618""")
1619
1620 def test_no_separating_blank_line(self):
1621 eq = self.ndiffAssertEqual
1622 msg = self._msgobj('msg_35.txt')
1623 eq(msg.as_string(), """\
1624From: aperson@dom.ain
1625To: bperson@dom.ain
1626Subject: here's something interesting
1627
1628counter to RFC 2822, there's no separating newline here
1629""")
1630
1631 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001632 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001633 msg = self._msgobj('msg_41.txt')
1634 unless(hasattr(msg, 'defects'))
1635 self.assertEqual(len(msg.defects), 2)
1636 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1637 unless(isinstance(msg.defects[1],
1638 errors.MultipartInvariantViolationDefect))
1639
1640 def test_missing_start_boundary(self):
1641 outer = self._msgobj('msg_42.txt')
1642 # The message structure is:
1643 #
1644 # multipart/mixed
1645 # text/plain
1646 # message/rfc822
1647 # multipart/mixed [*]
1648 #
1649 # [*] This message is missing its start boundary
1650 bad = outer.get_payload(1).get_payload(0)
1651 self.assertEqual(len(bad.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001652 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001653 errors.StartBoundaryNotFoundDefect))
1654
1655 def test_first_line_is_continuation_header(self):
1656 eq = self.assertEqual
1657 m = ' Line 1\nLine 2\nLine 3'
1658 msg = email.message_from_string(m)
1659 eq(msg.keys(), [])
1660 eq(msg.get_payload(), 'Line 2\nLine 3')
1661 eq(len(msg.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001662 self.assertTrue(isinstance(msg.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001663 errors.FirstHeaderLineIsContinuationDefect))
1664 eq(msg.defects[0].line, ' Line 1\n')
1665
1666
Ezio Melottib3aedd42010-11-20 19:04:17 +00001667
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001668# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00001669class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001670 def test_rfc2047_multiline(self):
1671 eq = self.assertEqual
1672 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
1673 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
1674 dh = decode_header(s)
1675 eq(dh, [
1676 (b'Re:', None),
1677 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
1678 (b'baz foo bar', None),
1679 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
1680 header = make_header(dh)
1681 eq(str(header),
1682 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00001683 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001684Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
1685 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001686
1687 def test_whitespace_eater_unicode(self):
1688 eq = self.assertEqual
1689 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
1690 dh = decode_header(s)
1691 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
1692 (b'Pirard <pirard@dom.ain>', None)])
1693 header = str(make_header(dh))
1694 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
1695
1696 def test_whitespace_eater_unicode_2(self):
1697 eq = self.assertEqual
1698 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
1699 dh = decode_header(s)
1700 eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'),
1701 (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')])
1702 hu = str(make_header(dh))
1703 eq(hu, 'The quick brown fox jumped over the lazy dog')
1704
1705 def test_rfc2047_missing_whitespace(self):
1706 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
1707 dh = decode_header(s)
1708 self.assertEqual(dh, [(s, None)])
1709
1710 def test_rfc2047_with_whitespace(self):
1711 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
1712 dh = decode_header(s)
1713 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
1714 (b'rg', None), (b'\xe5', 'iso-8859-1'),
1715 (b'sbord', None)])
1716
R. David Murrayc4e69cc2010-08-03 22:14:10 +00001717 def test_rfc2047_B_bad_padding(self):
1718 s = '=?iso-8859-1?B?%s?='
1719 data = [ # only test complete bytes
1720 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
1721 ('dmk=', b'vi'), ('dmk', b'vi')
1722 ]
1723 for q, a in data:
1724 dh = decode_header(s % q)
1725 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001726
R. David Murray31e984c2010-10-01 15:40:20 +00001727 def test_rfc2047_Q_invalid_digits(self):
1728 # issue 10004.
1729 s = '=?iso-8659-1?Q?andr=e9=zz?='
1730 self.assertEqual(decode_header(s),
1731 [(b'andr\xe9=zz', 'iso-8659-1')])
1732
Ezio Melottib3aedd42010-11-20 19:04:17 +00001733
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001734# Test the MIMEMessage class
1735class TestMIMEMessage(TestEmailBase):
1736 def setUp(self):
1737 with openfile('msg_11.txt') as fp:
1738 self._text = fp.read()
1739
1740 def test_type_error(self):
1741 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
1742
1743 def test_valid_argument(self):
1744 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001745 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001746 subject = 'A sub-message'
1747 m = Message()
1748 m['Subject'] = subject
1749 r = MIMEMessage(m)
1750 eq(r.get_content_type(), 'message/rfc822')
1751 payload = r.get_payload()
1752 unless(isinstance(payload, list))
1753 eq(len(payload), 1)
1754 subpart = payload[0]
1755 unless(subpart is m)
1756 eq(subpart['subject'], subject)
1757
1758 def test_bad_multipart(self):
1759 eq = self.assertEqual
1760 msg1 = Message()
1761 msg1['Subject'] = 'subpart 1'
1762 msg2 = Message()
1763 msg2['Subject'] = 'subpart 2'
1764 r = MIMEMessage(msg1)
1765 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
1766
1767 def test_generate(self):
1768 # First craft the message to be encapsulated
1769 m = Message()
1770 m['Subject'] = 'An enclosed message'
1771 m.set_payload('Here is the body of the message.\n')
1772 r = MIMEMessage(m)
1773 r['Subject'] = 'The enclosing message'
1774 s = StringIO()
1775 g = Generator(s)
1776 g.flatten(r)
1777 self.assertEqual(s.getvalue(), """\
1778Content-Type: message/rfc822
1779MIME-Version: 1.0
1780Subject: The enclosing message
1781
1782Subject: An enclosed message
1783
1784Here is the body of the message.
1785""")
1786
1787 def test_parse_message_rfc822(self):
1788 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001789 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001790 msg = self._msgobj('msg_11.txt')
1791 eq(msg.get_content_type(), 'message/rfc822')
1792 payload = msg.get_payload()
1793 unless(isinstance(payload, list))
1794 eq(len(payload), 1)
1795 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001796 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001797 eq(submsg['subject'], 'An enclosed message')
1798 eq(submsg.get_payload(), 'Here is the body of the message.\n')
1799
1800 def test_dsn(self):
1801 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001802 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001803 # msg 16 is a Delivery Status Notification, see RFC 1894
1804 msg = self._msgobj('msg_16.txt')
1805 eq(msg.get_content_type(), 'multipart/report')
1806 unless(msg.is_multipart())
1807 eq(len(msg.get_payload()), 3)
1808 # Subpart 1 is a text/plain, human readable section
1809 subpart = msg.get_payload(0)
1810 eq(subpart.get_content_type(), 'text/plain')
1811 eq(subpart.get_payload(), """\
1812This report relates to a message you sent with the following header fields:
1813
1814 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
1815 Date: Sun, 23 Sep 2001 20:10:55 -0700
1816 From: "Ian T. Henry" <henryi@oxy.edu>
1817 To: SoCal Raves <scr@socal-raves.org>
1818 Subject: [scr] yeah for Ians!!
1819
1820Your message cannot be delivered to the following recipients:
1821
1822 Recipient address: jangel1@cougar.noc.ucla.edu
1823 Reason: recipient reached disk quota
1824
1825""")
1826 # Subpart 2 contains the machine parsable DSN information. It
1827 # consists of two blocks of headers, represented by two nested Message
1828 # objects.
1829 subpart = msg.get_payload(1)
1830 eq(subpart.get_content_type(), 'message/delivery-status')
1831 eq(len(subpart.get_payload()), 2)
1832 # message/delivery-status should treat each block as a bunch of
1833 # headers, i.e. a bunch of Message objects.
1834 dsn1 = subpart.get_payload(0)
1835 unless(isinstance(dsn1, Message))
1836 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
1837 eq(dsn1.get_param('dns', header='reporting-mta'), '')
1838 # Try a missing one <wink>
1839 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
1840 dsn2 = subpart.get_payload(1)
1841 unless(isinstance(dsn2, Message))
1842 eq(dsn2['action'], 'failed')
1843 eq(dsn2.get_params(header='original-recipient'),
1844 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
1845 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
1846 # Subpart 3 is the original message
1847 subpart = msg.get_payload(2)
1848 eq(subpart.get_content_type(), 'message/rfc822')
1849 payload = subpart.get_payload()
1850 unless(isinstance(payload, list))
1851 eq(len(payload), 1)
1852 subsubpart = payload[0]
1853 unless(isinstance(subsubpart, Message))
1854 eq(subsubpart.get_content_type(), 'text/plain')
1855 eq(subsubpart['message-id'],
1856 '<002001c144a6$8752e060$56104586@oxy.edu>')
1857
1858 def test_epilogue(self):
1859 eq = self.ndiffAssertEqual
1860 with openfile('msg_21.txt') as fp:
1861 text = fp.read()
1862 msg = Message()
1863 msg['From'] = 'aperson@dom.ain'
1864 msg['To'] = 'bperson@dom.ain'
1865 msg['Subject'] = 'Test'
1866 msg.preamble = 'MIME message'
1867 msg.epilogue = 'End of MIME message\n'
1868 msg1 = MIMEText('One')
1869 msg2 = MIMEText('Two')
1870 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1871 msg.attach(msg1)
1872 msg.attach(msg2)
1873 sfp = StringIO()
1874 g = Generator(sfp)
1875 g.flatten(msg)
1876 eq(sfp.getvalue(), text)
1877
1878 def test_no_nl_preamble(self):
1879 eq = self.ndiffAssertEqual
1880 msg = Message()
1881 msg['From'] = 'aperson@dom.ain'
1882 msg['To'] = 'bperson@dom.ain'
1883 msg['Subject'] = 'Test'
1884 msg.preamble = 'MIME message'
1885 msg.epilogue = ''
1886 msg1 = MIMEText('One')
1887 msg2 = MIMEText('Two')
1888 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1889 msg.attach(msg1)
1890 msg.attach(msg2)
1891 eq(msg.as_string(), """\
1892From: aperson@dom.ain
1893To: bperson@dom.ain
1894Subject: Test
1895Content-Type: multipart/mixed; boundary="BOUNDARY"
1896
1897MIME message
1898--BOUNDARY
1899Content-Type: text/plain; charset="us-ascii"
1900MIME-Version: 1.0
1901Content-Transfer-Encoding: 7bit
1902
1903One
1904--BOUNDARY
1905Content-Type: text/plain; charset="us-ascii"
1906MIME-Version: 1.0
1907Content-Transfer-Encoding: 7bit
1908
1909Two
1910--BOUNDARY--
1911""")
1912
1913 def test_default_type(self):
1914 eq = self.assertEqual
1915 with openfile('msg_30.txt') as fp:
1916 msg = email.message_from_file(fp)
1917 container1 = msg.get_payload(0)
1918 eq(container1.get_default_type(), 'message/rfc822')
1919 eq(container1.get_content_type(), 'message/rfc822')
1920 container2 = msg.get_payload(1)
1921 eq(container2.get_default_type(), 'message/rfc822')
1922 eq(container2.get_content_type(), 'message/rfc822')
1923 container1a = container1.get_payload(0)
1924 eq(container1a.get_default_type(), 'text/plain')
1925 eq(container1a.get_content_type(), 'text/plain')
1926 container2a = container2.get_payload(0)
1927 eq(container2a.get_default_type(), 'text/plain')
1928 eq(container2a.get_content_type(), 'text/plain')
1929
1930 def test_default_type_with_explicit_container_type(self):
1931 eq = self.assertEqual
1932 with openfile('msg_28.txt') as fp:
1933 msg = email.message_from_file(fp)
1934 container1 = msg.get_payload(0)
1935 eq(container1.get_default_type(), 'message/rfc822')
1936 eq(container1.get_content_type(), 'message/rfc822')
1937 container2 = msg.get_payload(1)
1938 eq(container2.get_default_type(), 'message/rfc822')
1939 eq(container2.get_content_type(), 'message/rfc822')
1940 container1a = container1.get_payload(0)
1941 eq(container1a.get_default_type(), 'text/plain')
1942 eq(container1a.get_content_type(), 'text/plain')
1943 container2a = container2.get_payload(0)
1944 eq(container2a.get_default_type(), 'text/plain')
1945 eq(container2a.get_content_type(), 'text/plain')
1946
1947 def test_default_type_non_parsed(self):
1948 eq = self.assertEqual
1949 neq = self.ndiffAssertEqual
1950 # Set up container
1951 container = MIMEMultipart('digest', 'BOUNDARY')
1952 container.epilogue = ''
1953 # Set up subparts
1954 subpart1a = MIMEText('message 1\n')
1955 subpart2a = MIMEText('message 2\n')
1956 subpart1 = MIMEMessage(subpart1a)
1957 subpart2 = MIMEMessage(subpart2a)
1958 container.attach(subpart1)
1959 container.attach(subpart2)
1960 eq(subpart1.get_content_type(), 'message/rfc822')
1961 eq(subpart1.get_default_type(), 'message/rfc822')
1962 eq(subpart2.get_content_type(), 'message/rfc822')
1963 eq(subpart2.get_default_type(), 'message/rfc822')
1964 neq(container.as_string(0), '''\
1965Content-Type: multipart/digest; boundary="BOUNDARY"
1966MIME-Version: 1.0
1967
1968--BOUNDARY
1969Content-Type: message/rfc822
1970MIME-Version: 1.0
1971
1972Content-Type: text/plain; charset="us-ascii"
1973MIME-Version: 1.0
1974Content-Transfer-Encoding: 7bit
1975
1976message 1
1977
1978--BOUNDARY
1979Content-Type: message/rfc822
1980MIME-Version: 1.0
1981
1982Content-Type: text/plain; charset="us-ascii"
1983MIME-Version: 1.0
1984Content-Transfer-Encoding: 7bit
1985
1986message 2
1987
1988--BOUNDARY--
1989''')
1990 del subpart1['content-type']
1991 del subpart1['mime-version']
1992 del subpart2['content-type']
1993 del subpart2['mime-version']
1994 eq(subpart1.get_content_type(), 'message/rfc822')
1995 eq(subpart1.get_default_type(), 'message/rfc822')
1996 eq(subpart2.get_content_type(), 'message/rfc822')
1997 eq(subpart2.get_default_type(), 'message/rfc822')
1998 neq(container.as_string(0), '''\
1999Content-Type: multipart/digest; boundary="BOUNDARY"
2000MIME-Version: 1.0
2001
2002--BOUNDARY
2003
2004Content-Type: text/plain; charset="us-ascii"
2005MIME-Version: 1.0
2006Content-Transfer-Encoding: 7bit
2007
2008message 1
2009
2010--BOUNDARY
2011
2012Content-Type: text/plain; charset="us-ascii"
2013MIME-Version: 1.0
2014Content-Transfer-Encoding: 7bit
2015
2016message 2
2017
2018--BOUNDARY--
2019''')
2020
2021 def test_mime_attachments_in_constructor(self):
2022 eq = self.assertEqual
2023 text1 = MIMEText('')
2024 text2 = MIMEText('')
2025 msg = MIMEMultipart(_subparts=(text1, text2))
2026 eq(len(msg.get_payload()), 2)
2027 eq(msg.get_payload(0), text1)
2028 eq(msg.get_payload(1), text2)
2029
Christian Heimes587c2bf2008-01-19 16:21:02 +00002030 def test_default_multipart_constructor(self):
2031 msg = MIMEMultipart()
2032 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002033
Ezio Melottib3aedd42010-11-20 19:04:17 +00002034
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002035# A general test of parser->model->generator idempotency. IOW, read a message
2036# in, parse it into a message object tree, then without touching the tree,
2037# regenerate the plain text. The original text and the transformed text
2038# should be identical. Note: that we ignore the Unix-From since that may
2039# contain a changed date.
2040class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002041
2042 linesep = '\n'
2043
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002044 def _msgobj(self, filename):
2045 with openfile(filename) as fp:
2046 data = fp.read()
2047 msg = email.message_from_string(data)
2048 return msg, data
2049
R. David Murray719a4492010-11-21 16:53:48 +00002050 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002051 eq = self.ndiffAssertEqual
2052 s = StringIO()
2053 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002054 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002055 eq(text, s.getvalue())
2056
2057 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002058 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002059 msg, text = self._msgobj('msg_01.txt')
2060 eq(msg.get_content_type(), 'text/plain')
2061 eq(msg.get_content_maintype(), 'text')
2062 eq(msg.get_content_subtype(), 'plain')
2063 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2064 eq(msg.get_param('charset'), 'us-ascii')
2065 eq(msg.preamble, None)
2066 eq(msg.epilogue, None)
2067 self._idempotent(msg, text)
2068
2069 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002070 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002071 msg, text = self._msgobj('msg_03.txt')
2072 eq(msg.get_content_type(), 'text/plain')
2073 eq(msg.get_params(), None)
2074 eq(msg.get_param('charset'), None)
2075 self._idempotent(msg, text)
2076
2077 def test_simple_multipart(self):
2078 msg, text = self._msgobj('msg_04.txt')
2079 self._idempotent(msg, text)
2080
2081 def test_MIME_digest(self):
2082 msg, text = self._msgobj('msg_02.txt')
2083 self._idempotent(msg, text)
2084
2085 def test_long_header(self):
2086 msg, text = self._msgobj('msg_27.txt')
2087 self._idempotent(msg, text)
2088
2089 def test_MIME_digest_with_part_headers(self):
2090 msg, text = self._msgobj('msg_28.txt')
2091 self._idempotent(msg, text)
2092
2093 def test_mixed_with_image(self):
2094 msg, text = self._msgobj('msg_06.txt')
2095 self._idempotent(msg, text)
2096
2097 def test_multipart_report(self):
2098 msg, text = self._msgobj('msg_05.txt')
2099 self._idempotent(msg, text)
2100
2101 def test_dsn(self):
2102 msg, text = self._msgobj('msg_16.txt')
2103 self._idempotent(msg, text)
2104
2105 def test_preamble_epilogue(self):
2106 msg, text = self._msgobj('msg_21.txt')
2107 self._idempotent(msg, text)
2108
2109 def test_multipart_one_part(self):
2110 msg, text = self._msgobj('msg_23.txt')
2111 self._idempotent(msg, text)
2112
2113 def test_multipart_no_parts(self):
2114 msg, text = self._msgobj('msg_24.txt')
2115 self._idempotent(msg, text)
2116
2117 def test_no_start_boundary(self):
2118 msg, text = self._msgobj('msg_31.txt')
2119 self._idempotent(msg, text)
2120
2121 def test_rfc2231_charset(self):
2122 msg, text = self._msgobj('msg_32.txt')
2123 self._idempotent(msg, text)
2124
2125 def test_more_rfc2231_parameters(self):
2126 msg, text = self._msgobj('msg_33.txt')
2127 self._idempotent(msg, text)
2128
2129 def test_text_plain_in_a_multipart_digest(self):
2130 msg, text = self._msgobj('msg_34.txt')
2131 self._idempotent(msg, text)
2132
2133 def test_nested_multipart_mixeds(self):
2134 msg, text = self._msgobj('msg_12a.txt')
2135 self._idempotent(msg, text)
2136
2137 def test_message_external_body_idempotent(self):
2138 msg, text = self._msgobj('msg_36.txt')
2139 self._idempotent(msg, text)
2140
R. David Murray719a4492010-11-21 16:53:48 +00002141 def test_message_delivery_status(self):
2142 msg, text = self._msgobj('msg_43.txt')
2143 self._idempotent(msg, text, unixfrom=True)
2144
R. David Murray96fd54e2010-10-08 15:55:28 +00002145 def test_message_signed_idempotent(self):
2146 msg, text = self._msgobj('msg_45.txt')
2147 self._idempotent(msg, text)
2148
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002149 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002150 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002151 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002152 # Get a message object and reset the seek pointer for other tests
2153 msg, text = self._msgobj('msg_05.txt')
2154 eq(msg.get_content_type(), 'multipart/report')
2155 # Test the Content-Type: parameters
2156 params = {}
2157 for pk, pv in msg.get_params():
2158 params[pk] = pv
2159 eq(params['report-type'], 'delivery-status')
2160 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002161 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2162 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002163 eq(len(msg.get_payload()), 3)
2164 # Make sure the subparts are what we expect
2165 msg1 = msg.get_payload(0)
2166 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002167 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002168 msg2 = msg.get_payload(1)
2169 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002170 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002171 msg3 = msg.get_payload(2)
2172 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002173 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002174 payload = msg3.get_payload()
2175 unless(isinstance(payload, list))
2176 eq(len(payload), 1)
2177 msg4 = payload[0]
2178 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002179 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002180
2181 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002182 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002183 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002184 msg, text = self._msgobj('msg_06.txt')
2185 # Check some of the outer headers
2186 eq(msg.get_content_type(), 'message/rfc822')
2187 # Make sure the payload is a list of exactly one sub-Message, and that
2188 # that submessage has a type of text/plain
2189 payload = msg.get_payload()
2190 unless(isinstance(payload, list))
2191 eq(len(payload), 1)
2192 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002193 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002194 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002195 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002196 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002197
2198
Ezio Melottib3aedd42010-11-20 19:04:17 +00002199
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002200# Test various other bits of the package's functionality
2201class TestMiscellaneous(TestEmailBase):
2202 def test_message_from_string(self):
2203 with openfile('msg_01.txt') as fp:
2204 text = fp.read()
2205 msg = email.message_from_string(text)
2206 s = StringIO()
2207 # Don't wrap/continue long headers since we're trying to test
2208 # idempotency.
2209 g = Generator(s, maxheaderlen=0)
2210 g.flatten(msg)
2211 self.assertEqual(text, s.getvalue())
2212
2213 def test_message_from_file(self):
2214 with openfile('msg_01.txt') as fp:
2215 text = fp.read()
2216 fp.seek(0)
2217 msg = email.message_from_file(fp)
2218 s = StringIO()
2219 # Don't wrap/continue long headers since we're trying to test
2220 # idempotency.
2221 g = Generator(s, maxheaderlen=0)
2222 g.flatten(msg)
2223 self.assertEqual(text, s.getvalue())
2224
2225 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002226 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002227 with openfile('msg_01.txt') as fp:
2228 text = fp.read()
2229
2230 # Create a subclass
2231 class MyMessage(Message):
2232 pass
2233
2234 msg = email.message_from_string(text, MyMessage)
2235 unless(isinstance(msg, MyMessage))
2236 # Try something more complicated
2237 with openfile('msg_02.txt') as fp:
2238 text = fp.read()
2239 msg = email.message_from_string(text, MyMessage)
2240 for subpart in msg.walk():
2241 unless(isinstance(subpart, MyMessage))
2242
2243 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002244 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002245 # Create a subclass
2246 class MyMessage(Message):
2247 pass
2248
2249 with openfile('msg_01.txt') as fp:
2250 msg = email.message_from_file(fp, MyMessage)
2251 unless(isinstance(msg, MyMessage))
2252 # Try something more complicated
2253 with openfile('msg_02.txt') as fp:
2254 msg = email.message_from_file(fp, MyMessage)
2255 for subpart in msg.walk():
2256 unless(isinstance(subpart, MyMessage))
2257
2258 def test__all__(self):
2259 module = __import__('email')
2260 # Can't use sorted() here due to Python 2.3 compatibility
2261 all = module.__all__[:]
2262 all.sort()
2263 self.assertEqual(all, [
2264 'base64mime', 'charset', 'encoders', 'errors', 'generator',
R. David Murray96fd54e2010-10-08 15:55:28 +00002265 'header', 'iterators', 'message', 'message_from_binary_file',
2266 'message_from_bytes', 'message_from_file',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002267 'message_from_string', 'mime', 'parser',
2268 'quoprimime', 'utils',
2269 ])
2270
2271 def test_formatdate(self):
2272 now = time.time()
2273 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2274 time.gmtime(now)[:6])
2275
2276 def test_formatdate_localtime(self):
2277 now = time.time()
2278 self.assertEqual(
2279 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2280 time.localtime(now)[:6])
2281
2282 def test_formatdate_usegmt(self):
2283 now = time.time()
2284 self.assertEqual(
2285 utils.formatdate(now, localtime=False),
2286 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2287 self.assertEqual(
2288 utils.formatdate(now, localtime=False, usegmt=True),
2289 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2290
2291 def test_parsedate_none(self):
2292 self.assertEqual(utils.parsedate(''), None)
2293
2294 def test_parsedate_compact(self):
2295 # The FWS after the comma is optional
2296 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2297 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2298
2299 def test_parsedate_no_dayofweek(self):
2300 eq = self.assertEqual
2301 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2302 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2303
2304 def test_parsedate_compact_no_dayofweek(self):
2305 eq = self.assertEqual
2306 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2307 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2308
R. David Murray4a62e892010-12-23 20:35:46 +00002309 def test_parsedate_no_space_before_positive_offset(self):
2310 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2311 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2312
2313 def test_parsedate_no_space_before_negative_offset(self):
2314 # Issue 1155362: we already handled '+' for this case.
2315 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
2316 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
2317
2318
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002319 def test_parsedate_acceptable_to_time_functions(self):
2320 eq = self.assertEqual
2321 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2322 t = int(time.mktime(timetup))
2323 eq(time.localtime(t)[:6], timetup[:6])
2324 eq(int(time.strftime('%Y', timetup)), 2003)
2325 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2326 t = int(time.mktime(timetup[:9]))
2327 eq(time.localtime(t)[:6], timetup[:6])
2328 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2329
R. David Murray219d1c82010-08-25 00:45:55 +00002330 def test_parsedate_y2k(self):
2331 """Test for parsing a date with a two-digit year.
2332
2333 Parsing a date with a two-digit year should return the correct
2334 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2335 obsoletes RFC822) requires four-digit years.
2336
2337 """
2338 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2339 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2340 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2341 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2342
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002343 def test_parseaddr_empty(self):
2344 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2345 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2346
2347 def test_noquote_dump(self):
2348 self.assertEqual(
2349 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2350 'A Silly Person <person@dom.ain>')
2351
2352 def test_escape_dump(self):
2353 self.assertEqual(
2354 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
2355 r'"A \(Very\) Silly Person" <person@dom.ain>')
2356 a = r'A \(Special\) Person'
2357 b = 'person@dom.ain'
2358 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2359
2360 def test_escape_backslashes(self):
2361 self.assertEqual(
2362 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2363 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2364 a = r'Arthur \Backslash\ Foobar'
2365 b = 'person@dom.ain'
2366 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2367
2368 def test_name_with_dot(self):
2369 x = 'John X. Doe <jxd@example.com>'
2370 y = '"John X. Doe" <jxd@example.com>'
2371 a, b = ('John X. Doe', 'jxd@example.com')
2372 self.assertEqual(utils.parseaddr(x), (a, b))
2373 self.assertEqual(utils.parseaddr(y), (a, b))
2374 # formataddr() quotes the name if there's a dot in it
2375 self.assertEqual(utils.formataddr((a, b)), y)
2376
R. David Murray5397e862010-10-02 15:58:26 +00002377 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2378 # issue 10005. Note that in the third test the second pair of
2379 # backslashes is not actually a quoted pair because it is not inside a
2380 # comment or quoted string: the address being parsed has a quoted
2381 # string containing a quoted backslash, followed by 'example' and two
2382 # backslashes, followed by another quoted string containing a space and
2383 # the word 'example'. parseaddr copies those two backslashes
2384 # literally. Per rfc5322 this is not technically correct since a \ may
2385 # not appear in an address outside of a quoted string. It is probably
2386 # a sensible Postel interpretation, though.
2387 eq = self.assertEqual
2388 eq(utils.parseaddr('""example" example"@example.com'),
2389 ('', '""example" example"@example.com'))
2390 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2391 ('', '"\\"example\\" example"@example.com'))
2392 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2393 ('', '"\\\\"example\\\\" example"@example.com'))
2394
R. David Murray63563cd2010-12-18 18:25:38 +00002395 def test_parseaddr_preserves_spaces_in_local_part(self):
2396 # issue 9286. A normal RFC5322 local part should not contain any
2397 # folding white space, but legacy local parts can (they are a sequence
2398 # of atoms, not dotatoms). On the other hand we strip whitespace from
2399 # before the @ and around dots, on the assumption that the whitespace
2400 # around the punctuation is a mistake in what would otherwise be
2401 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
2402 self.assertEqual(('', "merwok wok@xample.com"),
2403 utils.parseaddr("merwok wok@xample.com"))
2404 self.assertEqual(('', "merwok wok@xample.com"),
2405 utils.parseaddr("merwok wok@xample.com"))
2406 self.assertEqual(('', "merwok wok@xample.com"),
2407 utils.parseaddr(" merwok wok @xample.com"))
2408 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
2409 utils.parseaddr('merwok"wok" wok@xample.com'))
2410 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
2411 utils.parseaddr('merwok. wok . wok@xample.com'))
2412
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002413 def test_multiline_from_comment(self):
2414 x = """\
2415Foo
2416\tBar <foo@example.com>"""
2417 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2418
2419 def test_quote_dump(self):
2420 self.assertEqual(
2421 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2422 r'"A Silly; Person" <person@dom.ain>')
2423
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002424 def test_charset_richcomparisons(self):
2425 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002426 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002427 cset1 = Charset()
2428 cset2 = Charset()
2429 eq(cset1, 'us-ascii')
2430 eq(cset1, 'US-ASCII')
2431 eq(cset1, 'Us-AsCiI')
2432 eq('us-ascii', cset1)
2433 eq('US-ASCII', cset1)
2434 eq('Us-AsCiI', cset1)
2435 ne(cset1, 'usascii')
2436 ne(cset1, 'USASCII')
2437 ne(cset1, 'UsAsCiI')
2438 ne('usascii', cset1)
2439 ne('USASCII', cset1)
2440 ne('UsAsCiI', cset1)
2441 eq(cset1, cset2)
2442 eq(cset2, cset1)
2443
2444 def test_getaddresses(self):
2445 eq = self.assertEqual
2446 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2447 'Bud Person <bperson@dom.ain>']),
2448 [('Al Person', 'aperson@dom.ain'),
2449 ('Bud Person', 'bperson@dom.ain')])
2450
2451 def test_getaddresses_nasty(self):
2452 eq = self.assertEqual
2453 eq(utils.getaddresses(['foo: ;']), [('', '')])
2454 eq(utils.getaddresses(
2455 ['[]*-- =~$']),
2456 [('', ''), ('', ''), ('', '*--')])
2457 eq(utils.getaddresses(
2458 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2459 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2460
2461 def test_getaddresses_embedded_comment(self):
2462 """Test proper handling of a nested comment"""
2463 eq = self.assertEqual
2464 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2465 eq(addrs[0][1], 'foo@bar.com')
2466
2467 def test_utils_quote_unquote(self):
2468 eq = self.assertEqual
2469 msg = Message()
2470 msg.add_header('content-disposition', 'attachment',
2471 filename='foo\\wacky"name')
2472 eq(msg.get_filename(), 'foo\\wacky"name')
2473
2474 def test_get_body_encoding_with_bogus_charset(self):
2475 charset = Charset('not a charset')
2476 self.assertEqual(charset.get_body_encoding(), 'base64')
2477
2478 def test_get_body_encoding_with_uppercase_charset(self):
2479 eq = self.assertEqual
2480 msg = Message()
2481 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2482 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2483 charsets = msg.get_charsets()
2484 eq(len(charsets), 1)
2485 eq(charsets[0], 'utf-8')
2486 charset = Charset(charsets[0])
2487 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002488 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002489 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2490 eq(msg.get_payload(decode=True), b'hello world')
2491 eq(msg['content-transfer-encoding'], 'base64')
2492 # Try another one
2493 msg = Message()
2494 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2495 charsets = msg.get_charsets()
2496 eq(len(charsets), 1)
2497 eq(charsets[0], 'us-ascii')
2498 charset = Charset(charsets[0])
2499 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2500 msg.set_payload('hello world', charset=charset)
2501 eq(msg.get_payload(), 'hello world')
2502 eq(msg['content-transfer-encoding'], '7bit')
2503
2504 def test_charsets_case_insensitive(self):
2505 lc = Charset('us-ascii')
2506 uc = Charset('US-ASCII')
2507 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2508
2509 def test_partial_falls_inside_message_delivery_status(self):
2510 eq = self.ndiffAssertEqual
2511 # The Parser interface provides chunks of data to FeedParser in 8192
2512 # byte gulps. SF bug #1076485 found one of those chunks inside
2513 # message/delivery-status header block, which triggered an
2514 # unreadline() of NeedMoreData.
2515 msg = self._msgobj('msg_43.txt')
2516 sfp = StringIO()
2517 iterators._structure(msg, sfp)
2518 eq(sfp.getvalue(), """\
2519multipart/report
2520 text/plain
2521 message/delivery-status
2522 text/plain
2523 text/plain
2524 text/plain
2525 text/plain
2526 text/plain
2527 text/plain
2528 text/plain
2529 text/plain
2530 text/plain
2531 text/plain
2532 text/plain
2533 text/plain
2534 text/plain
2535 text/plain
2536 text/plain
2537 text/plain
2538 text/plain
2539 text/plain
2540 text/plain
2541 text/plain
2542 text/plain
2543 text/plain
2544 text/plain
2545 text/plain
2546 text/plain
2547 text/plain
2548 text/rfc822-headers
2549""")
2550
R. David Murraya0b44b52010-12-02 21:47:19 +00002551 def test_make_msgid_domain(self):
2552 self.assertEqual(
2553 email.utils.make_msgid(domain='testdomain-string')[-19:],
2554 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002555
Ezio Melottib3aedd42010-11-20 19:04:17 +00002556
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002557# Test the iterator/generators
2558class TestIterators(TestEmailBase):
2559 def test_body_line_iterator(self):
2560 eq = self.assertEqual
2561 neq = self.ndiffAssertEqual
2562 # First a simple non-multipart message
2563 msg = self._msgobj('msg_01.txt')
2564 it = iterators.body_line_iterator(msg)
2565 lines = list(it)
2566 eq(len(lines), 6)
2567 neq(EMPTYSTRING.join(lines), msg.get_payload())
2568 # Now a more complicated multipart
2569 msg = self._msgobj('msg_02.txt')
2570 it = iterators.body_line_iterator(msg)
2571 lines = list(it)
2572 eq(len(lines), 43)
2573 with openfile('msg_19.txt') as fp:
2574 neq(EMPTYSTRING.join(lines), fp.read())
2575
2576 def test_typed_subpart_iterator(self):
2577 eq = self.assertEqual
2578 msg = self._msgobj('msg_04.txt')
2579 it = iterators.typed_subpart_iterator(msg, 'text')
2580 lines = []
2581 subparts = 0
2582 for subpart in it:
2583 subparts += 1
2584 lines.append(subpart.get_payload())
2585 eq(subparts, 2)
2586 eq(EMPTYSTRING.join(lines), """\
2587a simple kind of mirror
2588to reflect upon our own
2589a simple kind of mirror
2590to reflect upon our own
2591""")
2592
2593 def test_typed_subpart_iterator_default_type(self):
2594 eq = self.assertEqual
2595 msg = self._msgobj('msg_03.txt')
2596 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
2597 lines = []
2598 subparts = 0
2599 for subpart in it:
2600 subparts += 1
2601 lines.append(subpart.get_payload())
2602 eq(subparts, 1)
2603 eq(EMPTYSTRING.join(lines), """\
2604
2605Hi,
2606
2607Do you like this message?
2608
2609-Me
2610""")
2611
R. David Murray45bf773f2010-07-17 01:19:57 +00002612 def test_pushCR_LF(self):
2613 '''FeedParser BufferedSubFile.push() assumed it received complete
2614 line endings. A CR ending one push() followed by a LF starting
2615 the next push() added an empty line.
2616 '''
2617 imt = [
2618 ("a\r \n", 2),
2619 ("b", 0),
2620 ("c\n", 1),
2621 ("", 0),
2622 ("d\r\n", 1),
2623 ("e\r", 0),
2624 ("\nf", 1),
2625 ("\r\n", 1),
2626 ]
2627 from email.feedparser import BufferedSubFile, NeedMoreData
2628 bsf = BufferedSubFile()
2629 om = []
2630 nt = 0
2631 for il, n in imt:
2632 bsf.push(il)
2633 nt += n
2634 n1 = 0
2635 while True:
2636 ol = bsf.readline()
2637 if ol == NeedMoreData:
2638 break
2639 om.append(ol)
2640 n1 += 1
2641 self.assertTrue(n == n1)
2642 self.assertTrue(len(om) == nt)
2643 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
2644
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002645
Ezio Melottib3aedd42010-11-20 19:04:17 +00002646
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002647class TestParsers(TestEmailBase):
2648 def test_header_parser(self):
2649 eq = self.assertEqual
2650 # Parse only the headers of a complex multipart MIME document
2651 with openfile('msg_02.txt') as fp:
2652 msg = HeaderParser().parse(fp)
2653 eq(msg['from'], 'ppp-request@zzz.org')
2654 eq(msg['to'], 'ppp@zzz.org')
2655 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002656 self.assertFalse(msg.is_multipart())
2657 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002658
2659 def test_whitespace_continuation(self):
2660 eq = self.assertEqual
2661 # This message contains a line after the Subject: header that has only
2662 # whitespace, but it is not empty!
2663 msg = email.message_from_string("""\
2664From: aperson@dom.ain
2665To: bperson@dom.ain
2666Subject: the next line has a space on it
2667\x20
2668Date: Mon, 8 Apr 2002 15:09:19 -0400
2669Message-ID: spam
2670
2671Here's the message body
2672""")
2673 eq(msg['subject'], 'the next line has a space on it\n ')
2674 eq(msg['message-id'], 'spam')
2675 eq(msg.get_payload(), "Here's the message body\n")
2676
2677 def test_whitespace_continuation_last_header(self):
2678 eq = self.assertEqual
2679 # Like the previous test, but the subject line is the last
2680 # header.
2681 msg = email.message_from_string("""\
2682From: aperson@dom.ain
2683To: bperson@dom.ain
2684Date: Mon, 8 Apr 2002 15:09:19 -0400
2685Message-ID: spam
2686Subject: the next line has a space on it
2687\x20
2688
2689Here's the message body
2690""")
2691 eq(msg['subject'], 'the next line has a space on it\n ')
2692 eq(msg['message-id'], 'spam')
2693 eq(msg.get_payload(), "Here's the message body\n")
2694
2695 def test_crlf_separation(self):
2696 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00002697 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002698 msg = Parser().parse(fp)
2699 eq(len(msg.get_payload()), 2)
2700 part1 = msg.get_payload(0)
2701 eq(part1.get_content_type(), 'text/plain')
2702 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
2703 part2 = msg.get_payload(1)
2704 eq(part2.get_content_type(), 'application/riscos')
2705
R. David Murray8451c4b2010-10-23 22:19:56 +00002706 def test_crlf_flatten(self):
2707 # Using newline='\n' preserves the crlfs in this input file.
2708 with openfile('msg_26.txt', newline='\n') as fp:
2709 text = fp.read()
2710 msg = email.message_from_string(text)
2711 s = StringIO()
2712 g = Generator(s)
2713 g.flatten(msg, linesep='\r\n')
2714 self.assertEqual(s.getvalue(), text)
2715
2716 maxDiff = None
2717
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002718 def test_multipart_digest_with_extra_mime_headers(self):
2719 eq = self.assertEqual
2720 neq = self.ndiffAssertEqual
2721 with openfile('msg_28.txt') as fp:
2722 msg = email.message_from_file(fp)
2723 # Structure is:
2724 # multipart/digest
2725 # message/rfc822
2726 # text/plain
2727 # message/rfc822
2728 # text/plain
2729 eq(msg.is_multipart(), 1)
2730 eq(len(msg.get_payload()), 2)
2731 part1 = msg.get_payload(0)
2732 eq(part1.get_content_type(), 'message/rfc822')
2733 eq(part1.is_multipart(), 1)
2734 eq(len(part1.get_payload()), 1)
2735 part1a = part1.get_payload(0)
2736 eq(part1a.is_multipart(), 0)
2737 eq(part1a.get_content_type(), 'text/plain')
2738 neq(part1a.get_payload(), 'message 1\n')
2739 # next message/rfc822
2740 part2 = msg.get_payload(1)
2741 eq(part2.get_content_type(), 'message/rfc822')
2742 eq(part2.is_multipart(), 1)
2743 eq(len(part2.get_payload()), 1)
2744 part2a = part2.get_payload(0)
2745 eq(part2a.is_multipart(), 0)
2746 eq(part2a.get_content_type(), 'text/plain')
2747 neq(part2a.get_payload(), 'message 2\n')
2748
2749 def test_three_lines(self):
2750 # A bug report by Andrew McNamara
2751 lines = ['From: Andrew Person <aperson@dom.ain',
2752 'Subject: Test',
2753 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
2754 msg = email.message_from_string(NL.join(lines))
2755 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
2756
2757 def test_strip_line_feed_and_carriage_return_in_headers(self):
2758 eq = self.assertEqual
2759 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
2760 value1 = 'text'
2761 value2 = 'more text'
2762 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
2763 value1, value2)
2764 msg = email.message_from_string(m)
2765 eq(msg.get('Header'), value1)
2766 eq(msg.get('Next-Header'), value2)
2767
2768 def test_rfc2822_header_syntax(self):
2769 eq = self.assertEqual
2770 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2771 msg = email.message_from_string(m)
2772 eq(len(msg), 3)
2773 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
2774 eq(msg.get_payload(), 'body')
2775
2776 def test_rfc2822_space_not_allowed_in_header(self):
2777 eq = self.assertEqual
2778 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2779 msg = email.message_from_string(m)
2780 eq(len(msg.keys()), 0)
2781
2782 def test_rfc2822_one_character_header(self):
2783 eq = self.assertEqual
2784 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
2785 msg = email.message_from_string(m)
2786 headers = msg.keys()
2787 headers.sort()
2788 eq(headers, ['A', 'B', 'CC'])
2789 eq(msg.get_payload(), 'body')
2790
R. David Murray45e0e142010-06-16 02:19:40 +00002791 def test_CRLFLF_at_end_of_part(self):
2792 # issue 5610: feedparser should not eat two chars from body part ending
2793 # with "\r\n\n".
2794 m = (
2795 "From: foo@bar.com\n"
2796 "To: baz\n"
2797 "Mime-Version: 1.0\n"
2798 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
2799 "\n"
2800 "--BOUNDARY\n"
2801 "Content-Type: text/plain\n"
2802 "\n"
2803 "body ending with CRLF newline\r\n"
2804 "\n"
2805 "--BOUNDARY--\n"
2806 )
2807 msg = email.message_from_string(m)
2808 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002809
Ezio Melottib3aedd42010-11-20 19:04:17 +00002810
R. David Murray96fd54e2010-10-08 15:55:28 +00002811class Test8BitBytesHandling(unittest.TestCase):
2812 # In Python3 all input is string, but that doesn't work if the actual input
2813 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
2814 # decode byte streams using the surrogateescape error handler, and
2815 # reconvert to binary at appropriate places if we detect surrogates. This
2816 # doesn't allow us to transform headers with 8bit bytes (they get munged),
2817 # but it does allow us to parse and preserve them, and to decode body
2818 # parts that use an 8bit CTE.
2819
2820 bodytest_msg = textwrap.dedent("""\
2821 From: foo@bar.com
2822 To: baz
2823 Mime-Version: 1.0
2824 Content-Type: text/plain; charset={charset}
2825 Content-Transfer-Encoding: {cte}
2826
2827 {bodyline}
2828 """)
2829
2830 def test_known_8bit_CTE(self):
2831 m = self.bodytest_msg.format(charset='utf-8',
2832 cte='8bit',
2833 bodyline='pöstal').encode('utf-8')
2834 msg = email.message_from_bytes(m)
2835 self.assertEqual(msg.get_payload(), "pöstal\n")
2836 self.assertEqual(msg.get_payload(decode=True),
2837 "pöstal\n".encode('utf-8'))
2838
2839 def test_unknown_8bit_CTE(self):
2840 m = self.bodytest_msg.format(charset='notavalidcharset',
2841 cte='8bit',
2842 bodyline='pöstal').encode('utf-8')
2843 msg = email.message_from_bytes(m)
2844 self.assertEqual(msg.get_payload(), "p��stal\n")
2845 self.assertEqual(msg.get_payload(decode=True),
2846 "pöstal\n".encode('utf-8'))
2847
2848 def test_8bit_in_quopri_body(self):
2849 # This is non-RFC compliant data...without 'decode' the library code
2850 # decodes the body using the charset from the headers, and because the
2851 # source byte really is utf-8 this works. This is likely to fail
2852 # against real dirty data (ie: produce mojibake), but the data is
2853 # invalid anyway so it is as good a guess as any. But this means that
2854 # this test just confirms the current behavior; that behavior is not
2855 # necessarily the best possible behavior. With 'decode' it is
2856 # returning the raw bytes, so that test should be of correct behavior,
2857 # or at least produce the same result that email4 did.
2858 m = self.bodytest_msg.format(charset='utf-8',
2859 cte='quoted-printable',
2860 bodyline='p=C3=B6stál').encode('utf-8')
2861 msg = email.message_from_bytes(m)
2862 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
2863 self.assertEqual(msg.get_payload(decode=True),
2864 'pöstál\n'.encode('utf-8'))
2865
2866 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
2867 # This is similar to the previous test, but proves that if the 8bit
2868 # byte is undecodeable in the specified charset, it gets replaced
2869 # by the unicode 'unknown' character. Again, this may or may not
2870 # be the ideal behavior. Note that if decode=False none of the
2871 # decoders will get involved, so this is the only test we need
2872 # for this behavior.
2873 m = self.bodytest_msg.format(charset='ascii',
2874 cte='quoted-printable',
2875 bodyline='p=C3=B6stál').encode('utf-8')
2876 msg = email.message_from_bytes(m)
2877 self.assertEqual(msg.get_payload(), 'p=C3=B6st��l\n')
2878 self.assertEqual(msg.get_payload(decode=True),
2879 'pöstál\n'.encode('utf-8'))
2880
2881 def test_8bit_in_base64_body(self):
2882 # Sticking an 8bit byte in a base64 block makes it undecodable by
2883 # normal means, so the block is returned undecoded, but as bytes.
2884 m = self.bodytest_msg.format(charset='utf-8',
2885 cte='base64',
2886 bodyline='cMO2c3RhbAá=').encode('utf-8')
2887 msg = email.message_from_bytes(m)
2888 self.assertEqual(msg.get_payload(decode=True),
2889 'cMO2c3RhbAá=\n'.encode('utf-8'))
2890
2891 def test_8bit_in_uuencode_body(self):
2892 # Sticking an 8bit byte in a uuencode block makes it undecodable by
2893 # normal means, so the block is returned undecoded, but as bytes.
2894 m = self.bodytest_msg.format(charset='utf-8',
2895 cte='uuencode',
2896 bodyline='<,.V<W1A; á ').encode('utf-8')
2897 msg = email.message_from_bytes(m)
2898 self.assertEqual(msg.get_payload(decode=True),
2899 '<,.V<W1A; á \n'.encode('utf-8'))
2900
2901
2902 headertest_msg = textwrap.dedent("""\
2903 From: foo@bar.com
2904 To: báz
2905 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
2906 \tJean de Baddie
2907 From: göst
2908
2909 Yes, they are flying.
2910 """).encode('utf-8')
2911
2912 def test_get_8bit_header(self):
2913 msg = email.message_from_bytes(self.headertest_msg)
2914 self.assertEqual(msg.get('to'), 'b??z')
2915 self.assertEqual(msg['to'], 'b??z')
2916
2917 def test_print_8bit_headers(self):
2918 msg = email.message_from_bytes(self.headertest_msg)
2919 self.assertEqual(str(msg),
2920 self.headertest_msg.decode(
2921 'ascii', 'replace').replace('�', '?'))
2922
2923 def test_values_with_8bit_headers(self):
2924 msg = email.message_from_bytes(self.headertest_msg)
2925 self.assertListEqual(msg.values(),
2926 ['foo@bar.com',
2927 'b??z',
2928 'Maintenant je vous pr??sente mon '
2929 'coll??gue, le pouf c??l??bre\n'
2930 '\tJean de Baddie',
2931 "g??st"])
2932
2933 def test_items_with_8bit_headers(self):
2934 msg = email.message_from_bytes(self.headertest_msg)
2935 self.assertListEqual(msg.items(),
2936 [('From', 'foo@bar.com'),
2937 ('To', 'b??z'),
2938 ('Subject', 'Maintenant je vous pr??sente mon '
2939 'coll??gue, le pouf c??l??bre\n'
2940 '\tJean de Baddie'),
2941 ('From', 'g??st')])
2942
2943 def test_get_all_with_8bit_headers(self):
2944 msg = email.message_from_bytes(self.headertest_msg)
2945 self.assertListEqual(msg.get_all('from'),
2946 ['foo@bar.com',
2947 'g??st'])
2948
2949 non_latin_bin_msg = textwrap.dedent("""\
2950 From: foo@bar.com
2951 To: báz
2952 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
2953 \tJean de Baddie
2954 Mime-Version: 1.0
2955 Content-Type: text/plain; charset="utf-8"
2956 Content-Transfer-Encoding: 8bit
2957
2958 Да, они летят.
2959 """).encode('utf-8')
2960
2961 def test_bytes_generator(self):
2962 msg = email.message_from_bytes(self.non_latin_bin_msg)
2963 out = BytesIO()
2964 email.generator.BytesGenerator(out).flatten(msg)
2965 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
2966
2967 # XXX: ultimately the '?' should turn into CTE encoded bytes
2968 # using 'unknown-8bit' charset.
2969 non_latin_bin_msg_as7bit = textwrap.dedent("""\
2970 From: foo@bar.com
2971 To: b??z
2972 Subject: Maintenant je vous pr??sente mon coll??gue, le pouf c??l??bre
2973 \tJean de Baddie
2974 Mime-Version: 1.0
2975 Content-Type: text/plain; charset="utf-8"
2976 Content-Transfer-Encoding: base64
2977
2978 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
2979 """)
2980
2981 def test_generator_handles_8bit(self):
2982 msg = email.message_from_bytes(self.non_latin_bin_msg)
2983 out = StringIO()
2984 email.generator.Generator(out).flatten(msg)
2985 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit)
2986
2987 def test_bytes_generator_with_unix_from(self):
2988 # The unixfrom contains a current date, so we can't check it
2989 # literally. Just make sure the first word is 'From' and the
2990 # rest of the message matches the input.
2991 msg = email.message_from_bytes(self.non_latin_bin_msg)
2992 out = BytesIO()
2993 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
2994 lines = out.getvalue().split(b'\n')
2995 self.assertEqual(lines[0].split()[0], b'From')
2996 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
2997
2998 def test_message_from_binary_file(self):
2999 fn = 'test.msg'
3000 self.addCleanup(unlink, fn)
3001 with open(fn, 'wb') as testfile:
3002 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00003003 with open(fn, 'rb') as testfile:
3004 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00003005 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
3006
3007 latin_bin_msg = textwrap.dedent("""\
3008 From: foo@bar.com
3009 To: Dinsdale
3010 Subject: Nudge nudge, wink, wink
3011 Mime-Version: 1.0
3012 Content-Type: text/plain; charset="latin-1"
3013 Content-Transfer-Encoding: 8bit
3014
3015 oh là là, know what I mean, know what I mean?
3016 """).encode('latin-1')
3017
3018 latin_bin_msg_as7bit = textwrap.dedent("""\
3019 From: foo@bar.com
3020 To: Dinsdale
3021 Subject: Nudge nudge, wink, wink
3022 Mime-Version: 1.0
3023 Content-Type: text/plain; charset="iso-8859-1"
3024 Content-Transfer-Encoding: quoted-printable
3025
3026 oh l=E0 l=E0, know what I mean, know what I mean?
3027 """)
3028
3029 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
3030 m = email.message_from_bytes(self.latin_bin_msg)
3031 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3032
3033 def test_decoded_generator_emits_unicode_body(self):
3034 m = email.message_from_bytes(self.latin_bin_msg)
3035 out = StringIO()
3036 email.generator.DecodedGenerator(out).flatten(m)
3037 #DecodedHeader output contains an extra blank line compared
3038 #to the input message. RDM: not sure if this is a bug or not,
3039 #but it is not specific to the 8bit->7bit conversion.
3040 self.assertEqual(out.getvalue(),
3041 self.latin_bin_msg.decode('latin-1')+'\n')
3042
3043 def test_bytes_feedparser(self):
3044 bfp = email.feedparser.BytesFeedParser()
3045 for i in range(0, len(self.latin_bin_msg), 10):
3046 bfp.feed(self.latin_bin_msg[i:i+10])
3047 m = bfp.close()
3048 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3049
R. David Murray8451c4b2010-10-23 22:19:56 +00003050 def test_crlf_flatten(self):
3051 with openfile('msg_26.txt', 'rb') as fp:
3052 text = fp.read()
3053 msg = email.message_from_bytes(text)
3054 s = BytesIO()
3055 g = email.generator.BytesGenerator(s)
3056 g.flatten(msg, linesep='\r\n')
3057 self.assertEqual(s.getvalue(), text)
3058 maxDiff = None
3059
Ezio Melottib3aedd42010-11-20 19:04:17 +00003060
R. David Murray719a4492010-11-21 16:53:48 +00003061class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003062
R. David Murraye5db2632010-11-20 15:10:13 +00003063 maxDiff = None
3064
R. David Murray96fd54e2010-10-08 15:55:28 +00003065 def _msgobj(self, filename):
3066 with openfile(filename, 'rb') as fp:
3067 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003068 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003069 msg = email.message_from_bytes(data)
3070 return msg, data
3071
R. David Murray719a4492010-11-21 16:53:48 +00003072 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003073 b = BytesIO()
3074 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003075 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R. David Murraye5db2632010-11-20 15:10:13 +00003076 self.assertByteStringsEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003077
R. David Murraye5db2632010-11-20 15:10:13 +00003078 def assertByteStringsEqual(self, str1, str2):
R. David Murray719a4492010-11-21 16:53:48 +00003079 # Not using self.blinesep here is intentional. This way the output
3080 # is more useful when the failure results in mixed line endings.
R. David Murray96fd54e2010-10-08 15:55:28 +00003081 self.assertListEqual(str1.split(b'\n'), str2.split(b'\n'))
3082
3083
R. David Murray719a4492010-11-21 16:53:48 +00003084class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3085 TestIdempotent):
3086 linesep = '\n'
3087 blinesep = b'\n'
3088 normalize_linesep_regex = re.compile(br'\r\n')
3089
3090
3091class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3092 TestIdempotent):
3093 linesep = '\r\n'
3094 blinesep = b'\r\n'
3095 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3096
Ezio Melottib3aedd42010-11-20 19:04:17 +00003097
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003098class TestBase64(unittest.TestCase):
3099 def test_len(self):
3100 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003101 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003102 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003103 for size in range(15):
3104 if size == 0 : bsize = 0
3105 elif size <= 3 : bsize = 4
3106 elif size <= 6 : bsize = 8
3107 elif size <= 9 : bsize = 12
3108 elif size <= 12: bsize = 16
3109 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003110 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003111
3112 def test_decode(self):
3113 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003114 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003115 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003116
3117 def test_encode(self):
3118 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003119 eq(base64mime.body_encode(b''), b'')
3120 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003121 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003122 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003123 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003124 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003125eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3126eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3127eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3128eHh4eCB4eHh4IA==
3129""")
3130 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003131 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003132 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003133eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3134eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3135eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3136eHh4eCB4eHh4IA==\r
3137""")
3138
3139 def test_header_encode(self):
3140 eq = self.assertEqual
3141 he = base64mime.header_encode
3142 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003143 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3144 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003145 # Test the charset option
3146 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3147 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003148
3149
Ezio Melottib3aedd42010-11-20 19:04:17 +00003150
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003151class TestQuopri(unittest.TestCase):
3152 def setUp(self):
3153 # Set of characters (as byte integers) that don't need to be encoded
3154 # in headers.
3155 self.hlit = list(chain(
3156 range(ord('a'), ord('z') + 1),
3157 range(ord('A'), ord('Z') + 1),
3158 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003159 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003160 # Set of characters (as byte integers) that do need to be encoded in
3161 # headers.
3162 self.hnon = [c for c in range(256) if c not in self.hlit]
3163 assert len(self.hlit) + len(self.hnon) == 256
3164 # Set of characters (as byte integers) that don't need to be encoded
3165 # in bodies.
3166 self.blit = list(range(ord(' '), ord('~') + 1))
3167 self.blit.append(ord('\t'))
3168 self.blit.remove(ord('='))
3169 # Set of characters (as byte integers) that do need to be encoded in
3170 # bodies.
3171 self.bnon = [c for c in range(256) if c not in self.blit]
3172 assert len(self.blit) + len(self.bnon) == 256
3173
Guido van Rossum9604e662007-08-30 03:46:43 +00003174 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003175 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003176 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003177 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003178 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003179 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003180 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003181
Guido van Rossum9604e662007-08-30 03:46:43 +00003182 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003183 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003184 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003185 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003186 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003187 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003188 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003189
3190 def test_header_quopri_len(self):
3191 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003192 eq(quoprimime.header_length(b'hello'), 5)
3193 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003194 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003195 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003196 # =?xxx?q?...?= means 10 extra characters
3197 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00003198 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
3199 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003200 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003201 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003202 # =?xxx?q?...?= means 10 extra characters
3203 10)
3204 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003205 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003206 'expected length 1 for %r' % chr(c))
3207 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003208 # Space is special; it's encoded to _
3209 if c == ord(' '):
3210 continue
3211 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003212 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00003213 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003214
3215 def test_body_quopri_len(self):
3216 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003217 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003218 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003219 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003220 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003221
3222 def test_quote_unquote_idempotent(self):
3223 for x in range(256):
3224 c = chr(x)
3225 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
3226
3227 def test_header_encode(self):
3228 eq = self.assertEqual
3229 he = quoprimime.header_encode
3230 eq(he(b'hello'), '=?iso-8859-1?q?hello?=')
3231 eq(he(b'hello', charset='iso-8859-2'), '=?iso-8859-2?q?hello?=')
3232 eq(he(b'hello\nworld'), '=?iso-8859-1?q?hello=0Aworld?=')
3233 # Test a non-ASCII character
3234 eq(he(b'hello\xc7there'), '=?iso-8859-1?q?hello=C7there?=')
3235
3236 def test_decode(self):
3237 eq = self.assertEqual
3238 eq(quoprimime.decode(''), '')
3239 eq(quoprimime.decode('hello'), 'hello')
3240 eq(quoprimime.decode('hello', 'X'), 'hello')
3241 eq(quoprimime.decode('hello\nworld', 'X'), 'helloXworld')
3242
3243 def test_encode(self):
3244 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003245 eq(quoprimime.body_encode(''), '')
3246 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003247 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00003248 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003249 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00003250 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003251xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
3252 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
3253x xxxx xxxx xxxx xxxx=20""")
3254 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00003255 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
3256 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003257xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
3258 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
3259x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00003260 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003261one line
3262
3263two line"""), """\
3264one line
3265
3266two line""")
3267
3268
Ezio Melottib3aedd42010-11-20 19:04:17 +00003269
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003270# Test the Charset class
3271class TestCharset(unittest.TestCase):
3272 def tearDown(self):
3273 from email import charset as CharsetModule
3274 try:
3275 del CharsetModule.CHARSETS['fake']
3276 except KeyError:
3277 pass
3278
Guido van Rossum9604e662007-08-30 03:46:43 +00003279 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003280 eq = self.assertEqual
3281 # Make sure us-ascii = no Unicode conversion
3282 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00003283 eq(c.header_encode('Hello World!'), 'Hello World!')
3284 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003285 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00003286 self.assertRaises(UnicodeError, c.header_encode, s)
3287 c = Charset('utf-8')
3288 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003289
3290 def test_body_encode(self):
3291 eq = self.assertEqual
3292 # Try a charset with QP body encoding
3293 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003294 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003295 # Try a charset with Base64 body encoding
3296 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003297 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003298 # Try a charset with None body encoding
3299 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003300 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003301 # Try the convert argument, where input codec != output codec
3302 c = Charset('euc-jp')
3303 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00003304 # XXX FIXME
3305## try:
3306## eq('\x1b$B5FCO;~IW\x1b(B',
3307## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
3308## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
3309## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
3310## except LookupError:
3311## # We probably don't have the Japanese codecs installed
3312## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003313 # Testing SF bug #625509, which we have to fake, since there are no
3314 # built-in encodings where the header encoding is QP but the body
3315 # encoding is not.
3316 from email import charset as CharsetModule
3317 CharsetModule.add_charset('fake', CharsetModule.QP, None)
3318 c = Charset('fake')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003319 eq('hello w\xf6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003320
3321 def test_unicode_charset_name(self):
3322 charset = Charset('us-ascii')
3323 self.assertEqual(str(charset), 'us-ascii')
3324 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
3325
3326
Ezio Melottib3aedd42010-11-20 19:04:17 +00003327
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003328# Test multilingual MIME headers.
3329class TestHeader(TestEmailBase):
3330 def test_simple(self):
3331 eq = self.ndiffAssertEqual
3332 h = Header('Hello World!')
3333 eq(h.encode(), 'Hello World!')
3334 h.append(' Goodbye World!')
3335 eq(h.encode(), 'Hello World! Goodbye World!')
3336
3337 def test_simple_surprise(self):
3338 eq = self.ndiffAssertEqual
3339 h = Header('Hello World!')
3340 eq(h.encode(), 'Hello World!')
3341 h.append('Goodbye World!')
3342 eq(h.encode(), 'Hello World! Goodbye World!')
3343
3344 def test_header_needs_no_decoding(self):
3345 h = 'no decoding needed'
3346 self.assertEqual(decode_header(h), [(h, None)])
3347
3348 def test_long(self):
3349 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
3350 maxlinelen=76)
3351 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003352 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003353
3354 def test_multilingual(self):
3355 eq = self.ndiffAssertEqual
3356 g = Charset("iso-8859-1")
3357 cz = Charset("iso-8859-2")
3358 utf8 = Charset("utf-8")
3359 g_head = (b'Die Mieter treten hier ein werden mit einem '
3360 b'Foerderband komfortabel den Korridor entlang, '
3361 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
3362 b'gegen die rotierenden Klingen bef\xf6rdert. ')
3363 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
3364 b'd\xf9vtipu.. ')
3365 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
3366 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
3367 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
3368 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
3369 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
3370 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
3371 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
3372 '\u3044\u307e\u3059\u3002')
3373 h = Header(g_head, g)
3374 h.append(cz_head, cz)
3375 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00003376 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003377 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00003378=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
3379 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
3380 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
3381 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003382 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
3383 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
3384 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
3385 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00003386 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
3387 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
3388 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
3389 decoded = decode_header(enc)
3390 eq(len(decoded), 3)
3391 eq(decoded[0], (g_head, 'iso-8859-1'))
3392 eq(decoded[1], (cz_head, 'iso-8859-2'))
3393 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003394 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00003395 eq(ustr,
3396 (b'Die Mieter treten hier ein werden mit einem Foerderband '
3397 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
3398 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
3399 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
3400 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
3401 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
3402 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
3403 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
3404 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
3405 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
3406 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
3407 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
3408 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
3409 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
3410 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
3411 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
3412 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003413 # Test make_header()
3414 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00003415 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003416
3417 def test_empty_header_encode(self):
3418 h = Header()
3419 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00003420
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003421 def test_header_ctor_default_args(self):
3422 eq = self.ndiffAssertEqual
3423 h = Header()
3424 eq(h, '')
3425 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00003426 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003427
3428 def test_explicit_maxlinelen(self):
3429 eq = self.ndiffAssertEqual
3430 hstr = ('A very long line that must get split to something other '
3431 'than at the 76th character boundary to test the non-default '
3432 'behavior')
3433 h = Header(hstr)
3434 eq(h.encode(), '''\
3435A very long line that must get split to something other than at the 76th
3436 character boundary to test the non-default behavior''')
3437 eq(str(h), hstr)
3438 h = Header(hstr, header_name='Subject')
3439 eq(h.encode(), '''\
3440A very long line that must get split to something other than at the
3441 76th character boundary to test the non-default behavior''')
3442 eq(str(h), hstr)
3443 h = Header(hstr, maxlinelen=1024, header_name='Subject')
3444 eq(h.encode(), hstr)
3445 eq(str(h), hstr)
3446
Guido van Rossum9604e662007-08-30 03:46:43 +00003447 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003448 eq = self.ndiffAssertEqual
3449 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003450 x = 'xxxx ' * 20
3451 h.append(x)
3452 s = h.encode()
3453 eq(s, """\
3454=?iso-8859-1?q?xxx?=
3455 =?iso-8859-1?q?x_?=
3456 =?iso-8859-1?q?xx?=
3457 =?iso-8859-1?q?xx?=
3458 =?iso-8859-1?q?_x?=
3459 =?iso-8859-1?q?xx?=
3460 =?iso-8859-1?q?x_?=
3461 =?iso-8859-1?q?xx?=
3462 =?iso-8859-1?q?xx?=
3463 =?iso-8859-1?q?_x?=
3464 =?iso-8859-1?q?xx?=
3465 =?iso-8859-1?q?x_?=
3466 =?iso-8859-1?q?xx?=
3467 =?iso-8859-1?q?xx?=
3468 =?iso-8859-1?q?_x?=
3469 =?iso-8859-1?q?xx?=
3470 =?iso-8859-1?q?x_?=
3471 =?iso-8859-1?q?xx?=
3472 =?iso-8859-1?q?xx?=
3473 =?iso-8859-1?q?_x?=
3474 =?iso-8859-1?q?xx?=
3475 =?iso-8859-1?q?x_?=
3476 =?iso-8859-1?q?xx?=
3477 =?iso-8859-1?q?xx?=
3478 =?iso-8859-1?q?_x?=
3479 =?iso-8859-1?q?xx?=
3480 =?iso-8859-1?q?x_?=
3481 =?iso-8859-1?q?xx?=
3482 =?iso-8859-1?q?xx?=
3483 =?iso-8859-1?q?_x?=
3484 =?iso-8859-1?q?xx?=
3485 =?iso-8859-1?q?x_?=
3486 =?iso-8859-1?q?xx?=
3487 =?iso-8859-1?q?xx?=
3488 =?iso-8859-1?q?_x?=
3489 =?iso-8859-1?q?xx?=
3490 =?iso-8859-1?q?x_?=
3491 =?iso-8859-1?q?xx?=
3492 =?iso-8859-1?q?xx?=
3493 =?iso-8859-1?q?_x?=
3494 =?iso-8859-1?q?xx?=
3495 =?iso-8859-1?q?x_?=
3496 =?iso-8859-1?q?xx?=
3497 =?iso-8859-1?q?xx?=
3498 =?iso-8859-1?q?_x?=
3499 =?iso-8859-1?q?xx?=
3500 =?iso-8859-1?q?x_?=
3501 =?iso-8859-1?q?xx?=
3502 =?iso-8859-1?q?xx?=
3503 =?iso-8859-1?q?_?=""")
3504 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003505 h = Header(charset='iso-8859-1', maxlinelen=40)
3506 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003507 s = h.encode()
3508 eq(s, """\
3509=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
3510 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
3511 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
3512 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
3513 =?iso-8859-1?q?_xxxx_xxxx_?=""")
3514 eq(x, str(make_header(decode_header(s))))
3515
3516 def test_base64_splittable(self):
3517 eq = self.ndiffAssertEqual
3518 h = Header(charset='koi8-r', maxlinelen=20)
3519 x = 'xxxx ' * 20
3520 h.append(x)
3521 s = h.encode()
3522 eq(s, """\
3523=?koi8-r?b?eHh4?=
3524 =?koi8-r?b?eCB4?=
3525 =?koi8-r?b?eHh4?=
3526 =?koi8-r?b?IHh4?=
3527 =?koi8-r?b?eHgg?=
3528 =?koi8-r?b?eHh4?=
3529 =?koi8-r?b?eCB4?=
3530 =?koi8-r?b?eHh4?=
3531 =?koi8-r?b?IHh4?=
3532 =?koi8-r?b?eHgg?=
3533 =?koi8-r?b?eHh4?=
3534 =?koi8-r?b?eCB4?=
3535 =?koi8-r?b?eHh4?=
3536 =?koi8-r?b?IHh4?=
3537 =?koi8-r?b?eHgg?=
3538 =?koi8-r?b?eHh4?=
3539 =?koi8-r?b?eCB4?=
3540 =?koi8-r?b?eHh4?=
3541 =?koi8-r?b?IHh4?=
3542 =?koi8-r?b?eHgg?=
3543 =?koi8-r?b?eHh4?=
3544 =?koi8-r?b?eCB4?=
3545 =?koi8-r?b?eHh4?=
3546 =?koi8-r?b?IHh4?=
3547 =?koi8-r?b?eHgg?=
3548 =?koi8-r?b?eHh4?=
3549 =?koi8-r?b?eCB4?=
3550 =?koi8-r?b?eHh4?=
3551 =?koi8-r?b?IHh4?=
3552 =?koi8-r?b?eHgg?=
3553 =?koi8-r?b?eHh4?=
3554 =?koi8-r?b?eCB4?=
3555 =?koi8-r?b?eHh4?=
3556 =?koi8-r?b?IA==?=""")
3557 eq(x, str(make_header(decode_header(s))))
3558 h = Header(charset='koi8-r', maxlinelen=40)
3559 h.append(x)
3560 s = h.encode()
3561 eq(s, """\
3562=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
3563 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
3564 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
3565 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
3566 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
3567 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
3568 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003569
3570 def test_us_ascii_header(self):
3571 eq = self.assertEqual
3572 s = 'hello'
3573 x = decode_header(s)
3574 eq(x, [('hello', None)])
3575 h = make_header(x)
3576 eq(s, h.encode())
3577
3578 def test_string_charset(self):
3579 eq = self.assertEqual
3580 h = Header()
3581 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00003582 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003583
3584## def test_unicode_error(self):
3585## raises = self.assertRaises
3586## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
3587## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
3588## h = Header()
3589## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
3590## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
3591## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
3592
3593 def test_utf8_shortest(self):
3594 eq = self.assertEqual
3595 h = Header('p\xf6stal', 'utf-8')
3596 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
3597 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
3598 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
3599
3600 def test_bad_8bit_header(self):
3601 raises = self.assertRaises
3602 eq = self.assertEqual
3603 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
3604 raises(UnicodeError, Header, x)
3605 h = Header()
3606 raises(UnicodeError, h.append, x)
3607 e = x.decode('utf-8', 'replace')
3608 eq(str(Header(x, errors='replace')), e)
3609 h.append(x, errors='replace')
3610 eq(str(h), e)
3611
3612 def test_encoded_adjacent_nonencoded(self):
3613 eq = self.assertEqual
3614 h = Header()
3615 h.append('hello', 'iso-8859-1')
3616 h.append('world')
3617 s = h.encode()
3618 eq(s, '=?iso-8859-1?q?hello?= world')
3619 h = make_header(decode_header(s))
3620 eq(h.encode(), s)
3621
3622 def test_whitespace_eater(self):
3623 eq = self.assertEqual
3624 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
3625 parts = decode_header(s)
3626 eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)])
3627 hdr = make_header(parts)
3628 eq(hdr.encode(),
3629 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
3630
3631 def test_broken_base64_header(self):
3632 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00003633 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003634 raises(errors.HeaderParseError, decode_header, s)
3635
R. David Murray477efb32011-01-05 01:39:32 +00003636 def test_shift_jis_charset(self):
3637 h = Header('文', charset='shift_jis')
3638 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
3639
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003640
Ezio Melottib3aedd42010-11-20 19:04:17 +00003641
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003642# Test RFC 2231 header parameters (en/de)coding
3643class TestRFC2231(TestEmailBase):
3644 def test_get_param(self):
3645 eq = self.assertEqual
3646 msg = self._msgobj('msg_29.txt')
3647 eq(msg.get_param('title'),
3648 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
3649 eq(msg.get_param('title', unquote=False),
3650 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
3651
3652 def test_set_param(self):
3653 eq = self.ndiffAssertEqual
3654 msg = Message()
3655 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3656 charset='us-ascii')
3657 eq(msg.get_param('title'),
3658 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
3659 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3660 charset='us-ascii', language='en')
3661 eq(msg.get_param('title'),
3662 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
3663 msg = self._msgobj('msg_01.txt')
3664 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3665 charset='us-ascii', language='en')
3666 eq(msg.as_string(maxheaderlen=78), """\
3667Return-Path: <bbb@zzz.org>
3668Delivered-To: bbb@zzz.org
3669Received: by mail.zzz.org (Postfix, from userid 889)
3670\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
3671MIME-Version: 1.0
3672Content-Transfer-Encoding: 7bit
3673Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
3674From: bbb@ddd.com (John X. Doe)
3675To: bbb@zzz.org
3676Subject: This is a test message
3677Date: Fri, 4 May 2001 14:05:44 -0400
3678Content-Type: text/plain; charset=us-ascii;
R. David Murraydfd7eb02010-12-24 22:36:49 +00003679 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003680
3681
3682Hi,
3683
3684Do you like this message?
3685
3686-Me
3687""")
3688
3689 def test_del_param(self):
3690 eq = self.ndiffAssertEqual
3691 msg = self._msgobj('msg_01.txt')
3692 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
3693 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3694 charset='us-ascii', language='en')
3695 msg.del_param('foo', header='Content-Type')
3696 eq(msg.as_string(maxheaderlen=78), """\
3697Return-Path: <bbb@zzz.org>
3698Delivered-To: bbb@zzz.org
3699Received: by mail.zzz.org (Postfix, from userid 889)
3700\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
3701MIME-Version: 1.0
3702Content-Transfer-Encoding: 7bit
3703Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
3704From: bbb@ddd.com (John X. Doe)
3705To: bbb@zzz.org
3706Subject: This is a test message
3707Date: Fri, 4 May 2001 14:05:44 -0400
3708Content-Type: text/plain; charset="us-ascii";
R. David Murraydfd7eb02010-12-24 22:36:49 +00003709 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003710
3711
3712Hi,
3713
3714Do you like this message?
3715
3716-Me
3717""")
3718
3719 def test_rfc2231_get_content_charset(self):
3720 eq = self.assertEqual
3721 msg = self._msgobj('msg_32.txt')
3722 eq(msg.get_content_charset(), 'us-ascii')
3723
R. David Murraydfd7eb02010-12-24 22:36:49 +00003724 def test_rfc2231_parse_rfc_quoting(self):
3725 m = textwrap.dedent('''\
3726 Content-Disposition: inline;
3727 \tfilename*0*=''This%20is%20even%20more%20;
3728 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
3729 \tfilename*2="is it not.pdf"
3730
3731 ''')
3732 msg = email.message_from_string(m)
3733 self.assertEqual(msg.get_filename(),
3734 'This is even more ***fun*** is it not.pdf')
3735 self.assertEqual(m, msg.as_string())
3736
3737 def test_rfc2231_parse_extra_quoting(self):
3738 m = textwrap.dedent('''\
3739 Content-Disposition: inline;
3740 \tfilename*0*="''This%20is%20even%20more%20";
3741 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3742 \tfilename*2="is it not.pdf"
3743
3744 ''')
3745 msg = email.message_from_string(m)
3746 self.assertEqual(msg.get_filename(),
3747 'This is even more ***fun*** is it not.pdf')
3748 self.assertEqual(m, msg.as_string())
3749
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003750 def test_rfc2231_no_language_or_charset(self):
3751 m = '''\
3752Content-Transfer-Encoding: 8bit
3753Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
3754Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
3755
3756'''
3757 msg = email.message_from_string(m)
3758 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003759 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003760 self.assertEqual(
3761 param,
3762 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
3763
3764 def test_rfc2231_no_language_or_charset_in_filename(self):
3765 m = '''\
3766Content-Disposition: inline;
3767\tfilename*0*="''This%20is%20even%20more%20";
3768\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3769\tfilename*2="is it not.pdf"
3770
3771'''
3772 msg = email.message_from_string(m)
3773 self.assertEqual(msg.get_filename(),
3774 'This is even more ***fun*** is it not.pdf')
3775
3776 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
3777 m = '''\
3778Content-Disposition: inline;
3779\tfilename*0*="''This%20is%20even%20more%20";
3780\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3781\tfilename*2="is it not.pdf"
3782
3783'''
3784 msg = email.message_from_string(m)
3785 self.assertEqual(msg.get_filename(),
3786 'This is even more ***fun*** is it not.pdf')
3787
3788 def test_rfc2231_partly_encoded(self):
3789 m = '''\
3790Content-Disposition: inline;
3791\tfilename*0="''This%20is%20even%20more%20";
3792\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3793\tfilename*2="is it not.pdf"
3794
3795'''
3796 msg = email.message_from_string(m)
3797 self.assertEqual(
3798 msg.get_filename(),
3799 'This%20is%20even%20more%20***fun*** is it not.pdf')
3800
3801 def test_rfc2231_partly_nonencoded(self):
3802 m = '''\
3803Content-Disposition: inline;
3804\tfilename*0="This%20is%20even%20more%20";
3805\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
3806\tfilename*2="is it not.pdf"
3807
3808'''
3809 msg = email.message_from_string(m)
3810 self.assertEqual(
3811 msg.get_filename(),
3812 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
3813
3814 def test_rfc2231_no_language_or_charset_in_boundary(self):
3815 m = '''\
3816Content-Type: multipart/alternative;
3817\tboundary*0*="''This%20is%20even%20more%20";
3818\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
3819\tboundary*2="is it not.pdf"
3820
3821'''
3822 msg = email.message_from_string(m)
3823 self.assertEqual(msg.get_boundary(),
3824 'This is even more ***fun*** is it not.pdf')
3825
3826 def test_rfc2231_no_language_or_charset_in_charset(self):
3827 # This is a nonsensical charset value, but tests the code anyway
3828 m = '''\
3829Content-Type: text/plain;
3830\tcharset*0*="This%20is%20even%20more%20";
3831\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
3832\tcharset*2="is it not.pdf"
3833
3834'''
3835 msg = email.message_from_string(m)
3836 self.assertEqual(msg.get_content_charset(),
3837 'this is even more ***fun*** is it not.pdf')
3838
3839 def test_rfc2231_bad_encoding_in_filename(self):
3840 m = '''\
3841Content-Disposition: inline;
3842\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
3843\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3844\tfilename*2="is it not.pdf"
3845
3846'''
3847 msg = email.message_from_string(m)
3848 self.assertEqual(msg.get_filename(),
3849 'This is even more ***fun*** is it not.pdf')
3850
3851 def test_rfc2231_bad_encoding_in_charset(self):
3852 m = """\
3853Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
3854
3855"""
3856 msg = email.message_from_string(m)
3857 # This should return None because non-ascii characters in the charset
3858 # are not allowed.
3859 self.assertEqual(msg.get_content_charset(), None)
3860
3861 def test_rfc2231_bad_character_in_charset(self):
3862 m = """\
3863Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
3864
3865"""
3866 msg = email.message_from_string(m)
3867 # This should return None because non-ascii characters in the charset
3868 # are not allowed.
3869 self.assertEqual(msg.get_content_charset(), None)
3870
3871 def test_rfc2231_bad_character_in_filename(self):
3872 m = '''\
3873Content-Disposition: inline;
3874\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
3875\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3876\tfilename*2*="is it not.pdf%E2"
3877
3878'''
3879 msg = email.message_from_string(m)
3880 self.assertEqual(msg.get_filename(),
3881 'This is even more ***fun*** is it not.pdf\ufffd')
3882
3883 def test_rfc2231_unknown_encoding(self):
3884 m = """\
3885Content-Transfer-Encoding: 8bit
3886Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
3887
3888"""
3889 msg = email.message_from_string(m)
3890 self.assertEqual(msg.get_filename(), 'myfile.txt')
3891
3892 def test_rfc2231_single_tick_in_filename_extended(self):
3893 eq = self.assertEqual
3894 m = """\
3895Content-Type: application/x-foo;
3896\tname*0*=\"Frank's\"; name*1*=\" Document\"
3897
3898"""
3899 msg = email.message_from_string(m)
3900 charset, language, s = msg.get_param('name')
3901 eq(charset, None)
3902 eq(language, None)
3903 eq(s, "Frank's Document")
3904
3905 def test_rfc2231_single_tick_in_filename(self):
3906 m = """\
3907Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
3908
3909"""
3910 msg = email.message_from_string(m)
3911 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003912 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003913 self.assertEqual(param, "Frank's Document")
3914
3915 def test_rfc2231_tick_attack_extended(self):
3916 eq = self.assertEqual
3917 m = """\
3918Content-Type: application/x-foo;
3919\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
3920
3921"""
3922 msg = email.message_from_string(m)
3923 charset, language, s = msg.get_param('name')
3924 eq(charset, 'us-ascii')
3925 eq(language, 'en-us')
3926 eq(s, "Frank's Document")
3927
3928 def test_rfc2231_tick_attack(self):
3929 m = """\
3930Content-Type: application/x-foo;
3931\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
3932
3933"""
3934 msg = email.message_from_string(m)
3935 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003936 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003937 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
3938
3939 def test_rfc2231_no_extended_values(self):
3940 eq = self.assertEqual
3941 m = """\
3942Content-Type: application/x-foo; name=\"Frank's Document\"
3943
3944"""
3945 msg = email.message_from_string(m)
3946 eq(msg.get_param('name'), "Frank's Document")
3947
3948 def test_rfc2231_encoded_then_unencoded_segments(self):
3949 eq = self.assertEqual
3950 m = """\
3951Content-Type: application/x-foo;
3952\tname*0*=\"us-ascii'en-us'My\";
3953\tname*1=\" Document\";
3954\tname*2*=\" For You\"
3955
3956"""
3957 msg = email.message_from_string(m)
3958 charset, language, s = msg.get_param('name')
3959 eq(charset, 'us-ascii')
3960 eq(language, 'en-us')
3961 eq(s, 'My Document For You')
3962
3963 def test_rfc2231_unencoded_then_encoded_segments(self):
3964 eq = self.assertEqual
3965 m = """\
3966Content-Type: application/x-foo;
3967\tname*0=\"us-ascii'en-us'My\";
3968\tname*1*=\" Document\";
3969\tname*2*=\" For You\"
3970
3971"""
3972 msg = email.message_from_string(m)
3973 charset, language, s = msg.get_param('name')
3974 eq(charset, 'us-ascii')
3975 eq(language, 'en-us')
3976 eq(s, 'My Document For You')
3977
3978
Ezio Melottib3aedd42010-11-20 19:04:17 +00003979
R. David Murraya8f480f2010-01-16 18:30:03 +00003980# Tests to ensure that signed parts of an email are completely preserved, as
3981# required by RFC1847 section 2.1. Note that these are incomplete, because the
3982# email package does not currently always preserve the body. See issue 1670765.
3983class TestSigned(TestEmailBase):
3984
3985 def _msg_and_obj(self, filename):
3986 with openfile(findfile(filename)) as fp:
3987 original = fp.read()
3988 msg = email.message_from_string(original)
3989 return original, msg
3990
3991 def _signed_parts_eq(self, original, result):
3992 # Extract the first mime part of each message
3993 import re
3994 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
3995 inpart = repart.search(original).group(2)
3996 outpart = repart.search(result).group(2)
3997 self.assertEqual(outpart, inpart)
3998
3999 def test_long_headers_as_string(self):
4000 original, msg = self._msg_and_obj('msg_45.txt')
4001 result = msg.as_string()
4002 self._signed_parts_eq(original, result)
4003
4004 def test_long_headers_as_string_maxheaderlen(self):
4005 original, msg = self._msg_and_obj('msg_45.txt')
4006 result = msg.as_string(maxheaderlen=60)
4007 self._signed_parts_eq(original, result)
4008
4009 def test_long_headers_flatten(self):
4010 original, msg = self._msg_and_obj('msg_45.txt')
4011 fp = StringIO()
4012 Generator(fp).flatten(msg)
4013 result = fp.getvalue()
4014 self._signed_parts_eq(original, result)
4015
4016
Ezio Melottib3aedd42010-11-20 19:04:17 +00004017
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004018def _testclasses():
4019 mod = sys.modules[__name__]
4020 return [getattr(mod, name) for name in dir(mod) if name.startswith('Test')]
4021
4022
4023def suite():
4024 suite = unittest.TestSuite()
4025 for testclass in _testclasses():
4026 suite.addTest(unittest.makeSuite(testclass))
4027 return suite
4028
4029
4030def test_main():
4031 for testclass in _testclasses():
4032 run_unittest(testclass)
4033
4034
Ezio Melottib3aedd42010-11-20 19:04:17 +00004035
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004036if __name__ == '__main__':
4037 unittest.main(defaultTest='suite')