blob: e7fcee3b6d5f09e064f8480c78bd34317499ddbd [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
R. David Murray719a4492010-11-21 16:53:48 +00006import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00007import sys
8import time
9import base64
10import difflib
11import unittest
12import warnings
R. David Murray96fd54e2010-10-08 15:55:28 +000013import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000014
R. David Murray96fd54e2010-10-08 15:55:28 +000015from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016from itertools import chain
17
18import email
19
20from email.charset import Charset
21from email.header import Header, decode_header, make_header
22from email.parser import Parser, HeaderParser
23from email.generator import Generator, DecodedGenerator
24from email.message import Message
25from email.mime.application import MIMEApplication
26from email.mime.audio import MIMEAudio
27from email.mime.text import MIMEText
28from email.mime.image import MIMEImage
29from email.mime.base import MIMEBase
30from email.mime.message import MIMEMessage
31from email.mime.multipart import MIMEMultipart
32from email import utils
33from email import errors
34from email import encoders
35from email import iterators
36from email import base64mime
37from email import quoprimime
38
R. David Murray96fd54e2010-10-08 15:55:28 +000039from test.support import findfile, run_unittest, unlink
Guido van Rossum8b3febe2007-08-30 01:15:14 +000040from email.test import __file__ as landmark
41
42
43NL = '\n'
44EMPTYSTRING = ''
45SPACE = ' '
46
47
Ezio Melottib3aedd42010-11-20 19:04:17 +000048
Guido van Rossum8b3febe2007-08-30 01:15:14 +000049def openfile(filename, *args, **kws):
50 path = os.path.join(os.path.dirname(landmark), 'data', filename)
51 return open(path, *args, **kws)
52
53
Ezio Melottib3aedd42010-11-20 19:04:17 +000054
Guido van Rossum8b3febe2007-08-30 01:15:14 +000055# Base test class
56class TestEmailBase(unittest.TestCase):
57 def ndiffAssertEqual(self, first, second):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000058 """Like assertEqual except use ndiff for readable output."""
Guido van Rossum8b3febe2007-08-30 01:15:14 +000059 if first != second:
60 sfirst = str(first)
61 ssecond = str(second)
62 rfirst = [repr(line) for line in sfirst.splitlines()]
63 rsecond = [repr(line) for line in ssecond.splitlines()]
64 diff = difflib.ndiff(rfirst, rsecond)
65 raise self.failureException(NL + NL.join(diff))
66
67 def _msgobj(self, filename):
68 with openfile(findfile(filename)) as fp:
69 return email.message_from_file(fp)
70
71
Ezio Melottib3aedd42010-11-20 19:04:17 +000072
Guido van Rossum8b3febe2007-08-30 01:15:14 +000073# Test various aspects of the Message class's API
74class TestMessageAPI(TestEmailBase):
75 def test_get_all(self):
76 eq = self.assertEqual
77 msg = self._msgobj('msg_20.txt')
78 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
79 eq(msg.get_all('xx', 'n/a'), 'n/a')
80
R. David Murraye5db2632010-11-20 15:10:13 +000081 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000082 eq = self.assertEqual
83 msg = Message()
84 eq(msg.get_charset(), None)
85 charset = Charset('iso-8859-1')
86 msg.set_charset(charset)
87 eq(msg['mime-version'], '1.0')
88 eq(msg.get_content_type(), 'text/plain')
89 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
90 eq(msg.get_param('charset'), 'iso-8859-1')
91 eq(msg['content-transfer-encoding'], 'quoted-printable')
92 eq(msg.get_charset().input_charset, 'iso-8859-1')
93 # Remove the charset
94 msg.set_charset(None)
95 eq(msg.get_charset(), None)
96 eq(msg['content-type'], 'text/plain')
97 # Try adding a charset when there's already MIME headers present
98 msg = Message()
99 msg['MIME-Version'] = '2.0'
100 msg['Content-Type'] = 'text/x-weird'
101 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
102 msg.set_charset(charset)
103 eq(msg['mime-version'], '2.0')
104 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
105 eq(msg['content-transfer-encoding'], 'quinted-puntable')
106
107 def test_set_charset_from_string(self):
108 eq = self.assertEqual
109 msg = Message()
110 msg.set_charset('us-ascii')
111 eq(msg.get_charset().input_charset, 'us-ascii')
112 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
113
114 def test_set_payload_with_charset(self):
115 msg = Message()
116 charset = Charset('iso-8859-1')
117 msg.set_payload('This is a string payload', charset)
118 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
119
120 def test_get_charsets(self):
121 eq = self.assertEqual
122
123 msg = self._msgobj('msg_08.txt')
124 charsets = msg.get_charsets()
125 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
126
127 msg = self._msgobj('msg_09.txt')
128 charsets = msg.get_charsets('dingbat')
129 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
130 'koi8-r'])
131
132 msg = self._msgobj('msg_12.txt')
133 charsets = msg.get_charsets()
134 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
135 'iso-8859-3', 'us-ascii', 'koi8-r'])
136
137 def test_get_filename(self):
138 eq = self.assertEqual
139
140 msg = self._msgobj('msg_04.txt')
141 filenames = [p.get_filename() for p in msg.get_payload()]
142 eq(filenames, ['msg.txt', 'msg.txt'])
143
144 msg = self._msgobj('msg_07.txt')
145 subpart = msg.get_payload(1)
146 eq(subpart.get_filename(), 'dingusfish.gif')
147
148 def test_get_filename_with_name_parameter(self):
149 eq = self.assertEqual
150
151 msg = self._msgobj('msg_44.txt')
152 filenames = [p.get_filename() for p in msg.get_payload()]
153 eq(filenames, ['msg.txt', 'msg.txt'])
154
155 def test_get_boundary(self):
156 eq = self.assertEqual
157 msg = self._msgobj('msg_07.txt')
158 # No quotes!
159 eq(msg.get_boundary(), 'BOUNDARY')
160
161 def test_set_boundary(self):
162 eq = self.assertEqual
163 # This one has no existing boundary parameter, but the Content-Type:
164 # header appears fifth.
165 msg = self._msgobj('msg_01.txt')
166 msg.set_boundary('BOUNDARY')
167 header, value = msg.items()[4]
168 eq(header.lower(), 'content-type')
169 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
170 # This one has a Content-Type: header, with a boundary, stuck in the
171 # middle of its headers. Make sure the order is preserved; it should
172 # be fifth.
173 msg = self._msgobj('msg_04.txt')
174 msg.set_boundary('BOUNDARY')
175 header, value = msg.items()[4]
176 eq(header.lower(), 'content-type')
177 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
178 # And this one has no Content-Type: header at all.
179 msg = self._msgobj('msg_03.txt')
180 self.assertRaises(errors.HeaderParseError,
181 msg.set_boundary, 'BOUNDARY')
182
R. David Murray73a559d2010-12-21 18:07:59 +0000183 def test_make_boundary(self):
184 msg = MIMEMultipart('form-data')
185 # Note that when the boundary gets created is an implementation
186 # detail and might change.
187 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
188 # Trigger creation of boundary
189 msg.as_string()
190 self.assertEqual(msg.items()[0][1][:33],
191 'multipart/form-data; boundary="==')
192 # XXX: there ought to be tests of the uniqueness of the boundary, too.
193
R. David Murray57c45ac2010-02-21 04:39:40 +0000194 def test_message_rfc822_only(self):
195 # Issue 7970: message/rfc822 not in multipart parsed by
196 # HeaderParser caused an exception when flattened.
Brett Cannon384917a2010-10-29 23:08:36 +0000197 with openfile(findfile('msg_46.txt')) as fp:
198 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000199 parser = HeaderParser()
200 msg = parser.parsestr(msgdata)
201 out = StringIO()
202 gen = Generator(out, True, 0)
203 gen.flatten(msg, False)
204 self.assertEqual(out.getvalue(), msgdata)
205
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000206 def test_get_decoded_payload(self):
207 eq = self.assertEqual
208 msg = self._msgobj('msg_10.txt')
209 # The outer message is a multipart
210 eq(msg.get_payload(decode=True), None)
211 # Subpart 1 is 7bit encoded
212 eq(msg.get_payload(0).get_payload(decode=True),
213 b'This is a 7bit encoded message.\n')
214 # Subpart 2 is quopri
215 eq(msg.get_payload(1).get_payload(decode=True),
216 b'\xa1This is a Quoted Printable encoded message!\n')
217 # Subpart 3 is base64
218 eq(msg.get_payload(2).get_payload(decode=True),
219 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000220 # Subpart 4 is base64 with a trailing newline, which
221 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000222 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000223 b'This is a Base64 encoded message.\n')
224 # Subpart 5 has no Content-Transfer-Encoding: header.
225 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000226 b'This has no Content-Transfer-Encoding: header.\n')
227
228 def test_get_decoded_uu_payload(self):
229 eq = self.assertEqual
230 msg = Message()
231 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
232 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
233 msg['content-transfer-encoding'] = cte
234 eq(msg.get_payload(decode=True), b'hello world')
235 # Now try some bogus data
236 msg.set_payload('foo')
237 eq(msg.get_payload(decode=True), b'foo')
238
239 def test_decoded_generator(self):
240 eq = self.assertEqual
241 msg = self._msgobj('msg_07.txt')
242 with openfile('msg_17.txt') as fp:
243 text = fp.read()
244 s = StringIO()
245 g = DecodedGenerator(s)
246 g.flatten(msg)
247 eq(s.getvalue(), text)
248
249 def test__contains__(self):
250 msg = Message()
251 msg['From'] = 'Me'
252 msg['to'] = 'You'
253 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000254 self.assertTrue('from' in msg)
255 self.assertTrue('From' in msg)
256 self.assertTrue('FROM' in msg)
257 self.assertTrue('to' in msg)
258 self.assertTrue('To' in msg)
259 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000260
261 def test_as_string(self):
262 eq = self.ndiffAssertEqual
263 msg = self._msgobj('msg_01.txt')
264 with openfile('msg_01.txt') as fp:
265 text = fp.read()
266 eq(text, str(msg))
267 fullrepr = msg.as_string(unixfrom=True)
268 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000269 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000270 eq(text, NL.join(lines[1:]))
271
272 def test_bad_param(self):
273 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
274 self.assertEqual(msg.get_param('baz'), '')
275
276 def test_missing_filename(self):
277 msg = email.message_from_string("From: foo\n")
278 self.assertEqual(msg.get_filename(), None)
279
280 def test_bogus_filename(self):
281 msg = email.message_from_string(
282 "Content-Disposition: blarg; filename\n")
283 self.assertEqual(msg.get_filename(), '')
284
285 def test_missing_boundary(self):
286 msg = email.message_from_string("From: foo\n")
287 self.assertEqual(msg.get_boundary(), None)
288
289 def test_get_params(self):
290 eq = self.assertEqual
291 msg = email.message_from_string(
292 'X-Header: foo=one; bar=two; baz=three\n')
293 eq(msg.get_params(header='x-header'),
294 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
295 msg = email.message_from_string(
296 'X-Header: foo; bar=one; baz=two\n')
297 eq(msg.get_params(header='x-header'),
298 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
299 eq(msg.get_params(), None)
300 msg = email.message_from_string(
301 'X-Header: foo; bar="one"; baz=two\n')
302 eq(msg.get_params(header='x-header'),
303 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
304
305 def test_get_param_liberal(self):
306 msg = Message()
307 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
308 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
309
310 def test_get_param(self):
311 eq = self.assertEqual
312 msg = email.message_from_string(
313 "X-Header: foo=one; bar=two; baz=three\n")
314 eq(msg.get_param('bar', header='x-header'), 'two')
315 eq(msg.get_param('quuz', header='x-header'), None)
316 eq(msg.get_param('quuz'), None)
317 msg = email.message_from_string(
318 'X-Header: foo; bar="one"; baz=two\n')
319 eq(msg.get_param('foo', header='x-header'), '')
320 eq(msg.get_param('bar', header='x-header'), 'one')
321 eq(msg.get_param('baz', header='x-header'), 'two')
322 # XXX: We are not RFC-2045 compliant! We cannot parse:
323 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
324 # msg.get_param("weird")
325 # yet.
326
327 def test_get_param_funky_continuation_lines(self):
328 msg = self._msgobj('msg_22.txt')
329 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
330
331 def test_get_param_with_semis_in_quotes(self):
332 msg = email.message_from_string(
333 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
334 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
335 self.assertEqual(msg.get_param('name', unquote=False),
336 '"Jim&amp;&amp;Jill"')
337
R. David Murrayd48739f2010-04-14 18:59:18 +0000338 def test_get_param_with_quotes(self):
339 msg = email.message_from_string(
340 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
341 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
342 msg = email.message_from_string(
343 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
344 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
345
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000346 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000347 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000348 msg = email.message_from_string('Header: exists')
349 unless('header' in msg)
350 unless('Header' in msg)
351 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000352 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000353
354 def test_set_param(self):
355 eq = self.assertEqual
356 msg = Message()
357 msg.set_param('charset', 'iso-2022-jp')
358 eq(msg.get_param('charset'), 'iso-2022-jp')
359 msg.set_param('importance', 'high value')
360 eq(msg.get_param('importance'), 'high value')
361 eq(msg.get_param('importance', unquote=False), '"high value"')
362 eq(msg.get_params(), [('text/plain', ''),
363 ('charset', 'iso-2022-jp'),
364 ('importance', 'high value')])
365 eq(msg.get_params(unquote=False), [('text/plain', ''),
366 ('charset', '"iso-2022-jp"'),
367 ('importance', '"high value"')])
368 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
369 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
370
371 def test_del_param(self):
372 eq = self.assertEqual
373 msg = self._msgobj('msg_05.txt')
374 eq(msg.get_params(),
375 [('multipart/report', ''), ('report-type', 'delivery-status'),
376 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
377 old_val = msg.get_param("report-type")
378 msg.del_param("report-type")
379 eq(msg.get_params(),
380 [('multipart/report', ''),
381 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
382 msg.set_param("report-type", old_val)
383 eq(msg.get_params(),
384 [('multipart/report', ''),
385 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
386 ('report-type', old_val)])
387
388 def test_del_param_on_other_header(self):
389 msg = Message()
390 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
391 msg.del_param('filename', 'content-disposition')
392 self.assertEqual(msg['content-disposition'], 'attachment')
393
394 def test_set_type(self):
395 eq = self.assertEqual
396 msg = Message()
397 self.assertRaises(ValueError, msg.set_type, 'text')
398 msg.set_type('text/plain')
399 eq(msg['content-type'], 'text/plain')
400 msg.set_param('charset', 'us-ascii')
401 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
402 msg.set_type('text/html')
403 eq(msg['content-type'], 'text/html; charset="us-ascii"')
404
405 def test_set_type_on_other_header(self):
406 msg = Message()
407 msg['X-Content-Type'] = 'text/plain'
408 msg.set_type('application/octet-stream', 'X-Content-Type')
409 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
410
411 def test_get_content_type_missing(self):
412 msg = Message()
413 self.assertEqual(msg.get_content_type(), 'text/plain')
414
415 def test_get_content_type_missing_with_default_type(self):
416 msg = Message()
417 msg.set_default_type('message/rfc822')
418 self.assertEqual(msg.get_content_type(), 'message/rfc822')
419
420 def test_get_content_type_from_message_implicit(self):
421 msg = self._msgobj('msg_30.txt')
422 self.assertEqual(msg.get_payload(0).get_content_type(),
423 'message/rfc822')
424
425 def test_get_content_type_from_message_explicit(self):
426 msg = self._msgobj('msg_28.txt')
427 self.assertEqual(msg.get_payload(0).get_content_type(),
428 'message/rfc822')
429
430 def test_get_content_type_from_message_text_plain_implicit(self):
431 msg = self._msgobj('msg_03.txt')
432 self.assertEqual(msg.get_content_type(), 'text/plain')
433
434 def test_get_content_type_from_message_text_plain_explicit(self):
435 msg = self._msgobj('msg_01.txt')
436 self.assertEqual(msg.get_content_type(), 'text/plain')
437
438 def test_get_content_maintype_missing(self):
439 msg = Message()
440 self.assertEqual(msg.get_content_maintype(), 'text')
441
442 def test_get_content_maintype_missing_with_default_type(self):
443 msg = Message()
444 msg.set_default_type('message/rfc822')
445 self.assertEqual(msg.get_content_maintype(), 'message')
446
447 def test_get_content_maintype_from_message_implicit(self):
448 msg = self._msgobj('msg_30.txt')
449 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
450
451 def test_get_content_maintype_from_message_explicit(self):
452 msg = self._msgobj('msg_28.txt')
453 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
454
455 def test_get_content_maintype_from_message_text_plain_implicit(self):
456 msg = self._msgobj('msg_03.txt')
457 self.assertEqual(msg.get_content_maintype(), 'text')
458
459 def test_get_content_maintype_from_message_text_plain_explicit(self):
460 msg = self._msgobj('msg_01.txt')
461 self.assertEqual(msg.get_content_maintype(), 'text')
462
463 def test_get_content_subtype_missing(self):
464 msg = Message()
465 self.assertEqual(msg.get_content_subtype(), 'plain')
466
467 def test_get_content_subtype_missing_with_default_type(self):
468 msg = Message()
469 msg.set_default_type('message/rfc822')
470 self.assertEqual(msg.get_content_subtype(), 'rfc822')
471
472 def test_get_content_subtype_from_message_implicit(self):
473 msg = self._msgobj('msg_30.txt')
474 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
475
476 def test_get_content_subtype_from_message_explicit(self):
477 msg = self._msgobj('msg_28.txt')
478 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
479
480 def test_get_content_subtype_from_message_text_plain_implicit(self):
481 msg = self._msgobj('msg_03.txt')
482 self.assertEqual(msg.get_content_subtype(), 'plain')
483
484 def test_get_content_subtype_from_message_text_plain_explicit(self):
485 msg = self._msgobj('msg_01.txt')
486 self.assertEqual(msg.get_content_subtype(), 'plain')
487
488 def test_get_content_maintype_error(self):
489 msg = Message()
490 msg['Content-Type'] = 'no-slash-in-this-string'
491 self.assertEqual(msg.get_content_maintype(), 'text')
492
493 def test_get_content_subtype_error(self):
494 msg = Message()
495 msg['Content-Type'] = 'no-slash-in-this-string'
496 self.assertEqual(msg.get_content_subtype(), 'plain')
497
498 def test_replace_header(self):
499 eq = self.assertEqual
500 msg = Message()
501 msg.add_header('First', 'One')
502 msg.add_header('Second', 'Two')
503 msg.add_header('Third', 'Three')
504 eq(msg.keys(), ['First', 'Second', 'Third'])
505 eq(msg.values(), ['One', 'Two', 'Three'])
506 msg.replace_header('Second', 'Twenty')
507 eq(msg.keys(), ['First', 'Second', 'Third'])
508 eq(msg.values(), ['One', 'Twenty', 'Three'])
509 msg.add_header('First', 'Eleven')
510 msg.replace_header('First', 'One Hundred')
511 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
512 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
513 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
514
515 def test_broken_base64_payload(self):
516 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
517 msg = Message()
518 msg['content-type'] = 'audio/x-midi'
519 msg['content-transfer-encoding'] = 'base64'
520 msg.set_payload(x)
521 self.assertEqual(msg.get_payload(decode=True),
Guido van Rossum9604e662007-08-30 03:46:43 +0000522 bytes(x, 'raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000523
R. David Murray7ec754b2010-12-13 23:51:19 +0000524 # Issue 1078919
525 def test_ascii_add_header(self):
526 msg = Message()
527 msg.add_header('Content-Disposition', 'attachment',
528 filename='bud.gif')
529 self.assertEqual('attachment; filename="bud.gif"',
530 msg['Content-Disposition'])
531
532 def test_noascii_add_header(self):
533 msg = Message()
534 msg.add_header('Content-Disposition', 'attachment',
535 filename="Fußballer.ppt")
536 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000537 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
R. David Murray7ec754b2010-12-13 23:51:19 +0000538 msg['Content-Disposition'])
539
540 def test_nonascii_add_header_via_triple(self):
541 msg = Message()
542 msg.add_header('Content-Disposition', 'attachment',
543 filename=('iso-8859-1', '', 'Fußballer.ppt'))
544 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000545 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
546 msg['Content-Disposition'])
547
548 def test_ascii_add_header_with_tspecial(self):
549 msg = Message()
550 msg.add_header('Content-Disposition', 'attachment',
551 filename="windows [filename].ppt")
552 self.assertEqual(
553 'attachment; filename="windows [filename].ppt"',
554 msg['Content-Disposition'])
555
556 def test_nonascii_add_header_with_tspecial(self):
557 msg = Message()
558 msg.add_header('Content-Disposition', 'attachment',
559 filename="Fußballer [filename].ppt")
560 self.assertEqual(
561 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
R. David Murray7ec754b2010-12-13 23:51:19 +0000562 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000563
Ezio Melottib3aedd42010-11-20 19:04:17 +0000564
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000565# Test the email.encoders module
566class TestEncoders(unittest.TestCase):
567 def test_encode_empty_payload(self):
568 eq = self.assertEqual
569 msg = Message()
570 msg.set_charset('us-ascii')
571 eq(msg['content-transfer-encoding'], '7bit')
572
573 def test_default_cte(self):
574 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000575 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000576 msg = MIMEText('hello world')
577 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000578 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000579 msg = MIMEText('hello \xf8 world')
580 eq(msg['content-transfer-encoding'], '8bit')
581 # And now with a different charset
582 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
583 eq(msg['content-transfer-encoding'], 'quoted-printable')
584
R. David Murraye85200d2010-05-06 01:41:14 +0000585 def test_encode7or8bit(self):
586 # Make sure a charset whose input character set is 8bit but
587 # whose output character set is 7bit gets a transfer-encoding
588 # of 7bit.
589 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000590 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000591 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000592
Ezio Melottib3aedd42010-11-20 19:04:17 +0000593
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000594# Test long header wrapping
595class TestLongHeaders(TestEmailBase):
596 def test_split_long_continuation(self):
597 eq = self.ndiffAssertEqual
598 msg = email.message_from_string("""\
599Subject: bug demonstration
600\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
601\tmore text
602
603test
604""")
605 sfp = StringIO()
606 g = Generator(sfp)
607 g.flatten(msg)
608 eq(sfp.getvalue(), """\
609Subject: bug demonstration
610\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
611\tmore text
612
613test
614""")
615
616 def test_another_long_almost_unsplittable_header(self):
617 eq = self.ndiffAssertEqual
618 hstr = """\
619bug demonstration
620\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
621\tmore text"""
622 h = Header(hstr, continuation_ws='\t')
623 eq(h.encode(), """\
624bug demonstration
625\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
626\tmore text""")
627 h = Header(hstr.replace('\t', ' '))
628 eq(h.encode(), """\
629bug demonstration
630 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
631 more text""")
632
633 def test_long_nonstring(self):
634 eq = self.ndiffAssertEqual
635 g = Charset("iso-8859-1")
636 cz = Charset("iso-8859-2")
637 utf8 = Charset("utf-8")
638 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
639 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
640 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
641 b'bef\xf6rdert. ')
642 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
643 b'd\xf9vtipu.. ')
644 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
645 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
646 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
647 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
648 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
649 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
650 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
651 '\u3044\u307e\u3059\u3002')
652 h = Header(g_head, g, header_name='Subject')
653 h.append(cz_head, cz)
654 h.append(utf8_head, utf8)
655 msg = Message()
656 msg['Subject'] = h
657 sfp = StringIO()
658 g = Generator(sfp)
659 g.flatten(msg)
660 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000661Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
662 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
663 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
664 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
665 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
666 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
667 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
668 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
669 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
670 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
671 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000672
673""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000674 eq(h.encode(maxlinelen=76), """\
675=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
676 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
677 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
678 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
679 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
680 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
681 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
682 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
683 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
684 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
685 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000686
687 def test_long_header_encode(self):
688 eq = self.ndiffAssertEqual
689 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
690 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
691 header_name='X-Foobar-Spoink-Defrobnit')
692 eq(h.encode(), '''\
693wasnipoop; giraffes="very-long-necked-animals";
694 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
695
696 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
697 eq = self.ndiffAssertEqual
698 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
699 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
700 header_name='X-Foobar-Spoink-Defrobnit',
701 continuation_ws='\t')
702 eq(h.encode(), '''\
703wasnipoop; giraffes="very-long-necked-animals";
704 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
705
706 def test_long_header_encode_with_tab_continuation(self):
707 eq = self.ndiffAssertEqual
708 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
709 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
710 header_name='X-Foobar-Spoink-Defrobnit',
711 continuation_ws='\t')
712 eq(h.encode(), '''\
713wasnipoop; giraffes="very-long-necked-animals";
714\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
715
716 def test_header_splitter(self):
717 eq = self.ndiffAssertEqual
718 msg = MIMEText('')
719 # It'd be great if we could use add_header() here, but that doesn't
720 # guarantee an order of the parameters.
721 msg['X-Foobar-Spoink-Defrobnit'] = (
722 'wasnipoop; giraffes="very-long-necked-animals"; '
723 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
724 sfp = StringIO()
725 g = Generator(sfp)
726 g.flatten(msg)
727 eq(sfp.getvalue(), '''\
728Content-Type: text/plain; charset="us-ascii"
729MIME-Version: 1.0
730Content-Transfer-Encoding: 7bit
731X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
732 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
733
734''')
735
736 def test_no_semis_header_splitter(self):
737 eq = self.ndiffAssertEqual
738 msg = Message()
739 msg['From'] = 'test@dom.ain'
740 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
741 msg.set_payload('Test')
742 sfp = StringIO()
743 g = Generator(sfp)
744 g.flatten(msg)
745 eq(sfp.getvalue(), """\
746From: test@dom.ain
747References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
748 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
749
750Test""")
751
752 def test_no_split_long_header(self):
753 eq = self.ndiffAssertEqual
754 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +0000755 h = Header(hstr)
756 # These come on two lines because Headers are really field value
757 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000758 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000759References:
760 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
761 h = Header('x' * 80)
762 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000763
764 def test_splitting_multiple_long_lines(self):
765 eq = self.ndiffAssertEqual
766 hstr = """\
767from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
768\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
769\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
770"""
771 h = Header(hstr, continuation_ws='\t')
772 eq(h.encode(), """\
773from babylon.socal-raves.org (localhost [127.0.0.1]);
774 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
775 for <mailman-admin@babylon.socal-raves.org>;
776 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
777\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
778 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
779 for <mailman-admin@babylon.socal-raves.org>;
780 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
781\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
782 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
783 for <mailman-admin@babylon.socal-raves.org>;
784 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
785
786 def test_splitting_first_line_only_is_long(self):
787 eq = self.ndiffAssertEqual
788 hstr = """\
789from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
790\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
791\tid 17k4h5-00034i-00
792\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
793 h = Header(hstr, maxlinelen=78, header_name='Received',
794 continuation_ws='\t')
795 eq(h.encode(), """\
796from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
797 helo=cthulhu.gerg.ca)
798\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
799\tid 17k4h5-00034i-00
800\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
801
802 def test_long_8bit_header(self):
803 eq = self.ndiffAssertEqual
804 msg = Message()
805 h = Header('Britische Regierung gibt', 'iso-8859-1',
806 header_name='Subject')
807 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +0000808 eq(h.encode(maxlinelen=76), """\
809=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
810 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000811 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +0000812 eq(msg.as_string(maxheaderlen=76), """\
813Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
814 =?iso-8859-1?q?hore-Windkraftprojekte?=
815
816""")
817 eq(msg.as_string(maxheaderlen=0), """\
818Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000819
820""")
821
822 def test_long_8bit_header_no_charset(self):
823 eq = self.ndiffAssertEqual
824 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +0000825 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
826 'f\xfcr Offshore-Windkraftprojekte '
827 '<a-very-long-address@example.com>')
828 msg['Reply-To'] = header_string
829 self.assertRaises(UnicodeEncodeError, msg.as_string)
830 msg = Message()
831 msg['Reply-To'] = Header(header_string, 'utf-8',
832 header_name='Reply-To')
833 eq(msg.as_string(maxheaderlen=78), """\
834Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
835 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000836
837""")
838
839 def test_long_to_header(self):
840 eq = self.ndiffAssertEqual
841 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
842 '<someone@eecs.umich.edu>,'
843 '"Someone Test #B" <someone@umich.edu>, '
844 '"Someone Test #C" <someone@eecs.umich.edu>, '
845 '"Someone Test #D" <someone@eecs.umich.edu>')
846 msg = Message()
847 msg['To'] = to
848 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +0000849To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000850 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +0000851 "Someone Test #C" <someone@eecs.umich.edu>,
852 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000853
854''')
855
856 def test_long_line_after_append(self):
857 eq = self.ndiffAssertEqual
858 s = 'This is an example of string which has almost the limit of header length.'
859 h = Header(s)
860 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +0000861 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000862This is an example of string which has almost the limit of header length.
863 Add another line.""")
864
865 def test_shorter_line_with_append(self):
866 eq = self.ndiffAssertEqual
867 s = 'This is a shorter line.'
868 h = Header(s)
869 h.append('Add another sentence. (Surprise?)')
870 eq(h.encode(),
871 'This is a shorter line. Add another sentence. (Surprise?)')
872
873 def test_long_field_name(self):
874 eq = self.ndiffAssertEqual
875 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +0000876 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
877 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
878 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
879 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000880 h = Header(gs, 'iso-8859-1', header_name=fn)
881 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +0000882 eq(h.encode(maxlinelen=76), """\
883=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
884 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
885 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
886 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000887
888 def test_long_received_header(self):
889 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
890 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
891 'Wed, 05 Mar 2003 18:10:18 -0700')
892 msg = Message()
893 msg['Received-1'] = Header(h, continuation_ws='\t')
894 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000895 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000896 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000897Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
898 Wed, 05 Mar 2003 18:10:18 -0700
899Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
900 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000901
902""")
903
904 def test_string_headerinst_eq(self):
905 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
906 'tu-muenchen.de> (David Bremner\'s message of '
907 '"Thu, 6 Mar 2003 13:58:21 +0100")')
908 msg = Message()
909 msg['Received-1'] = Header(h, header_name='Received-1',
910 continuation_ws='\t')
911 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000912 # XXX This should be splitting on spaces not commas.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000913 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000914Received-1: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
915 6 Mar 2003 13:58:21 +0100\")
916Received-2: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
917 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000918
919""")
920
921 def test_long_unbreakable_lines_with_continuation(self):
922 eq = self.ndiffAssertEqual
923 msg = Message()
924 t = """\
925iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
926 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
927 msg['Face-1'] = t
928 msg['Face-2'] = Header(t, header_name='Face-2')
Barry Warsawbef9d212007-08-31 10:55:37 +0000929 # XXX This splitting is all wrong. It the first value line should be
930 # snug against the field name.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000931 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +0000932Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000933 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000934 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +0000935Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +0000936 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000937 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
938
939""")
940
941 def test_another_long_multiline_header(self):
942 eq = self.ndiffAssertEqual
943 m = ('Received: from siimage.com '
944 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +0000945 'Microsoft SMTPSVC(5.0.2195.4905); '
946 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000947 msg = email.message_from_string(m)
948 eq(msg.as_string(maxheaderlen=78), '''\
Barry Warsawbef9d212007-08-31 10:55:37 +0000949Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with Microsoft SMTPSVC(5.0.2195.4905);
950 Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000951
952''')
953
954 def test_long_lines_with_different_header(self):
955 eq = self.ndiffAssertEqual
956 h = ('List-Unsubscribe: '
957 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
958 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
959 '?subject=unsubscribe>')
960 msg = Message()
961 msg['List'] = h
962 msg['List'] = Header(h, header_name='List')
963 eq(msg.as_string(maxheaderlen=78), """\
964List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +0000965 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000966List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +0000967 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000968
969""")
970
971
Ezio Melottib3aedd42010-11-20 19:04:17 +0000972
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000973# Test mangling of "From " lines in the body of a message
974class TestFromMangling(unittest.TestCase):
975 def setUp(self):
976 self.msg = Message()
977 self.msg['From'] = 'aaa@bbb.org'
978 self.msg.set_payload("""\
979From the desk of A.A.A.:
980Blah blah blah
981""")
982
983 def test_mangled_from(self):
984 s = StringIO()
985 g = Generator(s, mangle_from_=True)
986 g.flatten(self.msg)
987 self.assertEqual(s.getvalue(), """\
988From: aaa@bbb.org
989
990>From the desk of A.A.A.:
991Blah blah blah
992""")
993
994 def test_dont_mangle_from(self):
995 s = StringIO()
996 g = Generator(s, mangle_from_=False)
997 g.flatten(self.msg)
998 self.assertEqual(s.getvalue(), """\
999From: aaa@bbb.org
1000
1001From the desk of A.A.A.:
1002Blah blah blah
1003""")
1004
1005
Ezio Melottib3aedd42010-11-20 19:04:17 +00001006
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001007# Test the basic MIMEAudio class
1008class TestMIMEAudio(unittest.TestCase):
1009 def setUp(self):
1010 # Make sure we pick up the audiotest.au that lives in email/test/data.
1011 # In Python, there's an audiotest.au living in Lib/test but that isn't
1012 # included in some binary distros that don't include the test
1013 # package. The trailing empty string on the .join() is significant
1014 # since findfile() will do a dirname().
1015 datadir = os.path.join(os.path.dirname(landmark), 'data', '')
1016 with open(findfile('audiotest.au', datadir), 'rb') as fp:
1017 self._audiodata = fp.read()
1018 self._au = MIMEAudio(self._audiodata)
1019
1020 def test_guess_minor_type(self):
1021 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1022
1023 def test_encoding(self):
1024 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001025 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1026 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001027
1028 def test_checkSetMinor(self):
1029 au = MIMEAudio(self._audiodata, 'fish')
1030 self.assertEqual(au.get_content_type(), 'audio/fish')
1031
1032 def test_add_header(self):
1033 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001034 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001035 self._au.add_header('Content-Disposition', 'attachment',
1036 filename='audiotest.au')
1037 eq(self._au['content-disposition'],
1038 'attachment; filename="audiotest.au"')
1039 eq(self._au.get_params(header='content-disposition'),
1040 [('attachment', ''), ('filename', 'audiotest.au')])
1041 eq(self._au.get_param('filename', header='content-disposition'),
1042 'audiotest.au')
1043 missing = []
1044 eq(self._au.get_param('attachment', header='content-disposition'), '')
1045 unless(self._au.get_param('foo', failobj=missing,
1046 header='content-disposition') is missing)
1047 # Try some missing stuff
1048 unless(self._au.get_param('foobar', missing) is missing)
1049 unless(self._au.get_param('attachment', missing,
1050 header='foobar') is missing)
1051
1052
Ezio Melottib3aedd42010-11-20 19:04:17 +00001053
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001054# Test the basic MIMEImage class
1055class TestMIMEImage(unittest.TestCase):
1056 def setUp(self):
1057 with openfile('PyBanner048.gif', 'rb') as fp:
1058 self._imgdata = fp.read()
1059 self._im = MIMEImage(self._imgdata)
1060
1061 def test_guess_minor_type(self):
1062 self.assertEqual(self._im.get_content_type(), 'image/gif')
1063
1064 def test_encoding(self):
1065 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001066 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1067 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001068
1069 def test_checkSetMinor(self):
1070 im = MIMEImage(self._imgdata, 'fish')
1071 self.assertEqual(im.get_content_type(), 'image/fish')
1072
1073 def test_add_header(self):
1074 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001075 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001076 self._im.add_header('Content-Disposition', 'attachment',
1077 filename='dingusfish.gif')
1078 eq(self._im['content-disposition'],
1079 'attachment; filename="dingusfish.gif"')
1080 eq(self._im.get_params(header='content-disposition'),
1081 [('attachment', ''), ('filename', 'dingusfish.gif')])
1082 eq(self._im.get_param('filename', header='content-disposition'),
1083 'dingusfish.gif')
1084 missing = []
1085 eq(self._im.get_param('attachment', header='content-disposition'), '')
1086 unless(self._im.get_param('foo', failobj=missing,
1087 header='content-disposition') is missing)
1088 # Try some missing stuff
1089 unless(self._im.get_param('foobar', missing) is missing)
1090 unless(self._im.get_param('attachment', missing,
1091 header='foobar') is missing)
1092
1093
Ezio Melottib3aedd42010-11-20 19:04:17 +00001094
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001095# Test the basic MIMEApplication class
1096class TestMIMEApplication(unittest.TestCase):
1097 def test_headers(self):
1098 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001099 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001100 eq(msg.get_content_type(), 'application/octet-stream')
1101 eq(msg['content-transfer-encoding'], 'base64')
1102
1103 def test_body(self):
1104 eq = self.assertEqual
Barry Warsaw8c571042007-08-30 19:17:18 +00001105 bytes = b'\xfa\xfb\xfc\xfd\xfe\xff'
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001106 msg = MIMEApplication(bytes)
R. David Murray7da8f062010-06-04 16:11:08 +00001107 eq(msg.get_payload(), '+vv8/f7/')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001108 eq(msg.get_payload(decode=True), bytes)
1109
1110
Ezio Melottib3aedd42010-11-20 19:04:17 +00001111
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001112# Test the basic MIMEText class
1113class TestMIMEText(unittest.TestCase):
1114 def setUp(self):
1115 self._msg = MIMEText('hello there')
1116
1117 def test_types(self):
1118 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001119 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001120 eq(self._msg.get_content_type(), 'text/plain')
1121 eq(self._msg.get_param('charset'), 'us-ascii')
1122 missing = []
1123 unless(self._msg.get_param('foobar', missing) is missing)
1124 unless(self._msg.get_param('charset', missing, header='foobar')
1125 is missing)
1126
1127 def test_payload(self):
1128 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001129 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001130
1131 def test_charset(self):
1132 eq = self.assertEqual
1133 msg = MIMEText('hello there', _charset='us-ascii')
1134 eq(msg.get_charset().input_charset, 'us-ascii')
1135 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1136
R. David Murray850fc852010-06-03 01:58:28 +00001137 def test_7bit_input(self):
1138 eq = self.assertEqual
1139 msg = MIMEText('hello there', _charset='us-ascii')
1140 eq(msg.get_charset().input_charset, 'us-ascii')
1141 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1142
1143 def test_7bit_input_no_charset(self):
1144 eq = self.assertEqual
1145 msg = MIMEText('hello there')
1146 eq(msg.get_charset(), 'us-ascii')
1147 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1148 self.assertTrue('hello there' in msg.as_string())
1149
1150 def test_utf8_input(self):
1151 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1152 eq = self.assertEqual
1153 msg = MIMEText(teststr, _charset='utf-8')
1154 eq(msg.get_charset().output_charset, 'utf-8')
1155 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1156 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1157
1158 @unittest.skip("can't fix because of backward compat in email5, "
1159 "will fix in email6")
1160 def test_utf8_input_no_charset(self):
1161 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1162 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1163
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001164
Ezio Melottib3aedd42010-11-20 19:04:17 +00001165
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001166# Test complicated multipart/* messages
1167class TestMultipart(TestEmailBase):
1168 def setUp(self):
1169 with openfile('PyBanner048.gif', 'rb') as fp:
1170 data = fp.read()
1171 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1172 image = MIMEImage(data, name='dingusfish.gif')
1173 image.add_header('content-disposition', 'attachment',
1174 filename='dingusfish.gif')
1175 intro = MIMEText('''\
1176Hi there,
1177
1178This is the dingus fish.
1179''')
1180 container.attach(intro)
1181 container.attach(image)
1182 container['From'] = 'Barry <barry@digicool.com>'
1183 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1184 container['Subject'] = 'Here is your dingus fish'
1185
1186 now = 987809702.54848599
1187 timetuple = time.localtime(now)
1188 if timetuple[-1] == 0:
1189 tzsecs = time.timezone
1190 else:
1191 tzsecs = time.altzone
1192 if tzsecs > 0:
1193 sign = '-'
1194 else:
1195 sign = '+'
1196 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1197 container['Date'] = time.strftime(
1198 '%a, %d %b %Y %H:%M:%S',
1199 time.localtime(now)) + tzoffset
1200 self._msg = container
1201 self._im = image
1202 self._txt = intro
1203
1204 def test_hierarchy(self):
1205 # convenience
1206 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001207 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001208 raises = self.assertRaises
1209 # tests
1210 m = self._msg
1211 unless(m.is_multipart())
1212 eq(m.get_content_type(), 'multipart/mixed')
1213 eq(len(m.get_payload()), 2)
1214 raises(IndexError, m.get_payload, 2)
1215 m0 = m.get_payload(0)
1216 m1 = m.get_payload(1)
1217 unless(m0 is self._txt)
1218 unless(m1 is self._im)
1219 eq(m.get_payload(), [m0, m1])
1220 unless(not m0.is_multipart())
1221 unless(not m1.is_multipart())
1222
1223 def test_empty_multipart_idempotent(self):
1224 text = """\
1225Content-Type: multipart/mixed; boundary="BOUNDARY"
1226MIME-Version: 1.0
1227Subject: A subject
1228To: aperson@dom.ain
1229From: bperson@dom.ain
1230
1231
1232--BOUNDARY
1233
1234
1235--BOUNDARY--
1236"""
1237 msg = Parser().parsestr(text)
1238 self.ndiffAssertEqual(text, msg.as_string())
1239
1240 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1241 outer = MIMEBase('multipart', 'mixed')
1242 outer['Subject'] = 'A subject'
1243 outer['To'] = 'aperson@dom.ain'
1244 outer['From'] = 'bperson@dom.ain'
1245 outer.set_boundary('BOUNDARY')
1246 self.ndiffAssertEqual(outer.as_string(), '''\
1247Content-Type: multipart/mixed; boundary="BOUNDARY"
1248MIME-Version: 1.0
1249Subject: A subject
1250To: aperson@dom.ain
1251From: bperson@dom.ain
1252
1253--BOUNDARY
1254
1255--BOUNDARY--''')
1256
1257 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1258 outer = MIMEBase('multipart', 'mixed')
1259 outer['Subject'] = 'A subject'
1260 outer['To'] = 'aperson@dom.ain'
1261 outer['From'] = 'bperson@dom.ain'
1262 outer.preamble = ''
1263 outer.epilogue = ''
1264 outer.set_boundary('BOUNDARY')
1265 self.ndiffAssertEqual(outer.as_string(), '''\
1266Content-Type: multipart/mixed; boundary="BOUNDARY"
1267MIME-Version: 1.0
1268Subject: A subject
1269To: aperson@dom.ain
1270From: bperson@dom.ain
1271
1272
1273--BOUNDARY
1274
1275--BOUNDARY--
1276''')
1277
1278 def test_one_part_in_a_multipart(self):
1279 eq = self.ndiffAssertEqual
1280 outer = MIMEBase('multipart', 'mixed')
1281 outer['Subject'] = 'A subject'
1282 outer['To'] = 'aperson@dom.ain'
1283 outer['From'] = 'bperson@dom.ain'
1284 outer.set_boundary('BOUNDARY')
1285 msg = MIMEText('hello world')
1286 outer.attach(msg)
1287 eq(outer.as_string(), '''\
1288Content-Type: multipart/mixed; boundary="BOUNDARY"
1289MIME-Version: 1.0
1290Subject: A subject
1291To: aperson@dom.ain
1292From: bperson@dom.ain
1293
1294--BOUNDARY
1295Content-Type: text/plain; charset="us-ascii"
1296MIME-Version: 1.0
1297Content-Transfer-Encoding: 7bit
1298
1299hello world
1300--BOUNDARY--''')
1301
1302 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1303 eq = self.ndiffAssertEqual
1304 outer = MIMEBase('multipart', 'mixed')
1305 outer['Subject'] = 'A subject'
1306 outer['To'] = 'aperson@dom.ain'
1307 outer['From'] = 'bperson@dom.ain'
1308 outer.preamble = ''
1309 msg = MIMEText('hello world')
1310 outer.attach(msg)
1311 outer.set_boundary('BOUNDARY')
1312 eq(outer.as_string(), '''\
1313Content-Type: multipart/mixed; boundary="BOUNDARY"
1314MIME-Version: 1.0
1315Subject: A subject
1316To: aperson@dom.ain
1317From: bperson@dom.ain
1318
1319
1320--BOUNDARY
1321Content-Type: text/plain; charset="us-ascii"
1322MIME-Version: 1.0
1323Content-Transfer-Encoding: 7bit
1324
1325hello world
1326--BOUNDARY--''')
1327
1328
1329 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1330 eq = self.ndiffAssertEqual
1331 outer = MIMEBase('multipart', 'mixed')
1332 outer['Subject'] = 'A subject'
1333 outer['To'] = 'aperson@dom.ain'
1334 outer['From'] = 'bperson@dom.ain'
1335 outer.preamble = None
1336 msg = MIMEText('hello world')
1337 outer.attach(msg)
1338 outer.set_boundary('BOUNDARY')
1339 eq(outer.as_string(), '''\
1340Content-Type: multipart/mixed; boundary="BOUNDARY"
1341MIME-Version: 1.0
1342Subject: A subject
1343To: aperson@dom.ain
1344From: bperson@dom.ain
1345
1346--BOUNDARY
1347Content-Type: text/plain; charset="us-ascii"
1348MIME-Version: 1.0
1349Content-Transfer-Encoding: 7bit
1350
1351hello world
1352--BOUNDARY--''')
1353
1354
1355 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1356 eq = self.ndiffAssertEqual
1357 outer = MIMEBase('multipart', 'mixed')
1358 outer['Subject'] = 'A subject'
1359 outer['To'] = 'aperson@dom.ain'
1360 outer['From'] = 'bperson@dom.ain'
1361 outer.epilogue = None
1362 msg = MIMEText('hello world')
1363 outer.attach(msg)
1364 outer.set_boundary('BOUNDARY')
1365 eq(outer.as_string(), '''\
1366Content-Type: multipart/mixed; boundary="BOUNDARY"
1367MIME-Version: 1.0
1368Subject: A subject
1369To: aperson@dom.ain
1370From: bperson@dom.ain
1371
1372--BOUNDARY
1373Content-Type: text/plain; charset="us-ascii"
1374MIME-Version: 1.0
1375Content-Transfer-Encoding: 7bit
1376
1377hello world
1378--BOUNDARY--''')
1379
1380
1381 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1382 eq = self.ndiffAssertEqual
1383 outer = MIMEBase('multipart', 'mixed')
1384 outer['Subject'] = 'A subject'
1385 outer['To'] = 'aperson@dom.ain'
1386 outer['From'] = 'bperson@dom.ain'
1387 outer.epilogue = ''
1388 msg = MIMEText('hello world')
1389 outer.attach(msg)
1390 outer.set_boundary('BOUNDARY')
1391 eq(outer.as_string(), '''\
1392Content-Type: multipart/mixed; boundary="BOUNDARY"
1393MIME-Version: 1.0
1394Subject: A subject
1395To: aperson@dom.ain
1396From: bperson@dom.ain
1397
1398--BOUNDARY
1399Content-Type: text/plain; charset="us-ascii"
1400MIME-Version: 1.0
1401Content-Transfer-Encoding: 7bit
1402
1403hello world
1404--BOUNDARY--
1405''')
1406
1407
1408 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1409 eq = self.ndiffAssertEqual
1410 outer = MIMEBase('multipart', 'mixed')
1411 outer['Subject'] = 'A subject'
1412 outer['To'] = 'aperson@dom.ain'
1413 outer['From'] = 'bperson@dom.ain'
1414 outer.epilogue = '\n'
1415 msg = MIMEText('hello world')
1416 outer.attach(msg)
1417 outer.set_boundary('BOUNDARY')
1418 eq(outer.as_string(), '''\
1419Content-Type: multipart/mixed; boundary="BOUNDARY"
1420MIME-Version: 1.0
1421Subject: A subject
1422To: aperson@dom.ain
1423From: bperson@dom.ain
1424
1425--BOUNDARY
1426Content-Type: text/plain; charset="us-ascii"
1427MIME-Version: 1.0
1428Content-Transfer-Encoding: 7bit
1429
1430hello world
1431--BOUNDARY--
1432
1433''')
1434
1435 def test_message_external_body(self):
1436 eq = self.assertEqual
1437 msg = self._msgobj('msg_36.txt')
1438 eq(len(msg.get_payload()), 2)
1439 msg1 = msg.get_payload(1)
1440 eq(msg1.get_content_type(), 'multipart/alternative')
1441 eq(len(msg1.get_payload()), 2)
1442 for subpart in msg1.get_payload():
1443 eq(subpart.get_content_type(), 'message/external-body')
1444 eq(len(subpart.get_payload()), 1)
1445 subsubpart = subpart.get_payload(0)
1446 eq(subsubpart.get_content_type(), 'text/plain')
1447
1448 def test_double_boundary(self):
1449 # msg_37.txt is a multipart that contains two dash-boundary's in a
1450 # row. Our interpretation of RFC 2046 calls for ignoring the second
1451 # and subsequent boundaries.
1452 msg = self._msgobj('msg_37.txt')
1453 self.assertEqual(len(msg.get_payload()), 3)
1454
1455 def test_nested_inner_contains_outer_boundary(self):
1456 eq = self.ndiffAssertEqual
1457 # msg_38.txt has an inner part that contains outer boundaries. My
1458 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1459 # these are illegal and should be interpreted as unterminated inner
1460 # parts.
1461 msg = self._msgobj('msg_38.txt')
1462 sfp = StringIO()
1463 iterators._structure(msg, sfp)
1464 eq(sfp.getvalue(), """\
1465multipart/mixed
1466 multipart/mixed
1467 multipart/alternative
1468 text/plain
1469 text/plain
1470 text/plain
1471 text/plain
1472""")
1473
1474 def test_nested_with_same_boundary(self):
1475 eq = self.ndiffAssertEqual
1476 # msg 39.txt is similarly evil in that it's got inner parts that use
1477 # the same boundary as outer parts. Again, I believe the way this is
1478 # parsed is closest to the spirit of RFC 2046
1479 msg = self._msgobj('msg_39.txt')
1480 sfp = StringIO()
1481 iterators._structure(msg, sfp)
1482 eq(sfp.getvalue(), """\
1483multipart/mixed
1484 multipart/mixed
1485 multipart/alternative
1486 application/octet-stream
1487 application/octet-stream
1488 text/plain
1489""")
1490
1491 def test_boundary_in_non_multipart(self):
1492 msg = self._msgobj('msg_40.txt')
1493 self.assertEqual(msg.as_string(), '''\
1494MIME-Version: 1.0
1495Content-Type: text/html; boundary="--961284236552522269"
1496
1497----961284236552522269
1498Content-Type: text/html;
1499Content-Transfer-Encoding: 7Bit
1500
1501<html></html>
1502
1503----961284236552522269--
1504''')
1505
1506 def test_boundary_with_leading_space(self):
1507 eq = self.assertEqual
1508 msg = email.message_from_string('''\
1509MIME-Version: 1.0
1510Content-Type: multipart/mixed; boundary=" XXXX"
1511
1512-- XXXX
1513Content-Type: text/plain
1514
1515
1516-- XXXX
1517Content-Type: text/plain
1518
1519-- XXXX--
1520''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001521 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001522 eq(msg.get_boundary(), ' XXXX')
1523 eq(len(msg.get_payload()), 2)
1524
1525 def test_boundary_without_trailing_newline(self):
1526 m = Parser().parsestr("""\
1527Content-Type: multipart/mixed; boundary="===============0012394164=="
1528MIME-Version: 1.0
1529
1530--===============0012394164==
1531Content-Type: image/file1.jpg
1532MIME-Version: 1.0
1533Content-Transfer-Encoding: base64
1534
1535YXNkZg==
1536--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001537 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001538
1539
Ezio Melottib3aedd42010-11-20 19:04:17 +00001540
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001541# Test some badly formatted messages
1542class TestNonConformant(TestEmailBase):
1543 def test_parse_missing_minor_type(self):
1544 eq = self.assertEqual
1545 msg = self._msgobj('msg_14.txt')
1546 eq(msg.get_content_type(), 'text/plain')
1547 eq(msg.get_content_maintype(), 'text')
1548 eq(msg.get_content_subtype(), 'plain')
1549
1550 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001551 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001552 msg = self._msgobj('msg_15.txt')
1553 # XXX We can probably eventually do better
1554 inner = msg.get_payload(0)
1555 unless(hasattr(inner, 'defects'))
1556 self.assertEqual(len(inner.defects), 1)
1557 unless(isinstance(inner.defects[0],
1558 errors.StartBoundaryNotFoundDefect))
1559
1560 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001561 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001562 msg = self._msgobj('msg_25.txt')
1563 unless(isinstance(msg.get_payload(), str))
1564 self.assertEqual(len(msg.defects), 2)
1565 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1566 unless(isinstance(msg.defects[1],
1567 errors.MultipartInvariantViolationDefect))
1568
1569 def test_invalid_content_type(self):
1570 eq = self.assertEqual
1571 neq = self.ndiffAssertEqual
1572 msg = Message()
1573 # RFC 2045, $5.2 says invalid yields text/plain
1574 msg['Content-Type'] = 'text'
1575 eq(msg.get_content_maintype(), 'text')
1576 eq(msg.get_content_subtype(), 'plain')
1577 eq(msg.get_content_type(), 'text/plain')
1578 # Clear the old value and try something /really/ invalid
1579 del msg['content-type']
1580 msg['Content-Type'] = 'foo'
1581 eq(msg.get_content_maintype(), 'text')
1582 eq(msg.get_content_subtype(), 'plain')
1583 eq(msg.get_content_type(), 'text/plain')
1584 # Still, make sure that the message is idempotently generated
1585 s = StringIO()
1586 g = Generator(s)
1587 g.flatten(msg)
1588 neq(s.getvalue(), 'Content-Type: foo\n\n')
1589
1590 def test_no_start_boundary(self):
1591 eq = self.ndiffAssertEqual
1592 msg = self._msgobj('msg_31.txt')
1593 eq(msg.get_payload(), """\
1594--BOUNDARY
1595Content-Type: text/plain
1596
1597message 1
1598
1599--BOUNDARY
1600Content-Type: text/plain
1601
1602message 2
1603
1604--BOUNDARY--
1605""")
1606
1607 def test_no_separating_blank_line(self):
1608 eq = self.ndiffAssertEqual
1609 msg = self._msgobj('msg_35.txt')
1610 eq(msg.as_string(), """\
1611From: aperson@dom.ain
1612To: bperson@dom.ain
1613Subject: here's something interesting
1614
1615counter to RFC 2822, there's no separating newline here
1616""")
1617
1618 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001619 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001620 msg = self._msgobj('msg_41.txt')
1621 unless(hasattr(msg, 'defects'))
1622 self.assertEqual(len(msg.defects), 2)
1623 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1624 unless(isinstance(msg.defects[1],
1625 errors.MultipartInvariantViolationDefect))
1626
1627 def test_missing_start_boundary(self):
1628 outer = self._msgobj('msg_42.txt')
1629 # The message structure is:
1630 #
1631 # multipart/mixed
1632 # text/plain
1633 # message/rfc822
1634 # multipart/mixed [*]
1635 #
1636 # [*] This message is missing its start boundary
1637 bad = outer.get_payload(1).get_payload(0)
1638 self.assertEqual(len(bad.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001639 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001640 errors.StartBoundaryNotFoundDefect))
1641
1642 def test_first_line_is_continuation_header(self):
1643 eq = self.assertEqual
1644 m = ' Line 1\nLine 2\nLine 3'
1645 msg = email.message_from_string(m)
1646 eq(msg.keys(), [])
1647 eq(msg.get_payload(), 'Line 2\nLine 3')
1648 eq(len(msg.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001649 self.assertTrue(isinstance(msg.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001650 errors.FirstHeaderLineIsContinuationDefect))
1651 eq(msg.defects[0].line, ' Line 1\n')
1652
1653
Ezio Melottib3aedd42010-11-20 19:04:17 +00001654
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001655# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00001656class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001657 def test_rfc2047_multiline(self):
1658 eq = self.assertEqual
1659 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
1660 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
1661 dh = decode_header(s)
1662 eq(dh, [
1663 (b'Re:', None),
1664 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
1665 (b'baz foo bar', None),
1666 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
1667 header = make_header(dh)
1668 eq(str(header),
1669 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00001670 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001671Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
1672 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001673
1674 def test_whitespace_eater_unicode(self):
1675 eq = self.assertEqual
1676 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
1677 dh = decode_header(s)
1678 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
1679 (b'Pirard <pirard@dom.ain>', None)])
1680 header = str(make_header(dh))
1681 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
1682
1683 def test_whitespace_eater_unicode_2(self):
1684 eq = self.assertEqual
1685 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
1686 dh = decode_header(s)
1687 eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'),
1688 (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')])
1689 hu = str(make_header(dh))
1690 eq(hu, 'The quick brown fox jumped over the lazy dog')
1691
1692 def test_rfc2047_missing_whitespace(self):
1693 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
1694 dh = decode_header(s)
1695 self.assertEqual(dh, [(s, None)])
1696
1697 def test_rfc2047_with_whitespace(self):
1698 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
1699 dh = decode_header(s)
1700 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
1701 (b'rg', None), (b'\xe5', 'iso-8859-1'),
1702 (b'sbord', None)])
1703
R. David Murrayc4e69cc2010-08-03 22:14:10 +00001704 def test_rfc2047_B_bad_padding(self):
1705 s = '=?iso-8859-1?B?%s?='
1706 data = [ # only test complete bytes
1707 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
1708 ('dmk=', b'vi'), ('dmk', b'vi')
1709 ]
1710 for q, a in data:
1711 dh = decode_header(s % q)
1712 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001713
R. David Murray31e984c2010-10-01 15:40:20 +00001714 def test_rfc2047_Q_invalid_digits(self):
1715 # issue 10004.
1716 s = '=?iso-8659-1?Q?andr=e9=zz?='
1717 self.assertEqual(decode_header(s),
1718 [(b'andr\xe9=zz', 'iso-8659-1')])
1719
Ezio Melottib3aedd42010-11-20 19:04:17 +00001720
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001721# Test the MIMEMessage class
1722class TestMIMEMessage(TestEmailBase):
1723 def setUp(self):
1724 with openfile('msg_11.txt') as fp:
1725 self._text = fp.read()
1726
1727 def test_type_error(self):
1728 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
1729
1730 def test_valid_argument(self):
1731 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001732 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001733 subject = 'A sub-message'
1734 m = Message()
1735 m['Subject'] = subject
1736 r = MIMEMessage(m)
1737 eq(r.get_content_type(), 'message/rfc822')
1738 payload = r.get_payload()
1739 unless(isinstance(payload, list))
1740 eq(len(payload), 1)
1741 subpart = payload[0]
1742 unless(subpart is m)
1743 eq(subpart['subject'], subject)
1744
1745 def test_bad_multipart(self):
1746 eq = self.assertEqual
1747 msg1 = Message()
1748 msg1['Subject'] = 'subpart 1'
1749 msg2 = Message()
1750 msg2['Subject'] = 'subpart 2'
1751 r = MIMEMessage(msg1)
1752 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
1753
1754 def test_generate(self):
1755 # First craft the message to be encapsulated
1756 m = Message()
1757 m['Subject'] = 'An enclosed message'
1758 m.set_payload('Here is the body of the message.\n')
1759 r = MIMEMessage(m)
1760 r['Subject'] = 'The enclosing message'
1761 s = StringIO()
1762 g = Generator(s)
1763 g.flatten(r)
1764 self.assertEqual(s.getvalue(), """\
1765Content-Type: message/rfc822
1766MIME-Version: 1.0
1767Subject: The enclosing message
1768
1769Subject: An enclosed message
1770
1771Here is the body of the message.
1772""")
1773
1774 def test_parse_message_rfc822(self):
1775 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001776 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001777 msg = self._msgobj('msg_11.txt')
1778 eq(msg.get_content_type(), 'message/rfc822')
1779 payload = msg.get_payload()
1780 unless(isinstance(payload, list))
1781 eq(len(payload), 1)
1782 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001783 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001784 eq(submsg['subject'], 'An enclosed message')
1785 eq(submsg.get_payload(), 'Here is the body of the message.\n')
1786
1787 def test_dsn(self):
1788 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001789 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001790 # msg 16 is a Delivery Status Notification, see RFC 1894
1791 msg = self._msgobj('msg_16.txt')
1792 eq(msg.get_content_type(), 'multipart/report')
1793 unless(msg.is_multipart())
1794 eq(len(msg.get_payload()), 3)
1795 # Subpart 1 is a text/plain, human readable section
1796 subpart = msg.get_payload(0)
1797 eq(subpart.get_content_type(), 'text/plain')
1798 eq(subpart.get_payload(), """\
1799This report relates to a message you sent with the following header fields:
1800
1801 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
1802 Date: Sun, 23 Sep 2001 20:10:55 -0700
1803 From: "Ian T. Henry" <henryi@oxy.edu>
1804 To: SoCal Raves <scr@socal-raves.org>
1805 Subject: [scr] yeah for Ians!!
1806
1807Your message cannot be delivered to the following recipients:
1808
1809 Recipient address: jangel1@cougar.noc.ucla.edu
1810 Reason: recipient reached disk quota
1811
1812""")
1813 # Subpart 2 contains the machine parsable DSN information. It
1814 # consists of two blocks of headers, represented by two nested Message
1815 # objects.
1816 subpart = msg.get_payload(1)
1817 eq(subpart.get_content_type(), 'message/delivery-status')
1818 eq(len(subpart.get_payload()), 2)
1819 # message/delivery-status should treat each block as a bunch of
1820 # headers, i.e. a bunch of Message objects.
1821 dsn1 = subpart.get_payload(0)
1822 unless(isinstance(dsn1, Message))
1823 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
1824 eq(dsn1.get_param('dns', header='reporting-mta'), '')
1825 # Try a missing one <wink>
1826 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
1827 dsn2 = subpart.get_payload(1)
1828 unless(isinstance(dsn2, Message))
1829 eq(dsn2['action'], 'failed')
1830 eq(dsn2.get_params(header='original-recipient'),
1831 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
1832 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
1833 # Subpart 3 is the original message
1834 subpart = msg.get_payload(2)
1835 eq(subpart.get_content_type(), 'message/rfc822')
1836 payload = subpart.get_payload()
1837 unless(isinstance(payload, list))
1838 eq(len(payload), 1)
1839 subsubpart = payload[0]
1840 unless(isinstance(subsubpart, Message))
1841 eq(subsubpart.get_content_type(), 'text/plain')
1842 eq(subsubpart['message-id'],
1843 '<002001c144a6$8752e060$56104586@oxy.edu>')
1844
1845 def test_epilogue(self):
1846 eq = self.ndiffAssertEqual
1847 with openfile('msg_21.txt') as fp:
1848 text = fp.read()
1849 msg = Message()
1850 msg['From'] = 'aperson@dom.ain'
1851 msg['To'] = 'bperson@dom.ain'
1852 msg['Subject'] = 'Test'
1853 msg.preamble = 'MIME message'
1854 msg.epilogue = 'End of MIME message\n'
1855 msg1 = MIMEText('One')
1856 msg2 = MIMEText('Two')
1857 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1858 msg.attach(msg1)
1859 msg.attach(msg2)
1860 sfp = StringIO()
1861 g = Generator(sfp)
1862 g.flatten(msg)
1863 eq(sfp.getvalue(), text)
1864
1865 def test_no_nl_preamble(self):
1866 eq = self.ndiffAssertEqual
1867 msg = Message()
1868 msg['From'] = 'aperson@dom.ain'
1869 msg['To'] = 'bperson@dom.ain'
1870 msg['Subject'] = 'Test'
1871 msg.preamble = 'MIME message'
1872 msg.epilogue = ''
1873 msg1 = MIMEText('One')
1874 msg2 = MIMEText('Two')
1875 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1876 msg.attach(msg1)
1877 msg.attach(msg2)
1878 eq(msg.as_string(), """\
1879From: aperson@dom.ain
1880To: bperson@dom.ain
1881Subject: Test
1882Content-Type: multipart/mixed; boundary="BOUNDARY"
1883
1884MIME message
1885--BOUNDARY
1886Content-Type: text/plain; charset="us-ascii"
1887MIME-Version: 1.0
1888Content-Transfer-Encoding: 7bit
1889
1890One
1891--BOUNDARY
1892Content-Type: text/plain; charset="us-ascii"
1893MIME-Version: 1.0
1894Content-Transfer-Encoding: 7bit
1895
1896Two
1897--BOUNDARY--
1898""")
1899
1900 def test_default_type(self):
1901 eq = self.assertEqual
1902 with openfile('msg_30.txt') as fp:
1903 msg = email.message_from_file(fp)
1904 container1 = msg.get_payload(0)
1905 eq(container1.get_default_type(), 'message/rfc822')
1906 eq(container1.get_content_type(), 'message/rfc822')
1907 container2 = msg.get_payload(1)
1908 eq(container2.get_default_type(), 'message/rfc822')
1909 eq(container2.get_content_type(), 'message/rfc822')
1910 container1a = container1.get_payload(0)
1911 eq(container1a.get_default_type(), 'text/plain')
1912 eq(container1a.get_content_type(), 'text/plain')
1913 container2a = container2.get_payload(0)
1914 eq(container2a.get_default_type(), 'text/plain')
1915 eq(container2a.get_content_type(), 'text/plain')
1916
1917 def test_default_type_with_explicit_container_type(self):
1918 eq = self.assertEqual
1919 with openfile('msg_28.txt') as fp:
1920 msg = email.message_from_file(fp)
1921 container1 = msg.get_payload(0)
1922 eq(container1.get_default_type(), 'message/rfc822')
1923 eq(container1.get_content_type(), 'message/rfc822')
1924 container2 = msg.get_payload(1)
1925 eq(container2.get_default_type(), 'message/rfc822')
1926 eq(container2.get_content_type(), 'message/rfc822')
1927 container1a = container1.get_payload(0)
1928 eq(container1a.get_default_type(), 'text/plain')
1929 eq(container1a.get_content_type(), 'text/plain')
1930 container2a = container2.get_payload(0)
1931 eq(container2a.get_default_type(), 'text/plain')
1932 eq(container2a.get_content_type(), 'text/plain')
1933
1934 def test_default_type_non_parsed(self):
1935 eq = self.assertEqual
1936 neq = self.ndiffAssertEqual
1937 # Set up container
1938 container = MIMEMultipart('digest', 'BOUNDARY')
1939 container.epilogue = ''
1940 # Set up subparts
1941 subpart1a = MIMEText('message 1\n')
1942 subpart2a = MIMEText('message 2\n')
1943 subpart1 = MIMEMessage(subpart1a)
1944 subpart2 = MIMEMessage(subpart2a)
1945 container.attach(subpart1)
1946 container.attach(subpart2)
1947 eq(subpart1.get_content_type(), 'message/rfc822')
1948 eq(subpart1.get_default_type(), 'message/rfc822')
1949 eq(subpart2.get_content_type(), 'message/rfc822')
1950 eq(subpart2.get_default_type(), 'message/rfc822')
1951 neq(container.as_string(0), '''\
1952Content-Type: multipart/digest; boundary="BOUNDARY"
1953MIME-Version: 1.0
1954
1955--BOUNDARY
1956Content-Type: message/rfc822
1957MIME-Version: 1.0
1958
1959Content-Type: text/plain; charset="us-ascii"
1960MIME-Version: 1.0
1961Content-Transfer-Encoding: 7bit
1962
1963message 1
1964
1965--BOUNDARY
1966Content-Type: message/rfc822
1967MIME-Version: 1.0
1968
1969Content-Type: text/plain; charset="us-ascii"
1970MIME-Version: 1.0
1971Content-Transfer-Encoding: 7bit
1972
1973message 2
1974
1975--BOUNDARY--
1976''')
1977 del subpart1['content-type']
1978 del subpart1['mime-version']
1979 del subpart2['content-type']
1980 del subpart2['mime-version']
1981 eq(subpart1.get_content_type(), 'message/rfc822')
1982 eq(subpart1.get_default_type(), 'message/rfc822')
1983 eq(subpart2.get_content_type(), 'message/rfc822')
1984 eq(subpart2.get_default_type(), 'message/rfc822')
1985 neq(container.as_string(0), '''\
1986Content-Type: multipart/digest; boundary="BOUNDARY"
1987MIME-Version: 1.0
1988
1989--BOUNDARY
1990
1991Content-Type: text/plain; charset="us-ascii"
1992MIME-Version: 1.0
1993Content-Transfer-Encoding: 7bit
1994
1995message 1
1996
1997--BOUNDARY
1998
1999Content-Type: text/plain; charset="us-ascii"
2000MIME-Version: 1.0
2001Content-Transfer-Encoding: 7bit
2002
2003message 2
2004
2005--BOUNDARY--
2006''')
2007
2008 def test_mime_attachments_in_constructor(self):
2009 eq = self.assertEqual
2010 text1 = MIMEText('')
2011 text2 = MIMEText('')
2012 msg = MIMEMultipart(_subparts=(text1, text2))
2013 eq(len(msg.get_payload()), 2)
2014 eq(msg.get_payload(0), text1)
2015 eq(msg.get_payload(1), text2)
2016
Christian Heimes587c2bf2008-01-19 16:21:02 +00002017 def test_default_multipart_constructor(self):
2018 msg = MIMEMultipart()
2019 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002020
Ezio Melottib3aedd42010-11-20 19:04:17 +00002021
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002022# A general test of parser->model->generator idempotency. IOW, read a message
2023# in, parse it into a message object tree, then without touching the tree,
2024# regenerate the plain text. The original text and the transformed text
2025# should be identical. Note: that we ignore the Unix-From since that may
2026# contain a changed date.
2027class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002028
2029 linesep = '\n'
2030
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002031 def _msgobj(self, filename):
2032 with openfile(filename) as fp:
2033 data = fp.read()
2034 msg = email.message_from_string(data)
2035 return msg, data
2036
R. David Murray719a4492010-11-21 16:53:48 +00002037 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002038 eq = self.ndiffAssertEqual
2039 s = StringIO()
2040 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002041 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002042 eq(text, s.getvalue())
2043
2044 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002045 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002046 msg, text = self._msgobj('msg_01.txt')
2047 eq(msg.get_content_type(), 'text/plain')
2048 eq(msg.get_content_maintype(), 'text')
2049 eq(msg.get_content_subtype(), 'plain')
2050 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2051 eq(msg.get_param('charset'), 'us-ascii')
2052 eq(msg.preamble, None)
2053 eq(msg.epilogue, None)
2054 self._idempotent(msg, text)
2055
2056 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002057 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002058 msg, text = self._msgobj('msg_03.txt')
2059 eq(msg.get_content_type(), 'text/plain')
2060 eq(msg.get_params(), None)
2061 eq(msg.get_param('charset'), None)
2062 self._idempotent(msg, text)
2063
2064 def test_simple_multipart(self):
2065 msg, text = self._msgobj('msg_04.txt')
2066 self._idempotent(msg, text)
2067
2068 def test_MIME_digest(self):
2069 msg, text = self._msgobj('msg_02.txt')
2070 self._idempotent(msg, text)
2071
2072 def test_long_header(self):
2073 msg, text = self._msgobj('msg_27.txt')
2074 self._idempotent(msg, text)
2075
2076 def test_MIME_digest_with_part_headers(self):
2077 msg, text = self._msgobj('msg_28.txt')
2078 self._idempotent(msg, text)
2079
2080 def test_mixed_with_image(self):
2081 msg, text = self._msgobj('msg_06.txt')
2082 self._idempotent(msg, text)
2083
2084 def test_multipart_report(self):
2085 msg, text = self._msgobj('msg_05.txt')
2086 self._idempotent(msg, text)
2087
2088 def test_dsn(self):
2089 msg, text = self._msgobj('msg_16.txt')
2090 self._idempotent(msg, text)
2091
2092 def test_preamble_epilogue(self):
2093 msg, text = self._msgobj('msg_21.txt')
2094 self._idempotent(msg, text)
2095
2096 def test_multipart_one_part(self):
2097 msg, text = self._msgobj('msg_23.txt')
2098 self._idempotent(msg, text)
2099
2100 def test_multipart_no_parts(self):
2101 msg, text = self._msgobj('msg_24.txt')
2102 self._idempotent(msg, text)
2103
2104 def test_no_start_boundary(self):
2105 msg, text = self._msgobj('msg_31.txt')
2106 self._idempotent(msg, text)
2107
2108 def test_rfc2231_charset(self):
2109 msg, text = self._msgobj('msg_32.txt')
2110 self._idempotent(msg, text)
2111
2112 def test_more_rfc2231_parameters(self):
2113 msg, text = self._msgobj('msg_33.txt')
2114 self._idempotent(msg, text)
2115
2116 def test_text_plain_in_a_multipart_digest(self):
2117 msg, text = self._msgobj('msg_34.txt')
2118 self._idempotent(msg, text)
2119
2120 def test_nested_multipart_mixeds(self):
2121 msg, text = self._msgobj('msg_12a.txt')
2122 self._idempotent(msg, text)
2123
2124 def test_message_external_body_idempotent(self):
2125 msg, text = self._msgobj('msg_36.txt')
2126 self._idempotent(msg, text)
2127
R. David Murray719a4492010-11-21 16:53:48 +00002128 def test_message_delivery_status(self):
2129 msg, text = self._msgobj('msg_43.txt')
2130 self._idempotent(msg, text, unixfrom=True)
2131
R. David Murray96fd54e2010-10-08 15:55:28 +00002132 def test_message_signed_idempotent(self):
2133 msg, text = self._msgobj('msg_45.txt')
2134 self._idempotent(msg, text)
2135
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002136 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002137 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002138 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002139 # Get a message object and reset the seek pointer for other tests
2140 msg, text = self._msgobj('msg_05.txt')
2141 eq(msg.get_content_type(), 'multipart/report')
2142 # Test the Content-Type: parameters
2143 params = {}
2144 for pk, pv in msg.get_params():
2145 params[pk] = pv
2146 eq(params['report-type'], 'delivery-status')
2147 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002148 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2149 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002150 eq(len(msg.get_payload()), 3)
2151 # Make sure the subparts are what we expect
2152 msg1 = msg.get_payload(0)
2153 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002154 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002155 msg2 = msg.get_payload(1)
2156 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002157 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002158 msg3 = msg.get_payload(2)
2159 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002160 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002161 payload = msg3.get_payload()
2162 unless(isinstance(payload, list))
2163 eq(len(payload), 1)
2164 msg4 = payload[0]
2165 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002166 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002167
2168 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002169 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002170 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002171 msg, text = self._msgobj('msg_06.txt')
2172 # Check some of the outer headers
2173 eq(msg.get_content_type(), 'message/rfc822')
2174 # Make sure the payload is a list of exactly one sub-Message, and that
2175 # that submessage has a type of text/plain
2176 payload = msg.get_payload()
2177 unless(isinstance(payload, list))
2178 eq(len(payload), 1)
2179 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002180 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002181 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002182 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002183 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002184
2185
Ezio Melottib3aedd42010-11-20 19:04:17 +00002186
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002187# Test various other bits of the package's functionality
2188class TestMiscellaneous(TestEmailBase):
2189 def test_message_from_string(self):
2190 with openfile('msg_01.txt') as fp:
2191 text = fp.read()
2192 msg = email.message_from_string(text)
2193 s = StringIO()
2194 # Don't wrap/continue long headers since we're trying to test
2195 # idempotency.
2196 g = Generator(s, maxheaderlen=0)
2197 g.flatten(msg)
2198 self.assertEqual(text, s.getvalue())
2199
2200 def test_message_from_file(self):
2201 with openfile('msg_01.txt') as fp:
2202 text = fp.read()
2203 fp.seek(0)
2204 msg = email.message_from_file(fp)
2205 s = StringIO()
2206 # Don't wrap/continue long headers since we're trying to test
2207 # idempotency.
2208 g = Generator(s, maxheaderlen=0)
2209 g.flatten(msg)
2210 self.assertEqual(text, s.getvalue())
2211
2212 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002213 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002214 with openfile('msg_01.txt') as fp:
2215 text = fp.read()
2216
2217 # Create a subclass
2218 class MyMessage(Message):
2219 pass
2220
2221 msg = email.message_from_string(text, MyMessage)
2222 unless(isinstance(msg, MyMessage))
2223 # Try something more complicated
2224 with openfile('msg_02.txt') as fp:
2225 text = fp.read()
2226 msg = email.message_from_string(text, MyMessage)
2227 for subpart in msg.walk():
2228 unless(isinstance(subpart, MyMessage))
2229
2230 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002231 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002232 # Create a subclass
2233 class MyMessage(Message):
2234 pass
2235
2236 with openfile('msg_01.txt') as fp:
2237 msg = email.message_from_file(fp, MyMessage)
2238 unless(isinstance(msg, MyMessage))
2239 # Try something more complicated
2240 with openfile('msg_02.txt') as fp:
2241 msg = email.message_from_file(fp, MyMessage)
2242 for subpart in msg.walk():
2243 unless(isinstance(subpart, MyMessage))
2244
2245 def test__all__(self):
2246 module = __import__('email')
2247 # Can't use sorted() here due to Python 2.3 compatibility
2248 all = module.__all__[:]
2249 all.sort()
2250 self.assertEqual(all, [
2251 'base64mime', 'charset', 'encoders', 'errors', 'generator',
R. David Murray96fd54e2010-10-08 15:55:28 +00002252 'header', 'iterators', 'message', 'message_from_binary_file',
2253 'message_from_bytes', 'message_from_file',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002254 'message_from_string', 'mime', 'parser',
2255 'quoprimime', 'utils',
2256 ])
2257
2258 def test_formatdate(self):
2259 now = time.time()
2260 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2261 time.gmtime(now)[:6])
2262
2263 def test_formatdate_localtime(self):
2264 now = time.time()
2265 self.assertEqual(
2266 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2267 time.localtime(now)[:6])
2268
2269 def test_formatdate_usegmt(self):
2270 now = time.time()
2271 self.assertEqual(
2272 utils.formatdate(now, localtime=False),
2273 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2274 self.assertEqual(
2275 utils.formatdate(now, localtime=False, usegmt=True),
2276 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2277
2278 def test_parsedate_none(self):
2279 self.assertEqual(utils.parsedate(''), None)
2280
2281 def test_parsedate_compact(self):
2282 # The FWS after the comma is optional
2283 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2284 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2285
2286 def test_parsedate_no_dayofweek(self):
2287 eq = self.assertEqual
2288 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2289 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2290
2291 def test_parsedate_compact_no_dayofweek(self):
2292 eq = self.assertEqual
2293 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2294 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2295
R. David Murray4a62e892010-12-23 20:35:46 +00002296 def test_parsedate_no_space_before_positive_offset(self):
2297 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2298 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2299
2300 def test_parsedate_no_space_before_negative_offset(self):
2301 # Issue 1155362: we already handled '+' for this case.
2302 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
2303 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
2304
2305
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002306 def test_parsedate_acceptable_to_time_functions(self):
2307 eq = self.assertEqual
2308 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2309 t = int(time.mktime(timetup))
2310 eq(time.localtime(t)[:6], timetup[:6])
2311 eq(int(time.strftime('%Y', timetup)), 2003)
2312 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2313 t = int(time.mktime(timetup[:9]))
2314 eq(time.localtime(t)[:6], timetup[:6])
2315 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2316
R. David Murray219d1c82010-08-25 00:45:55 +00002317 def test_parsedate_y2k(self):
2318 """Test for parsing a date with a two-digit year.
2319
2320 Parsing a date with a two-digit year should return the correct
2321 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2322 obsoletes RFC822) requires four-digit years.
2323
2324 """
2325 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2326 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2327 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2328 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2329
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002330 def test_parseaddr_empty(self):
2331 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2332 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2333
2334 def test_noquote_dump(self):
2335 self.assertEqual(
2336 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2337 'A Silly Person <person@dom.ain>')
2338
2339 def test_escape_dump(self):
2340 self.assertEqual(
2341 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
2342 r'"A \(Very\) Silly Person" <person@dom.ain>')
2343 a = r'A \(Special\) Person'
2344 b = 'person@dom.ain'
2345 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2346
2347 def test_escape_backslashes(self):
2348 self.assertEqual(
2349 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2350 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2351 a = r'Arthur \Backslash\ Foobar'
2352 b = 'person@dom.ain'
2353 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2354
2355 def test_name_with_dot(self):
2356 x = 'John X. Doe <jxd@example.com>'
2357 y = '"John X. Doe" <jxd@example.com>'
2358 a, b = ('John X. Doe', 'jxd@example.com')
2359 self.assertEqual(utils.parseaddr(x), (a, b))
2360 self.assertEqual(utils.parseaddr(y), (a, b))
2361 # formataddr() quotes the name if there's a dot in it
2362 self.assertEqual(utils.formataddr((a, b)), y)
2363
R. David Murray5397e862010-10-02 15:58:26 +00002364 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2365 # issue 10005. Note that in the third test the second pair of
2366 # backslashes is not actually a quoted pair because it is not inside a
2367 # comment or quoted string: the address being parsed has a quoted
2368 # string containing a quoted backslash, followed by 'example' and two
2369 # backslashes, followed by another quoted string containing a space and
2370 # the word 'example'. parseaddr copies those two backslashes
2371 # literally. Per rfc5322 this is not technically correct since a \ may
2372 # not appear in an address outside of a quoted string. It is probably
2373 # a sensible Postel interpretation, though.
2374 eq = self.assertEqual
2375 eq(utils.parseaddr('""example" example"@example.com'),
2376 ('', '""example" example"@example.com'))
2377 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2378 ('', '"\\"example\\" example"@example.com'))
2379 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2380 ('', '"\\\\"example\\\\" example"@example.com'))
2381
R. David Murray63563cd2010-12-18 18:25:38 +00002382 def test_parseaddr_preserves_spaces_in_local_part(self):
2383 # issue 9286. A normal RFC5322 local part should not contain any
2384 # folding white space, but legacy local parts can (they are a sequence
2385 # of atoms, not dotatoms). On the other hand we strip whitespace from
2386 # before the @ and around dots, on the assumption that the whitespace
2387 # around the punctuation is a mistake in what would otherwise be
2388 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
2389 self.assertEqual(('', "merwok wok@xample.com"),
2390 utils.parseaddr("merwok wok@xample.com"))
2391 self.assertEqual(('', "merwok wok@xample.com"),
2392 utils.parseaddr("merwok wok@xample.com"))
2393 self.assertEqual(('', "merwok wok@xample.com"),
2394 utils.parseaddr(" merwok wok @xample.com"))
2395 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
2396 utils.parseaddr('merwok"wok" wok@xample.com'))
2397 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
2398 utils.parseaddr('merwok. wok . wok@xample.com'))
2399
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002400 def test_multiline_from_comment(self):
2401 x = """\
2402Foo
2403\tBar <foo@example.com>"""
2404 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2405
2406 def test_quote_dump(self):
2407 self.assertEqual(
2408 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2409 r'"A Silly; Person" <person@dom.ain>')
2410
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002411 def test_charset_richcomparisons(self):
2412 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002413 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002414 cset1 = Charset()
2415 cset2 = Charset()
2416 eq(cset1, 'us-ascii')
2417 eq(cset1, 'US-ASCII')
2418 eq(cset1, 'Us-AsCiI')
2419 eq('us-ascii', cset1)
2420 eq('US-ASCII', cset1)
2421 eq('Us-AsCiI', cset1)
2422 ne(cset1, 'usascii')
2423 ne(cset1, 'USASCII')
2424 ne(cset1, 'UsAsCiI')
2425 ne('usascii', cset1)
2426 ne('USASCII', cset1)
2427 ne('UsAsCiI', cset1)
2428 eq(cset1, cset2)
2429 eq(cset2, cset1)
2430
2431 def test_getaddresses(self):
2432 eq = self.assertEqual
2433 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2434 'Bud Person <bperson@dom.ain>']),
2435 [('Al Person', 'aperson@dom.ain'),
2436 ('Bud Person', 'bperson@dom.ain')])
2437
2438 def test_getaddresses_nasty(self):
2439 eq = self.assertEqual
2440 eq(utils.getaddresses(['foo: ;']), [('', '')])
2441 eq(utils.getaddresses(
2442 ['[]*-- =~$']),
2443 [('', ''), ('', ''), ('', '*--')])
2444 eq(utils.getaddresses(
2445 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2446 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2447
2448 def test_getaddresses_embedded_comment(self):
2449 """Test proper handling of a nested comment"""
2450 eq = self.assertEqual
2451 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2452 eq(addrs[0][1], 'foo@bar.com')
2453
2454 def test_utils_quote_unquote(self):
2455 eq = self.assertEqual
2456 msg = Message()
2457 msg.add_header('content-disposition', 'attachment',
2458 filename='foo\\wacky"name')
2459 eq(msg.get_filename(), 'foo\\wacky"name')
2460
2461 def test_get_body_encoding_with_bogus_charset(self):
2462 charset = Charset('not a charset')
2463 self.assertEqual(charset.get_body_encoding(), 'base64')
2464
2465 def test_get_body_encoding_with_uppercase_charset(self):
2466 eq = self.assertEqual
2467 msg = Message()
2468 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2469 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2470 charsets = msg.get_charsets()
2471 eq(len(charsets), 1)
2472 eq(charsets[0], 'utf-8')
2473 charset = Charset(charsets[0])
2474 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002475 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002476 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2477 eq(msg.get_payload(decode=True), b'hello world')
2478 eq(msg['content-transfer-encoding'], 'base64')
2479 # Try another one
2480 msg = Message()
2481 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2482 charsets = msg.get_charsets()
2483 eq(len(charsets), 1)
2484 eq(charsets[0], 'us-ascii')
2485 charset = Charset(charsets[0])
2486 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2487 msg.set_payload('hello world', charset=charset)
2488 eq(msg.get_payload(), 'hello world')
2489 eq(msg['content-transfer-encoding'], '7bit')
2490
2491 def test_charsets_case_insensitive(self):
2492 lc = Charset('us-ascii')
2493 uc = Charset('US-ASCII')
2494 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2495
2496 def test_partial_falls_inside_message_delivery_status(self):
2497 eq = self.ndiffAssertEqual
2498 # The Parser interface provides chunks of data to FeedParser in 8192
2499 # byte gulps. SF bug #1076485 found one of those chunks inside
2500 # message/delivery-status header block, which triggered an
2501 # unreadline() of NeedMoreData.
2502 msg = self._msgobj('msg_43.txt')
2503 sfp = StringIO()
2504 iterators._structure(msg, sfp)
2505 eq(sfp.getvalue(), """\
2506multipart/report
2507 text/plain
2508 message/delivery-status
2509 text/plain
2510 text/plain
2511 text/plain
2512 text/plain
2513 text/plain
2514 text/plain
2515 text/plain
2516 text/plain
2517 text/plain
2518 text/plain
2519 text/plain
2520 text/plain
2521 text/plain
2522 text/plain
2523 text/plain
2524 text/plain
2525 text/plain
2526 text/plain
2527 text/plain
2528 text/plain
2529 text/plain
2530 text/plain
2531 text/plain
2532 text/plain
2533 text/plain
2534 text/plain
2535 text/rfc822-headers
2536""")
2537
R. David Murraya0b44b52010-12-02 21:47:19 +00002538 def test_make_msgid_domain(self):
2539 self.assertEqual(
2540 email.utils.make_msgid(domain='testdomain-string')[-19:],
2541 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002542
Ezio Melottib3aedd42010-11-20 19:04:17 +00002543
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002544# Test the iterator/generators
2545class TestIterators(TestEmailBase):
2546 def test_body_line_iterator(self):
2547 eq = self.assertEqual
2548 neq = self.ndiffAssertEqual
2549 # First a simple non-multipart message
2550 msg = self._msgobj('msg_01.txt')
2551 it = iterators.body_line_iterator(msg)
2552 lines = list(it)
2553 eq(len(lines), 6)
2554 neq(EMPTYSTRING.join(lines), msg.get_payload())
2555 # Now a more complicated multipart
2556 msg = self._msgobj('msg_02.txt')
2557 it = iterators.body_line_iterator(msg)
2558 lines = list(it)
2559 eq(len(lines), 43)
2560 with openfile('msg_19.txt') as fp:
2561 neq(EMPTYSTRING.join(lines), fp.read())
2562
2563 def test_typed_subpart_iterator(self):
2564 eq = self.assertEqual
2565 msg = self._msgobj('msg_04.txt')
2566 it = iterators.typed_subpart_iterator(msg, 'text')
2567 lines = []
2568 subparts = 0
2569 for subpart in it:
2570 subparts += 1
2571 lines.append(subpart.get_payload())
2572 eq(subparts, 2)
2573 eq(EMPTYSTRING.join(lines), """\
2574a simple kind of mirror
2575to reflect upon our own
2576a simple kind of mirror
2577to reflect upon our own
2578""")
2579
2580 def test_typed_subpart_iterator_default_type(self):
2581 eq = self.assertEqual
2582 msg = self._msgobj('msg_03.txt')
2583 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
2584 lines = []
2585 subparts = 0
2586 for subpart in it:
2587 subparts += 1
2588 lines.append(subpart.get_payload())
2589 eq(subparts, 1)
2590 eq(EMPTYSTRING.join(lines), """\
2591
2592Hi,
2593
2594Do you like this message?
2595
2596-Me
2597""")
2598
R. David Murray45bf773f2010-07-17 01:19:57 +00002599 def test_pushCR_LF(self):
2600 '''FeedParser BufferedSubFile.push() assumed it received complete
2601 line endings. A CR ending one push() followed by a LF starting
2602 the next push() added an empty line.
2603 '''
2604 imt = [
2605 ("a\r \n", 2),
2606 ("b", 0),
2607 ("c\n", 1),
2608 ("", 0),
2609 ("d\r\n", 1),
2610 ("e\r", 0),
2611 ("\nf", 1),
2612 ("\r\n", 1),
2613 ]
2614 from email.feedparser import BufferedSubFile, NeedMoreData
2615 bsf = BufferedSubFile()
2616 om = []
2617 nt = 0
2618 for il, n in imt:
2619 bsf.push(il)
2620 nt += n
2621 n1 = 0
2622 while True:
2623 ol = bsf.readline()
2624 if ol == NeedMoreData:
2625 break
2626 om.append(ol)
2627 n1 += 1
2628 self.assertTrue(n == n1)
2629 self.assertTrue(len(om) == nt)
2630 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
2631
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002632
Ezio Melottib3aedd42010-11-20 19:04:17 +00002633
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002634class TestParsers(TestEmailBase):
2635 def test_header_parser(self):
2636 eq = self.assertEqual
2637 # Parse only the headers of a complex multipart MIME document
2638 with openfile('msg_02.txt') as fp:
2639 msg = HeaderParser().parse(fp)
2640 eq(msg['from'], 'ppp-request@zzz.org')
2641 eq(msg['to'], 'ppp@zzz.org')
2642 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002643 self.assertFalse(msg.is_multipart())
2644 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002645
2646 def test_whitespace_continuation(self):
2647 eq = self.assertEqual
2648 # This message contains a line after the Subject: header that has only
2649 # whitespace, but it is not empty!
2650 msg = email.message_from_string("""\
2651From: aperson@dom.ain
2652To: bperson@dom.ain
2653Subject: the next line has a space on it
2654\x20
2655Date: Mon, 8 Apr 2002 15:09:19 -0400
2656Message-ID: spam
2657
2658Here's the message body
2659""")
2660 eq(msg['subject'], 'the next line has a space on it\n ')
2661 eq(msg['message-id'], 'spam')
2662 eq(msg.get_payload(), "Here's the message body\n")
2663
2664 def test_whitespace_continuation_last_header(self):
2665 eq = self.assertEqual
2666 # Like the previous test, but the subject line is the last
2667 # header.
2668 msg = email.message_from_string("""\
2669From: aperson@dom.ain
2670To: bperson@dom.ain
2671Date: Mon, 8 Apr 2002 15:09:19 -0400
2672Message-ID: spam
2673Subject: the next line has a space on it
2674\x20
2675
2676Here's the message body
2677""")
2678 eq(msg['subject'], 'the next line has a space on it\n ')
2679 eq(msg['message-id'], 'spam')
2680 eq(msg.get_payload(), "Here's the message body\n")
2681
2682 def test_crlf_separation(self):
2683 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00002684 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002685 msg = Parser().parse(fp)
2686 eq(len(msg.get_payload()), 2)
2687 part1 = msg.get_payload(0)
2688 eq(part1.get_content_type(), 'text/plain')
2689 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
2690 part2 = msg.get_payload(1)
2691 eq(part2.get_content_type(), 'application/riscos')
2692
R. David Murray8451c4b2010-10-23 22:19:56 +00002693 def test_crlf_flatten(self):
2694 # Using newline='\n' preserves the crlfs in this input file.
2695 with openfile('msg_26.txt', newline='\n') as fp:
2696 text = fp.read()
2697 msg = email.message_from_string(text)
2698 s = StringIO()
2699 g = Generator(s)
2700 g.flatten(msg, linesep='\r\n')
2701 self.assertEqual(s.getvalue(), text)
2702
2703 maxDiff = None
2704
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002705 def test_multipart_digest_with_extra_mime_headers(self):
2706 eq = self.assertEqual
2707 neq = self.ndiffAssertEqual
2708 with openfile('msg_28.txt') as fp:
2709 msg = email.message_from_file(fp)
2710 # Structure is:
2711 # multipart/digest
2712 # message/rfc822
2713 # text/plain
2714 # message/rfc822
2715 # text/plain
2716 eq(msg.is_multipart(), 1)
2717 eq(len(msg.get_payload()), 2)
2718 part1 = msg.get_payload(0)
2719 eq(part1.get_content_type(), 'message/rfc822')
2720 eq(part1.is_multipart(), 1)
2721 eq(len(part1.get_payload()), 1)
2722 part1a = part1.get_payload(0)
2723 eq(part1a.is_multipart(), 0)
2724 eq(part1a.get_content_type(), 'text/plain')
2725 neq(part1a.get_payload(), 'message 1\n')
2726 # next message/rfc822
2727 part2 = msg.get_payload(1)
2728 eq(part2.get_content_type(), 'message/rfc822')
2729 eq(part2.is_multipart(), 1)
2730 eq(len(part2.get_payload()), 1)
2731 part2a = part2.get_payload(0)
2732 eq(part2a.is_multipart(), 0)
2733 eq(part2a.get_content_type(), 'text/plain')
2734 neq(part2a.get_payload(), 'message 2\n')
2735
2736 def test_three_lines(self):
2737 # A bug report by Andrew McNamara
2738 lines = ['From: Andrew Person <aperson@dom.ain',
2739 'Subject: Test',
2740 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
2741 msg = email.message_from_string(NL.join(lines))
2742 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
2743
2744 def test_strip_line_feed_and_carriage_return_in_headers(self):
2745 eq = self.assertEqual
2746 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
2747 value1 = 'text'
2748 value2 = 'more text'
2749 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
2750 value1, value2)
2751 msg = email.message_from_string(m)
2752 eq(msg.get('Header'), value1)
2753 eq(msg.get('Next-Header'), value2)
2754
2755 def test_rfc2822_header_syntax(self):
2756 eq = self.assertEqual
2757 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2758 msg = email.message_from_string(m)
2759 eq(len(msg), 3)
2760 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
2761 eq(msg.get_payload(), 'body')
2762
2763 def test_rfc2822_space_not_allowed_in_header(self):
2764 eq = self.assertEqual
2765 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2766 msg = email.message_from_string(m)
2767 eq(len(msg.keys()), 0)
2768
2769 def test_rfc2822_one_character_header(self):
2770 eq = self.assertEqual
2771 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
2772 msg = email.message_from_string(m)
2773 headers = msg.keys()
2774 headers.sort()
2775 eq(headers, ['A', 'B', 'CC'])
2776 eq(msg.get_payload(), 'body')
2777
R. David Murray45e0e142010-06-16 02:19:40 +00002778 def test_CRLFLF_at_end_of_part(self):
2779 # issue 5610: feedparser should not eat two chars from body part ending
2780 # with "\r\n\n".
2781 m = (
2782 "From: foo@bar.com\n"
2783 "To: baz\n"
2784 "Mime-Version: 1.0\n"
2785 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
2786 "\n"
2787 "--BOUNDARY\n"
2788 "Content-Type: text/plain\n"
2789 "\n"
2790 "body ending with CRLF newline\r\n"
2791 "\n"
2792 "--BOUNDARY--\n"
2793 )
2794 msg = email.message_from_string(m)
2795 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002796
Ezio Melottib3aedd42010-11-20 19:04:17 +00002797
R. David Murray96fd54e2010-10-08 15:55:28 +00002798class Test8BitBytesHandling(unittest.TestCase):
2799 # In Python3 all input is string, but that doesn't work if the actual input
2800 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
2801 # decode byte streams using the surrogateescape error handler, and
2802 # reconvert to binary at appropriate places if we detect surrogates. This
2803 # doesn't allow us to transform headers with 8bit bytes (they get munged),
2804 # but it does allow us to parse and preserve them, and to decode body
2805 # parts that use an 8bit CTE.
2806
2807 bodytest_msg = textwrap.dedent("""\
2808 From: foo@bar.com
2809 To: baz
2810 Mime-Version: 1.0
2811 Content-Type: text/plain; charset={charset}
2812 Content-Transfer-Encoding: {cte}
2813
2814 {bodyline}
2815 """)
2816
2817 def test_known_8bit_CTE(self):
2818 m = self.bodytest_msg.format(charset='utf-8',
2819 cte='8bit',
2820 bodyline='pöstal').encode('utf-8')
2821 msg = email.message_from_bytes(m)
2822 self.assertEqual(msg.get_payload(), "pöstal\n")
2823 self.assertEqual(msg.get_payload(decode=True),
2824 "pöstal\n".encode('utf-8'))
2825
2826 def test_unknown_8bit_CTE(self):
2827 m = self.bodytest_msg.format(charset='notavalidcharset',
2828 cte='8bit',
2829 bodyline='pöstal').encode('utf-8')
2830 msg = email.message_from_bytes(m)
2831 self.assertEqual(msg.get_payload(), "p��stal\n")
2832 self.assertEqual(msg.get_payload(decode=True),
2833 "pöstal\n".encode('utf-8'))
2834
2835 def test_8bit_in_quopri_body(self):
2836 # This is non-RFC compliant data...without 'decode' the library code
2837 # decodes the body using the charset from the headers, and because the
2838 # source byte really is utf-8 this works. This is likely to fail
2839 # against real dirty data (ie: produce mojibake), but the data is
2840 # invalid anyway so it is as good a guess as any. But this means that
2841 # this test just confirms the current behavior; that behavior is not
2842 # necessarily the best possible behavior. With 'decode' it is
2843 # returning the raw bytes, so that test should be of correct behavior,
2844 # or at least produce the same result that email4 did.
2845 m = self.bodytest_msg.format(charset='utf-8',
2846 cte='quoted-printable',
2847 bodyline='p=C3=B6stál').encode('utf-8')
2848 msg = email.message_from_bytes(m)
2849 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
2850 self.assertEqual(msg.get_payload(decode=True),
2851 'pöstál\n'.encode('utf-8'))
2852
2853 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
2854 # This is similar to the previous test, but proves that if the 8bit
2855 # byte is undecodeable in the specified charset, it gets replaced
2856 # by the unicode 'unknown' character. Again, this may or may not
2857 # be the ideal behavior. Note that if decode=False none of the
2858 # decoders will get involved, so this is the only test we need
2859 # for this behavior.
2860 m = self.bodytest_msg.format(charset='ascii',
2861 cte='quoted-printable',
2862 bodyline='p=C3=B6stál').encode('utf-8')
2863 msg = email.message_from_bytes(m)
2864 self.assertEqual(msg.get_payload(), 'p=C3=B6st��l\n')
2865 self.assertEqual(msg.get_payload(decode=True),
2866 'pöstál\n'.encode('utf-8'))
2867
2868 def test_8bit_in_base64_body(self):
2869 # Sticking an 8bit byte in a base64 block makes it undecodable by
2870 # normal means, so the block is returned undecoded, but as bytes.
2871 m = self.bodytest_msg.format(charset='utf-8',
2872 cte='base64',
2873 bodyline='cMO2c3RhbAá=').encode('utf-8')
2874 msg = email.message_from_bytes(m)
2875 self.assertEqual(msg.get_payload(decode=True),
2876 'cMO2c3RhbAá=\n'.encode('utf-8'))
2877
2878 def test_8bit_in_uuencode_body(self):
2879 # Sticking an 8bit byte in a uuencode block makes it undecodable by
2880 # normal means, so the block is returned undecoded, but as bytes.
2881 m = self.bodytest_msg.format(charset='utf-8',
2882 cte='uuencode',
2883 bodyline='<,.V<W1A; á ').encode('utf-8')
2884 msg = email.message_from_bytes(m)
2885 self.assertEqual(msg.get_payload(decode=True),
2886 '<,.V<W1A; á \n'.encode('utf-8'))
2887
2888
2889 headertest_msg = textwrap.dedent("""\
2890 From: foo@bar.com
2891 To: báz
2892 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
2893 \tJean de Baddie
2894 From: göst
2895
2896 Yes, they are flying.
2897 """).encode('utf-8')
2898
2899 def test_get_8bit_header(self):
2900 msg = email.message_from_bytes(self.headertest_msg)
2901 self.assertEqual(msg.get('to'), 'b??z')
2902 self.assertEqual(msg['to'], 'b??z')
2903
2904 def test_print_8bit_headers(self):
2905 msg = email.message_from_bytes(self.headertest_msg)
2906 self.assertEqual(str(msg),
2907 self.headertest_msg.decode(
2908 'ascii', 'replace').replace('�', '?'))
2909
2910 def test_values_with_8bit_headers(self):
2911 msg = email.message_from_bytes(self.headertest_msg)
2912 self.assertListEqual(msg.values(),
2913 ['foo@bar.com',
2914 'b??z',
2915 'Maintenant je vous pr??sente mon '
2916 'coll??gue, le pouf c??l??bre\n'
2917 '\tJean de Baddie',
2918 "g??st"])
2919
2920 def test_items_with_8bit_headers(self):
2921 msg = email.message_from_bytes(self.headertest_msg)
2922 self.assertListEqual(msg.items(),
2923 [('From', 'foo@bar.com'),
2924 ('To', 'b??z'),
2925 ('Subject', 'Maintenant je vous pr??sente mon '
2926 'coll??gue, le pouf c??l??bre\n'
2927 '\tJean de Baddie'),
2928 ('From', 'g??st')])
2929
2930 def test_get_all_with_8bit_headers(self):
2931 msg = email.message_from_bytes(self.headertest_msg)
2932 self.assertListEqual(msg.get_all('from'),
2933 ['foo@bar.com',
2934 'g??st'])
2935
2936 non_latin_bin_msg = textwrap.dedent("""\
2937 From: foo@bar.com
2938 To: báz
2939 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
2940 \tJean de Baddie
2941 Mime-Version: 1.0
2942 Content-Type: text/plain; charset="utf-8"
2943 Content-Transfer-Encoding: 8bit
2944
2945 Да, они летят.
2946 """).encode('utf-8')
2947
2948 def test_bytes_generator(self):
2949 msg = email.message_from_bytes(self.non_latin_bin_msg)
2950 out = BytesIO()
2951 email.generator.BytesGenerator(out).flatten(msg)
2952 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
2953
2954 # XXX: ultimately the '?' should turn into CTE encoded bytes
2955 # using 'unknown-8bit' charset.
2956 non_latin_bin_msg_as7bit = textwrap.dedent("""\
2957 From: foo@bar.com
2958 To: b??z
2959 Subject: Maintenant je vous pr??sente mon coll??gue, le pouf c??l??bre
2960 \tJean de Baddie
2961 Mime-Version: 1.0
2962 Content-Type: text/plain; charset="utf-8"
2963 Content-Transfer-Encoding: base64
2964
2965 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
2966 """)
2967
2968 def test_generator_handles_8bit(self):
2969 msg = email.message_from_bytes(self.non_latin_bin_msg)
2970 out = StringIO()
2971 email.generator.Generator(out).flatten(msg)
2972 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit)
2973
2974 def test_bytes_generator_with_unix_from(self):
2975 # The unixfrom contains a current date, so we can't check it
2976 # literally. Just make sure the first word is 'From' and the
2977 # rest of the message matches the input.
2978 msg = email.message_from_bytes(self.non_latin_bin_msg)
2979 out = BytesIO()
2980 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
2981 lines = out.getvalue().split(b'\n')
2982 self.assertEqual(lines[0].split()[0], b'From')
2983 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
2984
2985 def test_message_from_binary_file(self):
2986 fn = 'test.msg'
2987 self.addCleanup(unlink, fn)
2988 with open(fn, 'wb') as testfile:
2989 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00002990 with open(fn, 'rb') as testfile:
2991 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00002992 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
2993
2994 latin_bin_msg = textwrap.dedent("""\
2995 From: foo@bar.com
2996 To: Dinsdale
2997 Subject: Nudge nudge, wink, wink
2998 Mime-Version: 1.0
2999 Content-Type: text/plain; charset="latin-1"
3000 Content-Transfer-Encoding: 8bit
3001
3002 oh là là, know what I mean, know what I mean?
3003 """).encode('latin-1')
3004
3005 latin_bin_msg_as7bit = textwrap.dedent("""\
3006 From: foo@bar.com
3007 To: Dinsdale
3008 Subject: Nudge nudge, wink, wink
3009 Mime-Version: 1.0
3010 Content-Type: text/plain; charset="iso-8859-1"
3011 Content-Transfer-Encoding: quoted-printable
3012
3013 oh l=E0 l=E0, know what I mean, know what I mean?
3014 """)
3015
3016 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
3017 m = email.message_from_bytes(self.latin_bin_msg)
3018 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3019
3020 def test_decoded_generator_emits_unicode_body(self):
3021 m = email.message_from_bytes(self.latin_bin_msg)
3022 out = StringIO()
3023 email.generator.DecodedGenerator(out).flatten(m)
3024 #DecodedHeader output contains an extra blank line compared
3025 #to the input message. RDM: not sure if this is a bug or not,
3026 #but it is not specific to the 8bit->7bit conversion.
3027 self.assertEqual(out.getvalue(),
3028 self.latin_bin_msg.decode('latin-1')+'\n')
3029
3030 def test_bytes_feedparser(self):
3031 bfp = email.feedparser.BytesFeedParser()
3032 for i in range(0, len(self.latin_bin_msg), 10):
3033 bfp.feed(self.latin_bin_msg[i:i+10])
3034 m = bfp.close()
3035 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3036
R. David Murray8451c4b2010-10-23 22:19:56 +00003037 def test_crlf_flatten(self):
3038 with openfile('msg_26.txt', 'rb') as fp:
3039 text = fp.read()
3040 msg = email.message_from_bytes(text)
3041 s = BytesIO()
3042 g = email.generator.BytesGenerator(s)
3043 g.flatten(msg, linesep='\r\n')
3044 self.assertEqual(s.getvalue(), text)
3045 maxDiff = None
3046
Ezio Melottib3aedd42010-11-20 19:04:17 +00003047
R. David Murray719a4492010-11-21 16:53:48 +00003048class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003049
R. David Murraye5db2632010-11-20 15:10:13 +00003050 maxDiff = None
3051
R. David Murray96fd54e2010-10-08 15:55:28 +00003052 def _msgobj(self, filename):
3053 with openfile(filename, 'rb') as fp:
3054 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003055 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003056 msg = email.message_from_bytes(data)
3057 return msg, data
3058
R. David Murray719a4492010-11-21 16:53:48 +00003059 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003060 b = BytesIO()
3061 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003062 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R. David Murraye5db2632010-11-20 15:10:13 +00003063 self.assertByteStringsEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003064
R. David Murraye5db2632010-11-20 15:10:13 +00003065 def assertByteStringsEqual(self, str1, str2):
R. David Murray719a4492010-11-21 16:53:48 +00003066 # Not using self.blinesep here is intentional. This way the output
3067 # is more useful when the failure results in mixed line endings.
R. David Murray96fd54e2010-10-08 15:55:28 +00003068 self.assertListEqual(str1.split(b'\n'), str2.split(b'\n'))
3069
3070
R. David Murray719a4492010-11-21 16:53:48 +00003071class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3072 TestIdempotent):
3073 linesep = '\n'
3074 blinesep = b'\n'
3075 normalize_linesep_regex = re.compile(br'\r\n')
3076
3077
3078class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3079 TestIdempotent):
3080 linesep = '\r\n'
3081 blinesep = b'\r\n'
3082 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3083
Ezio Melottib3aedd42010-11-20 19:04:17 +00003084
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003085class TestBase64(unittest.TestCase):
3086 def test_len(self):
3087 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003088 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003089 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003090 for size in range(15):
3091 if size == 0 : bsize = 0
3092 elif size <= 3 : bsize = 4
3093 elif size <= 6 : bsize = 8
3094 elif size <= 9 : bsize = 12
3095 elif size <= 12: bsize = 16
3096 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003097 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003098
3099 def test_decode(self):
3100 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003101 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003102 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003103
3104 def test_encode(self):
3105 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003106 eq(base64mime.body_encode(b''), b'')
3107 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003108 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003109 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003110 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003111 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003112eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3113eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3114eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3115eHh4eCB4eHh4IA==
3116""")
3117 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003118 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003119 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003120eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3121eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3122eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3123eHh4eCB4eHh4IA==\r
3124""")
3125
3126 def test_header_encode(self):
3127 eq = self.assertEqual
3128 he = base64mime.header_encode
3129 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003130 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3131 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003132 # Test the charset option
3133 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3134 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003135
3136
Ezio Melottib3aedd42010-11-20 19:04:17 +00003137
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003138class TestQuopri(unittest.TestCase):
3139 def setUp(self):
3140 # Set of characters (as byte integers) that don't need to be encoded
3141 # in headers.
3142 self.hlit = list(chain(
3143 range(ord('a'), ord('z') + 1),
3144 range(ord('A'), ord('Z') + 1),
3145 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003146 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003147 # Set of characters (as byte integers) that do need to be encoded in
3148 # headers.
3149 self.hnon = [c for c in range(256) if c not in self.hlit]
3150 assert len(self.hlit) + len(self.hnon) == 256
3151 # Set of characters (as byte integers) that don't need to be encoded
3152 # in bodies.
3153 self.blit = list(range(ord(' '), ord('~') + 1))
3154 self.blit.append(ord('\t'))
3155 self.blit.remove(ord('='))
3156 # Set of characters (as byte integers) that do need to be encoded in
3157 # bodies.
3158 self.bnon = [c for c in range(256) if c not in self.blit]
3159 assert len(self.blit) + len(self.bnon) == 256
3160
Guido van Rossum9604e662007-08-30 03:46:43 +00003161 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003162 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003163 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003164 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003165 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003166 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003167 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003168
Guido van Rossum9604e662007-08-30 03:46:43 +00003169 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003170 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003171 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003172 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003173 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003174 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003175 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003176
3177 def test_header_quopri_len(self):
3178 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003179 eq(quoprimime.header_length(b'hello'), 5)
3180 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003181 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003182 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003183 # =?xxx?q?...?= means 10 extra characters
3184 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00003185 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
3186 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003187 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003188 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003189 # =?xxx?q?...?= means 10 extra characters
3190 10)
3191 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003192 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003193 'expected length 1 for %r' % chr(c))
3194 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003195 # Space is special; it's encoded to _
3196 if c == ord(' '):
3197 continue
3198 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003199 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00003200 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003201
3202 def test_body_quopri_len(self):
3203 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003204 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003205 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003206 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003207 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003208
3209 def test_quote_unquote_idempotent(self):
3210 for x in range(256):
3211 c = chr(x)
3212 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
3213
3214 def test_header_encode(self):
3215 eq = self.assertEqual
3216 he = quoprimime.header_encode
3217 eq(he(b'hello'), '=?iso-8859-1?q?hello?=')
3218 eq(he(b'hello', charset='iso-8859-2'), '=?iso-8859-2?q?hello?=')
3219 eq(he(b'hello\nworld'), '=?iso-8859-1?q?hello=0Aworld?=')
3220 # Test a non-ASCII character
3221 eq(he(b'hello\xc7there'), '=?iso-8859-1?q?hello=C7there?=')
3222
3223 def test_decode(self):
3224 eq = self.assertEqual
3225 eq(quoprimime.decode(''), '')
3226 eq(quoprimime.decode('hello'), 'hello')
3227 eq(quoprimime.decode('hello', 'X'), 'hello')
3228 eq(quoprimime.decode('hello\nworld', 'X'), 'helloXworld')
3229
3230 def test_encode(self):
3231 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003232 eq(quoprimime.body_encode(''), '')
3233 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003234 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00003235 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003236 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00003237 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003238xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
3239 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
3240x xxxx xxxx xxxx xxxx=20""")
3241 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00003242 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
3243 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003244xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
3245 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
3246x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00003247 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003248one line
3249
3250two line"""), """\
3251one line
3252
3253two line""")
3254
3255
Ezio Melottib3aedd42010-11-20 19:04:17 +00003256
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003257# Test the Charset class
3258class TestCharset(unittest.TestCase):
3259 def tearDown(self):
3260 from email import charset as CharsetModule
3261 try:
3262 del CharsetModule.CHARSETS['fake']
3263 except KeyError:
3264 pass
3265
Guido van Rossum9604e662007-08-30 03:46:43 +00003266 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003267 eq = self.assertEqual
3268 # Make sure us-ascii = no Unicode conversion
3269 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00003270 eq(c.header_encode('Hello World!'), 'Hello World!')
3271 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003272 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00003273 self.assertRaises(UnicodeError, c.header_encode, s)
3274 c = Charset('utf-8')
3275 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003276
3277 def test_body_encode(self):
3278 eq = self.assertEqual
3279 # Try a charset with QP body encoding
3280 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003281 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003282 # Try a charset with Base64 body encoding
3283 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003284 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003285 # Try a charset with None body encoding
3286 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003287 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003288 # Try the convert argument, where input codec != output codec
3289 c = Charset('euc-jp')
3290 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00003291 # XXX FIXME
3292## try:
3293## eq('\x1b$B5FCO;~IW\x1b(B',
3294## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
3295## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
3296## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
3297## except LookupError:
3298## # We probably don't have the Japanese codecs installed
3299## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003300 # Testing SF bug #625509, which we have to fake, since there are no
3301 # built-in encodings where the header encoding is QP but the body
3302 # encoding is not.
3303 from email import charset as CharsetModule
3304 CharsetModule.add_charset('fake', CharsetModule.QP, None)
3305 c = Charset('fake')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003306 eq('hello w\xf6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003307
3308 def test_unicode_charset_name(self):
3309 charset = Charset('us-ascii')
3310 self.assertEqual(str(charset), 'us-ascii')
3311 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
3312
3313
Ezio Melottib3aedd42010-11-20 19:04:17 +00003314
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003315# Test multilingual MIME headers.
3316class TestHeader(TestEmailBase):
3317 def test_simple(self):
3318 eq = self.ndiffAssertEqual
3319 h = Header('Hello World!')
3320 eq(h.encode(), 'Hello World!')
3321 h.append(' Goodbye World!')
3322 eq(h.encode(), 'Hello World! Goodbye World!')
3323
3324 def test_simple_surprise(self):
3325 eq = self.ndiffAssertEqual
3326 h = Header('Hello World!')
3327 eq(h.encode(), 'Hello World!')
3328 h.append('Goodbye World!')
3329 eq(h.encode(), 'Hello World! Goodbye World!')
3330
3331 def test_header_needs_no_decoding(self):
3332 h = 'no decoding needed'
3333 self.assertEqual(decode_header(h), [(h, None)])
3334
3335 def test_long(self):
3336 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
3337 maxlinelen=76)
3338 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003339 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003340
3341 def test_multilingual(self):
3342 eq = self.ndiffAssertEqual
3343 g = Charset("iso-8859-1")
3344 cz = Charset("iso-8859-2")
3345 utf8 = Charset("utf-8")
3346 g_head = (b'Die Mieter treten hier ein werden mit einem '
3347 b'Foerderband komfortabel den Korridor entlang, '
3348 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
3349 b'gegen die rotierenden Klingen bef\xf6rdert. ')
3350 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
3351 b'd\xf9vtipu.. ')
3352 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
3353 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
3354 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
3355 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
3356 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
3357 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
3358 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
3359 '\u3044\u307e\u3059\u3002')
3360 h = Header(g_head, g)
3361 h.append(cz_head, cz)
3362 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00003363 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003364 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00003365=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
3366 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
3367 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
3368 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003369 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
3370 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
3371 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
3372 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00003373 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
3374 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
3375 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
3376 decoded = decode_header(enc)
3377 eq(len(decoded), 3)
3378 eq(decoded[0], (g_head, 'iso-8859-1'))
3379 eq(decoded[1], (cz_head, 'iso-8859-2'))
3380 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003381 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00003382 eq(ustr,
3383 (b'Die Mieter treten hier ein werden mit einem Foerderband '
3384 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
3385 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
3386 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
3387 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
3388 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
3389 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
3390 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
3391 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
3392 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
3393 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
3394 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
3395 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
3396 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
3397 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
3398 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
3399 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003400 # Test make_header()
3401 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00003402 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003403
3404 def test_empty_header_encode(self):
3405 h = Header()
3406 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00003407
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003408 def test_header_ctor_default_args(self):
3409 eq = self.ndiffAssertEqual
3410 h = Header()
3411 eq(h, '')
3412 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00003413 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003414
3415 def test_explicit_maxlinelen(self):
3416 eq = self.ndiffAssertEqual
3417 hstr = ('A very long line that must get split to something other '
3418 'than at the 76th character boundary to test the non-default '
3419 'behavior')
3420 h = Header(hstr)
3421 eq(h.encode(), '''\
3422A very long line that must get split to something other than at the 76th
3423 character boundary to test the non-default behavior''')
3424 eq(str(h), hstr)
3425 h = Header(hstr, header_name='Subject')
3426 eq(h.encode(), '''\
3427A very long line that must get split to something other than at the
3428 76th character boundary to test the non-default behavior''')
3429 eq(str(h), hstr)
3430 h = Header(hstr, maxlinelen=1024, header_name='Subject')
3431 eq(h.encode(), hstr)
3432 eq(str(h), hstr)
3433
Guido van Rossum9604e662007-08-30 03:46:43 +00003434 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003435 eq = self.ndiffAssertEqual
3436 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003437 x = 'xxxx ' * 20
3438 h.append(x)
3439 s = h.encode()
3440 eq(s, """\
3441=?iso-8859-1?q?xxx?=
3442 =?iso-8859-1?q?x_?=
3443 =?iso-8859-1?q?xx?=
3444 =?iso-8859-1?q?xx?=
3445 =?iso-8859-1?q?_x?=
3446 =?iso-8859-1?q?xx?=
3447 =?iso-8859-1?q?x_?=
3448 =?iso-8859-1?q?xx?=
3449 =?iso-8859-1?q?xx?=
3450 =?iso-8859-1?q?_x?=
3451 =?iso-8859-1?q?xx?=
3452 =?iso-8859-1?q?x_?=
3453 =?iso-8859-1?q?xx?=
3454 =?iso-8859-1?q?xx?=
3455 =?iso-8859-1?q?_x?=
3456 =?iso-8859-1?q?xx?=
3457 =?iso-8859-1?q?x_?=
3458 =?iso-8859-1?q?xx?=
3459 =?iso-8859-1?q?xx?=
3460 =?iso-8859-1?q?_x?=
3461 =?iso-8859-1?q?xx?=
3462 =?iso-8859-1?q?x_?=
3463 =?iso-8859-1?q?xx?=
3464 =?iso-8859-1?q?xx?=
3465 =?iso-8859-1?q?_x?=
3466 =?iso-8859-1?q?xx?=
3467 =?iso-8859-1?q?x_?=
3468 =?iso-8859-1?q?xx?=
3469 =?iso-8859-1?q?xx?=
3470 =?iso-8859-1?q?_x?=
3471 =?iso-8859-1?q?xx?=
3472 =?iso-8859-1?q?x_?=
3473 =?iso-8859-1?q?xx?=
3474 =?iso-8859-1?q?xx?=
3475 =?iso-8859-1?q?_x?=
3476 =?iso-8859-1?q?xx?=
3477 =?iso-8859-1?q?x_?=
3478 =?iso-8859-1?q?xx?=
3479 =?iso-8859-1?q?xx?=
3480 =?iso-8859-1?q?_x?=
3481 =?iso-8859-1?q?xx?=
3482 =?iso-8859-1?q?x_?=
3483 =?iso-8859-1?q?xx?=
3484 =?iso-8859-1?q?xx?=
3485 =?iso-8859-1?q?_x?=
3486 =?iso-8859-1?q?xx?=
3487 =?iso-8859-1?q?x_?=
3488 =?iso-8859-1?q?xx?=
3489 =?iso-8859-1?q?xx?=
3490 =?iso-8859-1?q?_?=""")
3491 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003492 h = Header(charset='iso-8859-1', maxlinelen=40)
3493 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003494 s = h.encode()
3495 eq(s, """\
3496=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
3497 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
3498 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
3499 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
3500 =?iso-8859-1?q?_xxxx_xxxx_?=""")
3501 eq(x, str(make_header(decode_header(s))))
3502
3503 def test_base64_splittable(self):
3504 eq = self.ndiffAssertEqual
3505 h = Header(charset='koi8-r', maxlinelen=20)
3506 x = 'xxxx ' * 20
3507 h.append(x)
3508 s = h.encode()
3509 eq(s, """\
3510=?koi8-r?b?eHh4?=
3511 =?koi8-r?b?eCB4?=
3512 =?koi8-r?b?eHh4?=
3513 =?koi8-r?b?IHh4?=
3514 =?koi8-r?b?eHgg?=
3515 =?koi8-r?b?eHh4?=
3516 =?koi8-r?b?eCB4?=
3517 =?koi8-r?b?eHh4?=
3518 =?koi8-r?b?IHh4?=
3519 =?koi8-r?b?eHgg?=
3520 =?koi8-r?b?eHh4?=
3521 =?koi8-r?b?eCB4?=
3522 =?koi8-r?b?eHh4?=
3523 =?koi8-r?b?IHh4?=
3524 =?koi8-r?b?eHgg?=
3525 =?koi8-r?b?eHh4?=
3526 =?koi8-r?b?eCB4?=
3527 =?koi8-r?b?eHh4?=
3528 =?koi8-r?b?IHh4?=
3529 =?koi8-r?b?eHgg?=
3530 =?koi8-r?b?eHh4?=
3531 =?koi8-r?b?eCB4?=
3532 =?koi8-r?b?eHh4?=
3533 =?koi8-r?b?IHh4?=
3534 =?koi8-r?b?eHgg?=
3535 =?koi8-r?b?eHh4?=
3536 =?koi8-r?b?eCB4?=
3537 =?koi8-r?b?eHh4?=
3538 =?koi8-r?b?IHh4?=
3539 =?koi8-r?b?eHgg?=
3540 =?koi8-r?b?eHh4?=
3541 =?koi8-r?b?eCB4?=
3542 =?koi8-r?b?eHh4?=
3543 =?koi8-r?b?IA==?=""")
3544 eq(x, str(make_header(decode_header(s))))
3545 h = Header(charset='koi8-r', maxlinelen=40)
3546 h.append(x)
3547 s = h.encode()
3548 eq(s, """\
3549=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
3550 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
3551 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
3552 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
3553 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
3554 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
3555 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003556
3557 def test_us_ascii_header(self):
3558 eq = self.assertEqual
3559 s = 'hello'
3560 x = decode_header(s)
3561 eq(x, [('hello', None)])
3562 h = make_header(x)
3563 eq(s, h.encode())
3564
3565 def test_string_charset(self):
3566 eq = self.assertEqual
3567 h = Header()
3568 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00003569 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003570
3571## def test_unicode_error(self):
3572## raises = self.assertRaises
3573## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
3574## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
3575## h = Header()
3576## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
3577## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
3578## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
3579
3580 def test_utf8_shortest(self):
3581 eq = self.assertEqual
3582 h = Header('p\xf6stal', 'utf-8')
3583 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
3584 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
3585 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
3586
3587 def test_bad_8bit_header(self):
3588 raises = self.assertRaises
3589 eq = self.assertEqual
3590 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
3591 raises(UnicodeError, Header, x)
3592 h = Header()
3593 raises(UnicodeError, h.append, x)
3594 e = x.decode('utf-8', 'replace')
3595 eq(str(Header(x, errors='replace')), e)
3596 h.append(x, errors='replace')
3597 eq(str(h), e)
3598
3599 def test_encoded_adjacent_nonencoded(self):
3600 eq = self.assertEqual
3601 h = Header()
3602 h.append('hello', 'iso-8859-1')
3603 h.append('world')
3604 s = h.encode()
3605 eq(s, '=?iso-8859-1?q?hello?= world')
3606 h = make_header(decode_header(s))
3607 eq(h.encode(), s)
3608
3609 def test_whitespace_eater(self):
3610 eq = self.assertEqual
3611 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
3612 parts = decode_header(s)
3613 eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)])
3614 hdr = make_header(parts)
3615 eq(hdr.encode(),
3616 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
3617
3618 def test_broken_base64_header(self):
3619 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00003620 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003621 raises(errors.HeaderParseError, decode_header, s)
3622
R. David Murray477efb32011-01-05 01:39:32 +00003623 def test_shift_jis_charset(self):
3624 h = Header('文', charset='shift_jis')
3625 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
3626
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003627
Ezio Melottib3aedd42010-11-20 19:04:17 +00003628
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003629# Test RFC 2231 header parameters (en/de)coding
3630class TestRFC2231(TestEmailBase):
3631 def test_get_param(self):
3632 eq = self.assertEqual
3633 msg = self._msgobj('msg_29.txt')
3634 eq(msg.get_param('title'),
3635 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
3636 eq(msg.get_param('title', unquote=False),
3637 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
3638
3639 def test_set_param(self):
3640 eq = self.ndiffAssertEqual
3641 msg = Message()
3642 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3643 charset='us-ascii')
3644 eq(msg.get_param('title'),
3645 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
3646 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3647 charset='us-ascii', language='en')
3648 eq(msg.get_param('title'),
3649 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
3650 msg = self._msgobj('msg_01.txt')
3651 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3652 charset='us-ascii', language='en')
3653 eq(msg.as_string(maxheaderlen=78), """\
3654Return-Path: <bbb@zzz.org>
3655Delivered-To: bbb@zzz.org
3656Received: by mail.zzz.org (Postfix, from userid 889)
3657\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
3658MIME-Version: 1.0
3659Content-Transfer-Encoding: 7bit
3660Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
3661From: bbb@ddd.com (John X. Doe)
3662To: bbb@zzz.org
3663Subject: This is a test message
3664Date: Fri, 4 May 2001 14:05:44 -0400
3665Content-Type: text/plain; charset=us-ascii;
R. David Murraydfd7eb02010-12-24 22:36:49 +00003666 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003667
3668
3669Hi,
3670
3671Do you like this message?
3672
3673-Me
3674""")
3675
3676 def test_del_param(self):
3677 eq = self.ndiffAssertEqual
3678 msg = self._msgobj('msg_01.txt')
3679 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
3680 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3681 charset='us-ascii', language='en')
3682 msg.del_param('foo', header='Content-Type')
3683 eq(msg.as_string(maxheaderlen=78), """\
3684Return-Path: <bbb@zzz.org>
3685Delivered-To: bbb@zzz.org
3686Received: by mail.zzz.org (Postfix, from userid 889)
3687\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
3688MIME-Version: 1.0
3689Content-Transfer-Encoding: 7bit
3690Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
3691From: bbb@ddd.com (John X. Doe)
3692To: bbb@zzz.org
3693Subject: This is a test message
3694Date: Fri, 4 May 2001 14:05:44 -0400
3695Content-Type: text/plain; charset="us-ascii";
R. David Murraydfd7eb02010-12-24 22:36:49 +00003696 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003697
3698
3699Hi,
3700
3701Do you like this message?
3702
3703-Me
3704""")
3705
3706 def test_rfc2231_get_content_charset(self):
3707 eq = self.assertEqual
3708 msg = self._msgobj('msg_32.txt')
3709 eq(msg.get_content_charset(), 'us-ascii')
3710
R. David Murraydfd7eb02010-12-24 22:36:49 +00003711 def test_rfc2231_parse_rfc_quoting(self):
3712 m = textwrap.dedent('''\
3713 Content-Disposition: inline;
3714 \tfilename*0*=''This%20is%20even%20more%20;
3715 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
3716 \tfilename*2="is it not.pdf"
3717
3718 ''')
3719 msg = email.message_from_string(m)
3720 self.assertEqual(msg.get_filename(),
3721 'This is even more ***fun*** is it not.pdf')
3722 self.assertEqual(m, msg.as_string())
3723
3724 def test_rfc2231_parse_extra_quoting(self):
3725 m = textwrap.dedent('''\
3726 Content-Disposition: inline;
3727 \tfilename*0*="''This%20is%20even%20more%20";
3728 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3729 \tfilename*2="is it not.pdf"
3730
3731 ''')
3732 msg = email.message_from_string(m)
3733 self.assertEqual(msg.get_filename(),
3734 'This is even more ***fun*** is it not.pdf')
3735 self.assertEqual(m, msg.as_string())
3736
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003737 def test_rfc2231_no_language_or_charset(self):
3738 m = '''\
3739Content-Transfer-Encoding: 8bit
3740Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
3741Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
3742
3743'''
3744 msg = email.message_from_string(m)
3745 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003746 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003747 self.assertEqual(
3748 param,
3749 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
3750
3751 def test_rfc2231_no_language_or_charset_in_filename(self):
3752 m = '''\
3753Content-Disposition: inline;
3754\tfilename*0*="''This%20is%20even%20more%20";
3755\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3756\tfilename*2="is it not.pdf"
3757
3758'''
3759 msg = email.message_from_string(m)
3760 self.assertEqual(msg.get_filename(),
3761 'This is even more ***fun*** is it not.pdf')
3762
3763 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
3764 m = '''\
3765Content-Disposition: inline;
3766\tfilename*0*="''This%20is%20even%20more%20";
3767\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3768\tfilename*2="is it not.pdf"
3769
3770'''
3771 msg = email.message_from_string(m)
3772 self.assertEqual(msg.get_filename(),
3773 'This is even more ***fun*** is it not.pdf')
3774
3775 def test_rfc2231_partly_encoded(self):
3776 m = '''\
3777Content-Disposition: inline;
3778\tfilename*0="''This%20is%20even%20more%20";
3779\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3780\tfilename*2="is it not.pdf"
3781
3782'''
3783 msg = email.message_from_string(m)
3784 self.assertEqual(
3785 msg.get_filename(),
3786 'This%20is%20even%20more%20***fun*** is it not.pdf')
3787
3788 def test_rfc2231_partly_nonencoded(self):
3789 m = '''\
3790Content-Disposition: inline;
3791\tfilename*0="This%20is%20even%20more%20";
3792\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
3793\tfilename*2="is it not.pdf"
3794
3795'''
3796 msg = email.message_from_string(m)
3797 self.assertEqual(
3798 msg.get_filename(),
3799 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
3800
3801 def test_rfc2231_no_language_or_charset_in_boundary(self):
3802 m = '''\
3803Content-Type: multipart/alternative;
3804\tboundary*0*="''This%20is%20even%20more%20";
3805\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
3806\tboundary*2="is it not.pdf"
3807
3808'''
3809 msg = email.message_from_string(m)
3810 self.assertEqual(msg.get_boundary(),
3811 'This is even more ***fun*** is it not.pdf')
3812
3813 def test_rfc2231_no_language_or_charset_in_charset(self):
3814 # This is a nonsensical charset value, but tests the code anyway
3815 m = '''\
3816Content-Type: text/plain;
3817\tcharset*0*="This%20is%20even%20more%20";
3818\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
3819\tcharset*2="is it not.pdf"
3820
3821'''
3822 msg = email.message_from_string(m)
3823 self.assertEqual(msg.get_content_charset(),
3824 'this is even more ***fun*** is it not.pdf')
3825
3826 def test_rfc2231_bad_encoding_in_filename(self):
3827 m = '''\
3828Content-Disposition: inline;
3829\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
3830\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3831\tfilename*2="is it not.pdf"
3832
3833'''
3834 msg = email.message_from_string(m)
3835 self.assertEqual(msg.get_filename(),
3836 'This is even more ***fun*** is it not.pdf')
3837
3838 def test_rfc2231_bad_encoding_in_charset(self):
3839 m = """\
3840Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
3841
3842"""
3843 msg = email.message_from_string(m)
3844 # This should return None because non-ascii characters in the charset
3845 # are not allowed.
3846 self.assertEqual(msg.get_content_charset(), None)
3847
3848 def test_rfc2231_bad_character_in_charset(self):
3849 m = """\
3850Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
3851
3852"""
3853 msg = email.message_from_string(m)
3854 # This should return None because non-ascii characters in the charset
3855 # are not allowed.
3856 self.assertEqual(msg.get_content_charset(), None)
3857
3858 def test_rfc2231_bad_character_in_filename(self):
3859 m = '''\
3860Content-Disposition: inline;
3861\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
3862\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3863\tfilename*2*="is it not.pdf%E2"
3864
3865'''
3866 msg = email.message_from_string(m)
3867 self.assertEqual(msg.get_filename(),
3868 'This is even more ***fun*** is it not.pdf\ufffd')
3869
3870 def test_rfc2231_unknown_encoding(self):
3871 m = """\
3872Content-Transfer-Encoding: 8bit
3873Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
3874
3875"""
3876 msg = email.message_from_string(m)
3877 self.assertEqual(msg.get_filename(), 'myfile.txt')
3878
3879 def test_rfc2231_single_tick_in_filename_extended(self):
3880 eq = self.assertEqual
3881 m = """\
3882Content-Type: application/x-foo;
3883\tname*0*=\"Frank's\"; name*1*=\" Document\"
3884
3885"""
3886 msg = email.message_from_string(m)
3887 charset, language, s = msg.get_param('name')
3888 eq(charset, None)
3889 eq(language, None)
3890 eq(s, "Frank's Document")
3891
3892 def test_rfc2231_single_tick_in_filename(self):
3893 m = """\
3894Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
3895
3896"""
3897 msg = email.message_from_string(m)
3898 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003899 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003900 self.assertEqual(param, "Frank's Document")
3901
3902 def test_rfc2231_tick_attack_extended(self):
3903 eq = self.assertEqual
3904 m = """\
3905Content-Type: application/x-foo;
3906\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
3907
3908"""
3909 msg = email.message_from_string(m)
3910 charset, language, s = msg.get_param('name')
3911 eq(charset, 'us-ascii')
3912 eq(language, 'en-us')
3913 eq(s, "Frank's Document")
3914
3915 def test_rfc2231_tick_attack(self):
3916 m = """\
3917Content-Type: application/x-foo;
3918\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
3919
3920"""
3921 msg = email.message_from_string(m)
3922 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003923 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003924 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
3925
3926 def test_rfc2231_no_extended_values(self):
3927 eq = self.assertEqual
3928 m = """\
3929Content-Type: application/x-foo; name=\"Frank's Document\"
3930
3931"""
3932 msg = email.message_from_string(m)
3933 eq(msg.get_param('name'), "Frank's Document")
3934
3935 def test_rfc2231_encoded_then_unencoded_segments(self):
3936 eq = self.assertEqual
3937 m = """\
3938Content-Type: application/x-foo;
3939\tname*0*=\"us-ascii'en-us'My\";
3940\tname*1=\" Document\";
3941\tname*2*=\" For You\"
3942
3943"""
3944 msg = email.message_from_string(m)
3945 charset, language, s = msg.get_param('name')
3946 eq(charset, 'us-ascii')
3947 eq(language, 'en-us')
3948 eq(s, 'My Document For You')
3949
3950 def test_rfc2231_unencoded_then_encoded_segments(self):
3951 eq = self.assertEqual
3952 m = """\
3953Content-Type: application/x-foo;
3954\tname*0=\"us-ascii'en-us'My\";
3955\tname*1*=\" Document\";
3956\tname*2*=\" For You\"
3957
3958"""
3959 msg = email.message_from_string(m)
3960 charset, language, s = msg.get_param('name')
3961 eq(charset, 'us-ascii')
3962 eq(language, 'en-us')
3963 eq(s, 'My Document For You')
3964
3965
Ezio Melottib3aedd42010-11-20 19:04:17 +00003966
R. David Murraya8f480f2010-01-16 18:30:03 +00003967# Tests to ensure that signed parts of an email are completely preserved, as
3968# required by RFC1847 section 2.1. Note that these are incomplete, because the
3969# email package does not currently always preserve the body. See issue 1670765.
3970class TestSigned(TestEmailBase):
3971
3972 def _msg_and_obj(self, filename):
3973 with openfile(findfile(filename)) as fp:
3974 original = fp.read()
3975 msg = email.message_from_string(original)
3976 return original, msg
3977
3978 def _signed_parts_eq(self, original, result):
3979 # Extract the first mime part of each message
3980 import re
3981 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
3982 inpart = repart.search(original).group(2)
3983 outpart = repart.search(result).group(2)
3984 self.assertEqual(outpart, inpart)
3985
3986 def test_long_headers_as_string(self):
3987 original, msg = self._msg_and_obj('msg_45.txt')
3988 result = msg.as_string()
3989 self._signed_parts_eq(original, result)
3990
3991 def test_long_headers_as_string_maxheaderlen(self):
3992 original, msg = self._msg_and_obj('msg_45.txt')
3993 result = msg.as_string(maxheaderlen=60)
3994 self._signed_parts_eq(original, result)
3995
3996 def test_long_headers_flatten(self):
3997 original, msg = self._msg_and_obj('msg_45.txt')
3998 fp = StringIO()
3999 Generator(fp).flatten(msg)
4000 result = fp.getvalue()
4001 self._signed_parts_eq(original, result)
4002
4003
Ezio Melottib3aedd42010-11-20 19:04:17 +00004004
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004005def _testclasses():
4006 mod = sys.modules[__name__]
4007 return [getattr(mod, name) for name in dir(mod) if name.startswith('Test')]
4008
4009
4010def suite():
4011 suite = unittest.TestSuite()
4012 for testclass in _testclasses():
4013 suite.addTest(unittest.makeSuite(testclass))
4014 return suite
4015
4016
4017def test_main():
4018 for testclass in _testclasses():
4019 run_unittest(testclass)
4020
4021
Ezio Melottib3aedd42010-11-20 19:04:17 +00004022
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004023if __name__ == '__main__':
4024 unittest.main(defaultTest='suite')