blob: a54c1a3447a89b0e674f0eda3f688019a4794f71 [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
R. David Murray719a4492010-11-21 16:53:48 +00006import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00007import sys
8import time
9import base64
10import difflib
11import unittest
12import warnings
R. David Murray96fd54e2010-10-08 15:55:28 +000013import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000014
R. David Murray96fd54e2010-10-08 15:55:28 +000015from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016from itertools import chain
17
18import email
19
20from email.charset import Charset
21from email.header import Header, decode_header, make_header
22from email.parser import Parser, HeaderParser
23from email.generator import Generator, DecodedGenerator
24from email.message import Message
25from email.mime.application import MIMEApplication
26from email.mime.audio import MIMEAudio
27from email.mime.text import MIMEText
28from email.mime.image import MIMEImage
29from email.mime.base import MIMEBase
30from email.mime.message import MIMEMessage
31from email.mime.multipart import MIMEMultipart
32from email import utils
33from email import errors
34from email import encoders
35from email import iterators
36from email import base64mime
37from email import quoprimime
38
R. David Murray96fd54e2010-10-08 15:55:28 +000039from test.support import findfile, run_unittest, unlink
Guido van Rossum8b3febe2007-08-30 01:15:14 +000040from email.test import __file__ as landmark
41
42
43NL = '\n'
44EMPTYSTRING = ''
45SPACE = ' '
46
47
Ezio Melottib3aedd42010-11-20 19:04:17 +000048
Guido van Rossum8b3febe2007-08-30 01:15:14 +000049def openfile(filename, *args, **kws):
50 path = os.path.join(os.path.dirname(landmark), 'data', filename)
51 return open(path, *args, **kws)
52
53
Ezio Melottib3aedd42010-11-20 19:04:17 +000054
Guido van Rossum8b3febe2007-08-30 01:15:14 +000055# Base test class
56class TestEmailBase(unittest.TestCase):
57 def ndiffAssertEqual(self, first, second):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000058 """Like assertEqual except use ndiff for readable output."""
Guido van Rossum8b3febe2007-08-30 01:15:14 +000059 if first != second:
60 sfirst = str(first)
61 ssecond = str(second)
62 rfirst = [repr(line) for line in sfirst.splitlines()]
63 rsecond = [repr(line) for line in ssecond.splitlines()]
64 diff = difflib.ndiff(rfirst, rsecond)
65 raise self.failureException(NL + NL.join(diff))
66
67 def _msgobj(self, filename):
68 with openfile(findfile(filename)) as fp:
69 return email.message_from_file(fp)
70
71
Ezio Melottib3aedd42010-11-20 19:04:17 +000072
Guido van Rossum8b3febe2007-08-30 01:15:14 +000073# Test various aspects of the Message class's API
74class TestMessageAPI(TestEmailBase):
75 def test_get_all(self):
76 eq = self.assertEqual
77 msg = self._msgobj('msg_20.txt')
78 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
79 eq(msg.get_all('xx', 'n/a'), 'n/a')
80
R. David Murraye5db2632010-11-20 15:10:13 +000081 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000082 eq = self.assertEqual
83 msg = Message()
84 eq(msg.get_charset(), None)
85 charset = Charset('iso-8859-1')
86 msg.set_charset(charset)
87 eq(msg['mime-version'], '1.0')
88 eq(msg.get_content_type(), 'text/plain')
89 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
90 eq(msg.get_param('charset'), 'iso-8859-1')
91 eq(msg['content-transfer-encoding'], 'quoted-printable')
92 eq(msg.get_charset().input_charset, 'iso-8859-1')
93 # Remove the charset
94 msg.set_charset(None)
95 eq(msg.get_charset(), None)
96 eq(msg['content-type'], 'text/plain')
97 # Try adding a charset when there's already MIME headers present
98 msg = Message()
99 msg['MIME-Version'] = '2.0'
100 msg['Content-Type'] = 'text/x-weird'
101 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
102 msg.set_charset(charset)
103 eq(msg['mime-version'], '2.0')
104 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
105 eq(msg['content-transfer-encoding'], 'quinted-puntable')
106
107 def test_set_charset_from_string(self):
108 eq = self.assertEqual
109 msg = Message()
110 msg.set_charset('us-ascii')
111 eq(msg.get_charset().input_charset, 'us-ascii')
112 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
113
114 def test_set_payload_with_charset(self):
115 msg = Message()
116 charset = Charset('iso-8859-1')
117 msg.set_payload('This is a string payload', charset)
118 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
119
120 def test_get_charsets(self):
121 eq = self.assertEqual
122
123 msg = self._msgobj('msg_08.txt')
124 charsets = msg.get_charsets()
125 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
126
127 msg = self._msgobj('msg_09.txt')
128 charsets = msg.get_charsets('dingbat')
129 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
130 'koi8-r'])
131
132 msg = self._msgobj('msg_12.txt')
133 charsets = msg.get_charsets()
134 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
135 'iso-8859-3', 'us-ascii', 'koi8-r'])
136
137 def test_get_filename(self):
138 eq = self.assertEqual
139
140 msg = self._msgobj('msg_04.txt')
141 filenames = [p.get_filename() for p in msg.get_payload()]
142 eq(filenames, ['msg.txt', 'msg.txt'])
143
144 msg = self._msgobj('msg_07.txt')
145 subpart = msg.get_payload(1)
146 eq(subpart.get_filename(), 'dingusfish.gif')
147
148 def test_get_filename_with_name_parameter(self):
149 eq = self.assertEqual
150
151 msg = self._msgobj('msg_44.txt')
152 filenames = [p.get_filename() for p in msg.get_payload()]
153 eq(filenames, ['msg.txt', 'msg.txt'])
154
155 def test_get_boundary(self):
156 eq = self.assertEqual
157 msg = self._msgobj('msg_07.txt')
158 # No quotes!
159 eq(msg.get_boundary(), 'BOUNDARY')
160
161 def test_set_boundary(self):
162 eq = self.assertEqual
163 # This one has no existing boundary parameter, but the Content-Type:
164 # header appears fifth.
165 msg = self._msgobj('msg_01.txt')
166 msg.set_boundary('BOUNDARY')
167 header, value = msg.items()[4]
168 eq(header.lower(), 'content-type')
169 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
170 # This one has a Content-Type: header, with a boundary, stuck in the
171 # middle of its headers. Make sure the order is preserved; it should
172 # be fifth.
173 msg = self._msgobj('msg_04.txt')
174 msg.set_boundary('BOUNDARY')
175 header, value = msg.items()[4]
176 eq(header.lower(), 'content-type')
177 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
178 # And this one has no Content-Type: header at all.
179 msg = self._msgobj('msg_03.txt')
180 self.assertRaises(errors.HeaderParseError,
181 msg.set_boundary, 'BOUNDARY')
182
R. David Murray73a559d2010-12-21 18:07:59 +0000183 def test_make_boundary(self):
184 msg = MIMEMultipart('form-data')
185 # Note that when the boundary gets created is an implementation
186 # detail and might change.
187 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
188 # Trigger creation of boundary
189 msg.as_string()
190 self.assertEqual(msg.items()[0][1][:33],
191 'multipart/form-data; boundary="==')
192 # XXX: there ought to be tests of the uniqueness of the boundary, too.
193
R. David Murray57c45ac2010-02-21 04:39:40 +0000194 def test_message_rfc822_only(self):
195 # Issue 7970: message/rfc822 not in multipart parsed by
196 # HeaderParser caused an exception when flattened.
Brett Cannon384917a2010-10-29 23:08:36 +0000197 with openfile(findfile('msg_46.txt')) as fp:
198 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000199 parser = HeaderParser()
200 msg = parser.parsestr(msgdata)
201 out = StringIO()
202 gen = Generator(out, True, 0)
203 gen.flatten(msg, False)
204 self.assertEqual(out.getvalue(), msgdata)
205
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000206 def test_get_decoded_payload(self):
207 eq = self.assertEqual
208 msg = self._msgobj('msg_10.txt')
209 # The outer message is a multipart
210 eq(msg.get_payload(decode=True), None)
211 # Subpart 1 is 7bit encoded
212 eq(msg.get_payload(0).get_payload(decode=True),
213 b'This is a 7bit encoded message.\n')
214 # Subpart 2 is quopri
215 eq(msg.get_payload(1).get_payload(decode=True),
216 b'\xa1This is a Quoted Printable encoded message!\n')
217 # Subpart 3 is base64
218 eq(msg.get_payload(2).get_payload(decode=True),
219 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000220 # Subpart 4 is base64 with a trailing newline, which
221 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000222 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000223 b'This is a Base64 encoded message.\n')
224 # Subpart 5 has no Content-Transfer-Encoding: header.
225 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000226 b'This has no Content-Transfer-Encoding: header.\n')
227
228 def test_get_decoded_uu_payload(self):
229 eq = self.assertEqual
230 msg = Message()
231 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
232 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
233 msg['content-transfer-encoding'] = cte
234 eq(msg.get_payload(decode=True), b'hello world')
235 # Now try some bogus data
236 msg.set_payload('foo')
237 eq(msg.get_payload(decode=True), b'foo')
238
239 def test_decoded_generator(self):
240 eq = self.assertEqual
241 msg = self._msgobj('msg_07.txt')
242 with openfile('msg_17.txt') as fp:
243 text = fp.read()
244 s = StringIO()
245 g = DecodedGenerator(s)
246 g.flatten(msg)
247 eq(s.getvalue(), text)
248
249 def test__contains__(self):
250 msg = Message()
251 msg['From'] = 'Me'
252 msg['to'] = 'You'
253 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000254 self.assertTrue('from' in msg)
255 self.assertTrue('From' in msg)
256 self.assertTrue('FROM' in msg)
257 self.assertTrue('to' in msg)
258 self.assertTrue('To' in msg)
259 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000260
261 def test_as_string(self):
262 eq = self.ndiffAssertEqual
263 msg = self._msgobj('msg_01.txt')
264 with openfile('msg_01.txt') as fp:
265 text = fp.read()
266 eq(text, str(msg))
267 fullrepr = msg.as_string(unixfrom=True)
268 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000269 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000270 eq(text, NL.join(lines[1:]))
271
272 def test_bad_param(self):
273 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
274 self.assertEqual(msg.get_param('baz'), '')
275
276 def test_missing_filename(self):
277 msg = email.message_from_string("From: foo\n")
278 self.assertEqual(msg.get_filename(), None)
279
280 def test_bogus_filename(self):
281 msg = email.message_from_string(
282 "Content-Disposition: blarg; filename\n")
283 self.assertEqual(msg.get_filename(), '')
284
285 def test_missing_boundary(self):
286 msg = email.message_from_string("From: foo\n")
287 self.assertEqual(msg.get_boundary(), None)
288
289 def test_get_params(self):
290 eq = self.assertEqual
291 msg = email.message_from_string(
292 'X-Header: foo=one; bar=two; baz=three\n')
293 eq(msg.get_params(header='x-header'),
294 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
295 msg = email.message_from_string(
296 'X-Header: foo; bar=one; baz=two\n')
297 eq(msg.get_params(header='x-header'),
298 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
299 eq(msg.get_params(), None)
300 msg = email.message_from_string(
301 'X-Header: foo; bar="one"; baz=two\n')
302 eq(msg.get_params(header='x-header'),
303 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
304
305 def test_get_param_liberal(self):
306 msg = Message()
307 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
308 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
309
310 def test_get_param(self):
311 eq = self.assertEqual
312 msg = email.message_from_string(
313 "X-Header: foo=one; bar=two; baz=three\n")
314 eq(msg.get_param('bar', header='x-header'), 'two')
315 eq(msg.get_param('quuz', header='x-header'), None)
316 eq(msg.get_param('quuz'), None)
317 msg = email.message_from_string(
318 'X-Header: foo; bar="one"; baz=two\n')
319 eq(msg.get_param('foo', header='x-header'), '')
320 eq(msg.get_param('bar', header='x-header'), 'one')
321 eq(msg.get_param('baz', header='x-header'), 'two')
322 # XXX: We are not RFC-2045 compliant! We cannot parse:
323 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
324 # msg.get_param("weird")
325 # yet.
326
327 def test_get_param_funky_continuation_lines(self):
328 msg = self._msgobj('msg_22.txt')
329 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
330
331 def test_get_param_with_semis_in_quotes(self):
332 msg = email.message_from_string(
333 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
334 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
335 self.assertEqual(msg.get_param('name', unquote=False),
336 '"Jim&amp;&amp;Jill"')
337
R. David Murrayd48739f2010-04-14 18:59:18 +0000338 def test_get_param_with_quotes(self):
339 msg = email.message_from_string(
340 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
341 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
342 msg = email.message_from_string(
343 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
344 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
345
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000346 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000347 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000348 msg = email.message_from_string('Header: exists')
349 unless('header' in msg)
350 unless('Header' in msg)
351 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000352 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000353
354 def test_set_param(self):
355 eq = self.assertEqual
356 msg = Message()
357 msg.set_param('charset', 'iso-2022-jp')
358 eq(msg.get_param('charset'), 'iso-2022-jp')
359 msg.set_param('importance', 'high value')
360 eq(msg.get_param('importance'), 'high value')
361 eq(msg.get_param('importance', unquote=False), '"high value"')
362 eq(msg.get_params(), [('text/plain', ''),
363 ('charset', 'iso-2022-jp'),
364 ('importance', 'high value')])
365 eq(msg.get_params(unquote=False), [('text/plain', ''),
366 ('charset', '"iso-2022-jp"'),
367 ('importance', '"high value"')])
368 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
369 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
370
371 def test_del_param(self):
372 eq = self.assertEqual
373 msg = self._msgobj('msg_05.txt')
374 eq(msg.get_params(),
375 [('multipart/report', ''), ('report-type', 'delivery-status'),
376 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
377 old_val = msg.get_param("report-type")
378 msg.del_param("report-type")
379 eq(msg.get_params(),
380 [('multipart/report', ''),
381 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
382 msg.set_param("report-type", old_val)
383 eq(msg.get_params(),
384 [('multipart/report', ''),
385 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
386 ('report-type', old_val)])
387
388 def test_del_param_on_other_header(self):
389 msg = Message()
390 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
391 msg.del_param('filename', 'content-disposition')
392 self.assertEqual(msg['content-disposition'], 'attachment')
393
394 def test_set_type(self):
395 eq = self.assertEqual
396 msg = Message()
397 self.assertRaises(ValueError, msg.set_type, 'text')
398 msg.set_type('text/plain')
399 eq(msg['content-type'], 'text/plain')
400 msg.set_param('charset', 'us-ascii')
401 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
402 msg.set_type('text/html')
403 eq(msg['content-type'], 'text/html; charset="us-ascii"')
404
405 def test_set_type_on_other_header(self):
406 msg = Message()
407 msg['X-Content-Type'] = 'text/plain'
408 msg.set_type('application/octet-stream', 'X-Content-Type')
409 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
410
411 def test_get_content_type_missing(self):
412 msg = Message()
413 self.assertEqual(msg.get_content_type(), 'text/plain')
414
415 def test_get_content_type_missing_with_default_type(self):
416 msg = Message()
417 msg.set_default_type('message/rfc822')
418 self.assertEqual(msg.get_content_type(), 'message/rfc822')
419
420 def test_get_content_type_from_message_implicit(self):
421 msg = self._msgobj('msg_30.txt')
422 self.assertEqual(msg.get_payload(0).get_content_type(),
423 'message/rfc822')
424
425 def test_get_content_type_from_message_explicit(self):
426 msg = self._msgobj('msg_28.txt')
427 self.assertEqual(msg.get_payload(0).get_content_type(),
428 'message/rfc822')
429
430 def test_get_content_type_from_message_text_plain_implicit(self):
431 msg = self._msgobj('msg_03.txt')
432 self.assertEqual(msg.get_content_type(), 'text/plain')
433
434 def test_get_content_type_from_message_text_plain_explicit(self):
435 msg = self._msgobj('msg_01.txt')
436 self.assertEqual(msg.get_content_type(), 'text/plain')
437
438 def test_get_content_maintype_missing(self):
439 msg = Message()
440 self.assertEqual(msg.get_content_maintype(), 'text')
441
442 def test_get_content_maintype_missing_with_default_type(self):
443 msg = Message()
444 msg.set_default_type('message/rfc822')
445 self.assertEqual(msg.get_content_maintype(), 'message')
446
447 def test_get_content_maintype_from_message_implicit(self):
448 msg = self._msgobj('msg_30.txt')
449 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
450
451 def test_get_content_maintype_from_message_explicit(self):
452 msg = self._msgobj('msg_28.txt')
453 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
454
455 def test_get_content_maintype_from_message_text_plain_implicit(self):
456 msg = self._msgobj('msg_03.txt')
457 self.assertEqual(msg.get_content_maintype(), 'text')
458
459 def test_get_content_maintype_from_message_text_plain_explicit(self):
460 msg = self._msgobj('msg_01.txt')
461 self.assertEqual(msg.get_content_maintype(), 'text')
462
463 def test_get_content_subtype_missing(self):
464 msg = Message()
465 self.assertEqual(msg.get_content_subtype(), 'plain')
466
467 def test_get_content_subtype_missing_with_default_type(self):
468 msg = Message()
469 msg.set_default_type('message/rfc822')
470 self.assertEqual(msg.get_content_subtype(), 'rfc822')
471
472 def test_get_content_subtype_from_message_implicit(self):
473 msg = self._msgobj('msg_30.txt')
474 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
475
476 def test_get_content_subtype_from_message_explicit(self):
477 msg = self._msgobj('msg_28.txt')
478 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
479
480 def test_get_content_subtype_from_message_text_plain_implicit(self):
481 msg = self._msgobj('msg_03.txt')
482 self.assertEqual(msg.get_content_subtype(), 'plain')
483
484 def test_get_content_subtype_from_message_text_plain_explicit(self):
485 msg = self._msgobj('msg_01.txt')
486 self.assertEqual(msg.get_content_subtype(), 'plain')
487
488 def test_get_content_maintype_error(self):
489 msg = Message()
490 msg['Content-Type'] = 'no-slash-in-this-string'
491 self.assertEqual(msg.get_content_maintype(), 'text')
492
493 def test_get_content_subtype_error(self):
494 msg = Message()
495 msg['Content-Type'] = 'no-slash-in-this-string'
496 self.assertEqual(msg.get_content_subtype(), 'plain')
497
498 def test_replace_header(self):
499 eq = self.assertEqual
500 msg = Message()
501 msg.add_header('First', 'One')
502 msg.add_header('Second', 'Two')
503 msg.add_header('Third', 'Three')
504 eq(msg.keys(), ['First', 'Second', 'Third'])
505 eq(msg.values(), ['One', 'Two', 'Three'])
506 msg.replace_header('Second', 'Twenty')
507 eq(msg.keys(), ['First', 'Second', 'Third'])
508 eq(msg.values(), ['One', 'Twenty', 'Three'])
509 msg.add_header('First', 'Eleven')
510 msg.replace_header('First', 'One Hundred')
511 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
512 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
513 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
514
515 def test_broken_base64_payload(self):
516 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
517 msg = Message()
518 msg['content-type'] = 'audio/x-midi'
519 msg['content-transfer-encoding'] = 'base64'
520 msg.set_payload(x)
521 self.assertEqual(msg.get_payload(decode=True),
Guido van Rossum9604e662007-08-30 03:46:43 +0000522 bytes(x, 'raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000523
R. David Murray7ec754b2010-12-13 23:51:19 +0000524 # Issue 1078919
525 def test_ascii_add_header(self):
526 msg = Message()
527 msg.add_header('Content-Disposition', 'attachment',
528 filename='bud.gif')
529 self.assertEqual('attachment; filename="bud.gif"',
530 msg['Content-Disposition'])
531
532 def test_noascii_add_header(self):
533 msg = Message()
534 msg.add_header('Content-Disposition', 'attachment',
535 filename="Fußballer.ppt")
536 self.assertEqual(
537 'attachment; filename*="utf-8\'\'Fu%C3%9Fballer.ppt"',
538 msg['Content-Disposition'])
539
540 def test_nonascii_add_header_via_triple(self):
541 msg = Message()
542 msg.add_header('Content-Disposition', 'attachment',
543 filename=('iso-8859-1', '', 'Fußballer.ppt'))
544 self.assertEqual(
545 'attachment; filename*="iso-8859-1\'\'Fu%DFballer.ppt"',
546 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000547
Ezio Melottib3aedd42010-11-20 19:04:17 +0000548
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000549# Test the email.encoders module
550class TestEncoders(unittest.TestCase):
551 def test_encode_empty_payload(self):
552 eq = self.assertEqual
553 msg = Message()
554 msg.set_charset('us-ascii')
555 eq(msg['content-transfer-encoding'], '7bit')
556
557 def test_default_cte(self):
558 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000559 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000560 msg = MIMEText('hello world')
561 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000562 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000563 msg = MIMEText('hello \xf8 world')
564 eq(msg['content-transfer-encoding'], '8bit')
565 # And now with a different charset
566 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
567 eq(msg['content-transfer-encoding'], 'quoted-printable')
568
R. David Murraye85200d2010-05-06 01:41:14 +0000569 def test_encode7or8bit(self):
570 # Make sure a charset whose input character set is 8bit but
571 # whose output character set is 7bit gets a transfer-encoding
572 # of 7bit.
573 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000574 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000575 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000576
Ezio Melottib3aedd42010-11-20 19:04:17 +0000577
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000578# Test long header wrapping
579class TestLongHeaders(TestEmailBase):
580 def test_split_long_continuation(self):
581 eq = self.ndiffAssertEqual
582 msg = email.message_from_string("""\
583Subject: bug demonstration
584\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
585\tmore text
586
587test
588""")
589 sfp = StringIO()
590 g = Generator(sfp)
591 g.flatten(msg)
592 eq(sfp.getvalue(), """\
593Subject: bug demonstration
594\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
595\tmore text
596
597test
598""")
599
600 def test_another_long_almost_unsplittable_header(self):
601 eq = self.ndiffAssertEqual
602 hstr = """\
603bug demonstration
604\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
605\tmore text"""
606 h = Header(hstr, continuation_ws='\t')
607 eq(h.encode(), """\
608bug demonstration
609\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
610\tmore text""")
611 h = Header(hstr.replace('\t', ' '))
612 eq(h.encode(), """\
613bug demonstration
614 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
615 more text""")
616
617 def test_long_nonstring(self):
618 eq = self.ndiffAssertEqual
619 g = Charset("iso-8859-1")
620 cz = Charset("iso-8859-2")
621 utf8 = Charset("utf-8")
622 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
623 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
624 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
625 b'bef\xf6rdert. ')
626 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
627 b'd\xf9vtipu.. ')
628 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
629 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
630 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
631 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
632 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
633 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
634 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
635 '\u3044\u307e\u3059\u3002')
636 h = Header(g_head, g, header_name='Subject')
637 h.append(cz_head, cz)
638 h.append(utf8_head, utf8)
639 msg = Message()
640 msg['Subject'] = h
641 sfp = StringIO()
642 g = Generator(sfp)
643 g.flatten(msg)
644 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000645Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
646 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
647 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
648 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
649 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
650 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
651 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
652 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
653 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
654 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
655 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000656
657""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000658 eq(h.encode(maxlinelen=76), """\
659=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
660 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
661 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
662 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
663 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
664 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
665 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
666 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
667 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
668 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
669 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000670
671 def test_long_header_encode(self):
672 eq = self.ndiffAssertEqual
673 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
674 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
675 header_name='X-Foobar-Spoink-Defrobnit')
676 eq(h.encode(), '''\
677wasnipoop; giraffes="very-long-necked-animals";
678 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
679
680 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
681 eq = self.ndiffAssertEqual
682 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
683 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
684 header_name='X-Foobar-Spoink-Defrobnit',
685 continuation_ws='\t')
686 eq(h.encode(), '''\
687wasnipoop; giraffes="very-long-necked-animals";
688 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
689
690 def test_long_header_encode_with_tab_continuation(self):
691 eq = self.ndiffAssertEqual
692 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
693 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
694 header_name='X-Foobar-Spoink-Defrobnit',
695 continuation_ws='\t')
696 eq(h.encode(), '''\
697wasnipoop; giraffes="very-long-necked-animals";
698\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
699
700 def test_header_splitter(self):
701 eq = self.ndiffAssertEqual
702 msg = MIMEText('')
703 # It'd be great if we could use add_header() here, but that doesn't
704 # guarantee an order of the parameters.
705 msg['X-Foobar-Spoink-Defrobnit'] = (
706 'wasnipoop; giraffes="very-long-necked-animals"; '
707 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
708 sfp = StringIO()
709 g = Generator(sfp)
710 g.flatten(msg)
711 eq(sfp.getvalue(), '''\
712Content-Type: text/plain; charset="us-ascii"
713MIME-Version: 1.0
714Content-Transfer-Encoding: 7bit
715X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
716 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
717
718''')
719
720 def test_no_semis_header_splitter(self):
721 eq = self.ndiffAssertEqual
722 msg = Message()
723 msg['From'] = 'test@dom.ain'
724 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
725 msg.set_payload('Test')
726 sfp = StringIO()
727 g = Generator(sfp)
728 g.flatten(msg)
729 eq(sfp.getvalue(), """\
730From: test@dom.ain
731References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
732 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
733
734Test""")
735
736 def test_no_split_long_header(self):
737 eq = self.ndiffAssertEqual
738 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +0000739 h = Header(hstr)
740 # These come on two lines because Headers are really field value
741 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000742 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000743References:
744 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
745 h = Header('x' * 80)
746 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000747
748 def test_splitting_multiple_long_lines(self):
749 eq = self.ndiffAssertEqual
750 hstr = """\
751from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
752\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
753\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
754"""
755 h = Header(hstr, continuation_ws='\t')
756 eq(h.encode(), """\
757from babylon.socal-raves.org (localhost [127.0.0.1]);
758 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
759 for <mailman-admin@babylon.socal-raves.org>;
760 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
761\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
762 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
763 for <mailman-admin@babylon.socal-raves.org>;
764 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
765\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
766 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
767 for <mailman-admin@babylon.socal-raves.org>;
768 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
769
770 def test_splitting_first_line_only_is_long(self):
771 eq = self.ndiffAssertEqual
772 hstr = """\
773from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
774\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
775\tid 17k4h5-00034i-00
776\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
777 h = Header(hstr, maxlinelen=78, header_name='Received',
778 continuation_ws='\t')
779 eq(h.encode(), """\
780from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
781 helo=cthulhu.gerg.ca)
782\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
783\tid 17k4h5-00034i-00
784\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
785
786 def test_long_8bit_header(self):
787 eq = self.ndiffAssertEqual
788 msg = Message()
789 h = Header('Britische Regierung gibt', 'iso-8859-1',
790 header_name='Subject')
791 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +0000792 eq(h.encode(maxlinelen=76), """\
793=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
794 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000795 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +0000796 eq(msg.as_string(maxheaderlen=76), """\
797Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
798 =?iso-8859-1?q?hore-Windkraftprojekte?=
799
800""")
801 eq(msg.as_string(maxheaderlen=0), """\
802Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000803
804""")
805
806 def test_long_8bit_header_no_charset(self):
807 eq = self.ndiffAssertEqual
808 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +0000809 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
810 'f\xfcr Offshore-Windkraftprojekte '
811 '<a-very-long-address@example.com>')
812 msg['Reply-To'] = header_string
813 self.assertRaises(UnicodeEncodeError, msg.as_string)
814 msg = Message()
815 msg['Reply-To'] = Header(header_string, 'utf-8',
816 header_name='Reply-To')
817 eq(msg.as_string(maxheaderlen=78), """\
818Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
819 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000820
821""")
822
823 def test_long_to_header(self):
824 eq = self.ndiffAssertEqual
825 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
826 '<someone@eecs.umich.edu>,'
827 '"Someone Test #B" <someone@umich.edu>, '
828 '"Someone Test #C" <someone@eecs.umich.edu>, '
829 '"Someone Test #D" <someone@eecs.umich.edu>')
830 msg = Message()
831 msg['To'] = to
832 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +0000833To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000834 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +0000835 "Someone Test #C" <someone@eecs.umich.edu>,
836 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000837
838''')
839
840 def test_long_line_after_append(self):
841 eq = self.ndiffAssertEqual
842 s = 'This is an example of string which has almost the limit of header length.'
843 h = Header(s)
844 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +0000845 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000846This is an example of string which has almost the limit of header length.
847 Add another line.""")
848
849 def test_shorter_line_with_append(self):
850 eq = self.ndiffAssertEqual
851 s = 'This is a shorter line.'
852 h = Header(s)
853 h.append('Add another sentence. (Surprise?)')
854 eq(h.encode(),
855 'This is a shorter line. Add another sentence. (Surprise?)')
856
857 def test_long_field_name(self):
858 eq = self.ndiffAssertEqual
859 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +0000860 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
861 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
862 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
863 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000864 h = Header(gs, 'iso-8859-1', header_name=fn)
865 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +0000866 eq(h.encode(maxlinelen=76), """\
867=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
868 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
869 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
870 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000871
872 def test_long_received_header(self):
873 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
874 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
875 'Wed, 05 Mar 2003 18:10:18 -0700')
876 msg = Message()
877 msg['Received-1'] = Header(h, continuation_ws='\t')
878 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000879 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000880 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000881Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
882 Wed, 05 Mar 2003 18:10:18 -0700
883Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
884 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000885
886""")
887
888 def test_string_headerinst_eq(self):
889 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
890 'tu-muenchen.de> (David Bremner\'s message of '
891 '"Thu, 6 Mar 2003 13:58:21 +0100")')
892 msg = Message()
893 msg['Received-1'] = Header(h, header_name='Received-1',
894 continuation_ws='\t')
895 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000896 # XXX This should be splitting on spaces not commas.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000897 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000898Received-1: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
899 6 Mar 2003 13:58:21 +0100\")
900Received-2: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
901 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000902
903""")
904
905 def test_long_unbreakable_lines_with_continuation(self):
906 eq = self.ndiffAssertEqual
907 msg = Message()
908 t = """\
909iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
910 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
911 msg['Face-1'] = t
912 msg['Face-2'] = Header(t, header_name='Face-2')
Barry Warsawbef9d212007-08-31 10:55:37 +0000913 # XXX This splitting is all wrong. It the first value line should be
914 # snug against the field name.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000915 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +0000916Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000917 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000918 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +0000919Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +0000920 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000921 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
922
923""")
924
925 def test_another_long_multiline_header(self):
926 eq = self.ndiffAssertEqual
927 m = ('Received: from siimage.com '
928 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +0000929 'Microsoft SMTPSVC(5.0.2195.4905); '
930 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000931 msg = email.message_from_string(m)
932 eq(msg.as_string(maxheaderlen=78), '''\
Barry Warsawbef9d212007-08-31 10:55:37 +0000933Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with Microsoft SMTPSVC(5.0.2195.4905);
934 Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000935
936''')
937
938 def test_long_lines_with_different_header(self):
939 eq = self.ndiffAssertEqual
940 h = ('List-Unsubscribe: '
941 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
942 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
943 '?subject=unsubscribe>')
944 msg = Message()
945 msg['List'] = h
946 msg['List'] = Header(h, header_name='List')
947 eq(msg.as_string(maxheaderlen=78), """\
948List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +0000949 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000950List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +0000951 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000952
953""")
954
955
Ezio Melottib3aedd42010-11-20 19:04:17 +0000956
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000957# Test mangling of "From " lines in the body of a message
958class TestFromMangling(unittest.TestCase):
959 def setUp(self):
960 self.msg = Message()
961 self.msg['From'] = 'aaa@bbb.org'
962 self.msg.set_payload("""\
963From the desk of A.A.A.:
964Blah blah blah
965""")
966
967 def test_mangled_from(self):
968 s = StringIO()
969 g = Generator(s, mangle_from_=True)
970 g.flatten(self.msg)
971 self.assertEqual(s.getvalue(), """\
972From: aaa@bbb.org
973
974>From the desk of A.A.A.:
975Blah blah blah
976""")
977
978 def test_dont_mangle_from(self):
979 s = StringIO()
980 g = Generator(s, mangle_from_=False)
981 g.flatten(self.msg)
982 self.assertEqual(s.getvalue(), """\
983From: aaa@bbb.org
984
985From the desk of A.A.A.:
986Blah blah blah
987""")
988
989
Ezio Melottib3aedd42010-11-20 19:04:17 +0000990
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000991# Test the basic MIMEAudio class
992class TestMIMEAudio(unittest.TestCase):
993 def setUp(self):
994 # Make sure we pick up the audiotest.au that lives in email/test/data.
995 # In Python, there's an audiotest.au living in Lib/test but that isn't
996 # included in some binary distros that don't include the test
997 # package. The trailing empty string on the .join() is significant
998 # since findfile() will do a dirname().
999 datadir = os.path.join(os.path.dirname(landmark), 'data', '')
1000 with open(findfile('audiotest.au', datadir), 'rb') as fp:
1001 self._audiodata = fp.read()
1002 self._au = MIMEAudio(self._audiodata)
1003
1004 def test_guess_minor_type(self):
1005 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1006
1007 def test_encoding(self):
1008 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001009 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1010 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001011
1012 def test_checkSetMinor(self):
1013 au = MIMEAudio(self._audiodata, 'fish')
1014 self.assertEqual(au.get_content_type(), 'audio/fish')
1015
1016 def test_add_header(self):
1017 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001018 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001019 self._au.add_header('Content-Disposition', 'attachment',
1020 filename='audiotest.au')
1021 eq(self._au['content-disposition'],
1022 'attachment; filename="audiotest.au"')
1023 eq(self._au.get_params(header='content-disposition'),
1024 [('attachment', ''), ('filename', 'audiotest.au')])
1025 eq(self._au.get_param('filename', header='content-disposition'),
1026 'audiotest.au')
1027 missing = []
1028 eq(self._au.get_param('attachment', header='content-disposition'), '')
1029 unless(self._au.get_param('foo', failobj=missing,
1030 header='content-disposition') is missing)
1031 # Try some missing stuff
1032 unless(self._au.get_param('foobar', missing) is missing)
1033 unless(self._au.get_param('attachment', missing,
1034 header='foobar') is missing)
1035
1036
Ezio Melottib3aedd42010-11-20 19:04:17 +00001037
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001038# Test the basic MIMEImage class
1039class TestMIMEImage(unittest.TestCase):
1040 def setUp(self):
1041 with openfile('PyBanner048.gif', 'rb') as fp:
1042 self._imgdata = fp.read()
1043 self._im = MIMEImage(self._imgdata)
1044
1045 def test_guess_minor_type(self):
1046 self.assertEqual(self._im.get_content_type(), 'image/gif')
1047
1048 def test_encoding(self):
1049 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001050 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1051 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001052
1053 def test_checkSetMinor(self):
1054 im = MIMEImage(self._imgdata, 'fish')
1055 self.assertEqual(im.get_content_type(), 'image/fish')
1056
1057 def test_add_header(self):
1058 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001059 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001060 self._im.add_header('Content-Disposition', 'attachment',
1061 filename='dingusfish.gif')
1062 eq(self._im['content-disposition'],
1063 'attachment; filename="dingusfish.gif"')
1064 eq(self._im.get_params(header='content-disposition'),
1065 [('attachment', ''), ('filename', 'dingusfish.gif')])
1066 eq(self._im.get_param('filename', header='content-disposition'),
1067 'dingusfish.gif')
1068 missing = []
1069 eq(self._im.get_param('attachment', header='content-disposition'), '')
1070 unless(self._im.get_param('foo', failobj=missing,
1071 header='content-disposition') is missing)
1072 # Try some missing stuff
1073 unless(self._im.get_param('foobar', missing) is missing)
1074 unless(self._im.get_param('attachment', missing,
1075 header='foobar') is missing)
1076
1077
Ezio Melottib3aedd42010-11-20 19:04:17 +00001078
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001079# Test the basic MIMEApplication class
1080class TestMIMEApplication(unittest.TestCase):
1081 def test_headers(self):
1082 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001083 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001084 eq(msg.get_content_type(), 'application/octet-stream')
1085 eq(msg['content-transfer-encoding'], 'base64')
1086
1087 def test_body(self):
1088 eq = self.assertEqual
Barry Warsaw8c571042007-08-30 19:17:18 +00001089 bytes = b'\xfa\xfb\xfc\xfd\xfe\xff'
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001090 msg = MIMEApplication(bytes)
R. David Murray7da8f062010-06-04 16:11:08 +00001091 eq(msg.get_payload(), '+vv8/f7/')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001092 eq(msg.get_payload(decode=True), bytes)
1093
1094
Ezio Melottib3aedd42010-11-20 19:04:17 +00001095
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001096# Test the basic MIMEText class
1097class TestMIMEText(unittest.TestCase):
1098 def setUp(self):
1099 self._msg = MIMEText('hello there')
1100
1101 def test_types(self):
1102 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001103 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001104 eq(self._msg.get_content_type(), 'text/plain')
1105 eq(self._msg.get_param('charset'), 'us-ascii')
1106 missing = []
1107 unless(self._msg.get_param('foobar', missing) is missing)
1108 unless(self._msg.get_param('charset', missing, header='foobar')
1109 is missing)
1110
1111 def test_payload(self):
1112 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001113 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001114
1115 def test_charset(self):
1116 eq = self.assertEqual
1117 msg = MIMEText('hello there', _charset='us-ascii')
1118 eq(msg.get_charset().input_charset, 'us-ascii')
1119 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1120
R. David Murray850fc852010-06-03 01:58:28 +00001121 def test_7bit_input(self):
1122 eq = self.assertEqual
1123 msg = MIMEText('hello there', _charset='us-ascii')
1124 eq(msg.get_charset().input_charset, 'us-ascii')
1125 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1126
1127 def test_7bit_input_no_charset(self):
1128 eq = self.assertEqual
1129 msg = MIMEText('hello there')
1130 eq(msg.get_charset(), 'us-ascii')
1131 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1132 self.assertTrue('hello there' in msg.as_string())
1133
1134 def test_utf8_input(self):
1135 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1136 eq = self.assertEqual
1137 msg = MIMEText(teststr, _charset='utf-8')
1138 eq(msg.get_charset().output_charset, 'utf-8')
1139 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1140 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1141
1142 @unittest.skip("can't fix because of backward compat in email5, "
1143 "will fix in email6")
1144 def test_utf8_input_no_charset(self):
1145 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1146 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1147
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001148
Ezio Melottib3aedd42010-11-20 19:04:17 +00001149
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001150# Test complicated multipart/* messages
1151class TestMultipart(TestEmailBase):
1152 def setUp(self):
1153 with openfile('PyBanner048.gif', 'rb') as fp:
1154 data = fp.read()
1155 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1156 image = MIMEImage(data, name='dingusfish.gif')
1157 image.add_header('content-disposition', 'attachment',
1158 filename='dingusfish.gif')
1159 intro = MIMEText('''\
1160Hi there,
1161
1162This is the dingus fish.
1163''')
1164 container.attach(intro)
1165 container.attach(image)
1166 container['From'] = 'Barry <barry@digicool.com>'
1167 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1168 container['Subject'] = 'Here is your dingus fish'
1169
1170 now = 987809702.54848599
1171 timetuple = time.localtime(now)
1172 if timetuple[-1] == 0:
1173 tzsecs = time.timezone
1174 else:
1175 tzsecs = time.altzone
1176 if tzsecs > 0:
1177 sign = '-'
1178 else:
1179 sign = '+'
1180 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1181 container['Date'] = time.strftime(
1182 '%a, %d %b %Y %H:%M:%S',
1183 time.localtime(now)) + tzoffset
1184 self._msg = container
1185 self._im = image
1186 self._txt = intro
1187
1188 def test_hierarchy(self):
1189 # convenience
1190 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001191 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001192 raises = self.assertRaises
1193 # tests
1194 m = self._msg
1195 unless(m.is_multipart())
1196 eq(m.get_content_type(), 'multipart/mixed')
1197 eq(len(m.get_payload()), 2)
1198 raises(IndexError, m.get_payload, 2)
1199 m0 = m.get_payload(0)
1200 m1 = m.get_payload(1)
1201 unless(m0 is self._txt)
1202 unless(m1 is self._im)
1203 eq(m.get_payload(), [m0, m1])
1204 unless(not m0.is_multipart())
1205 unless(not m1.is_multipart())
1206
1207 def test_empty_multipart_idempotent(self):
1208 text = """\
1209Content-Type: multipart/mixed; boundary="BOUNDARY"
1210MIME-Version: 1.0
1211Subject: A subject
1212To: aperson@dom.ain
1213From: bperson@dom.ain
1214
1215
1216--BOUNDARY
1217
1218
1219--BOUNDARY--
1220"""
1221 msg = Parser().parsestr(text)
1222 self.ndiffAssertEqual(text, msg.as_string())
1223
1224 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1225 outer = MIMEBase('multipart', 'mixed')
1226 outer['Subject'] = 'A subject'
1227 outer['To'] = 'aperson@dom.ain'
1228 outer['From'] = 'bperson@dom.ain'
1229 outer.set_boundary('BOUNDARY')
1230 self.ndiffAssertEqual(outer.as_string(), '''\
1231Content-Type: multipart/mixed; boundary="BOUNDARY"
1232MIME-Version: 1.0
1233Subject: A subject
1234To: aperson@dom.ain
1235From: bperson@dom.ain
1236
1237--BOUNDARY
1238
1239--BOUNDARY--''')
1240
1241 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1242 outer = MIMEBase('multipart', 'mixed')
1243 outer['Subject'] = 'A subject'
1244 outer['To'] = 'aperson@dom.ain'
1245 outer['From'] = 'bperson@dom.ain'
1246 outer.preamble = ''
1247 outer.epilogue = ''
1248 outer.set_boundary('BOUNDARY')
1249 self.ndiffAssertEqual(outer.as_string(), '''\
1250Content-Type: multipart/mixed; boundary="BOUNDARY"
1251MIME-Version: 1.0
1252Subject: A subject
1253To: aperson@dom.ain
1254From: bperson@dom.ain
1255
1256
1257--BOUNDARY
1258
1259--BOUNDARY--
1260''')
1261
1262 def test_one_part_in_a_multipart(self):
1263 eq = self.ndiffAssertEqual
1264 outer = MIMEBase('multipart', 'mixed')
1265 outer['Subject'] = 'A subject'
1266 outer['To'] = 'aperson@dom.ain'
1267 outer['From'] = 'bperson@dom.ain'
1268 outer.set_boundary('BOUNDARY')
1269 msg = MIMEText('hello world')
1270 outer.attach(msg)
1271 eq(outer.as_string(), '''\
1272Content-Type: multipart/mixed; boundary="BOUNDARY"
1273MIME-Version: 1.0
1274Subject: A subject
1275To: aperson@dom.ain
1276From: bperson@dom.ain
1277
1278--BOUNDARY
1279Content-Type: text/plain; charset="us-ascii"
1280MIME-Version: 1.0
1281Content-Transfer-Encoding: 7bit
1282
1283hello world
1284--BOUNDARY--''')
1285
1286 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1287 eq = self.ndiffAssertEqual
1288 outer = MIMEBase('multipart', 'mixed')
1289 outer['Subject'] = 'A subject'
1290 outer['To'] = 'aperson@dom.ain'
1291 outer['From'] = 'bperson@dom.ain'
1292 outer.preamble = ''
1293 msg = MIMEText('hello world')
1294 outer.attach(msg)
1295 outer.set_boundary('BOUNDARY')
1296 eq(outer.as_string(), '''\
1297Content-Type: multipart/mixed; boundary="BOUNDARY"
1298MIME-Version: 1.0
1299Subject: A subject
1300To: aperson@dom.ain
1301From: bperson@dom.ain
1302
1303
1304--BOUNDARY
1305Content-Type: text/plain; charset="us-ascii"
1306MIME-Version: 1.0
1307Content-Transfer-Encoding: 7bit
1308
1309hello world
1310--BOUNDARY--''')
1311
1312
1313 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1314 eq = self.ndiffAssertEqual
1315 outer = MIMEBase('multipart', 'mixed')
1316 outer['Subject'] = 'A subject'
1317 outer['To'] = 'aperson@dom.ain'
1318 outer['From'] = 'bperson@dom.ain'
1319 outer.preamble = None
1320 msg = MIMEText('hello world')
1321 outer.attach(msg)
1322 outer.set_boundary('BOUNDARY')
1323 eq(outer.as_string(), '''\
1324Content-Type: multipart/mixed; boundary="BOUNDARY"
1325MIME-Version: 1.0
1326Subject: A subject
1327To: aperson@dom.ain
1328From: bperson@dom.ain
1329
1330--BOUNDARY
1331Content-Type: text/plain; charset="us-ascii"
1332MIME-Version: 1.0
1333Content-Transfer-Encoding: 7bit
1334
1335hello world
1336--BOUNDARY--''')
1337
1338
1339 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1340 eq = self.ndiffAssertEqual
1341 outer = MIMEBase('multipart', 'mixed')
1342 outer['Subject'] = 'A subject'
1343 outer['To'] = 'aperson@dom.ain'
1344 outer['From'] = 'bperson@dom.ain'
1345 outer.epilogue = None
1346 msg = MIMEText('hello world')
1347 outer.attach(msg)
1348 outer.set_boundary('BOUNDARY')
1349 eq(outer.as_string(), '''\
1350Content-Type: multipart/mixed; boundary="BOUNDARY"
1351MIME-Version: 1.0
1352Subject: A subject
1353To: aperson@dom.ain
1354From: bperson@dom.ain
1355
1356--BOUNDARY
1357Content-Type: text/plain; charset="us-ascii"
1358MIME-Version: 1.0
1359Content-Transfer-Encoding: 7bit
1360
1361hello world
1362--BOUNDARY--''')
1363
1364
1365 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1366 eq = self.ndiffAssertEqual
1367 outer = MIMEBase('multipart', 'mixed')
1368 outer['Subject'] = 'A subject'
1369 outer['To'] = 'aperson@dom.ain'
1370 outer['From'] = 'bperson@dom.ain'
1371 outer.epilogue = ''
1372 msg = MIMEText('hello world')
1373 outer.attach(msg)
1374 outer.set_boundary('BOUNDARY')
1375 eq(outer.as_string(), '''\
1376Content-Type: multipart/mixed; boundary="BOUNDARY"
1377MIME-Version: 1.0
1378Subject: A subject
1379To: aperson@dom.ain
1380From: bperson@dom.ain
1381
1382--BOUNDARY
1383Content-Type: text/plain; charset="us-ascii"
1384MIME-Version: 1.0
1385Content-Transfer-Encoding: 7bit
1386
1387hello world
1388--BOUNDARY--
1389''')
1390
1391
1392 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1393 eq = self.ndiffAssertEqual
1394 outer = MIMEBase('multipart', 'mixed')
1395 outer['Subject'] = 'A subject'
1396 outer['To'] = 'aperson@dom.ain'
1397 outer['From'] = 'bperson@dom.ain'
1398 outer.epilogue = '\n'
1399 msg = MIMEText('hello world')
1400 outer.attach(msg)
1401 outer.set_boundary('BOUNDARY')
1402 eq(outer.as_string(), '''\
1403Content-Type: multipart/mixed; boundary="BOUNDARY"
1404MIME-Version: 1.0
1405Subject: A subject
1406To: aperson@dom.ain
1407From: bperson@dom.ain
1408
1409--BOUNDARY
1410Content-Type: text/plain; charset="us-ascii"
1411MIME-Version: 1.0
1412Content-Transfer-Encoding: 7bit
1413
1414hello world
1415--BOUNDARY--
1416
1417''')
1418
1419 def test_message_external_body(self):
1420 eq = self.assertEqual
1421 msg = self._msgobj('msg_36.txt')
1422 eq(len(msg.get_payload()), 2)
1423 msg1 = msg.get_payload(1)
1424 eq(msg1.get_content_type(), 'multipart/alternative')
1425 eq(len(msg1.get_payload()), 2)
1426 for subpart in msg1.get_payload():
1427 eq(subpart.get_content_type(), 'message/external-body')
1428 eq(len(subpart.get_payload()), 1)
1429 subsubpart = subpart.get_payload(0)
1430 eq(subsubpart.get_content_type(), 'text/plain')
1431
1432 def test_double_boundary(self):
1433 # msg_37.txt is a multipart that contains two dash-boundary's in a
1434 # row. Our interpretation of RFC 2046 calls for ignoring the second
1435 # and subsequent boundaries.
1436 msg = self._msgobj('msg_37.txt')
1437 self.assertEqual(len(msg.get_payload()), 3)
1438
1439 def test_nested_inner_contains_outer_boundary(self):
1440 eq = self.ndiffAssertEqual
1441 # msg_38.txt has an inner part that contains outer boundaries. My
1442 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1443 # these are illegal and should be interpreted as unterminated inner
1444 # parts.
1445 msg = self._msgobj('msg_38.txt')
1446 sfp = StringIO()
1447 iterators._structure(msg, sfp)
1448 eq(sfp.getvalue(), """\
1449multipart/mixed
1450 multipart/mixed
1451 multipart/alternative
1452 text/plain
1453 text/plain
1454 text/plain
1455 text/plain
1456""")
1457
1458 def test_nested_with_same_boundary(self):
1459 eq = self.ndiffAssertEqual
1460 # msg 39.txt is similarly evil in that it's got inner parts that use
1461 # the same boundary as outer parts. Again, I believe the way this is
1462 # parsed is closest to the spirit of RFC 2046
1463 msg = self._msgobj('msg_39.txt')
1464 sfp = StringIO()
1465 iterators._structure(msg, sfp)
1466 eq(sfp.getvalue(), """\
1467multipart/mixed
1468 multipart/mixed
1469 multipart/alternative
1470 application/octet-stream
1471 application/octet-stream
1472 text/plain
1473""")
1474
1475 def test_boundary_in_non_multipart(self):
1476 msg = self._msgobj('msg_40.txt')
1477 self.assertEqual(msg.as_string(), '''\
1478MIME-Version: 1.0
1479Content-Type: text/html; boundary="--961284236552522269"
1480
1481----961284236552522269
1482Content-Type: text/html;
1483Content-Transfer-Encoding: 7Bit
1484
1485<html></html>
1486
1487----961284236552522269--
1488''')
1489
1490 def test_boundary_with_leading_space(self):
1491 eq = self.assertEqual
1492 msg = email.message_from_string('''\
1493MIME-Version: 1.0
1494Content-Type: multipart/mixed; boundary=" XXXX"
1495
1496-- XXXX
1497Content-Type: text/plain
1498
1499
1500-- XXXX
1501Content-Type: text/plain
1502
1503-- XXXX--
1504''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001505 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001506 eq(msg.get_boundary(), ' XXXX')
1507 eq(len(msg.get_payload()), 2)
1508
1509 def test_boundary_without_trailing_newline(self):
1510 m = Parser().parsestr("""\
1511Content-Type: multipart/mixed; boundary="===============0012394164=="
1512MIME-Version: 1.0
1513
1514--===============0012394164==
1515Content-Type: image/file1.jpg
1516MIME-Version: 1.0
1517Content-Transfer-Encoding: base64
1518
1519YXNkZg==
1520--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001521 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001522
1523
Ezio Melottib3aedd42010-11-20 19:04:17 +00001524
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001525# Test some badly formatted messages
1526class TestNonConformant(TestEmailBase):
1527 def test_parse_missing_minor_type(self):
1528 eq = self.assertEqual
1529 msg = self._msgobj('msg_14.txt')
1530 eq(msg.get_content_type(), 'text/plain')
1531 eq(msg.get_content_maintype(), 'text')
1532 eq(msg.get_content_subtype(), 'plain')
1533
1534 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001535 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001536 msg = self._msgobj('msg_15.txt')
1537 # XXX We can probably eventually do better
1538 inner = msg.get_payload(0)
1539 unless(hasattr(inner, 'defects'))
1540 self.assertEqual(len(inner.defects), 1)
1541 unless(isinstance(inner.defects[0],
1542 errors.StartBoundaryNotFoundDefect))
1543
1544 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001545 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001546 msg = self._msgobj('msg_25.txt')
1547 unless(isinstance(msg.get_payload(), str))
1548 self.assertEqual(len(msg.defects), 2)
1549 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1550 unless(isinstance(msg.defects[1],
1551 errors.MultipartInvariantViolationDefect))
1552
1553 def test_invalid_content_type(self):
1554 eq = self.assertEqual
1555 neq = self.ndiffAssertEqual
1556 msg = Message()
1557 # RFC 2045, $5.2 says invalid yields text/plain
1558 msg['Content-Type'] = 'text'
1559 eq(msg.get_content_maintype(), 'text')
1560 eq(msg.get_content_subtype(), 'plain')
1561 eq(msg.get_content_type(), 'text/plain')
1562 # Clear the old value and try something /really/ invalid
1563 del msg['content-type']
1564 msg['Content-Type'] = 'foo'
1565 eq(msg.get_content_maintype(), 'text')
1566 eq(msg.get_content_subtype(), 'plain')
1567 eq(msg.get_content_type(), 'text/plain')
1568 # Still, make sure that the message is idempotently generated
1569 s = StringIO()
1570 g = Generator(s)
1571 g.flatten(msg)
1572 neq(s.getvalue(), 'Content-Type: foo\n\n')
1573
1574 def test_no_start_boundary(self):
1575 eq = self.ndiffAssertEqual
1576 msg = self._msgobj('msg_31.txt')
1577 eq(msg.get_payload(), """\
1578--BOUNDARY
1579Content-Type: text/plain
1580
1581message 1
1582
1583--BOUNDARY
1584Content-Type: text/plain
1585
1586message 2
1587
1588--BOUNDARY--
1589""")
1590
1591 def test_no_separating_blank_line(self):
1592 eq = self.ndiffAssertEqual
1593 msg = self._msgobj('msg_35.txt')
1594 eq(msg.as_string(), """\
1595From: aperson@dom.ain
1596To: bperson@dom.ain
1597Subject: here's something interesting
1598
1599counter to RFC 2822, there's no separating newline here
1600""")
1601
1602 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001603 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001604 msg = self._msgobj('msg_41.txt')
1605 unless(hasattr(msg, 'defects'))
1606 self.assertEqual(len(msg.defects), 2)
1607 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1608 unless(isinstance(msg.defects[1],
1609 errors.MultipartInvariantViolationDefect))
1610
1611 def test_missing_start_boundary(self):
1612 outer = self._msgobj('msg_42.txt')
1613 # The message structure is:
1614 #
1615 # multipart/mixed
1616 # text/plain
1617 # message/rfc822
1618 # multipart/mixed [*]
1619 #
1620 # [*] This message is missing its start boundary
1621 bad = outer.get_payload(1).get_payload(0)
1622 self.assertEqual(len(bad.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001623 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001624 errors.StartBoundaryNotFoundDefect))
1625
1626 def test_first_line_is_continuation_header(self):
1627 eq = self.assertEqual
1628 m = ' Line 1\nLine 2\nLine 3'
1629 msg = email.message_from_string(m)
1630 eq(msg.keys(), [])
1631 eq(msg.get_payload(), 'Line 2\nLine 3')
1632 eq(len(msg.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001633 self.assertTrue(isinstance(msg.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001634 errors.FirstHeaderLineIsContinuationDefect))
1635 eq(msg.defects[0].line, ' Line 1\n')
1636
1637
Ezio Melottib3aedd42010-11-20 19:04:17 +00001638
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001639# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00001640class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001641 def test_rfc2047_multiline(self):
1642 eq = self.assertEqual
1643 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
1644 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
1645 dh = decode_header(s)
1646 eq(dh, [
1647 (b'Re:', None),
1648 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
1649 (b'baz foo bar', None),
1650 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
1651 header = make_header(dh)
1652 eq(str(header),
1653 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00001654 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001655Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
1656 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001657
1658 def test_whitespace_eater_unicode(self):
1659 eq = self.assertEqual
1660 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
1661 dh = decode_header(s)
1662 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
1663 (b'Pirard <pirard@dom.ain>', None)])
1664 header = str(make_header(dh))
1665 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
1666
1667 def test_whitespace_eater_unicode_2(self):
1668 eq = self.assertEqual
1669 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
1670 dh = decode_header(s)
1671 eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'),
1672 (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')])
1673 hu = str(make_header(dh))
1674 eq(hu, 'The quick brown fox jumped over the lazy dog')
1675
1676 def test_rfc2047_missing_whitespace(self):
1677 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
1678 dh = decode_header(s)
1679 self.assertEqual(dh, [(s, None)])
1680
1681 def test_rfc2047_with_whitespace(self):
1682 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
1683 dh = decode_header(s)
1684 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
1685 (b'rg', None), (b'\xe5', 'iso-8859-1'),
1686 (b'sbord', None)])
1687
R. David Murrayc4e69cc2010-08-03 22:14:10 +00001688 def test_rfc2047_B_bad_padding(self):
1689 s = '=?iso-8859-1?B?%s?='
1690 data = [ # only test complete bytes
1691 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
1692 ('dmk=', b'vi'), ('dmk', b'vi')
1693 ]
1694 for q, a in data:
1695 dh = decode_header(s % q)
1696 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001697
R. David Murray31e984c2010-10-01 15:40:20 +00001698 def test_rfc2047_Q_invalid_digits(self):
1699 # issue 10004.
1700 s = '=?iso-8659-1?Q?andr=e9=zz?='
1701 self.assertEqual(decode_header(s),
1702 [(b'andr\xe9=zz', 'iso-8659-1')])
1703
Ezio Melottib3aedd42010-11-20 19:04:17 +00001704
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001705# Test the MIMEMessage class
1706class TestMIMEMessage(TestEmailBase):
1707 def setUp(self):
1708 with openfile('msg_11.txt') as fp:
1709 self._text = fp.read()
1710
1711 def test_type_error(self):
1712 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
1713
1714 def test_valid_argument(self):
1715 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001716 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001717 subject = 'A sub-message'
1718 m = Message()
1719 m['Subject'] = subject
1720 r = MIMEMessage(m)
1721 eq(r.get_content_type(), 'message/rfc822')
1722 payload = r.get_payload()
1723 unless(isinstance(payload, list))
1724 eq(len(payload), 1)
1725 subpart = payload[0]
1726 unless(subpart is m)
1727 eq(subpart['subject'], subject)
1728
1729 def test_bad_multipart(self):
1730 eq = self.assertEqual
1731 msg1 = Message()
1732 msg1['Subject'] = 'subpart 1'
1733 msg2 = Message()
1734 msg2['Subject'] = 'subpart 2'
1735 r = MIMEMessage(msg1)
1736 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
1737
1738 def test_generate(self):
1739 # First craft the message to be encapsulated
1740 m = Message()
1741 m['Subject'] = 'An enclosed message'
1742 m.set_payload('Here is the body of the message.\n')
1743 r = MIMEMessage(m)
1744 r['Subject'] = 'The enclosing message'
1745 s = StringIO()
1746 g = Generator(s)
1747 g.flatten(r)
1748 self.assertEqual(s.getvalue(), """\
1749Content-Type: message/rfc822
1750MIME-Version: 1.0
1751Subject: The enclosing message
1752
1753Subject: An enclosed message
1754
1755Here is the body of the message.
1756""")
1757
1758 def test_parse_message_rfc822(self):
1759 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001760 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001761 msg = self._msgobj('msg_11.txt')
1762 eq(msg.get_content_type(), 'message/rfc822')
1763 payload = msg.get_payload()
1764 unless(isinstance(payload, list))
1765 eq(len(payload), 1)
1766 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001767 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001768 eq(submsg['subject'], 'An enclosed message')
1769 eq(submsg.get_payload(), 'Here is the body of the message.\n')
1770
1771 def test_dsn(self):
1772 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001773 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001774 # msg 16 is a Delivery Status Notification, see RFC 1894
1775 msg = self._msgobj('msg_16.txt')
1776 eq(msg.get_content_type(), 'multipart/report')
1777 unless(msg.is_multipart())
1778 eq(len(msg.get_payload()), 3)
1779 # Subpart 1 is a text/plain, human readable section
1780 subpart = msg.get_payload(0)
1781 eq(subpart.get_content_type(), 'text/plain')
1782 eq(subpart.get_payload(), """\
1783This report relates to a message you sent with the following header fields:
1784
1785 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
1786 Date: Sun, 23 Sep 2001 20:10:55 -0700
1787 From: "Ian T. Henry" <henryi@oxy.edu>
1788 To: SoCal Raves <scr@socal-raves.org>
1789 Subject: [scr] yeah for Ians!!
1790
1791Your message cannot be delivered to the following recipients:
1792
1793 Recipient address: jangel1@cougar.noc.ucla.edu
1794 Reason: recipient reached disk quota
1795
1796""")
1797 # Subpart 2 contains the machine parsable DSN information. It
1798 # consists of two blocks of headers, represented by two nested Message
1799 # objects.
1800 subpart = msg.get_payload(1)
1801 eq(subpart.get_content_type(), 'message/delivery-status')
1802 eq(len(subpart.get_payload()), 2)
1803 # message/delivery-status should treat each block as a bunch of
1804 # headers, i.e. a bunch of Message objects.
1805 dsn1 = subpart.get_payload(0)
1806 unless(isinstance(dsn1, Message))
1807 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
1808 eq(dsn1.get_param('dns', header='reporting-mta'), '')
1809 # Try a missing one <wink>
1810 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
1811 dsn2 = subpart.get_payload(1)
1812 unless(isinstance(dsn2, Message))
1813 eq(dsn2['action'], 'failed')
1814 eq(dsn2.get_params(header='original-recipient'),
1815 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
1816 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
1817 # Subpart 3 is the original message
1818 subpart = msg.get_payload(2)
1819 eq(subpart.get_content_type(), 'message/rfc822')
1820 payload = subpart.get_payload()
1821 unless(isinstance(payload, list))
1822 eq(len(payload), 1)
1823 subsubpart = payload[0]
1824 unless(isinstance(subsubpart, Message))
1825 eq(subsubpart.get_content_type(), 'text/plain')
1826 eq(subsubpart['message-id'],
1827 '<002001c144a6$8752e060$56104586@oxy.edu>')
1828
1829 def test_epilogue(self):
1830 eq = self.ndiffAssertEqual
1831 with openfile('msg_21.txt') as fp:
1832 text = fp.read()
1833 msg = Message()
1834 msg['From'] = 'aperson@dom.ain'
1835 msg['To'] = 'bperson@dom.ain'
1836 msg['Subject'] = 'Test'
1837 msg.preamble = 'MIME message'
1838 msg.epilogue = 'End of MIME message\n'
1839 msg1 = MIMEText('One')
1840 msg2 = MIMEText('Two')
1841 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1842 msg.attach(msg1)
1843 msg.attach(msg2)
1844 sfp = StringIO()
1845 g = Generator(sfp)
1846 g.flatten(msg)
1847 eq(sfp.getvalue(), text)
1848
1849 def test_no_nl_preamble(self):
1850 eq = self.ndiffAssertEqual
1851 msg = Message()
1852 msg['From'] = 'aperson@dom.ain'
1853 msg['To'] = 'bperson@dom.ain'
1854 msg['Subject'] = 'Test'
1855 msg.preamble = 'MIME message'
1856 msg.epilogue = ''
1857 msg1 = MIMEText('One')
1858 msg2 = MIMEText('Two')
1859 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1860 msg.attach(msg1)
1861 msg.attach(msg2)
1862 eq(msg.as_string(), """\
1863From: aperson@dom.ain
1864To: bperson@dom.ain
1865Subject: Test
1866Content-Type: multipart/mixed; boundary="BOUNDARY"
1867
1868MIME message
1869--BOUNDARY
1870Content-Type: text/plain; charset="us-ascii"
1871MIME-Version: 1.0
1872Content-Transfer-Encoding: 7bit
1873
1874One
1875--BOUNDARY
1876Content-Type: text/plain; charset="us-ascii"
1877MIME-Version: 1.0
1878Content-Transfer-Encoding: 7bit
1879
1880Two
1881--BOUNDARY--
1882""")
1883
1884 def test_default_type(self):
1885 eq = self.assertEqual
1886 with openfile('msg_30.txt') as fp:
1887 msg = email.message_from_file(fp)
1888 container1 = msg.get_payload(0)
1889 eq(container1.get_default_type(), 'message/rfc822')
1890 eq(container1.get_content_type(), 'message/rfc822')
1891 container2 = msg.get_payload(1)
1892 eq(container2.get_default_type(), 'message/rfc822')
1893 eq(container2.get_content_type(), 'message/rfc822')
1894 container1a = container1.get_payload(0)
1895 eq(container1a.get_default_type(), 'text/plain')
1896 eq(container1a.get_content_type(), 'text/plain')
1897 container2a = container2.get_payload(0)
1898 eq(container2a.get_default_type(), 'text/plain')
1899 eq(container2a.get_content_type(), 'text/plain')
1900
1901 def test_default_type_with_explicit_container_type(self):
1902 eq = self.assertEqual
1903 with openfile('msg_28.txt') as fp:
1904 msg = email.message_from_file(fp)
1905 container1 = msg.get_payload(0)
1906 eq(container1.get_default_type(), 'message/rfc822')
1907 eq(container1.get_content_type(), 'message/rfc822')
1908 container2 = msg.get_payload(1)
1909 eq(container2.get_default_type(), 'message/rfc822')
1910 eq(container2.get_content_type(), 'message/rfc822')
1911 container1a = container1.get_payload(0)
1912 eq(container1a.get_default_type(), 'text/plain')
1913 eq(container1a.get_content_type(), 'text/plain')
1914 container2a = container2.get_payload(0)
1915 eq(container2a.get_default_type(), 'text/plain')
1916 eq(container2a.get_content_type(), 'text/plain')
1917
1918 def test_default_type_non_parsed(self):
1919 eq = self.assertEqual
1920 neq = self.ndiffAssertEqual
1921 # Set up container
1922 container = MIMEMultipart('digest', 'BOUNDARY')
1923 container.epilogue = ''
1924 # Set up subparts
1925 subpart1a = MIMEText('message 1\n')
1926 subpart2a = MIMEText('message 2\n')
1927 subpart1 = MIMEMessage(subpart1a)
1928 subpart2 = MIMEMessage(subpart2a)
1929 container.attach(subpart1)
1930 container.attach(subpart2)
1931 eq(subpart1.get_content_type(), 'message/rfc822')
1932 eq(subpart1.get_default_type(), 'message/rfc822')
1933 eq(subpart2.get_content_type(), 'message/rfc822')
1934 eq(subpart2.get_default_type(), 'message/rfc822')
1935 neq(container.as_string(0), '''\
1936Content-Type: multipart/digest; boundary="BOUNDARY"
1937MIME-Version: 1.0
1938
1939--BOUNDARY
1940Content-Type: message/rfc822
1941MIME-Version: 1.0
1942
1943Content-Type: text/plain; charset="us-ascii"
1944MIME-Version: 1.0
1945Content-Transfer-Encoding: 7bit
1946
1947message 1
1948
1949--BOUNDARY
1950Content-Type: message/rfc822
1951MIME-Version: 1.0
1952
1953Content-Type: text/plain; charset="us-ascii"
1954MIME-Version: 1.0
1955Content-Transfer-Encoding: 7bit
1956
1957message 2
1958
1959--BOUNDARY--
1960''')
1961 del subpart1['content-type']
1962 del subpart1['mime-version']
1963 del subpart2['content-type']
1964 del subpart2['mime-version']
1965 eq(subpart1.get_content_type(), 'message/rfc822')
1966 eq(subpart1.get_default_type(), 'message/rfc822')
1967 eq(subpart2.get_content_type(), 'message/rfc822')
1968 eq(subpart2.get_default_type(), 'message/rfc822')
1969 neq(container.as_string(0), '''\
1970Content-Type: multipart/digest; boundary="BOUNDARY"
1971MIME-Version: 1.0
1972
1973--BOUNDARY
1974
1975Content-Type: text/plain; charset="us-ascii"
1976MIME-Version: 1.0
1977Content-Transfer-Encoding: 7bit
1978
1979message 1
1980
1981--BOUNDARY
1982
1983Content-Type: text/plain; charset="us-ascii"
1984MIME-Version: 1.0
1985Content-Transfer-Encoding: 7bit
1986
1987message 2
1988
1989--BOUNDARY--
1990''')
1991
1992 def test_mime_attachments_in_constructor(self):
1993 eq = self.assertEqual
1994 text1 = MIMEText('')
1995 text2 = MIMEText('')
1996 msg = MIMEMultipart(_subparts=(text1, text2))
1997 eq(len(msg.get_payload()), 2)
1998 eq(msg.get_payload(0), text1)
1999 eq(msg.get_payload(1), text2)
2000
Christian Heimes587c2bf2008-01-19 16:21:02 +00002001 def test_default_multipart_constructor(self):
2002 msg = MIMEMultipart()
2003 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002004
Ezio Melottib3aedd42010-11-20 19:04:17 +00002005
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002006# A general test of parser->model->generator idempotency. IOW, read a message
2007# in, parse it into a message object tree, then without touching the tree,
2008# regenerate the plain text. The original text and the transformed text
2009# should be identical. Note: that we ignore the Unix-From since that may
2010# contain a changed date.
2011class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002012
2013 linesep = '\n'
2014
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002015 def _msgobj(self, filename):
2016 with openfile(filename) as fp:
2017 data = fp.read()
2018 msg = email.message_from_string(data)
2019 return msg, data
2020
R. David Murray719a4492010-11-21 16:53:48 +00002021 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002022 eq = self.ndiffAssertEqual
2023 s = StringIO()
2024 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002025 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002026 eq(text, s.getvalue())
2027
2028 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002029 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002030 msg, text = self._msgobj('msg_01.txt')
2031 eq(msg.get_content_type(), 'text/plain')
2032 eq(msg.get_content_maintype(), 'text')
2033 eq(msg.get_content_subtype(), 'plain')
2034 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2035 eq(msg.get_param('charset'), 'us-ascii')
2036 eq(msg.preamble, None)
2037 eq(msg.epilogue, None)
2038 self._idempotent(msg, text)
2039
2040 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002041 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002042 msg, text = self._msgobj('msg_03.txt')
2043 eq(msg.get_content_type(), 'text/plain')
2044 eq(msg.get_params(), None)
2045 eq(msg.get_param('charset'), None)
2046 self._idempotent(msg, text)
2047
2048 def test_simple_multipart(self):
2049 msg, text = self._msgobj('msg_04.txt')
2050 self._idempotent(msg, text)
2051
2052 def test_MIME_digest(self):
2053 msg, text = self._msgobj('msg_02.txt')
2054 self._idempotent(msg, text)
2055
2056 def test_long_header(self):
2057 msg, text = self._msgobj('msg_27.txt')
2058 self._idempotent(msg, text)
2059
2060 def test_MIME_digest_with_part_headers(self):
2061 msg, text = self._msgobj('msg_28.txt')
2062 self._idempotent(msg, text)
2063
2064 def test_mixed_with_image(self):
2065 msg, text = self._msgobj('msg_06.txt')
2066 self._idempotent(msg, text)
2067
2068 def test_multipart_report(self):
2069 msg, text = self._msgobj('msg_05.txt')
2070 self._idempotent(msg, text)
2071
2072 def test_dsn(self):
2073 msg, text = self._msgobj('msg_16.txt')
2074 self._idempotent(msg, text)
2075
2076 def test_preamble_epilogue(self):
2077 msg, text = self._msgobj('msg_21.txt')
2078 self._idempotent(msg, text)
2079
2080 def test_multipart_one_part(self):
2081 msg, text = self._msgobj('msg_23.txt')
2082 self._idempotent(msg, text)
2083
2084 def test_multipart_no_parts(self):
2085 msg, text = self._msgobj('msg_24.txt')
2086 self._idempotent(msg, text)
2087
2088 def test_no_start_boundary(self):
2089 msg, text = self._msgobj('msg_31.txt')
2090 self._idempotent(msg, text)
2091
2092 def test_rfc2231_charset(self):
2093 msg, text = self._msgobj('msg_32.txt')
2094 self._idempotent(msg, text)
2095
2096 def test_more_rfc2231_parameters(self):
2097 msg, text = self._msgobj('msg_33.txt')
2098 self._idempotent(msg, text)
2099
2100 def test_text_plain_in_a_multipart_digest(self):
2101 msg, text = self._msgobj('msg_34.txt')
2102 self._idempotent(msg, text)
2103
2104 def test_nested_multipart_mixeds(self):
2105 msg, text = self._msgobj('msg_12a.txt')
2106 self._idempotent(msg, text)
2107
2108 def test_message_external_body_idempotent(self):
2109 msg, text = self._msgobj('msg_36.txt')
2110 self._idempotent(msg, text)
2111
R. David Murray719a4492010-11-21 16:53:48 +00002112 def test_message_delivery_status(self):
2113 msg, text = self._msgobj('msg_43.txt')
2114 self._idempotent(msg, text, unixfrom=True)
2115
R. David Murray96fd54e2010-10-08 15:55:28 +00002116 def test_message_signed_idempotent(self):
2117 msg, text = self._msgobj('msg_45.txt')
2118 self._idempotent(msg, text)
2119
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002120 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002121 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002122 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002123 # Get a message object and reset the seek pointer for other tests
2124 msg, text = self._msgobj('msg_05.txt')
2125 eq(msg.get_content_type(), 'multipart/report')
2126 # Test the Content-Type: parameters
2127 params = {}
2128 for pk, pv in msg.get_params():
2129 params[pk] = pv
2130 eq(params['report-type'], 'delivery-status')
2131 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002132 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2133 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002134 eq(len(msg.get_payload()), 3)
2135 # Make sure the subparts are what we expect
2136 msg1 = msg.get_payload(0)
2137 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002138 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002139 msg2 = msg.get_payload(1)
2140 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002141 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002142 msg3 = msg.get_payload(2)
2143 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002144 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002145 payload = msg3.get_payload()
2146 unless(isinstance(payload, list))
2147 eq(len(payload), 1)
2148 msg4 = payload[0]
2149 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002150 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002151
2152 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002153 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002154 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002155 msg, text = self._msgobj('msg_06.txt')
2156 # Check some of the outer headers
2157 eq(msg.get_content_type(), 'message/rfc822')
2158 # Make sure the payload is a list of exactly one sub-Message, and that
2159 # that submessage has a type of text/plain
2160 payload = msg.get_payload()
2161 unless(isinstance(payload, list))
2162 eq(len(payload), 1)
2163 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002164 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002165 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002166 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002167 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002168
2169
Ezio Melottib3aedd42010-11-20 19:04:17 +00002170
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002171# Test various other bits of the package's functionality
2172class TestMiscellaneous(TestEmailBase):
2173 def test_message_from_string(self):
2174 with openfile('msg_01.txt') as fp:
2175 text = fp.read()
2176 msg = email.message_from_string(text)
2177 s = StringIO()
2178 # Don't wrap/continue long headers since we're trying to test
2179 # idempotency.
2180 g = Generator(s, maxheaderlen=0)
2181 g.flatten(msg)
2182 self.assertEqual(text, s.getvalue())
2183
2184 def test_message_from_file(self):
2185 with openfile('msg_01.txt') as fp:
2186 text = fp.read()
2187 fp.seek(0)
2188 msg = email.message_from_file(fp)
2189 s = StringIO()
2190 # Don't wrap/continue long headers since we're trying to test
2191 # idempotency.
2192 g = Generator(s, maxheaderlen=0)
2193 g.flatten(msg)
2194 self.assertEqual(text, s.getvalue())
2195
2196 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002197 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002198 with openfile('msg_01.txt') as fp:
2199 text = fp.read()
2200
2201 # Create a subclass
2202 class MyMessage(Message):
2203 pass
2204
2205 msg = email.message_from_string(text, MyMessage)
2206 unless(isinstance(msg, MyMessage))
2207 # Try something more complicated
2208 with openfile('msg_02.txt') as fp:
2209 text = fp.read()
2210 msg = email.message_from_string(text, MyMessage)
2211 for subpart in msg.walk():
2212 unless(isinstance(subpart, MyMessage))
2213
2214 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002215 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002216 # Create a subclass
2217 class MyMessage(Message):
2218 pass
2219
2220 with openfile('msg_01.txt') as fp:
2221 msg = email.message_from_file(fp, MyMessage)
2222 unless(isinstance(msg, MyMessage))
2223 # Try something more complicated
2224 with openfile('msg_02.txt') as fp:
2225 msg = email.message_from_file(fp, MyMessage)
2226 for subpart in msg.walk():
2227 unless(isinstance(subpart, MyMessage))
2228
2229 def test__all__(self):
2230 module = __import__('email')
2231 # Can't use sorted() here due to Python 2.3 compatibility
2232 all = module.__all__[:]
2233 all.sort()
2234 self.assertEqual(all, [
2235 'base64mime', 'charset', 'encoders', 'errors', 'generator',
R. David Murray96fd54e2010-10-08 15:55:28 +00002236 'header', 'iterators', 'message', 'message_from_binary_file',
2237 'message_from_bytes', 'message_from_file',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002238 'message_from_string', 'mime', 'parser',
2239 'quoprimime', 'utils',
2240 ])
2241
2242 def test_formatdate(self):
2243 now = time.time()
2244 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2245 time.gmtime(now)[:6])
2246
2247 def test_formatdate_localtime(self):
2248 now = time.time()
2249 self.assertEqual(
2250 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2251 time.localtime(now)[:6])
2252
2253 def test_formatdate_usegmt(self):
2254 now = time.time()
2255 self.assertEqual(
2256 utils.formatdate(now, localtime=False),
2257 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2258 self.assertEqual(
2259 utils.formatdate(now, localtime=False, usegmt=True),
2260 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2261
2262 def test_parsedate_none(self):
2263 self.assertEqual(utils.parsedate(''), None)
2264
2265 def test_parsedate_compact(self):
2266 # The FWS after the comma is optional
2267 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2268 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2269
2270 def test_parsedate_no_dayofweek(self):
2271 eq = self.assertEqual
2272 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2273 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2274
2275 def test_parsedate_compact_no_dayofweek(self):
2276 eq = self.assertEqual
2277 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2278 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2279
2280 def test_parsedate_acceptable_to_time_functions(self):
2281 eq = self.assertEqual
2282 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2283 t = int(time.mktime(timetup))
2284 eq(time.localtime(t)[:6], timetup[:6])
2285 eq(int(time.strftime('%Y', timetup)), 2003)
2286 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2287 t = int(time.mktime(timetup[:9]))
2288 eq(time.localtime(t)[:6], timetup[:6])
2289 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2290
R. David Murray219d1c82010-08-25 00:45:55 +00002291 def test_parsedate_y2k(self):
2292 """Test for parsing a date with a two-digit year.
2293
2294 Parsing a date with a two-digit year should return the correct
2295 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2296 obsoletes RFC822) requires four-digit years.
2297
2298 """
2299 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2300 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2301 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2302 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2303
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002304 def test_parseaddr_empty(self):
2305 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2306 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2307
2308 def test_noquote_dump(self):
2309 self.assertEqual(
2310 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2311 'A Silly Person <person@dom.ain>')
2312
2313 def test_escape_dump(self):
2314 self.assertEqual(
2315 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
2316 r'"A \(Very\) Silly Person" <person@dom.ain>')
2317 a = r'A \(Special\) Person'
2318 b = 'person@dom.ain'
2319 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2320
2321 def test_escape_backslashes(self):
2322 self.assertEqual(
2323 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2324 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2325 a = r'Arthur \Backslash\ Foobar'
2326 b = 'person@dom.ain'
2327 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2328
2329 def test_name_with_dot(self):
2330 x = 'John X. Doe <jxd@example.com>'
2331 y = '"John X. Doe" <jxd@example.com>'
2332 a, b = ('John X. Doe', 'jxd@example.com')
2333 self.assertEqual(utils.parseaddr(x), (a, b))
2334 self.assertEqual(utils.parseaddr(y), (a, b))
2335 # formataddr() quotes the name if there's a dot in it
2336 self.assertEqual(utils.formataddr((a, b)), y)
2337
R. David Murray5397e862010-10-02 15:58:26 +00002338 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2339 # issue 10005. Note that in the third test the second pair of
2340 # backslashes is not actually a quoted pair because it is not inside a
2341 # comment or quoted string: the address being parsed has a quoted
2342 # string containing a quoted backslash, followed by 'example' and two
2343 # backslashes, followed by another quoted string containing a space and
2344 # the word 'example'. parseaddr copies those two backslashes
2345 # literally. Per rfc5322 this is not technically correct since a \ may
2346 # not appear in an address outside of a quoted string. It is probably
2347 # a sensible Postel interpretation, though.
2348 eq = self.assertEqual
2349 eq(utils.parseaddr('""example" example"@example.com'),
2350 ('', '""example" example"@example.com'))
2351 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2352 ('', '"\\"example\\" example"@example.com'))
2353 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2354 ('', '"\\\\"example\\\\" example"@example.com'))
2355
R. David Murray63563cd2010-12-18 18:25:38 +00002356 def test_parseaddr_preserves_spaces_in_local_part(self):
2357 # issue 9286. A normal RFC5322 local part should not contain any
2358 # folding white space, but legacy local parts can (they are a sequence
2359 # of atoms, not dotatoms). On the other hand we strip whitespace from
2360 # before the @ and around dots, on the assumption that the whitespace
2361 # around the punctuation is a mistake in what would otherwise be
2362 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
2363 self.assertEqual(('', "merwok wok@xample.com"),
2364 utils.parseaddr("merwok wok@xample.com"))
2365 self.assertEqual(('', "merwok wok@xample.com"),
2366 utils.parseaddr("merwok wok@xample.com"))
2367 self.assertEqual(('', "merwok wok@xample.com"),
2368 utils.parseaddr(" merwok wok @xample.com"))
2369 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
2370 utils.parseaddr('merwok"wok" wok@xample.com'))
2371 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
2372 utils.parseaddr('merwok. wok . wok@xample.com'))
2373
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002374 def test_multiline_from_comment(self):
2375 x = """\
2376Foo
2377\tBar <foo@example.com>"""
2378 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2379
2380 def test_quote_dump(self):
2381 self.assertEqual(
2382 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2383 r'"A Silly; Person" <person@dom.ain>')
2384
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002385 def test_charset_richcomparisons(self):
2386 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002387 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002388 cset1 = Charset()
2389 cset2 = Charset()
2390 eq(cset1, 'us-ascii')
2391 eq(cset1, 'US-ASCII')
2392 eq(cset1, 'Us-AsCiI')
2393 eq('us-ascii', cset1)
2394 eq('US-ASCII', cset1)
2395 eq('Us-AsCiI', cset1)
2396 ne(cset1, 'usascii')
2397 ne(cset1, 'USASCII')
2398 ne(cset1, 'UsAsCiI')
2399 ne('usascii', cset1)
2400 ne('USASCII', cset1)
2401 ne('UsAsCiI', cset1)
2402 eq(cset1, cset2)
2403 eq(cset2, cset1)
2404
2405 def test_getaddresses(self):
2406 eq = self.assertEqual
2407 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2408 'Bud Person <bperson@dom.ain>']),
2409 [('Al Person', 'aperson@dom.ain'),
2410 ('Bud Person', 'bperson@dom.ain')])
2411
2412 def test_getaddresses_nasty(self):
2413 eq = self.assertEqual
2414 eq(utils.getaddresses(['foo: ;']), [('', '')])
2415 eq(utils.getaddresses(
2416 ['[]*-- =~$']),
2417 [('', ''), ('', ''), ('', '*--')])
2418 eq(utils.getaddresses(
2419 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2420 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2421
2422 def test_getaddresses_embedded_comment(self):
2423 """Test proper handling of a nested comment"""
2424 eq = self.assertEqual
2425 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2426 eq(addrs[0][1], 'foo@bar.com')
2427
2428 def test_utils_quote_unquote(self):
2429 eq = self.assertEqual
2430 msg = Message()
2431 msg.add_header('content-disposition', 'attachment',
2432 filename='foo\\wacky"name')
2433 eq(msg.get_filename(), 'foo\\wacky"name')
2434
2435 def test_get_body_encoding_with_bogus_charset(self):
2436 charset = Charset('not a charset')
2437 self.assertEqual(charset.get_body_encoding(), 'base64')
2438
2439 def test_get_body_encoding_with_uppercase_charset(self):
2440 eq = self.assertEqual
2441 msg = Message()
2442 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2443 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2444 charsets = msg.get_charsets()
2445 eq(len(charsets), 1)
2446 eq(charsets[0], 'utf-8')
2447 charset = Charset(charsets[0])
2448 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002449 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002450 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2451 eq(msg.get_payload(decode=True), b'hello world')
2452 eq(msg['content-transfer-encoding'], 'base64')
2453 # Try another one
2454 msg = Message()
2455 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2456 charsets = msg.get_charsets()
2457 eq(len(charsets), 1)
2458 eq(charsets[0], 'us-ascii')
2459 charset = Charset(charsets[0])
2460 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2461 msg.set_payload('hello world', charset=charset)
2462 eq(msg.get_payload(), 'hello world')
2463 eq(msg['content-transfer-encoding'], '7bit')
2464
2465 def test_charsets_case_insensitive(self):
2466 lc = Charset('us-ascii')
2467 uc = Charset('US-ASCII')
2468 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2469
2470 def test_partial_falls_inside_message_delivery_status(self):
2471 eq = self.ndiffAssertEqual
2472 # The Parser interface provides chunks of data to FeedParser in 8192
2473 # byte gulps. SF bug #1076485 found one of those chunks inside
2474 # message/delivery-status header block, which triggered an
2475 # unreadline() of NeedMoreData.
2476 msg = self._msgobj('msg_43.txt')
2477 sfp = StringIO()
2478 iterators._structure(msg, sfp)
2479 eq(sfp.getvalue(), """\
2480multipart/report
2481 text/plain
2482 message/delivery-status
2483 text/plain
2484 text/plain
2485 text/plain
2486 text/plain
2487 text/plain
2488 text/plain
2489 text/plain
2490 text/plain
2491 text/plain
2492 text/plain
2493 text/plain
2494 text/plain
2495 text/plain
2496 text/plain
2497 text/plain
2498 text/plain
2499 text/plain
2500 text/plain
2501 text/plain
2502 text/plain
2503 text/plain
2504 text/plain
2505 text/plain
2506 text/plain
2507 text/plain
2508 text/plain
2509 text/rfc822-headers
2510""")
2511
R. David Murraya0b44b52010-12-02 21:47:19 +00002512 def test_make_msgid_domain(self):
2513 self.assertEqual(
2514 email.utils.make_msgid(domain='testdomain-string')[-19:],
2515 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002516
Ezio Melottib3aedd42010-11-20 19:04:17 +00002517
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002518# Test the iterator/generators
2519class TestIterators(TestEmailBase):
2520 def test_body_line_iterator(self):
2521 eq = self.assertEqual
2522 neq = self.ndiffAssertEqual
2523 # First a simple non-multipart message
2524 msg = self._msgobj('msg_01.txt')
2525 it = iterators.body_line_iterator(msg)
2526 lines = list(it)
2527 eq(len(lines), 6)
2528 neq(EMPTYSTRING.join(lines), msg.get_payload())
2529 # Now a more complicated multipart
2530 msg = self._msgobj('msg_02.txt')
2531 it = iterators.body_line_iterator(msg)
2532 lines = list(it)
2533 eq(len(lines), 43)
2534 with openfile('msg_19.txt') as fp:
2535 neq(EMPTYSTRING.join(lines), fp.read())
2536
2537 def test_typed_subpart_iterator(self):
2538 eq = self.assertEqual
2539 msg = self._msgobj('msg_04.txt')
2540 it = iterators.typed_subpart_iterator(msg, 'text')
2541 lines = []
2542 subparts = 0
2543 for subpart in it:
2544 subparts += 1
2545 lines.append(subpart.get_payload())
2546 eq(subparts, 2)
2547 eq(EMPTYSTRING.join(lines), """\
2548a simple kind of mirror
2549to reflect upon our own
2550a simple kind of mirror
2551to reflect upon our own
2552""")
2553
2554 def test_typed_subpart_iterator_default_type(self):
2555 eq = self.assertEqual
2556 msg = self._msgobj('msg_03.txt')
2557 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
2558 lines = []
2559 subparts = 0
2560 for subpart in it:
2561 subparts += 1
2562 lines.append(subpart.get_payload())
2563 eq(subparts, 1)
2564 eq(EMPTYSTRING.join(lines), """\
2565
2566Hi,
2567
2568Do you like this message?
2569
2570-Me
2571""")
2572
R. David Murray45bf773f2010-07-17 01:19:57 +00002573 def test_pushCR_LF(self):
2574 '''FeedParser BufferedSubFile.push() assumed it received complete
2575 line endings. A CR ending one push() followed by a LF starting
2576 the next push() added an empty line.
2577 '''
2578 imt = [
2579 ("a\r \n", 2),
2580 ("b", 0),
2581 ("c\n", 1),
2582 ("", 0),
2583 ("d\r\n", 1),
2584 ("e\r", 0),
2585 ("\nf", 1),
2586 ("\r\n", 1),
2587 ]
2588 from email.feedparser import BufferedSubFile, NeedMoreData
2589 bsf = BufferedSubFile()
2590 om = []
2591 nt = 0
2592 for il, n in imt:
2593 bsf.push(il)
2594 nt += n
2595 n1 = 0
2596 while True:
2597 ol = bsf.readline()
2598 if ol == NeedMoreData:
2599 break
2600 om.append(ol)
2601 n1 += 1
2602 self.assertTrue(n == n1)
2603 self.assertTrue(len(om) == nt)
2604 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
2605
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002606
Ezio Melottib3aedd42010-11-20 19:04:17 +00002607
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002608class TestParsers(TestEmailBase):
2609 def test_header_parser(self):
2610 eq = self.assertEqual
2611 # Parse only the headers of a complex multipart MIME document
2612 with openfile('msg_02.txt') as fp:
2613 msg = HeaderParser().parse(fp)
2614 eq(msg['from'], 'ppp-request@zzz.org')
2615 eq(msg['to'], 'ppp@zzz.org')
2616 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002617 self.assertFalse(msg.is_multipart())
2618 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002619
2620 def test_whitespace_continuation(self):
2621 eq = self.assertEqual
2622 # This message contains a line after the Subject: header that has only
2623 # whitespace, but it is not empty!
2624 msg = email.message_from_string("""\
2625From: aperson@dom.ain
2626To: bperson@dom.ain
2627Subject: the next line has a space on it
2628\x20
2629Date: Mon, 8 Apr 2002 15:09:19 -0400
2630Message-ID: spam
2631
2632Here's the message body
2633""")
2634 eq(msg['subject'], 'the next line has a space on it\n ')
2635 eq(msg['message-id'], 'spam')
2636 eq(msg.get_payload(), "Here's the message body\n")
2637
2638 def test_whitespace_continuation_last_header(self):
2639 eq = self.assertEqual
2640 # Like the previous test, but the subject line is the last
2641 # header.
2642 msg = email.message_from_string("""\
2643From: aperson@dom.ain
2644To: bperson@dom.ain
2645Date: Mon, 8 Apr 2002 15:09:19 -0400
2646Message-ID: spam
2647Subject: the next line has a space on it
2648\x20
2649
2650Here's the message body
2651""")
2652 eq(msg['subject'], 'the next line has a space on it\n ')
2653 eq(msg['message-id'], 'spam')
2654 eq(msg.get_payload(), "Here's the message body\n")
2655
2656 def test_crlf_separation(self):
2657 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00002658 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002659 msg = Parser().parse(fp)
2660 eq(len(msg.get_payload()), 2)
2661 part1 = msg.get_payload(0)
2662 eq(part1.get_content_type(), 'text/plain')
2663 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
2664 part2 = msg.get_payload(1)
2665 eq(part2.get_content_type(), 'application/riscos')
2666
R. David Murray8451c4b2010-10-23 22:19:56 +00002667 def test_crlf_flatten(self):
2668 # Using newline='\n' preserves the crlfs in this input file.
2669 with openfile('msg_26.txt', newline='\n') as fp:
2670 text = fp.read()
2671 msg = email.message_from_string(text)
2672 s = StringIO()
2673 g = Generator(s)
2674 g.flatten(msg, linesep='\r\n')
2675 self.assertEqual(s.getvalue(), text)
2676
2677 maxDiff = None
2678
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002679 def test_multipart_digest_with_extra_mime_headers(self):
2680 eq = self.assertEqual
2681 neq = self.ndiffAssertEqual
2682 with openfile('msg_28.txt') as fp:
2683 msg = email.message_from_file(fp)
2684 # Structure is:
2685 # multipart/digest
2686 # message/rfc822
2687 # text/plain
2688 # message/rfc822
2689 # text/plain
2690 eq(msg.is_multipart(), 1)
2691 eq(len(msg.get_payload()), 2)
2692 part1 = msg.get_payload(0)
2693 eq(part1.get_content_type(), 'message/rfc822')
2694 eq(part1.is_multipart(), 1)
2695 eq(len(part1.get_payload()), 1)
2696 part1a = part1.get_payload(0)
2697 eq(part1a.is_multipart(), 0)
2698 eq(part1a.get_content_type(), 'text/plain')
2699 neq(part1a.get_payload(), 'message 1\n')
2700 # next message/rfc822
2701 part2 = msg.get_payload(1)
2702 eq(part2.get_content_type(), 'message/rfc822')
2703 eq(part2.is_multipart(), 1)
2704 eq(len(part2.get_payload()), 1)
2705 part2a = part2.get_payload(0)
2706 eq(part2a.is_multipart(), 0)
2707 eq(part2a.get_content_type(), 'text/plain')
2708 neq(part2a.get_payload(), 'message 2\n')
2709
2710 def test_three_lines(self):
2711 # A bug report by Andrew McNamara
2712 lines = ['From: Andrew Person <aperson@dom.ain',
2713 'Subject: Test',
2714 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
2715 msg = email.message_from_string(NL.join(lines))
2716 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
2717
2718 def test_strip_line_feed_and_carriage_return_in_headers(self):
2719 eq = self.assertEqual
2720 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
2721 value1 = 'text'
2722 value2 = 'more text'
2723 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
2724 value1, value2)
2725 msg = email.message_from_string(m)
2726 eq(msg.get('Header'), value1)
2727 eq(msg.get('Next-Header'), value2)
2728
2729 def test_rfc2822_header_syntax(self):
2730 eq = self.assertEqual
2731 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2732 msg = email.message_from_string(m)
2733 eq(len(msg), 3)
2734 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
2735 eq(msg.get_payload(), 'body')
2736
2737 def test_rfc2822_space_not_allowed_in_header(self):
2738 eq = self.assertEqual
2739 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2740 msg = email.message_from_string(m)
2741 eq(len(msg.keys()), 0)
2742
2743 def test_rfc2822_one_character_header(self):
2744 eq = self.assertEqual
2745 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
2746 msg = email.message_from_string(m)
2747 headers = msg.keys()
2748 headers.sort()
2749 eq(headers, ['A', 'B', 'CC'])
2750 eq(msg.get_payload(), 'body')
2751
R. David Murray45e0e142010-06-16 02:19:40 +00002752 def test_CRLFLF_at_end_of_part(self):
2753 # issue 5610: feedparser should not eat two chars from body part ending
2754 # with "\r\n\n".
2755 m = (
2756 "From: foo@bar.com\n"
2757 "To: baz\n"
2758 "Mime-Version: 1.0\n"
2759 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
2760 "\n"
2761 "--BOUNDARY\n"
2762 "Content-Type: text/plain\n"
2763 "\n"
2764 "body ending with CRLF newline\r\n"
2765 "\n"
2766 "--BOUNDARY--\n"
2767 )
2768 msg = email.message_from_string(m)
2769 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002770
Ezio Melottib3aedd42010-11-20 19:04:17 +00002771
R. David Murray96fd54e2010-10-08 15:55:28 +00002772class Test8BitBytesHandling(unittest.TestCase):
2773 # In Python3 all input is string, but that doesn't work if the actual input
2774 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
2775 # decode byte streams using the surrogateescape error handler, and
2776 # reconvert to binary at appropriate places if we detect surrogates. This
2777 # doesn't allow us to transform headers with 8bit bytes (they get munged),
2778 # but it does allow us to parse and preserve them, and to decode body
2779 # parts that use an 8bit CTE.
2780
2781 bodytest_msg = textwrap.dedent("""\
2782 From: foo@bar.com
2783 To: baz
2784 Mime-Version: 1.0
2785 Content-Type: text/plain; charset={charset}
2786 Content-Transfer-Encoding: {cte}
2787
2788 {bodyline}
2789 """)
2790
2791 def test_known_8bit_CTE(self):
2792 m = self.bodytest_msg.format(charset='utf-8',
2793 cte='8bit',
2794 bodyline='pöstal').encode('utf-8')
2795 msg = email.message_from_bytes(m)
2796 self.assertEqual(msg.get_payload(), "pöstal\n")
2797 self.assertEqual(msg.get_payload(decode=True),
2798 "pöstal\n".encode('utf-8'))
2799
2800 def test_unknown_8bit_CTE(self):
2801 m = self.bodytest_msg.format(charset='notavalidcharset',
2802 cte='8bit',
2803 bodyline='pöstal').encode('utf-8')
2804 msg = email.message_from_bytes(m)
2805 self.assertEqual(msg.get_payload(), "p��stal\n")
2806 self.assertEqual(msg.get_payload(decode=True),
2807 "pöstal\n".encode('utf-8'))
2808
2809 def test_8bit_in_quopri_body(self):
2810 # This is non-RFC compliant data...without 'decode' the library code
2811 # decodes the body using the charset from the headers, and because the
2812 # source byte really is utf-8 this works. This is likely to fail
2813 # against real dirty data (ie: produce mojibake), but the data is
2814 # invalid anyway so it is as good a guess as any. But this means that
2815 # this test just confirms the current behavior; that behavior is not
2816 # necessarily the best possible behavior. With 'decode' it is
2817 # returning the raw bytes, so that test should be of correct behavior,
2818 # or at least produce the same result that email4 did.
2819 m = self.bodytest_msg.format(charset='utf-8',
2820 cte='quoted-printable',
2821 bodyline='p=C3=B6stál').encode('utf-8')
2822 msg = email.message_from_bytes(m)
2823 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
2824 self.assertEqual(msg.get_payload(decode=True),
2825 'pöstál\n'.encode('utf-8'))
2826
2827 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
2828 # This is similar to the previous test, but proves that if the 8bit
2829 # byte is undecodeable in the specified charset, it gets replaced
2830 # by the unicode 'unknown' character. Again, this may or may not
2831 # be the ideal behavior. Note that if decode=False none of the
2832 # decoders will get involved, so this is the only test we need
2833 # for this behavior.
2834 m = self.bodytest_msg.format(charset='ascii',
2835 cte='quoted-printable',
2836 bodyline='p=C3=B6stál').encode('utf-8')
2837 msg = email.message_from_bytes(m)
2838 self.assertEqual(msg.get_payload(), 'p=C3=B6st��l\n')
2839 self.assertEqual(msg.get_payload(decode=True),
2840 'pöstál\n'.encode('utf-8'))
2841
2842 def test_8bit_in_base64_body(self):
2843 # Sticking an 8bit byte in a base64 block makes it undecodable by
2844 # normal means, so the block is returned undecoded, but as bytes.
2845 m = self.bodytest_msg.format(charset='utf-8',
2846 cte='base64',
2847 bodyline='cMO2c3RhbAá=').encode('utf-8')
2848 msg = email.message_from_bytes(m)
2849 self.assertEqual(msg.get_payload(decode=True),
2850 'cMO2c3RhbAá=\n'.encode('utf-8'))
2851
2852 def test_8bit_in_uuencode_body(self):
2853 # Sticking an 8bit byte in a uuencode block makes it undecodable by
2854 # normal means, so the block is returned undecoded, but as bytes.
2855 m = self.bodytest_msg.format(charset='utf-8',
2856 cte='uuencode',
2857 bodyline='<,.V<W1A; á ').encode('utf-8')
2858 msg = email.message_from_bytes(m)
2859 self.assertEqual(msg.get_payload(decode=True),
2860 '<,.V<W1A; á \n'.encode('utf-8'))
2861
2862
2863 headertest_msg = textwrap.dedent("""\
2864 From: foo@bar.com
2865 To: báz
2866 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
2867 \tJean de Baddie
2868 From: göst
2869
2870 Yes, they are flying.
2871 """).encode('utf-8')
2872
2873 def test_get_8bit_header(self):
2874 msg = email.message_from_bytes(self.headertest_msg)
2875 self.assertEqual(msg.get('to'), 'b??z')
2876 self.assertEqual(msg['to'], 'b??z')
2877
2878 def test_print_8bit_headers(self):
2879 msg = email.message_from_bytes(self.headertest_msg)
2880 self.assertEqual(str(msg),
2881 self.headertest_msg.decode(
2882 'ascii', 'replace').replace('�', '?'))
2883
2884 def test_values_with_8bit_headers(self):
2885 msg = email.message_from_bytes(self.headertest_msg)
2886 self.assertListEqual(msg.values(),
2887 ['foo@bar.com',
2888 'b??z',
2889 'Maintenant je vous pr??sente mon '
2890 'coll??gue, le pouf c??l??bre\n'
2891 '\tJean de Baddie',
2892 "g??st"])
2893
2894 def test_items_with_8bit_headers(self):
2895 msg = email.message_from_bytes(self.headertest_msg)
2896 self.assertListEqual(msg.items(),
2897 [('From', 'foo@bar.com'),
2898 ('To', 'b??z'),
2899 ('Subject', 'Maintenant je vous pr??sente mon '
2900 'coll??gue, le pouf c??l??bre\n'
2901 '\tJean de Baddie'),
2902 ('From', 'g??st')])
2903
2904 def test_get_all_with_8bit_headers(self):
2905 msg = email.message_from_bytes(self.headertest_msg)
2906 self.assertListEqual(msg.get_all('from'),
2907 ['foo@bar.com',
2908 'g??st'])
2909
2910 non_latin_bin_msg = textwrap.dedent("""\
2911 From: foo@bar.com
2912 To: báz
2913 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
2914 \tJean de Baddie
2915 Mime-Version: 1.0
2916 Content-Type: text/plain; charset="utf-8"
2917 Content-Transfer-Encoding: 8bit
2918
2919 Да, они летят.
2920 """).encode('utf-8')
2921
2922 def test_bytes_generator(self):
2923 msg = email.message_from_bytes(self.non_latin_bin_msg)
2924 out = BytesIO()
2925 email.generator.BytesGenerator(out).flatten(msg)
2926 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
2927
2928 # XXX: ultimately the '?' should turn into CTE encoded bytes
2929 # using 'unknown-8bit' charset.
2930 non_latin_bin_msg_as7bit = textwrap.dedent("""\
2931 From: foo@bar.com
2932 To: b??z
2933 Subject: Maintenant je vous pr??sente mon coll??gue, le pouf c??l??bre
2934 \tJean de Baddie
2935 Mime-Version: 1.0
2936 Content-Type: text/plain; charset="utf-8"
2937 Content-Transfer-Encoding: base64
2938
2939 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
2940 """)
2941
2942 def test_generator_handles_8bit(self):
2943 msg = email.message_from_bytes(self.non_latin_bin_msg)
2944 out = StringIO()
2945 email.generator.Generator(out).flatten(msg)
2946 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit)
2947
2948 def test_bytes_generator_with_unix_from(self):
2949 # The unixfrom contains a current date, so we can't check it
2950 # literally. Just make sure the first word is 'From' and the
2951 # rest of the message matches the input.
2952 msg = email.message_from_bytes(self.non_latin_bin_msg)
2953 out = BytesIO()
2954 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
2955 lines = out.getvalue().split(b'\n')
2956 self.assertEqual(lines[0].split()[0], b'From')
2957 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
2958
2959 def test_message_from_binary_file(self):
2960 fn = 'test.msg'
2961 self.addCleanup(unlink, fn)
2962 with open(fn, 'wb') as testfile:
2963 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00002964 with open(fn, 'rb') as testfile:
2965 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00002966 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
2967
2968 latin_bin_msg = textwrap.dedent("""\
2969 From: foo@bar.com
2970 To: Dinsdale
2971 Subject: Nudge nudge, wink, wink
2972 Mime-Version: 1.0
2973 Content-Type: text/plain; charset="latin-1"
2974 Content-Transfer-Encoding: 8bit
2975
2976 oh là là, know what I mean, know what I mean?
2977 """).encode('latin-1')
2978
2979 latin_bin_msg_as7bit = textwrap.dedent("""\
2980 From: foo@bar.com
2981 To: Dinsdale
2982 Subject: Nudge nudge, wink, wink
2983 Mime-Version: 1.0
2984 Content-Type: text/plain; charset="iso-8859-1"
2985 Content-Transfer-Encoding: quoted-printable
2986
2987 oh l=E0 l=E0, know what I mean, know what I mean?
2988 """)
2989
2990 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
2991 m = email.message_from_bytes(self.latin_bin_msg)
2992 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
2993
2994 def test_decoded_generator_emits_unicode_body(self):
2995 m = email.message_from_bytes(self.latin_bin_msg)
2996 out = StringIO()
2997 email.generator.DecodedGenerator(out).flatten(m)
2998 #DecodedHeader output contains an extra blank line compared
2999 #to the input message. RDM: not sure if this is a bug or not,
3000 #but it is not specific to the 8bit->7bit conversion.
3001 self.assertEqual(out.getvalue(),
3002 self.latin_bin_msg.decode('latin-1')+'\n')
3003
3004 def test_bytes_feedparser(self):
3005 bfp = email.feedparser.BytesFeedParser()
3006 for i in range(0, len(self.latin_bin_msg), 10):
3007 bfp.feed(self.latin_bin_msg[i:i+10])
3008 m = bfp.close()
3009 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3010
R. David Murray8451c4b2010-10-23 22:19:56 +00003011 def test_crlf_flatten(self):
3012 with openfile('msg_26.txt', 'rb') as fp:
3013 text = fp.read()
3014 msg = email.message_from_bytes(text)
3015 s = BytesIO()
3016 g = email.generator.BytesGenerator(s)
3017 g.flatten(msg, linesep='\r\n')
3018 self.assertEqual(s.getvalue(), text)
3019 maxDiff = None
3020
Ezio Melottib3aedd42010-11-20 19:04:17 +00003021
R. David Murray719a4492010-11-21 16:53:48 +00003022class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003023
R. David Murraye5db2632010-11-20 15:10:13 +00003024 maxDiff = None
3025
R. David Murray96fd54e2010-10-08 15:55:28 +00003026 def _msgobj(self, filename):
3027 with openfile(filename, 'rb') as fp:
3028 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003029 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003030 msg = email.message_from_bytes(data)
3031 return msg, data
3032
R. David Murray719a4492010-11-21 16:53:48 +00003033 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003034 b = BytesIO()
3035 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003036 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R. David Murraye5db2632010-11-20 15:10:13 +00003037 self.assertByteStringsEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003038
R. David Murraye5db2632010-11-20 15:10:13 +00003039 def assertByteStringsEqual(self, str1, str2):
R. David Murray719a4492010-11-21 16:53:48 +00003040 # Not using self.blinesep here is intentional. This way the output
3041 # is more useful when the failure results in mixed line endings.
R. David Murray96fd54e2010-10-08 15:55:28 +00003042 self.assertListEqual(str1.split(b'\n'), str2.split(b'\n'))
3043
3044
R. David Murray719a4492010-11-21 16:53:48 +00003045class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3046 TestIdempotent):
3047 linesep = '\n'
3048 blinesep = b'\n'
3049 normalize_linesep_regex = re.compile(br'\r\n')
3050
3051
3052class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3053 TestIdempotent):
3054 linesep = '\r\n'
3055 blinesep = b'\r\n'
3056 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3057
Ezio Melottib3aedd42010-11-20 19:04:17 +00003058
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003059class TestBase64(unittest.TestCase):
3060 def test_len(self):
3061 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003062 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003063 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003064 for size in range(15):
3065 if size == 0 : bsize = 0
3066 elif size <= 3 : bsize = 4
3067 elif size <= 6 : bsize = 8
3068 elif size <= 9 : bsize = 12
3069 elif size <= 12: bsize = 16
3070 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003071 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003072
3073 def test_decode(self):
3074 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003075 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003076 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003077
3078 def test_encode(self):
3079 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003080 eq(base64mime.body_encode(b''), b'')
3081 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003082 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003083 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003084 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003085 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003086eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3087eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3088eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3089eHh4eCB4eHh4IA==
3090""")
3091 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003092 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003093 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003094eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3095eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3096eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3097eHh4eCB4eHh4IA==\r
3098""")
3099
3100 def test_header_encode(self):
3101 eq = self.assertEqual
3102 he = base64mime.header_encode
3103 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003104 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3105 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003106 # Test the charset option
3107 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3108 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003109
3110
Ezio Melottib3aedd42010-11-20 19:04:17 +00003111
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003112class TestQuopri(unittest.TestCase):
3113 def setUp(self):
3114 # Set of characters (as byte integers) that don't need to be encoded
3115 # in headers.
3116 self.hlit = list(chain(
3117 range(ord('a'), ord('z') + 1),
3118 range(ord('A'), ord('Z') + 1),
3119 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003120 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003121 # Set of characters (as byte integers) that do need to be encoded in
3122 # headers.
3123 self.hnon = [c for c in range(256) if c not in self.hlit]
3124 assert len(self.hlit) + len(self.hnon) == 256
3125 # Set of characters (as byte integers) that don't need to be encoded
3126 # in bodies.
3127 self.blit = list(range(ord(' '), ord('~') + 1))
3128 self.blit.append(ord('\t'))
3129 self.blit.remove(ord('='))
3130 # Set of characters (as byte integers) that do need to be encoded in
3131 # bodies.
3132 self.bnon = [c for c in range(256) if c not in self.blit]
3133 assert len(self.blit) + len(self.bnon) == 256
3134
Guido van Rossum9604e662007-08-30 03:46:43 +00003135 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003136 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003137 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003138 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003139 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003140 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003141 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003142
Guido van Rossum9604e662007-08-30 03:46:43 +00003143 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003144 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003145 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003146 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003147 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003148 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003149 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003150
3151 def test_header_quopri_len(self):
3152 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003153 eq(quoprimime.header_length(b'hello'), 5)
3154 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003155 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003156 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003157 # =?xxx?q?...?= means 10 extra characters
3158 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00003159 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
3160 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003161 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003162 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003163 # =?xxx?q?...?= means 10 extra characters
3164 10)
3165 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003166 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003167 'expected length 1 for %r' % chr(c))
3168 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003169 # Space is special; it's encoded to _
3170 if c == ord(' '):
3171 continue
3172 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003173 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00003174 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003175
3176 def test_body_quopri_len(self):
3177 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003178 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003179 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003180 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003181 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003182
3183 def test_quote_unquote_idempotent(self):
3184 for x in range(256):
3185 c = chr(x)
3186 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
3187
3188 def test_header_encode(self):
3189 eq = self.assertEqual
3190 he = quoprimime.header_encode
3191 eq(he(b'hello'), '=?iso-8859-1?q?hello?=')
3192 eq(he(b'hello', charset='iso-8859-2'), '=?iso-8859-2?q?hello?=')
3193 eq(he(b'hello\nworld'), '=?iso-8859-1?q?hello=0Aworld?=')
3194 # Test a non-ASCII character
3195 eq(he(b'hello\xc7there'), '=?iso-8859-1?q?hello=C7there?=')
3196
3197 def test_decode(self):
3198 eq = self.assertEqual
3199 eq(quoprimime.decode(''), '')
3200 eq(quoprimime.decode('hello'), 'hello')
3201 eq(quoprimime.decode('hello', 'X'), 'hello')
3202 eq(quoprimime.decode('hello\nworld', 'X'), 'helloXworld')
3203
3204 def test_encode(self):
3205 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003206 eq(quoprimime.body_encode(''), '')
3207 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003208 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00003209 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003210 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00003211 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003212xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
3213 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
3214x xxxx xxxx xxxx xxxx=20""")
3215 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00003216 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
3217 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003218xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
3219 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
3220x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00003221 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003222one line
3223
3224two line"""), """\
3225one line
3226
3227two line""")
3228
3229
Ezio Melottib3aedd42010-11-20 19:04:17 +00003230
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003231# Test the Charset class
3232class TestCharset(unittest.TestCase):
3233 def tearDown(self):
3234 from email import charset as CharsetModule
3235 try:
3236 del CharsetModule.CHARSETS['fake']
3237 except KeyError:
3238 pass
3239
Guido van Rossum9604e662007-08-30 03:46:43 +00003240 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003241 eq = self.assertEqual
3242 # Make sure us-ascii = no Unicode conversion
3243 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00003244 eq(c.header_encode('Hello World!'), 'Hello World!')
3245 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003246 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00003247 self.assertRaises(UnicodeError, c.header_encode, s)
3248 c = Charset('utf-8')
3249 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003250
3251 def test_body_encode(self):
3252 eq = self.assertEqual
3253 # Try a charset with QP body encoding
3254 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003255 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003256 # Try a charset with Base64 body encoding
3257 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003258 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003259 # Try a charset with None body encoding
3260 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003261 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003262 # Try the convert argument, where input codec != output codec
3263 c = Charset('euc-jp')
3264 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00003265 # XXX FIXME
3266## try:
3267## eq('\x1b$B5FCO;~IW\x1b(B',
3268## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
3269## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
3270## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
3271## except LookupError:
3272## # We probably don't have the Japanese codecs installed
3273## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003274 # Testing SF bug #625509, which we have to fake, since there are no
3275 # built-in encodings where the header encoding is QP but the body
3276 # encoding is not.
3277 from email import charset as CharsetModule
3278 CharsetModule.add_charset('fake', CharsetModule.QP, None)
3279 c = Charset('fake')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003280 eq('hello w\xf6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003281
3282 def test_unicode_charset_name(self):
3283 charset = Charset('us-ascii')
3284 self.assertEqual(str(charset), 'us-ascii')
3285 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
3286
3287
Ezio Melottib3aedd42010-11-20 19:04:17 +00003288
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003289# Test multilingual MIME headers.
3290class TestHeader(TestEmailBase):
3291 def test_simple(self):
3292 eq = self.ndiffAssertEqual
3293 h = Header('Hello World!')
3294 eq(h.encode(), 'Hello World!')
3295 h.append(' Goodbye World!')
3296 eq(h.encode(), 'Hello World! Goodbye World!')
3297
3298 def test_simple_surprise(self):
3299 eq = self.ndiffAssertEqual
3300 h = Header('Hello World!')
3301 eq(h.encode(), 'Hello World!')
3302 h.append('Goodbye World!')
3303 eq(h.encode(), 'Hello World! Goodbye World!')
3304
3305 def test_header_needs_no_decoding(self):
3306 h = 'no decoding needed'
3307 self.assertEqual(decode_header(h), [(h, None)])
3308
3309 def test_long(self):
3310 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
3311 maxlinelen=76)
3312 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003313 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003314
3315 def test_multilingual(self):
3316 eq = self.ndiffAssertEqual
3317 g = Charset("iso-8859-1")
3318 cz = Charset("iso-8859-2")
3319 utf8 = Charset("utf-8")
3320 g_head = (b'Die Mieter treten hier ein werden mit einem '
3321 b'Foerderband komfortabel den Korridor entlang, '
3322 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
3323 b'gegen die rotierenden Klingen bef\xf6rdert. ')
3324 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
3325 b'd\xf9vtipu.. ')
3326 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
3327 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
3328 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
3329 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
3330 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
3331 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
3332 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
3333 '\u3044\u307e\u3059\u3002')
3334 h = Header(g_head, g)
3335 h.append(cz_head, cz)
3336 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00003337 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003338 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00003339=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
3340 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
3341 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
3342 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003343 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
3344 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
3345 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
3346 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00003347 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
3348 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
3349 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
3350 decoded = decode_header(enc)
3351 eq(len(decoded), 3)
3352 eq(decoded[0], (g_head, 'iso-8859-1'))
3353 eq(decoded[1], (cz_head, 'iso-8859-2'))
3354 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003355 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00003356 eq(ustr,
3357 (b'Die Mieter treten hier ein werden mit einem Foerderband '
3358 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
3359 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
3360 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
3361 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
3362 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
3363 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
3364 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
3365 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
3366 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
3367 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
3368 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
3369 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
3370 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
3371 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
3372 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
3373 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003374 # Test make_header()
3375 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00003376 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003377
3378 def test_empty_header_encode(self):
3379 h = Header()
3380 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00003381
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003382 def test_header_ctor_default_args(self):
3383 eq = self.ndiffAssertEqual
3384 h = Header()
3385 eq(h, '')
3386 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00003387 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003388
3389 def test_explicit_maxlinelen(self):
3390 eq = self.ndiffAssertEqual
3391 hstr = ('A very long line that must get split to something other '
3392 'than at the 76th character boundary to test the non-default '
3393 'behavior')
3394 h = Header(hstr)
3395 eq(h.encode(), '''\
3396A very long line that must get split to something other than at the 76th
3397 character boundary to test the non-default behavior''')
3398 eq(str(h), hstr)
3399 h = Header(hstr, header_name='Subject')
3400 eq(h.encode(), '''\
3401A very long line that must get split to something other than at the
3402 76th character boundary to test the non-default behavior''')
3403 eq(str(h), hstr)
3404 h = Header(hstr, maxlinelen=1024, header_name='Subject')
3405 eq(h.encode(), hstr)
3406 eq(str(h), hstr)
3407
Guido van Rossum9604e662007-08-30 03:46:43 +00003408 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003409 eq = self.ndiffAssertEqual
3410 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003411 x = 'xxxx ' * 20
3412 h.append(x)
3413 s = h.encode()
3414 eq(s, """\
3415=?iso-8859-1?q?xxx?=
3416 =?iso-8859-1?q?x_?=
3417 =?iso-8859-1?q?xx?=
3418 =?iso-8859-1?q?xx?=
3419 =?iso-8859-1?q?_x?=
3420 =?iso-8859-1?q?xx?=
3421 =?iso-8859-1?q?x_?=
3422 =?iso-8859-1?q?xx?=
3423 =?iso-8859-1?q?xx?=
3424 =?iso-8859-1?q?_x?=
3425 =?iso-8859-1?q?xx?=
3426 =?iso-8859-1?q?x_?=
3427 =?iso-8859-1?q?xx?=
3428 =?iso-8859-1?q?xx?=
3429 =?iso-8859-1?q?_x?=
3430 =?iso-8859-1?q?xx?=
3431 =?iso-8859-1?q?x_?=
3432 =?iso-8859-1?q?xx?=
3433 =?iso-8859-1?q?xx?=
3434 =?iso-8859-1?q?_x?=
3435 =?iso-8859-1?q?xx?=
3436 =?iso-8859-1?q?x_?=
3437 =?iso-8859-1?q?xx?=
3438 =?iso-8859-1?q?xx?=
3439 =?iso-8859-1?q?_x?=
3440 =?iso-8859-1?q?xx?=
3441 =?iso-8859-1?q?x_?=
3442 =?iso-8859-1?q?xx?=
3443 =?iso-8859-1?q?xx?=
3444 =?iso-8859-1?q?_x?=
3445 =?iso-8859-1?q?xx?=
3446 =?iso-8859-1?q?x_?=
3447 =?iso-8859-1?q?xx?=
3448 =?iso-8859-1?q?xx?=
3449 =?iso-8859-1?q?_x?=
3450 =?iso-8859-1?q?xx?=
3451 =?iso-8859-1?q?x_?=
3452 =?iso-8859-1?q?xx?=
3453 =?iso-8859-1?q?xx?=
3454 =?iso-8859-1?q?_x?=
3455 =?iso-8859-1?q?xx?=
3456 =?iso-8859-1?q?x_?=
3457 =?iso-8859-1?q?xx?=
3458 =?iso-8859-1?q?xx?=
3459 =?iso-8859-1?q?_x?=
3460 =?iso-8859-1?q?xx?=
3461 =?iso-8859-1?q?x_?=
3462 =?iso-8859-1?q?xx?=
3463 =?iso-8859-1?q?xx?=
3464 =?iso-8859-1?q?_?=""")
3465 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003466 h = Header(charset='iso-8859-1', maxlinelen=40)
3467 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003468 s = h.encode()
3469 eq(s, """\
3470=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
3471 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
3472 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
3473 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
3474 =?iso-8859-1?q?_xxxx_xxxx_?=""")
3475 eq(x, str(make_header(decode_header(s))))
3476
3477 def test_base64_splittable(self):
3478 eq = self.ndiffAssertEqual
3479 h = Header(charset='koi8-r', maxlinelen=20)
3480 x = 'xxxx ' * 20
3481 h.append(x)
3482 s = h.encode()
3483 eq(s, """\
3484=?koi8-r?b?eHh4?=
3485 =?koi8-r?b?eCB4?=
3486 =?koi8-r?b?eHh4?=
3487 =?koi8-r?b?IHh4?=
3488 =?koi8-r?b?eHgg?=
3489 =?koi8-r?b?eHh4?=
3490 =?koi8-r?b?eCB4?=
3491 =?koi8-r?b?eHh4?=
3492 =?koi8-r?b?IHh4?=
3493 =?koi8-r?b?eHgg?=
3494 =?koi8-r?b?eHh4?=
3495 =?koi8-r?b?eCB4?=
3496 =?koi8-r?b?eHh4?=
3497 =?koi8-r?b?IHh4?=
3498 =?koi8-r?b?eHgg?=
3499 =?koi8-r?b?eHh4?=
3500 =?koi8-r?b?eCB4?=
3501 =?koi8-r?b?eHh4?=
3502 =?koi8-r?b?IHh4?=
3503 =?koi8-r?b?eHgg?=
3504 =?koi8-r?b?eHh4?=
3505 =?koi8-r?b?eCB4?=
3506 =?koi8-r?b?eHh4?=
3507 =?koi8-r?b?IHh4?=
3508 =?koi8-r?b?eHgg?=
3509 =?koi8-r?b?eHh4?=
3510 =?koi8-r?b?eCB4?=
3511 =?koi8-r?b?eHh4?=
3512 =?koi8-r?b?IHh4?=
3513 =?koi8-r?b?eHgg?=
3514 =?koi8-r?b?eHh4?=
3515 =?koi8-r?b?eCB4?=
3516 =?koi8-r?b?eHh4?=
3517 =?koi8-r?b?IA==?=""")
3518 eq(x, str(make_header(decode_header(s))))
3519 h = Header(charset='koi8-r', maxlinelen=40)
3520 h.append(x)
3521 s = h.encode()
3522 eq(s, """\
3523=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
3524 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
3525 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
3526 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
3527 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
3528 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
3529 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003530
3531 def test_us_ascii_header(self):
3532 eq = self.assertEqual
3533 s = 'hello'
3534 x = decode_header(s)
3535 eq(x, [('hello', None)])
3536 h = make_header(x)
3537 eq(s, h.encode())
3538
3539 def test_string_charset(self):
3540 eq = self.assertEqual
3541 h = Header()
3542 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00003543 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003544
3545## def test_unicode_error(self):
3546## raises = self.assertRaises
3547## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
3548## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
3549## h = Header()
3550## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
3551## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
3552## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
3553
3554 def test_utf8_shortest(self):
3555 eq = self.assertEqual
3556 h = Header('p\xf6stal', 'utf-8')
3557 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
3558 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
3559 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
3560
3561 def test_bad_8bit_header(self):
3562 raises = self.assertRaises
3563 eq = self.assertEqual
3564 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
3565 raises(UnicodeError, Header, x)
3566 h = Header()
3567 raises(UnicodeError, h.append, x)
3568 e = x.decode('utf-8', 'replace')
3569 eq(str(Header(x, errors='replace')), e)
3570 h.append(x, errors='replace')
3571 eq(str(h), e)
3572
3573 def test_encoded_adjacent_nonencoded(self):
3574 eq = self.assertEqual
3575 h = Header()
3576 h.append('hello', 'iso-8859-1')
3577 h.append('world')
3578 s = h.encode()
3579 eq(s, '=?iso-8859-1?q?hello?= world')
3580 h = make_header(decode_header(s))
3581 eq(h.encode(), s)
3582
3583 def test_whitespace_eater(self):
3584 eq = self.assertEqual
3585 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
3586 parts = decode_header(s)
3587 eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)])
3588 hdr = make_header(parts)
3589 eq(hdr.encode(),
3590 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
3591
3592 def test_broken_base64_header(self):
3593 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00003594 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003595 raises(errors.HeaderParseError, decode_header, s)
3596
3597
Ezio Melottib3aedd42010-11-20 19:04:17 +00003598
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003599# Test RFC 2231 header parameters (en/de)coding
3600class TestRFC2231(TestEmailBase):
3601 def test_get_param(self):
3602 eq = self.assertEqual
3603 msg = self._msgobj('msg_29.txt')
3604 eq(msg.get_param('title'),
3605 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
3606 eq(msg.get_param('title', unquote=False),
3607 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
3608
3609 def test_set_param(self):
3610 eq = self.ndiffAssertEqual
3611 msg = Message()
3612 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3613 charset='us-ascii')
3614 eq(msg.get_param('title'),
3615 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
3616 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3617 charset='us-ascii', language='en')
3618 eq(msg.get_param('title'),
3619 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
3620 msg = self._msgobj('msg_01.txt')
3621 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3622 charset='us-ascii', language='en')
3623 eq(msg.as_string(maxheaderlen=78), """\
3624Return-Path: <bbb@zzz.org>
3625Delivered-To: bbb@zzz.org
3626Received: by mail.zzz.org (Postfix, from userid 889)
3627\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
3628MIME-Version: 1.0
3629Content-Transfer-Encoding: 7bit
3630Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
3631From: bbb@ddd.com (John X. Doe)
3632To: bbb@zzz.org
3633Subject: This is a test message
3634Date: Fri, 4 May 2001 14:05:44 -0400
3635Content-Type: text/plain; charset=us-ascii;
3636 title*="us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21"
3637
3638
3639Hi,
3640
3641Do you like this message?
3642
3643-Me
3644""")
3645
3646 def test_del_param(self):
3647 eq = self.ndiffAssertEqual
3648 msg = self._msgobj('msg_01.txt')
3649 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
3650 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3651 charset='us-ascii', language='en')
3652 msg.del_param('foo', header='Content-Type')
3653 eq(msg.as_string(maxheaderlen=78), """\
3654Return-Path: <bbb@zzz.org>
3655Delivered-To: bbb@zzz.org
3656Received: by mail.zzz.org (Postfix, from userid 889)
3657\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
3658MIME-Version: 1.0
3659Content-Transfer-Encoding: 7bit
3660Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
3661From: bbb@ddd.com (John X. Doe)
3662To: bbb@zzz.org
3663Subject: This is a test message
3664Date: Fri, 4 May 2001 14:05:44 -0400
3665Content-Type: text/plain; charset="us-ascii";
3666 title*="us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21"
3667
3668
3669Hi,
3670
3671Do you like this message?
3672
3673-Me
3674""")
3675
3676 def test_rfc2231_get_content_charset(self):
3677 eq = self.assertEqual
3678 msg = self._msgobj('msg_32.txt')
3679 eq(msg.get_content_charset(), 'us-ascii')
3680
3681 def test_rfc2231_no_language_or_charset(self):
3682 m = '''\
3683Content-Transfer-Encoding: 8bit
3684Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
3685Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
3686
3687'''
3688 msg = email.message_from_string(m)
3689 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003690 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003691 self.assertEqual(
3692 param,
3693 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
3694
3695 def test_rfc2231_no_language_or_charset_in_filename(self):
3696 m = '''\
3697Content-Disposition: inline;
3698\tfilename*0*="''This%20is%20even%20more%20";
3699\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3700\tfilename*2="is it not.pdf"
3701
3702'''
3703 msg = email.message_from_string(m)
3704 self.assertEqual(msg.get_filename(),
3705 'This is even more ***fun*** is it not.pdf')
3706
3707 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
3708 m = '''\
3709Content-Disposition: inline;
3710\tfilename*0*="''This%20is%20even%20more%20";
3711\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3712\tfilename*2="is it not.pdf"
3713
3714'''
3715 msg = email.message_from_string(m)
3716 self.assertEqual(msg.get_filename(),
3717 'This is even more ***fun*** is it not.pdf')
3718
3719 def test_rfc2231_partly_encoded(self):
3720 m = '''\
3721Content-Disposition: inline;
3722\tfilename*0="''This%20is%20even%20more%20";
3723\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3724\tfilename*2="is it not.pdf"
3725
3726'''
3727 msg = email.message_from_string(m)
3728 self.assertEqual(
3729 msg.get_filename(),
3730 'This%20is%20even%20more%20***fun*** is it not.pdf')
3731
3732 def test_rfc2231_partly_nonencoded(self):
3733 m = '''\
3734Content-Disposition: inline;
3735\tfilename*0="This%20is%20even%20more%20";
3736\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
3737\tfilename*2="is it not.pdf"
3738
3739'''
3740 msg = email.message_from_string(m)
3741 self.assertEqual(
3742 msg.get_filename(),
3743 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
3744
3745 def test_rfc2231_no_language_or_charset_in_boundary(self):
3746 m = '''\
3747Content-Type: multipart/alternative;
3748\tboundary*0*="''This%20is%20even%20more%20";
3749\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
3750\tboundary*2="is it not.pdf"
3751
3752'''
3753 msg = email.message_from_string(m)
3754 self.assertEqual(msg.get_boundary(),
3755 'This is even more ***fun*** is it not.pdf')
3756
3757 def test_rfc2231_no_language_or_charset_in_charset(self):
3758 # This is a nonsensical charset value, but tests the code anyway
3759 m = '''\
3760Content-Type: text/plain;
3761\tcharset*0*="This%20is%20even%20more%20";
3762\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
3763\tcharset*2="is it not.pdf"
3764
3765'''
3766 msg = email.message_from_string(m)
3767 self.assertEqual(msg.get_content_charset(),
3768 'this is even more ***fun*** is it not.pdf')
3769
3770 def test_rfc2231_bad_encoding_in_filename(self):
3771 m = '''\
3772Content-Disposition: inline;
3773\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
3774\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3775\tfilename*2="is it not.pdf"
3776
3777'''
3778 msg = email.message_from_string(m)
3779 self.assertEqual(msg.get_filename(),
3780 'This is even more ***fun*** is it not.pdf')
3781
3782 def test_rfc2231_bad_encoding_in_charset(self):
3783 m = """\
3784Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
3785
3786"""
3787 msg = email.message_from_string(m)
3788 # This should return None because non-ascii characters in the charset
3789 # are not allowed.
3790 self.assertEqual(msg.get_content_charset(), None)
3791
3792 def test_rfc2231_bad_character_in_charset(self):
3793 m = """\
3794Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
3795
3796"""
3797 msg = email.message_from_string(m)
3798 # This should return None because non-ascii characters in the charset
3799 # are not allowed.
3800 self.assertEqual(msg.get_content_charset(), None)
3801
3802 def test_rfc2231_bad_character_in_filename(self):
3803 m = '''\
3804Content-Disposition: inline;
3805\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
3806\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3807\tfilename*2*="is it not.pdf%E2"
3808
3809'''
3810 msg = email.message_from_string(m)
3811 self.assertEqual(msg.get_filename(),
3812 'This is even more ***fun*** is it not.pdf\ufffd')
3813
3814 def test_rfc2231_unknown_encoding(self):
3815 m = """\
3816Content-Transfer-Encoding: 8bit
3817Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
3818
3819"""
3820 msg = email.message_from_string(m)
3821 self.assertEqual(msg.get_filename(), 'myfile.txt')
3822
3823 def test_rfc2231_single_tick_in_filename_extended(self):
3824 eq = self.assertEqual
3825 m = """\
3826Content-Type: application/x-foo;
3827\tname*0*=\"Frank's\"; name*1*=\" Document\"
3828
3829"""
3830 msg = email.message_from_string(m)
3831 charset, language, s = msg.get_param('name')
3832 eq(charset, None)
3833 eq(language, None)
3834 eq(s, "Frank's Document")
3835
3836 def test_rfc2231_single_tick_in_filename(self):
3837 m = """\
3838Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
3839
3840"""
3841 msg = email.message_from_string(m)
3842 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003843 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003844 self.assertEqual(param, "Frank's Document")
3845
3846 def test_rfc2231_tick_attack_extended(self):
3847 eq = self.assertEqual
3848 m = """\
3849Content-Type: application/x-foo;
3850\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
3851
3852"""
3853 msg = email.message_from_string(m)
3854 charset, language, s = msg.get_param('name')
3855 eq(charset, 'us-ascii')
3856 eq(language, 'en-us')
3857 eq(s, "Frank's Document")
3858
3859 def test_rfc2231_tick_attack(self):
3860 m = """\
3861Content-Type: application/x-foo;
3862\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
3863
3864"""
3865 msg = email.message_from_string(m)
3866 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003867 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003868 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
3869
3870 def test_rfc2231_no_extended_values(self):
3871 eq = self.assertEqual
3872 m = """\
3873Content-Type: application/x-foo; name=\"Frank's Document\"
3874
3875"""
3876 msg = email.message_from_string(m)
3877 eq(msg.get_param('name'), "Frank's Document")
3878
3879 def test_rfc2231_encoded_then_unencoded_segments(self):
3880 eq = self.assertEqual
3881 m = """\
3882Content-Type: application/x-foo;
3883\tname*0*=\"us-ascii'en-us'My\";
3884\tname*1=\" Document\";
3885\tname*2*=\" For You\"
3886
3887"""
3888 msg = email.message_from_string(m)
3889 charset, language, s = msg.get_param('name')
3890 eq(charset, 'us-ascii')
3891 eq(language, 'en-us')
3892 eq(s, 'My Document For You')
3893
3894 def test_rfc2231_unencoded_then_encoded_segments(self):
3895 eq = self.assertEqual
3896 m = """\
3897Content-Type: application/x-foo;
3898\tname*0=\"us-ascii'en-us'My\";
3899\tname*1*=\" Document\";
3900\tname*2*=\" For You\"
3901
3902"""
3903 msg = email.message_from_string(m)
3904 charset, language, s = msg.get_param('name')
3905 eq(charset, 'us-ascii')
3906 eq(language, 'en-us')
3907 eq(s, 'My Document For You')
3908
3909
Ezio Melottib3aedd42010-11-20 19:04:17 +00003910
R. David Murraya8f480f2010-01-16 18:30:03 +00003911# Tests to ensure that signed parts of an email are completely preserved, as
3912# required by RFC1847 section 2.1. Note that these are incomplete, because the
3913# email package does not currently always preserve the body. See issue 1670765.
3914class TestSigned(TestEmailBase):
3915
3916 def _msg_and_obj(self, filename):
3917 with openfile(findfile(filename)) as fp:
3918 original = fp.read()
3919 msg = email.message_from_string(original)
3920 return original, msg
3921
3922 def _signed_parts_eq(self, original, result):
3923 # Extract the first mime part of each message
3924 import re
3925 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
3926 inpart = repart.search(original).group(2)
3927 outpart = repart.search(result).group(2)
3928 self.assertEqual(outpart, inpart)
3929
3930 def test_long_headers_as_string(self):
3931 original, msg = self._msg_and_obj('msg_45.txt')
3932 result = msg.as_string()
3933 self._signed_parts_eq(original, result)
3934
3935 def test_long_headers_as_string_maxheaderlen(self):
3936 original, msg = self._msg_and_obj('msg_45.txt')
3937 result = msg.as_string(maxheaderlen=60)
3938 self._signed_parts_eq(original, result)
3939
3940 def test_long_headers_flatten(self):
3941 original, msg = self._msg_and_obj('msg_45.txt')
3942 fp = StringIO()
3943 Generator(fp).flatten(msg)
3944 result = fp.getvalue()
3945 self._signed_parts_eq(original, result)
3946
3947
Ezio Melottib3aedd42010-11-20 19:04:17 +00003948
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003949def _testclasses():
3950 mod = sys.modules[__name__]
3951 return [getattr(mod, name) for name in dir(mod) if name.startswith('Test')]
3952
3953
3954def suite():
3955 suite = unittest.TestSuite()
3956 for testclass in _testclasses():
3957 suite.addTest(unittest.makeSuite(testclass))
3958 return suite
3959
3960
3961def test_main():
3962 for testclass in _testclasses():
3963 run_unittest(testclass)
3964
3965
Ezio Melottib3aedd42010-11-20 19:04:17 +00003966
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003967if __name__ == '__main__':
3968 unittest.main(defaultTest='suite')