blob: 53c4042479322a1d04feb077ab549e47f7cb90c5 [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
R. David Murray719a4492010-11-21 16:53:48 +00006import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00007import sys
8import time
9import base64
10import difflib
11import unittest
12import warnings
R. David Murray96fd54e2010-10-08 15:55:28 +000013import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000014
R. David Murray96fd54e2010-10-08 15:55:28 +000015from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016from itertools import chain
17
18import email
19
20from email.charset import Charset
21from email.header import Header, decode_header, make_header
22from email.parser import Parser, HeaderParser
23from email.generator import Generator, DecodedGenerator
24from email.message import Message
25from email.mime.application import MIMEApplication
26from email.mime.audio import MIMEAudio
27from email.mime.text import MIMEText
28from email.mime.image import MIMEImage
29from email.mime.base import MIMEBase
30from email.mime.message import MIMEMessage
31from email.mime.multipart import MIMEMultipart
32from email import utils
33from email import errors
34from email import encoders
35from email import iterators
36from email import base64mime
37from email import quoprimime
38
R. David Murray96fd54e2010-10-08 15:55:28 +000039from test.support import findfile, run_unittest, unlink
Guido van Rossum8b3febe2007-08-30 01:15:14 +000040from email.test import __file__ as landmark
41
42
43NL = '\n'
44EMPTYSTRING = ''
45SPACE = ' '
46
47
Ezio Melottib3aedd42010-11-20 19:04:17 +000048
Guido van Rossum8b3febe2007-08-30 01:15:14 +000049def openfile(filename, *args, **kws):
50 path = os.path.join(os.path.dirname(landmark), 'data', filename)
51 return open(path, *args, **kws)
52
53
Ezio Melottib3aedd42010-11-20 19:04:17 +000054
Guido van Rossum8b3febe2007-08-30 01:15:14 +000055# Base test class
56class TestEmailBase(unittest.TestCase):
57 def ndiffAssertEqual(self, first, second):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000058 """Like assertEqual except use ndiff for readable output."""
Guido van Rossum8b3febe2007-08-30 01:15:14 +000059 if first != second:
60 sfirst = str(first)
61 ssecond = str(second)
62 rfirst = [repr(line) for line in sfirst.splitlines()]
63 rsecond = [repr(line) for line in ssecond.splitlines()]
64 diff = difflib.ndiff(rfirst, rsecond)
65 raise self.failureException(NL + NL.join(diff))
66
67 def _msgobj(self, filename):
68 with openfile(findfile(filename)) as fp:
69 return email.message_from_file(fp)
70
71
Ezio Melottib3aedd42010-11-20 19:04:17 +000072
Guido van Rossum8b3febe2007-08-30 01:15:14 +000073# Test various aspects of the Message class's API
74class TestMessageAPI(TestEmailBase):
75 def test_get_all(self):
76 eq = self.assertEqual
77 msg = self._msgobj('msg_20.txt')
78 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
79 eq(msg.get_all('xx', 'n/a'), 'n/a')
80
R. David Murraye5db2632010-11-20 15:10:13 +000081 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000082 eq = self.assertEqual
83 msg = Message()
84 eq(msg.get_charset(), None)
85 charset = Charset('iso-8859-1')
86 msg.set_charset(charset)
87 eq(msg['mime-version'], '1.0')
88 eq(msg.get_content_type(), 'text/plain')
89 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
90 eq(msg.get_param('charset'), 'iso-8859-1')
91 eq(msg['content-transfer-encoding'], 'quoted-printable')
92 eq(msg.get_charset().input_charset, 'iso-8859-1')
93 # Remove the charset
94 msg.set_charset(None)
95 eq(msg.get_charset(), None)
96 eq(msg['content-type'], 'text/plain')
97 # Try adding a charset when there's already MIME headers present
98 msg = Message()
99 msg['MIME-Version'] = '2.0'
100 msg['Content-Type'] = 'text/x-weird'
101 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
102 msg.set_charset(charset)
103 eq(msg['mime-version'], '2.0')
104 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
105 eq(msg['content-transfer-encoding'], 'quinted-puntable')
106
107 def test_set_charset_from_string(self):
108 eq = self.assertEqual
109 msg = Message()
110 msg.set_charset('us-ascii')
111 eq(msg.get_charset().input_charset, 'us-ascii')
112 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
113
114 def test_set_payload_with_charset(self):
115 msg = Message()
116 charset = Charset('iso-8859-1')
117 msg.set_payload('This is a string payload', charset)
118 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
119
120 def test_get_charsets(self):
121 eq = self.assertEqual
122
123 msg = self._msgobj('msg_08.txt')
124 charsets = msg.get_charsets()
125 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
126
127 msg = self._msgobj('msg_09.txt')
128 charsets = msg.get_charsets('dingbat')
129 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
130 'koi8-r'])
131
132 msg = self._msgobj('msg_12.txt')
133 charsets = msg.get_charsets()
134 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
135 'iso-8859-3', 'us-ascii', 'koi8-r'])
136
137 def test_get_filename(self):
138 eq = self.assertEqual
139
140 msg = self._msgobj('msg_04.txt')
141 filenames = [p.get_filename() for p in msg.get_payload()]
142 eq(filenames, ['msg.txt', 'msg.txt'])
143
144 msg = self._msgobj('msg_07.txt')
145 subpart = msg.get_payload(1)
146 eq(subpart.get_filename(), 'dingusfish.gif')
147
148 def test_get_filename_with_name_parameter(self):
149 eq = self.assertEqual
150
151 msg = self._msgobj('msg_44.txt')
152 filenames = [p.get_filename() for p in msg.get_payload()]
153 eq(filenames, ['msg.txt', 'msg.txt'])
154
155 def test_get_boundary(self):
156 eq = self.assertEqual
157 msg = self._msgobj('msg_07.txt')
158 # No quotes!
159 eq(msg.get_boundary(), 'BOUNDARY')
160
161 def test_set_boundary(self):
162 eq = self.assertEqual
163 # This one has no existing boundary parameter, but the Content-Type:
164 # header appears fifth.
165 msg = self._msgobj('msg_01.txt')
166 msg.set_boundary('BOUNDARY')
167 header, value = msg.items()[4]
168 eq(header.lower(), 'content-type')
169 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
170 # This one has a Content-Type: header, with a boundary, stuck in the
171 # middle of its headers. Make sure the order is preserved; it should
172 # be fifth.
173 msg = self._msgobj('msg_04.txt')
174 msg.set_boundary('BOUNDARY')
175 header, value = msg.items()[4]
176 eq(header.lower(), 'content-type')
177 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
178 # And this one has no Content-Type: header at all.
179 msg = self._msgobj('msg_03.txt')
180 self.assertRaises(errors.HeaderParseError,
181 msg.set_boundary, 'BOUNDARY')
182
R. David Murray73a559d2010-12-21 18:07:59 +0000183 def test_make_boundary(self):
184 msg = MIMEMultipart('form-data')
185 # Note that when the boundary gets created is an implementation
186 # detail and might change.
187 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
188 # Trigger creation of boundary
189 msg.as_string()
190 self.assertEqual(msg.items()[0][1][:33],
191 'multipart/form-data; boundary="==')
192 # XXX: there ought to be tests of the uniqueness of the boundary, too.
193
R. David Murray57c45ac2010-02-21 04:39:40 +0000194 def test_message_rfc822_only(self):
195 # Issue 7970: message/rfc822 not in multipart parsed by
196 # HeaderParser caused an exception when flattened.
Brett Cannon384917a2010-10-29 23:08:36 +0000197 with openfile(findfile('msg_46.txt')) as fp:
198 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000199 parser = HeaderParser()
200 msg = parser.parsestr(msgdata)
201 out = StringIO()
202 gen = Generator(out, True, 0)
203 gen.flatten(msg, False)
204 self.assertEqual(out.getvalue(), msgdata)
205
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000206 def test_get_decoded_payload(self):
207 eq = self.assertEqual
208 msg = self._msgobj('msg_10.txt')
209 # The outer message is a multipart
210 eq(msg.get_payload(decode=True), None)
211 # Subpart 1 is 7bit encoded
212 eq(msg.get_payload(0).get_payload(decode=True),
213 b'This is a 7bit encoded message.\n')
214 # Subpart 2 is quopri
215 eq(msg.get_payload(1).get_payload(decode=True),
216 b'\xa1This is a Quoted Printable encoded message!\n')
217 # Subpart 3 is base64
218 eq(msg.get_payload(2).get_payload(decode=True),
219 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000220 # Subpart 4 is base64 with a trailing newline, which
221 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000222 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000223 b'This is a Base64 encoded message.\n')
224 # Subpart 5 has no Content-Transfer-Encoding: header.
225 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000226 b'This has no Content-Transfer-Encoding: header.\n')
227
228 def test_get_decoded_uu_payload(self):
229 eq = self.assertEqual
230 msg = Message()
231 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
232 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
233 msg['content-transfer-encoding'] = cte
234 eq(msg.get_payload(decode=True), b'hello world')
235 # Now try some bogus data
236 msg.set_payload('foo')
237 eq(msg.get_payload(decode=True), b'foo')
238
239 def test_decoded_generator(self):
240 eq = self.assertEqual
241 msg = self._msgobj('msg_07.txt')
242 with openfile('msg_17.txt') as fp:
243 text = fp.read()
244 s = StringIO()
245 g = DecodedGenerator(s)
246 g.flatten(msg)
247 eq(s.getvalue(), text)
248
249 def test__contains__(self):
250 msg = Message()
251 msg['From'] = 'Me'
252 msg['to'] = 'You'
253 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000254 self.assertTrue('from' in msg)
255 self.assertTrue('From' in msg)
256 self.assertTrue('FROM' in msg)
257 self.assertTrue('to' in msg)
258 self.assertTrue('To' in msg)
259 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000260
261 def test_as_string(self):
262 eq = self.ndiffAssertEqual
263 msg = self._msgobj('msg_01.txt')
264 with openfile('msg_01.txt') as fp:
265 text = fp.read()
266 eq(text, str(msg))
267 fullrepr = msg.as_string(unixfrom=True)
268 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000269 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000270 eq(text, NL.join(lines[1:]))
271
272 def test_bad_param(self):
273 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
274 self.assertEqual(msg.get_param('baz'), '')
275
276 def test_missing_filename(self):
277 msg = email.message_from_string("From: foo\n")
278 self.assertEqual(msg.get_filename(), None)
279
280 def test_bogus_filename(self):
281 msg = email.message_from_string(
282 "Content-Disposition: blarg; filename\n")
283 self.assertEqual(msg.get_filename(), '')
284
285 def test_missing_boundary(self):
286 msg = email.message_from_string("From: foo\n")
287 self.assertEqual(msg.get_boundary(), None)
288
289 def test_get_params(self):
290 eq = self.assertEqual
291 msg = email.message_from_string(
292 'X-Header: foo=one; bar=two; baz=three\n')
293 eq(msg.get_params(header='x-header'),
294 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
295 msg = email.message_from_string(
296 'X-Header: foo; bar=one; baz=two\n')
297 eq(msg.get_params(header='x-header'),
298 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
299 eq(msg.get_params(), None)
300 msg = email.message_from_string(
301 'X-Header: foo; bar="one"; baz=two\n')
302 eq(msg.get_params(header='x-header'),
303 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
304
305 def test_get_param_liberal(self):
306 msg = Message()
307 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
308 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
309
310 def test_get_param(self):
311 eq = self.assertEqual
312 msg = email.message_from_string(
313 "X-Header: foo=one; bar=two; baz=three\n")
314 eq(msg.get_param('bar', header='x-header'), 'two')
315 eq(msg.get_param('quuz', header='x-header'), None)
316 eq(msg.get_param('quuz'), None)
317 msg = email.message_from_string(
318 'X-Header: foo; bar="one"; baz=two\n')
319 eq(msg.get_param('foo', header='x-header'), '')
320 eq(msg.get_param('bar', header='x-header'), 'one')
321 eq(msg.get_param('baz', header='x-header'), 'two')
322 # XXX: We are not RFC-2045 compliant! We cannot parse:
323 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
324 # msg.get_param("weird")
325 # yet.
326
327 def test_get_param_funky_continuation_lines(self):
328 msg = self._msgobj('msg_22.txt')
329 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
330
331 def test_get_param_with_semis_in_quotes(self):
332 msg = email.message_from_string(
333 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
334 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
335 self.assertEqual(msg.get_param('name', unquote=False),
336 '"Jim&amp;&amp;Jill"')
337
R. David Murrayd48739f2010-04-14 18:59:18 +0000338 def test_get_param_with_quotes(self):
339 msg = email.message_from_string(
340 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
341 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
342 msg = email.message_from_string(
343 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
344 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
345
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000346 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000347 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000348 msg = email.message_from_string('Header: exists')
349 unless('header' in msg)
350 unless('Header' in msg)
351 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000352 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000353
354 def test_set_param(self):
355 eq = self.assertEqual
356 msg = Message()
357 msg.set_param('charset', 'iso-2022-jp')
358 eq(msg.get_param('charset'), 'iso-2022-jp')
359 msg.set_param('importance', 'high value')
360 eq(msg.get_param('importance'), 'high value')
361 eq(msg.get_param('importance', unquote=False), '"high value"')
362 eq(msg.get_params(), [('text/plain', ''),
363 ('charset', 'iso-2022-jp'),
364 ('importance', 'high value')])
365 eq(msg.get_params(unquote=False), [('text/plain', ''),
366 ('charset', '"iso-2022-jp"'),
367 ('importance', '"high value"')])
368 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
369 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
370
371 def test_del_param(self):
372 eq = self.assertEqual
373 msg = self._msgobj('msg_05.txt')
374 eq(msg.get_params(),
375 [('multipart/report', ''), ('report-type', 'delivery-status'),
376 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
377 old_val = msg.get_param("report-type")
378 msg.del_param("report-type")
379 eq(msg.get_params(),
380 [('multipart/report', ''),
381 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
382 msg.set_param("report-type", old_val)
383 eq(msg.get_params(),
384 [('multipart/report', ''),
385 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
386 ('report-type', old_val)])
387
388 def test_del_param_on_other_header(self):
389 msg = Message()
390 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
391 msg.del_param('filename', 'content-disposition')
392 self.assertEqual(msg['content-disposition'], 'attachment')
393
394 def test_set_type(self):
395 eq = self.assertEqual
396 msg = Message()
397 self.assertRaises(ValueError, msg.set_type, 'text')
398 msg.set_type('text/plain')
399 eq(msg['content-type'], 'text/plain')
400 msg.set_param('charset', 'us-ascii')
401 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
402 msg.set_type('text/html')
403 eq(msg['content-type'], 'text/html; charset="us-ascii"')
404
405 def test_set_type_on_other_header(self):
406 msg = Message()
407 msg['X-Content-Type'] = 'text/plain'
408 msg.set_type('application/octet-stream', 'X-Content-Type')
409 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
410
411 def test_get_content_type_missing(self):
412 msg = Message()
413 self.assertEqual(msg.get_content_type(), 'text/plain')
414
415 def test_get_content_type_missing_with_default_type(self):
416 msg = Message()
417 msg.set_default_type('message/rfc822')
418 self.assertEqual(msg.get_content_type(), 'message/rfc822')
419
420 def test_get_content_type_from_message_implicit(self):
421 msg = self._msgobj('msg_30.txt')
422 self.assertEqual(msg.get_payload(0).get_content_type(),
423 'message/rfc822')
424
425 def test_get_content_type_from_message_explicit(self):
426 msg = self._msgobj('msg_28.txt')
427 self.assertEqual(msg.get_payload(0).get_content_type(),
428 'message/rfc822')
429
430 def test_get_content_type_from_message_text_plain_implicit(self):
431 msg = self._msgobj('msg_03.txt')
432 self.assertEqual(msg.get_content_type(), 'text/plain')
433
434 def test_get_content_type_from_message_text_plain_explicit(self):
435 msg = self._msgobj('msg_01.txt')
436 self.assertEqual(msg.get_content_type(), 'text/plain')
437
438 def test_get_content_maintype_missing(self):
439 msg = Message()
440 self.assertEqual(msg.get_content_maintype(), 'text')
441
442 def test_get_content_maintype_missing_with_default_type(self):
443 msg = Message()
444 msg.set_default_type('message/rfc822')
445 self.assertEqual(msg.get_content_maintype(), 'message')
446
447 def test_get_content_maintype_from_message_implicit(self):
448 msg = self._msgobj('msg_30.txt')
449 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
450
451 def test_get_content_maintype_from_message_explicit(self):
452 msg = self._msgobj('msg_28.txt')
453 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
454
455 def test_get_content_maintype_from_message_text_plain_implicit(self):
456 msg = self._msgobj('msg_03.txt')
457 self.assertEqual(msg.get_content_maintype(), 'text')
458
459 def test_get_content_maintype_from_message_text_plain_explicit(self):
460 msg = self._msgobj('msg_01.txt')
461 self.assertEqual(msg.get_content_maintype(), 'text')
462
463 def test_get_content_subtype_missing(self):
464 msg = Message()
465 self.assertEqual(msg.get_content_subtype(), 'plain')
466
467 def test_get_content_subtype_missing_with_default_type(self):
468 msg = Message()
469 msg.set_default_type('message/rfc822')
470 self.assertEqual(msg.get_content_subtype(), 'rfc822')
471
472 def test_get_content_subtype_from_message_implicit(self):
473 msg = self._msgobj('msg_30.txt')
474 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
475
476 def test_get_content_subtype_from_message_explicit(self):
477 msg = self._msgobj('msg_28.txt')
478 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
479
480 def test_get_content_subtype_from_message_text_plain_implicit(self):
481 msg = self._msgobj('msg_03.txt')
482 self.assertEqual(msg.get_content_subtype(), 'plain')
483
484 def test_get_content_subtype_from_message_text_plain_explicit(self):
485 msg = self._msgobj('msg_01.txt')
486 self.assertEqual(msg.get_content_subtype(), 'plain')
487
488 def test_get_content_maintype_error(self):
489 msg = Message()
490 msg['Content-Type'] = 'no-slash-in-this-string'
491 self.assertEqual(msg.get_content_maintype(), 'text')
492
493 def test_get_content_subtype_error(self):
494 msg = Message()
495 msg['Content-Type'] = 'no-slash-in-this-string'
496 self.assertEqual(msg.get_content_subtype(), 'plain')
497
498 def test_replace_header(self):
499 eq = self.assertEqual
500 msg = Message()
501 msg.add_header('First', 'One')
502 msg.add_header('Second', 'Two')
503 msg.add_header('Third', 'Three')
504 eq(msg.keys(), ['First', 'Second', 'Third'])
505 eq(msg.values(), ['One', 'Two', 'Three'])
506 msg.replace_header('Second', 'Twenty')
507 eq(msg.keys(), ['First', 'Second', 'Third'])
508 eq(msg.values(), ['One', 'Twenty', 'Three'])
509 msg.add_header('First', 'Eleven')
510 msg.replace_header('First', 'One Hundred')
511 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
512 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
513 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
514
515 def test_broken_base64_payload(self):
516 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
517 msg = Message()
518 msg['content-type'] = 'audio/x-midi'
519 msg['content-transfer-encoding'] = 'base64'
520 msg.set_payload(x)
521 self.assertEqual(msg.get_payload(decode=True),
Guido van Rossum9604e662007-08-30 03:46:43 +0000522 bytes(x, 'raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000523
R. David Murray7ec754b2010-12-13 23:51:19 +0000524 # Issue 1078919
525 def test_ascii_add_header(self):
526 msg = Message()
527 msg.add_header('Content-Disposition', 'attachment',
528 filename='bud.gif')
529 self.assertEqual('attachment; filename="bud.gif"',
530 msg['Content-Disposition'])
531
532 def test_noascii_add_header(self):
533 msg = Message()
534 msg.add_header('Content-Disposition', 'attachment',
535 filename="Fußballer.ppt")
536 self.assertEqual(
537 'attachment; filename*="utf-8\'\'Fu%C3%9Fballer.ppt"',
538 msg['Content-Disposition'])
539
540 def test_nonascii_add_header_via_triple(self):
541 msg = Message()
542 msg.add_header('Content-Disposition', 'attachment',
543 filename=('iso-8859-1', '', 'Fußballer.ppt'))
544 self.assertEqual(
545 'attachment; filename*="iso-8859-1\'\'Fu%DFballer.ppt"',
546 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000547
Ezio Melottib3aedd42010-11-20 19:04:17 +0000548
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000549# Test the email.encoders module
550class TestEncoders(unittest.TestCase):
551 def test_encode_empty_payload(self):
552 eq = self.assertEqual
553 msg = Message()
554 msg.set_charset('us-ascii')
555 eq(msg['content-transfer-encoding'], '7bit')
556
557 def test_default_cte(self):
558 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000559 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000560 msg = MIMEText('hello world')
561 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000562 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000563 msg = MIMEText('hello \xf8 world')
564 eq(msg['content-transfer-encoding'], '8bit')
565 # And now with a different charset
566 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
567 eq(msg['content-transfer-encoding'], 'quoted-printable')
568
R. David Murraye85200d2010-05-06 01:41:14 +0000569 def test_encode7or8bit(self):
570 # Make sure a charset whose input character set is 8bit but
571 # whose output character set is 7bit gets a transfer-encoding
572 # of 7bit.
573 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000574 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000575 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000576
Ezio Melottib3aedd42010-11-20 19:04:17 +0000577
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000578# Test long header wrapping
579class TestLongHeaders(TestEmailBase):
580 def test_split_long_continuation(self):
581 eq = self.ndiffAssertEqual
582 msg = email.message_from_string("""\
583Subject: bug demonstration
584\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
585\tmore text
586
587test
588""")
589 sfp = StringIO()
590 g = Generator(sfp)
591 g.flatten(msg)
592 eq(sfp.getvalue(), """\
593Subject: bug demonstration
594\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
595\tmore text
596
597test
598""")
599
600 def test_another_long_almost_unsplittable_header(self):
601 eq = self.ndiffAssertEqual
602 hstr = """\
603bug demonstration
604\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
605\tmore text"""
606 h = Header(hstr, continuation_ws='\t')
607 eq(h.encode(), """\
608bug demonstration
609\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
610\tmore text""")
611 h = Header(hstr.replace('\t', ' '))
612 eq(h.encode(), """\
613bug demonstration
614 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
615 more text""")
616
617 def test_long_nonstring(self):
618 eq = self.ndiffAssertEqual
619 g = Charset("iso-8859-1")
620 cz = Charset("iso-8859-2")
621 utf8 = Charset("utf-8")
622 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
623 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
624 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
625 b'bef\xf6rdert. ')
626 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
627 b'd\xf9vtipu.. ')
628 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
629 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
630 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
631 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
632 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
633 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
634 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
635 '\u3044\u307e\u3059\u3002')
636 h = Header(g_head, g, header_name='Subject')
637 h.append(cz_head, cz)
638 h.append(utf8_head, utf8)
639 msg = Message()
640 msg['Subject'] = h
641 sfp = StringIO()
642 g = Generator(sfp)
643 g.flatten(msg)
644 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000645Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
646 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
647 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
648 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
649 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
650 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
651 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
652 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
653 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
654 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
655 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000656
657""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000658 eq(h.encode(maxlinelen=76), """\
659=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
660 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
661 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
662 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
663 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
664 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
665 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
666 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
667 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
668 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
669 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000670
671 def test_long_header_encode(self):
672 eq = self.ndiffAssertEqual
673 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
674 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
675 header_name='X-Foobar-Spoink-Defrobnit')
676 eq(h.encode(), '''\
677wasnipoop; giraffes="very-long-necked-animals";
678 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
679
680 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
681 eq = self.ndiffAssertEqual
682 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
683 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
684 header_name='X-Foobar-Spoink-Defrobnit',
685 continuation_ws='\t')
686 eq(h.encode(), '''\
687wasnipoop; giraffes="very-long-necked-animals";
688 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
689
690 def test_long_header_encode_with_tab_continuation(self):
691 eq = self.ndiffAssertEqual
692 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
693 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
694 header_name='X-Foobar-Spoink-Defrobnit',
695 continuation_ws='\t')
696 eq(h.encode(), '''\
697wasnipoop; giraffes="very-long-necked-animals";
698\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
699
700 def test_header_splitter(self):
701 eq = self.ndiffAssertEqual
702 msg = MIMEText('')
703 # It'd be great if we could use add_header() here, but that doesn't
704 # guarantee an order of the parameters.
705 msg['X-Foobar-Spoink-Defrobnit'] = (
706 'wasnipoop; giraffes="very-long-necked-animals"; '
707 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
708 sfp = StringIO()
709 g = Generator(sfp)
710 g.flatten(msg)
711 eq(sfp.getvalue(), '''\
712Content-Type: text/plain; charset="us-ascii"
713MIME-Version: 1.0
714Content-Transfer-Encoding: 7bit
715X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
716 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
717
718''')
719
720 def test_no_semis_header_splitter(self):
721 eq = self.ndiffAssertEqual
722 msg = Message()
723 msg['From'] = 'test@dom.ain'
724 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
725 msg.set_payload('Test')
726 sfp = StringIO()
727 g = Generator(sfp)
728 g.flatten(msg)
729 eq(sfp.getvalue(), """\
730From: test@dom.ain
731References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
732 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
733
734Test""")
735
736 def test_no_split_long_header(self):
737 eq = self.ndiffAssertEqual
738 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +0000739 h = Header(hstr)
740 # These come on two lines because Headers are really field value
741 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000742 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000743References:
744 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
745 h = Header('x' * 80)
746 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000747
748 def test_splitting_multiple_long_lines(self):
749 eq = self.ndiffAssertEqual
750 hstr = """\
751from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
752\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
753\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
754"""
755 h = Header(hstr, continuation_ws='\t')
756 eq(h.encode(), """\
757from babylon.socal-raves.org (localhost [127.0.0.1]);
758 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
759 for <mailman-admin@babylon.socal-raves.org>;
760 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
761\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
762 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
763 for <mailman-admin@babylon.socal-raves.org>;
764 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
765\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
766 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
767 for <mailman-admin@babylon.socal-raves.org>;
768 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
769
770 def test_splitting_first_line_only_is_long(self):
771 eq = self.ndiffAssertEqual
772 hstr = """\
773from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
774\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
775\tid 17k4h5-00034i-00
776\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
777 h = Header(hstr, maxlinelen=78, header_name='Received',
778 continuation_ws='\t')
779 eq(h.encode(), """\
780from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
781 helo=cthulhu.gerg.ca)
782\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
783\tid 17k4h5-00034i-00
784\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
785
786 def test_long_8bit_header(self):
787 eq = self.ndiffAssertEqual
788 msg = Message()
789 h = Header('Britische Regierung gibt', 'iso-8859-1',
790 header_name='Subject')
791 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +0000792 eq(h.encode(maxlinelen=76), """\
793=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
794 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000795 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +0000796 eq(msg.as_string(maxheaderlen=76), """\
797Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
798 =?iso-8859-1?q?hore-Windkraftprojekte?=
799
800""")
801 eq(msg.as_string(maxheaderlen=0), """\
802Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000803
804""")
805
806 def test_long_8bit_header_no_charset(self):
807 eq = self.ndiffAssertEqual
808 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +0000809 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
810 'f\xfcr Offshore-Windkraftprojekte '
811 '<a-very-long-address@example.com>')
812 msg['Reply-To'] = header_string
813 self.assertRaises(UnicodeEncodeError, msg.as_string)
814 msg = Message()
815 msg['Reply-To'] = Header(header_string, 'utf-8',
816 header_name='Reply-To')
817 eq(msg.as_string(maxheaderlen=78), """\
818Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
819 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000820
821""")
822
823 def test_long_to_header(self):
824 eq = self.ndiffAssertEqual
825 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
826 '<someone@eecs.umich.edu>,'
827 '"Someone Test #B" <someone@umich.edu>, '
828 '"Someone Test #C" <someone@eecs.umich.edu>, '
829 '"Someone Test #D" <someone@eecs.umich.edu>')
830 msg = Message()
831 msg['To'] = to
832 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +0000833To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000834 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +0000835 "Someone Test #C" <someone@eecs.umich.edu>,
836 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000837
838''')
839
840 def test_long_line_after_append(self):
841 eq = self.ndiffAssertEqual
842 s = 'This is an example of string which has almost the limit of header length.'
843 h = Header(s)
844 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +0000845 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000846This is an example of string which has almost the limit of header length.
847 Add another line.""")
848
849 def test_shorter_line_with_append(self):
850 eq = self.ndiffAssertEqual
851 s = 'This is a shorter line.'
852 h = Header(s)
853 h.append('Add another sentence. (Surprise?)')
854 eq(h.encode(),
855 'This is a shorter line. Add another sentence. (Surprise?)')
856
857 def test_long_field_name(self):
858 eq = self.ndiffAssertEqual
859 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +0000860 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
861 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
862 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
863 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000864 h = Header(gs, 'iso-8859-1', header_name=fn)
865 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +0000866 eq(h.encode(maxlinelen=76), """\
867=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
868 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
869 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
870 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000871
872 def test_long_received_header(self):
873 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
874 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
875 'Wed, 05 Mar 2003 18:10:18 -0700')
876 msg = Message()
877 msg['Received-1'] = Header(h, continuation_ws='\t')
878 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000879 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000880 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000881Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
882 Wed, 05 Mar 2003 18:10:18 -0700
883Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
884 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000885
886""")
887
888 def test_string_headerinst_eq(self):
889 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
890 'tu-muenchen.de> (David Bremner\'s message of '
891 '"Thu, 6 Mar 2003 13:58:21 +0100")')
892 msg = Message()
893 msg['Received-1'] = Header(h, header_name='Received-1',
894 continuation_ws='\t')
895 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000896 # XXX This should be splitting on spaces not commas.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000897 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000898Received-1: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
899 6 Mar 2003 13:58:21 +0100\")
900Received-2: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
901 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000902
903""")
904
905 def test_long_unbreakable_lines_with_continuation(self):
906 eq = self.ndiffAssertEqual
907 msg = Message()
908 t = """\
909iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
910 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
911 msg['Face-1'] = t
912 msg['Face-2'] = Header(t, header_name='Face-2')
Barry Warsawbef9d212007-08-31 10:55:37 +0000913 # XXX This splitting is all wrong. It the first value line should be
914 # snug against the field name.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000915 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +0000916Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000917 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000918 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +0000919Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +0000920 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000921 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
922
923""")
924
925 def test_another_long_multiline_header(self):
926 eq = self.ndiffAssertEqual
927 m = ('Received: from siimage.com '
928 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +0000929 'Microsoft SMTPSVC(5.0.2195.4905); '
930 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000931 msg = email.message_from_string(m)
932 eq(msg.as_string(maxheaderlen=78), '''\
Barry Warsawbef9d212007-08-31 10:55:37 +0000933Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with Microsoft SMTPSVC(5.0.2195.4905);
934 Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000935
936''')
937
938 def test_long_lines_with_different_header(self):
939 eq = self.ndiffAssertEqual
940 h = ('List-Unsubscribe: '
941 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
942 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
943 '?subject=unsubscribe>')
944 msg = Message()
945 msg['List'] = h
946 msg['List'] = Header(h, header_name='List')
947 eq(msg.as_string(maxheaderlen=78), """\
948List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +0000949 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000950List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +0000951 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000952
953""")
954
955
Ezio Melottib3aedd42010-11-20 19:04:17 +0000956
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000957# Test mangling of "From " lines in the body of a message
958class TestFromMangling(unittest.TestCase):
959 def setUp(self):
960 self.msg = Message()
961 self.msg['From'] = 'aaa@bbb.org'
962 self.msg.set_payload("""\
963From the desk of A.A.A.:
964Blah blah blah
965""")
966
967 def test_mangled_from(self):
968 s = StringIO()
969 g = Generator(s, mangle_from_=True)
970 g.flatten(self.msg)
971 self.assertEqual(s.getvalue(), """\
972From: aaa@bbb.org
973
974>From the desk of A.A.A.:
975Blah blah blah
976""")
977
978 def test_dont_mangle_from(self):
979 s = StringIO()
980 g = Generator(s, mangle_from_=False)
981 g.flatten(self.msg)
982 self.assertEqual(s.getvalue(), """\
983From: aaa@bbb.org
984
985From the desk of A.A.A.:
986Blah blah blah
987""")
988
989
Ezio Melottib3aedd42010-11-20 19:04:17 +0000990
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000991# Test the basic MIMEAudio class
992class TestMIMEAudio(unittest.TestCase):
993 def setUp(self):
994 # Make sure we pick up the audiotest.au that lives in email/test/data.
995 # In Python, there's an audiotest.au living in Lib/test but that isn't
996 # included in some binary distros that don't include the test
997 # package. The trailing empty string on the .join() is significant
998 # since findfile() will do a dirname().
999 datadir = os.path.join(os.path.dirname(landmark), 'data', '')
1000 with open(findfile('audiotest.au', datadir), 'rb') as fp:
1001 self._audiodata = fp.read()
1002 self._au = MIMEAudio(self._audiodata)
1003
1004 def test_guess_minor_type(self):
1005 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1006
1007 def test_encoding(self):
1008 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001009 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1010 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001011
1012 def test_checkSetMinor(self):
1013 au = MIMEAudio(self._audiodata, 'fish')
1014 self.assertEqual(au.get_content_type(), 'audio/fish')
1015
1016 def test_add_header(self):
1017 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001018 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001019 self._au.add_header('Content-Disposition', 'attachment',
1020 filename='audiotest.au')
1021 eq(self._au['content-disposition'],
1022 'attachment; filename="audiotest.au"')
1023 eq(self._au.get_params(header='content-disposition'),
1024 [('attachment', ''), ('filename', 'audiotest.au')])
1025 eq(self._au.get_param('filename', header='content-disposition'),
1026 'audiotest.au')
1027 missing = []
1028 eq(self._au.get_param('attachment', header='content-disposition'), '')
1029 unless(self._au.get_param('foo', failobj=missing,
1030 header='content-disposition') is missing)
1031 # Try some missing stuff
1032 unless(self._au.get_param('foobar', missing) is missing)
1033 unless(self._au.get_param('attachment', missing,
1034 header='foobar') is missing)
1035
1036
Ezio Melottib3aedd42010-11-20 19:04:17 +00001037
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001038# Test the basic MIMEImage class
1039class TestMIMEImage(unittest.TestCase):
1040 def setUp(self):
1041 with openfile('PyBanner048.gif', 'rb') as fp:
1042 self._imgdata = fp.read()
1043 self._im = MIMEImage(self._imgdata)
1044
1045 def test_guess_minor_type(self):
1046 self.assertEqual(self._im.get_content_type(), 'image/gif')
1047
1048 def test_encoding(self):
1049 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001050 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1051 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001052
1053 def test_checkSetMinor(self):
1054 im = MIMEImage(self._imgdata, 'fish')
1055 self.assertEqual(im.get_content_type(), 'image/fish')
1056
1057 def test_add_header(self):
1058 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001059 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001060 self._im.add_header('Content-Disposition', 'attachment',
1061 filename='dingusfish.gif')
1062 eq(self._im['content-disposition'],
1063 'attachment; filename="dingusfish.gif"')
1064 eq(self._im.get_params(header='content-disposition'),
1065 [('attachment', ''), ('filename', 'dingusfish.gif')])
1066 eq(self._im.get_param('filename', header='content-disposition'),
1067 'dingusfish.gif')
1068 missing = []
1069 eq(self._im.get_param('attachment', header='content-disposition'), '')
1070 unless(self._im.get_param('foo', failobj=missing,
1071 header='content-disposition') is missing)
1072 # Try some missing stuff
1073 unless(self._im.get_param('foobar', missing) is missing)
1074 unless(self._im.get_param('attachment', missing,
1075 header='foobar') is missing)
1076
1077
Ezio Melottib3aedd42010-11-20 19:04:17 +00001078
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001079# Test the basic MIMEApplication class
1080class TestMIMEApplication(unittest.TestCase):
1081 def test_headers(self):
1082 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001083 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001084 eq(msg.get_content_type(), 'application/octet-stream')
1085 eq(msg['content-transfer-encoding'], 'base64')
1086
1087 def test_body(self):
1088 eq = self.assertEqual
Barry Warsaw8c571042007-08-30 19:17:18 +00001089 bytes = b'\xfa\xfb\xfc\xfd\xfe\xff'
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001090 msg = MIMEApplication(bytes)
R. David Murray7da8f062010-06-04 16:11:08 +00001091 eq(msg.get_payload(), '+vv8/f7/')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001092 eq(msg.get_payload(decode=True), bytes)
1093
1094
Ezio Melottib3aedd42010-11-20 19:04:17 +00001095
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001096# Test the basic MIMEText class
1097class TestMIMEText(unittest.TestCase):
1098 def setUp(self):
1099 self._msg = MIMEText('hello there')
1100
1101 def test_types(self):
1102 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001103 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001104 eq(self._msg.get_content_type(), 'text/plain')
1105 eq(self._msg.get_param('charset'), 'us-ascii')
1106 missing = []
1107 unless(self._msg.get_param('foobar', missing) is missing)
1108 unless(self._msg.get_param('charset', missing, header='foobar')
1109 is missing)
1110
1111 def test_payload(self):
1112 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001113 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001114
1115 def test_charset(self):
1116 eq = self.assertEqual
1117 msg = MIMEText('hello there', _charset='us-ascii')
1118 eq(msg.get_charset().input_charset, 'us-ascii')
1119 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1120
R. David Murray850fc852010-06-03 01:58:28 +00001121 def test_7bit_input(self):
1122 eq = self.assertEqual
1123 msg = MIMEText('hello there', _charset='us-ascii')
1124 eq(msg.get_charset().input_charset, 'us-ascii')
1125 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1126
1127 def test_7bit_input_no_charset(self):
1128 eq = self.assertEqual
1129 msg = MIMEText('hello there')
1130 eq(msg.get_charset(), 'us-ascii')
1131 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1132 self.assertTrue('hello there' in msg.as_string())
1133
1134 def test_utf8_input(self):
1135 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1136 eq = self.assertEqual
1137 msg = MIMEText(teststr, _charset='utf-8')
1138 eq(msg.get_charset().output_charset, 'utf-8')
1139 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1140 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1141
1142 @unittest.skip("can't fix because of backward compat in email5, "
1143 "will fix in email6")
1144 def test_utf8_input_no_charset(self):
1145 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1146 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1147
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001148
Ezio Melottib3aedd42010-11-20 19:04:17 +00001149
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001150# Test complicated multipart/* messages
1151class TestMultipart(TestEmailBase):
1152 def setUp(self):
1153 with openfile('PyBanner048.gif', 'rb') as fp:
1154 data = fp.read()
1155 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1156 image = MIMEImage(data, name='dingusfish.gif')
1157 image.add_header('content-disposition', 'attachment',
1158 filename='dingusfish.gif')
1159 intro = MIMEText('''\
1160Hi there,
1161
1162This is the dingus fish.
1163''')
1164 container.attach(intro)
1165 container.attach(image)
1166 container['From'] = 'Barry <barry@digicool.com>'
1167 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1168 container['Subject'] = 'Here is your dingus fish'
1169
1170 now = 987809702.54848599
1171 timetuple = time.localtime(now)
1172 if timetuple[-1] == 0:
1173 tzsecs = time.timezone
1174 else:
1175 tzsecs = time.altzone
1176 if tzsecs > 0:
1177 sign = '-'
1178 else:
1179 sign = '+'
1180 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1181 container['Date'] = time.strftime(
1182 '%a, %d %b %Y %H:%M:%S',
1183 time.localtime(now)) + tzoffset
1184 self._msg = container
1185 self._im = image
1186 self._txt = intro
1187
1188 def test_hierarchy(self):
1189 # convenience
1190 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001191 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001192 raises = self.assertRaises
1193 # tests
1194 m = self._msg
1195 unless(m.is_multipart())
1196 eq(m.get_content_type(), 'multipart/mixed')
1197 eq(len(m.get_payload()), 2)
1198 raises(IndexError, m.get_payload, 2)
1199 m0 = m.get_payload(0)
1200 m1 = m.get_payload(1)
1201 unless(m0 is self._txt)
1202 unless(m1 is self._im)
1203 eq(m.get_payload(), [m0, m1])
1204 unless(not m0.is_multipart())
1205 unless(not m1.is_multipart())
1206
1207 def test_empty_multipart_idempotent(self):
1208 text = """\
1209Content-Type: multipart/mixed; boundary="BOUNDARY"
1210MIME-Version: 1.0
1211Subject: A subject
1212To: aperson@dom.ain
1213From: bperson@dom.ain
1214
1215
1216--BOUNDARY
1217
1218
1219--BOUNDARY--
1220"""
1221 msg = Parser().parsestr(text)
1222 self.ndiffAssertEqual(text, msg.as_string())
1223
1224 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1225 outer = MIMEBase('multipart', 'mixed')
1226 outer['Subject'] = 'A subject'
1227 outer['To'] = 'aperson@dom.ain'
1228 outer['From'] = 'bperson@dom.ain'
1229 outer.set_boundary('BOUNDARY')
1230 self.ndiffAssertEqual(outer.as_string(), '''\
1231Content-Type: multipart/mixed; boundary="BOUNDARY"
1232MIME-Version: 1.0
1233Subject: A subject
1234To: aperson@dom.ain
1235From: bperson@dom.ain
1236
1237--BOUNDARY
1238
1239--BOUNDARY--''')
1240
1241 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1242 outer = MIMEBase('multipart', 'mixed')
1243 outer['Subject'] = 'A subject'
1244 outer['To'] = 'aperson@dom.ain'
1245 outer['From'] = 'bperson@dom.ain'
1246 outer.preamble = ''
1247 outer.epilogue = ''
1248 outer.set_boundary('BOUNDARY')
1249 self.ndiffAssertEqual(outer.as_string(), '''\
1250Content-Type: multipart/mixed; boundary="BOUNDARY"
1251MIME-Version: 1.0
1252Subject: A subject
1253To: aperson@dom.ain
1254From: bperson@dom.ain
1255
1256
1257--BOUNDARY
1258
1259--BOUNDARY--
1260''')
1261
1262 def test_one_part_in_a_multipart(self):
1263 eq = self.ndiffAssertEqual
1264 outer = MIMEBase('multipart', 'mixed')
1265 outer['Subject'] = 'A subject'
1266 outer['To'] = 'aperson@dom.ain'
1267 outer['From'] = 'bperson@dom.ain'
1268 outer.set_boundary('BOUNDARY')
1269 msg = MIMEText('hello world')
1270 outer.attach(msg)
1271 eq(outer.as_string(), '''\
1272Content-Type: multipart/mixed; boundary="BOUNDARY"
1273MIME-Version: 1.0
1274Subject: A subject
1275To: aperson@dom.ain
1276From: bperson@dom.ain
1277
1278--BOUNDARY
1279Content-Type: text/plain; charset="us-ascii"
1280MIME-Version: 1.0
1281Content-Transfer-Encoding: 7bit
1282
1283hello world
1284--BOUNDARY--''')
1285
1286 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1287 eq = self.ndiffAssertEqual
1288 outer = MIMEBase('multipart', 'mixed')
1289 outer['Subject'] = 'A subject'
1290 outer['To'] = 'aperson@dom.ain'
1291 outer['From'] = 'bperson@dom.ain'
1292 outer.preamble = ''
1293 msg = MIMEText('hello world')
1294 outer.attach(msg)
1295 outer.set_boundary('BOUNDARY')
1296 eq(outer.as_string(), '''\
1297Content-Type: multipart/mixed; boundary="BOUNDARY"
1298MIME-Version: 1.0
1299Subject: A subject
1300To: aperson@dom.ain
1301From: bperson@dom.ain
1302
1303
1304--BOUNDARY
1305Content-Type: text/plain; charset="us-ascii"
1306MIME-Version: 1.0
1307Content-Transfer-Encoding: 7bit
1308
1309hello world
1310--BOUNDARY--''')
1311
1312
1313 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1314 eq = self.ndiffAssertEqual
1315 outer = MIMEBase('multipart', 'mixed')
1316 outer['Subject'] = 'A subject'
1317 outer['To'] = 'aperson@dom.ain'
1318 outer['From'] = 'bperson@dom.ain'
1319 outer.preamble = None
1320 msg = MIMEText('hello world')
1321 outer.attach(msg)
1322 outer.set_boundary('BOUNDARY')
1323 eq(outer.as_string(), '''\
1324Content-Type: multipart/mixed; boundary="BOUNDARY"
1325MIME-Version: 1.0
1326Subject: A subject
1327To: aperson@dom.ain
1328From: bperson@dom.ain
1329
1330--BOUNDARY
1331Content-Type: text/plain; charset="us-ascii"
1332MIME-Version: 1.0
1333Content-Transfer-Encoding: 7bit
1334
1335hello world
1336--BOUNDARY--''')
1337
1338
1339 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1340 eq = self.ndiffAssertEqual
1341 outer = MIMEBase('multipart', 'mixed')
1342 outer['Subject'] = 'A subject'
1343 outer['To'] = 'aperson@dom.ain'
1344 outer['From'] = 'bperson@dom.ain'
1345 outer.epilogue = None
1346 msg = MIMEText('hello world')
1347 outer.attach(msg)
1348 outer.set_boundary('BOUNDARY')
1349 eq(outer.as_string(), '''\
1350Content-Type: multipart/mixed; boundary="BOUNDARY"
1351MIME-Version: 1.0
1352Subject: A subject
1353To: aperson@dom.ain
1354From: bperson@dom.ain
1355
1356--BOUNDARY
1357Content-Type: text/plain; charset="us-ascii"
1358MIME-Version: 1.0
1359Content-Transfer-Encoding: 7bit
1360
1361hello world
1362--BOUNDARY--''')
1363
1364
1365 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1366 eq = self.ndiffAssertEqual
1367 outer = MIMEBase('multipart', 'mixed')
1368 outer['Subject'] = 'A subject'
1369 outer['To'] = 'aperson@dom.ain'
1370 outer['From'] = 'bperson@dom.ain'
1371 outer.epilogue = ''
1372 msg = MIMEText('hello world')
1373 outer.attach(msg)
1374 outer.set_boundary('BOUNDARY')
1375 eq(outer.as_string(), '''\
1376Content-Type: multipart/mixed; boundary="BOUNDARY"
1377MIME-Version: 1.0
1378Subject: A subject
1379To: aperson@dom.ain
1380From: bperson@dom.ain
1381
1382--BOUNDARY
1383Content-Type: text/plain; charset="us-ascii"
1384MIME-Version: 1.0
1385Content-Transfer-Encoding: 7bit
1386
1387hello world
1388--BOUNDARY--
1389''')
1390
1391
1392 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1393 eq = self.ndiffAssertEqual
1394 outer = MIMEBase('multipart', 'mixed')
1395 outer['Subject'] = 'A subject'
1396 outer['To'] = 'aperson@dom.ain'
1397 outer['From'] = 'bperson@dom.ain'
1398 outer.epilogue = '\n'
1399 msg = MIMEText('hello world')
1400 outer.attach(msg)
1401 outer.set_boundary('BOUNDARY')
1402 eq(outer.as_string(), '''\
1403Content-Type: multipart/mixed; boundary="BOUNDARY"
1404MIME-Version: 1.0
1405Subject: A subject
1406To: aperson@dom.ain
1407From: bperson@dom.ain
1408
1409--BOUNDARY
1410Content-Type: text/plain; charset="us-ascii"
1411MIME-Version: 1.0
1412Content-Transfer-Encoding: 7bit
1413
1414hello world
1415--BOUNDARY--
1416
1417''')
1418
1419 def test_message_external_body(self):
1420 eq = self.assertEqual
1421 msg = self._msgobj('msg_36.txt')
1422 eq(len(msg.get_payload()), 2)
1423 msg1 = msg.get_payload(1)
1424 eq(msg1.get_content_type(), 'multipart/alternative')
1425 eq(len(msg1.get_payload()), 2)
1426 for subpart in msg1.get_payload():
1427 eq(subpart.get_content_type(), 'message/external-body')
1428 eq(len(subpart.get_payload()), 1)
1429 subsubpart = subpart.get_payload(0)
1430 eq(subsubpart.get_content_type(), 'text/plain')
1431
1432 def test_double_boundary(self):
1433 # msg_37.txt is a multipart that contains two dash-boundary's in a
1434 # row. Our interpretation of RFC 2046 calls for ignoring the second
1435 # and subsequent boundaries.
1436 msg = self._msgobj('msg_37.txt')
1437 self.assertEqual(len(msg.get_payload()), 3)
1438
1439 def test_nested_inner_contains_outer_boundary(self):
1440 eq = self.ndiffAssertEqual
1441 # msg_38.txt has an inner part that contains outer boundaries. My
1442 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1443 # these are illegal and should be interpreted as unterminated inner
1444 # parts.
1445 msg = self._msgobj('msg_38.txt')
1446 sfp = StringIO()
1447 iterators._structure(msg, sfp)
1448 eq(sfp.getvalue(), """\
1449multipart/mixed
1450 multipart/mixed
1451 multipart/alternative
1452 text/plain
1453 text/plain
1454 text/plain
1455 text/plain
1456""")
1457
1458 def test_nested_with_same_boundary(self):
1459 eq = self.ndiffAssertEqual
1460 # msg 39.txt is similarly evil in that it's got inner parts that use
1461 # the same boundary as outer parts. Again, I believe the way this is
1462 # parsed is closest to the spirit of RFC 2046
1463 msg = self._msgobj('msg_39.txt')
1464 sfp = StringIO()
1465 iterators._structure(msg, sfp)
1466 eq(sfp.getvalue(), """\
1467multipart/mixed
1468 multipart/mixed
1469 multipart/alternative
1470 application/octet-stream
1471 application/octet-stream
1472 text/plain
1473""")
1474
1475 def test_boundary_in_non_multipart(self):
1476 msg = self._msgobj('msg_40.txt')
1477 self.assertEqual(msg.as_string(), '''\
1478MIME-Version: 1.0
1479Content-Type: text/html; boundary="--961284236552522269"
1480
1481----961284236552522269
1482Content-Type: text/html;
1483Content-Transfer-Encoding: 7Bit
1484
1485<html></html>
1486
1487----961284236552522269--
1488''')
1489
1490 def test_boundary_with_leading_space(self):
1491 eq = self.assertEqual
1492 msg = email.message_from_string('''\
1493MIME-Version: 1.0
1494Content-Type: multipart/mixed; boundary=" XXXX"
1495
1496-- XXXX
1497Content-Type: text/plain
1498
1499
1500-- XXXX
1501Content-Type: text/plain
1502
1503-- XXXX--
1504''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001505 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001506 eq(msg.get_boundary(), ' XXXX')
1507 eq(len(msg.get_payload()), 2)
1508
1509 def test_boundary_without_trailing_newline(self):
1510 m = Parser().parsestr("""\
1511Content-Type: multipart/mixed; boundary="===============0012394164=="
1512MIME-Version: 1.0
1513
1514--===============0012394164==
1515Content-Type: image/file1.jpg
1516MIME-Version: 1.0
1517Content-Transfer-Encoding: base64
1518
1519YXNkZg==
1520--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001521 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001522
1523
Ezio Melottib3aedd42010-11-20 19:04:17 +00001524
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001525# Test some badly formatted messages
1526class TestNonConformant(TestEmailBase):
1527 def test_parse_missing_minor_type(self):
1528 eq = self.assertEqual
1529 msg = self._msgobj('msg_14.txt')
1530 eq(msg.get_content_type(), 'text/plain')
1531 eq(msg.get_content_maintype(), 'text')
1532 eq(msg.get_content_subtype(), 'plain')
1533
1534 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001535 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001536 msg = self._msgobj('msg_15.txt')
1537 # XXX We can probably eventually do better
1538 inner = msg.get_payload(0)
1539 unless(hasattr(inner, 'defects'))
1540 self.assertEqual(len(inner.defects), 1)
1541 unless(isinstance(inner.defects[0],
1542 errors.StartBoundaryNotFoundDefect))
1543
1544 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001545 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001546 msg = self._msgobj('msg_25.txt')
1547 unless(isinstance(msg.get_payload(), str))
1548 self.assertEqual(len(msg.defects), 2)
1549 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1550 unless(isinstance(msg.defects[1],
1551 errors.MultipartInvariantViolationDefect))
1552
1553 def test_invalid_content_type(self):
1554 eq = self.assertEqual
1555 neq = self.ndiffAssertEqual
1556 msg = Message()
1557 # RFC 2045, $5.2 says invalid yields text/plain
1558 msg['Content-Type'] = 'text'
1559 eq(msg.get_content_maintype(), 'text')
1560 eq(msg.get_content_subtype(), 'plain')
1561 eq(msg.get_content_type(), 'text/plain')
1562 # Clear the old value and try something /really/ invalid
1563 del msg['content-type']
1564 msg['Content-Type'] = 'foo'
1565 eq(msg.get_content_maintype(), 'text')
1566 eq(msg.get_content_subtype(), 'plain')
1567 eq(msg.get_content_type(), 'text/plain')
1568 # Still, make sure that the message is idempotently generated
1569 s = StringIO()
1570 g = Generator(s)
1571 g.flatten(msg)
1572 neq(s.getvalue(), 'Content-Type: foo\n\n')
1573
1574 def test_no_start_boundary(self):
1575 eq = self.ndiffAssertEqual
1576 msg = self._msgobj('msg_31.txt')
1577 eq(msg.get_payload(), """\
1578--BOUNDARY
1579Content-Type: text/plain
1580
1581message 1
1582
1583--BOUNDARY
1584Content-Type: text/plain
1585
1586message 2
1587
1588--BOUNDARY--
1589""")
1590
1591 def test_no_separating_blank_line(self):
1592 eq = self.ndiffAssertEqual
1593 msg = self._msgobj('msg_35.txt')
1594 eq(msg.as_string(), """\
1595From: aperson@dom.ain
1596To: bperson@dom.ain
1597Subject: here's something interesting
1598
1599counter to RFC 2822, there's no separating newline here
1600""")
1601
1602 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001603 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001604 msg = self._msgobj('msg_41.txt')
1605 unless(hasattr(msg, 'defects'))
1606 self.assertEqual(len(msg.defects), 2)
1607 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1608 unless(isinstance(msg.defects[1],
1609 errors.MultipartInvariantViolationDefect))
1610
1611 def test_missing_start_boundary(self):
1612 outer = self._msgobj('msg_42.txt')
1613 # The message structure is:
1614 #
1615 # multipart/mixed
1616 # text/plain
1617 # message/rfc822
1618 # multipart/mixed [*]
1619 #
1620 # [*] This message is missing its start boundary
1621 bad = outer.get_payload(1).get_payload(0)
1622 self.assertEqual(len(bad.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001623 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001624 errors.StartBoundaryNotFoundDefect))
1625
1626 def test_first_line_is_continuation_header(self):
1627 eq = self.assertEqual
1628 m = ' Line 1\nLine 2\nLine 3'
1629 msg = email.message_from_string(m)
1630 eq(msg.keys(), [])
1631 eq(msg.get_payload(), 'Line 2\nLine 3')
1632 eq(len(msg.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001633 self.assertTrue(isinstance(msg.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001634 errors.FirstHeaderLineIsContinuationDefect))
1635 eq(msg.defects[0].line, ' Line 1\n')
1636
1637
Ezio Melottib3aedd42010-11-20 19:04:17 +00001638
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001639# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00001640class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001641 def test_rfc2047_multiline(self):
1642 eq = self.assertEqual
1643 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
1644 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
1645 dh = decode_header(s)
1646 eq(dh, [
1647 (b'Re:', None),
1648 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
1649 (b'baz foo bar', None),
1650 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
1651 header = make_header(dh)
1652 eq(str(header),
1653 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00001654 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001655Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
1656 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001657
1658 def test_whitespace_eater_unicode(self):
1659 eq = self.assertEqual
1660 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
1661 dh = decode_header(s)
1662 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
1663 (b'Pirard <pirard@dom.ain>', None)])
1664 header = str(make_header(dh))
1665 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
1666
1667 def test_whitespace_eater_unicode_2(self):
1668 eq = self.assertEqual
1669 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
1670 dh = decode_header(s)
1671 eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'),
1672 (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')])
1673 hu = str(make_header(dh))
1674 eq(hu, 'The quick brown fox jumped over the lazy dog')
1675
1676 def test_rfc2047_missing_whitespace(self):
1677 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
1678 dh = decode_header(s)
1679 self.assertEqual(dh, [(s, None)])
1680
1681 def test_rfc2047_with_whitespace(self):
1682 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
1683 dh = decode_header(s)
1684 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
1685 (b'rg', None), (b'\xe5', 'iso-8859-1'),
1686 (b'sbord', None)])
1687
R. David Murrayc4e69cc2010-08-03 22:14:10 +00001688 def test_rfc2047_B_bad_padding(self):
1689 s = '=?iso-8859-1?B?%s?='
1690 data = [ # only test complete bytes
1691 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
1692 ('dmk=', b'vi'), ('dmk', b'vi')
1693 ]
1694 for q, a in data:
1695 dh = decode_header(s % q)
1696 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001697
R. David Murray31e984c2010-10-01 15:40:20 +00001698 def test_rfc2047_Q_invalid_digits(self):
1699 # issue 10004.
1700 s = '=?iso-8659-1?Q?andr=e9=zz?='
1701 self.assertEqual(decode_header(s),
1702 [(b'andr\xe9=zz', 'iso-8659-1')])
1703
Ezio Melottib3aedd42010-11-20 19:04:17 +00001704
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001705# Test the MIMEMessage class
1706class TestMIMEMessage(TestEmailBase):
1707 def setUp(self):
1708 with openfile('msg_11.txt') as fp:
1709 self._text = fp.read()
1710
1711 def test_type_error(self):
1712 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
1713
1714 def test_valid_argument(self):
1715 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001716 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001717 subject = 'A sub-message'
1718 m = Message()
1719 m['Subject'] = subject
1720 r = MIMEMessage(m)
1721 eq(r.get_content_type(), 'message/rfc822')
1722 payload = r.get_payload()
1723 unless(isinstance(payload, list))
1724 eq(len(payload), 1)
1725 subpart = payload[0]
1726 unless(subpart is m)
1727 eq(subpart['subject'], subject)
1728
1729 def test_bad_multipart(self):
1730 eq = self.assertEqual
1731 msg1 = Message()
1732 msg1['Subject'] = 'subpart 1'
1733 msg2 = Message()
1734 msg2['Subject'] = 'subpart 2'
1735 r = MIMEMessage(msg1)
1736 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
1737
1738 def test_generate(self):
1739 # First craft the message to be encapsulated
1740 m = Message()
1741 m['Subject'] = 'An enclosed message'
1742 m.set_payload('Here is the body of the message.\n')
1743 r = MIMEMessage(m)
1744 r['Subject'] = 'The enclosing message'
1745 s = StringIO()
1746 g = Generator(s)
1747 g.flatten(r)
1748 self.assertEqual(s.getvalue(), """\
1749Content-Type: message/rfc822
1750MIME-Version: 1.0
1751Subject: The enclosing message
1752
1753Subject: An enclosed message
1754
1755Here is the body of the message.
1756""")
1757
1758 def test_parse_message_rfc822(self):
1759 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001760 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001761 msg = self._msgobj('msg_11.txt')
1762 eq(msg.get_content_type(), 'message/rfc822')
1763 payload = msg.get_payload()
1764 unless(isinstance(payload, list))
1765 eq(len(payload), 1)
1766 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001767 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001768 eq(submsg['subject'], 'An enclosed message')
1769 eq(submsg.get_payload(), 'Here is the body of the message.\n')
1770
1771 def test_dsn(self):
1772 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001773 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001774 # msg 16 is a Delivery Status Notification, see RFC 1894
1775 msg = self._msgobj('msg_16.txt')
1776 eq(msg.get_content_type(), 'multipart/report')
1777 unless(msg.is_multipart())
1778 eq(len(msg.get_payload()), 3)
1779 # Subpart 1 is a text/plain, human readable section
1780 subpart = msg.get_payload(0)
1781 eq(subpart.get_content_type(), 'text/plain')
1782 eq(subpart.get_payload(), """\
1783This report relates to a message you sent with the following header fields:
1784
1785 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
1786 Date: Sun, 23 Sep 2001 20:10:55 -0700
1787 From: "Ian T. Henry" <henryi@oxy.edu>
1788 To: SoCal Raves <scr@socal-raves.org>
1789 Subject: [scr] yeah for Ians!!
1790
1791Your message cannot be delivered to the following recipients:
1792
1793 Recipient address: jangel1@cougar.noc.ucla.edu
1794 Reason: recipient reached disk quota
1795
1796""")
1797 # Subpart 2 contains the machine parsable DSN information. It
1798 # consists of two blocks of headers, represented by two nested Message
1799 # objects.
1800 subpart = msg.get_payload(1)
1801 eq(subpart.get_content_type(), 'message/delivery-status')
1802 eq(len(subpart.get_payload()), 2)
1803 # message/delivery-status should treat each block as a bunch of
1804 # headers, i.e. a bunch of Message objects.
1805 dsn1 = subpart.get_payload(0)
1806 unless(isinstance(dsn1, Message))
1807 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
1808 eq(dsn1.get_param('dns', header='reporting-mta'), '')
1809 # Try a missing one <wink>
1810 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
1811 dsn2 = subpart.get_payload(1)
1812 unless(isinstance(dsn2, Message))
1813 eq(dsn2['action'], 'failed')
1814 eq(dsn2.get_params(header='original-recipient'),
1815 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
1816 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
1817 # Subpart 3 is the original message
1818 subpart = msg.get_payload(2)
1819 eq(subpart.get_content_type(), 'message/rfc822')
1820 payload = subpart.get_payload()
1821 unless(isinstance(payload, list))
1822 eq(len(payload), 1)
1823 subsubpart = payload[0]
1824 unless(isinstance(subsubpart, Message))
1825 eq(subsubpart.get_content_type(), 'text/plain')
1826 eq(subsubpart['message-id'],
1827 '<002001c144a6$8752e060$56104586@oxy.edu>')
1828
1829 def test_epilogue(self):
1830 eq = self.ndiffAssertEqual
1831 with openfile('msg_21.txt') as fp:
1832 text = fp.read()
1833 msg = Message()
1834 msg['From'] = 'aperson@dom.ain'
1835 msg['To'] = 'bperson@dom.ain'
1836 msg['Subject'] = 'Test'
1837 msg.preamble = 'MIME message'
1838 msg.epilogue = 'End of MIME message\n'
1839 msg1 = MIMEText('One')
1840 msg2 = MIMEText('Two')
1841 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1842 msg.attach(msg1)
1843 msg.attach(msg2)
1844 sfp = StringIO()
1845 g = Generator(sfp)
1846 g.flatten(msg)
1847 eq(sfp.getvalue(), text)
1848
1849 def test_no_nl_preamble(self):
1850 eq = self.ndiffAssertEqual
1851 msg = Message()
1852 msg['From'] = 'aperson@dom.ain'
1853 msg['To'] = 'bperson@dom.ain'
1854 msg['Subject'] = 'Test'
1855 msg.preamble = 'MIME message'
1856 msg.epilogue = ''
1857 msg1 = MIMEText('One')
1858 msg2 = MIMEText('Two')
1859 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1860 msg.attach(msg1)
1861 msg.attach(msg2)
1862 eq(msg.as_string(), """\
1863From: aperson@dom.ain
1864To: bperson@dom.ain
1865Subject: Test
1866Content-Type: multipart/mixed; boundary="BOUNDARY"
1867
1868MIME message
1869--BOUNDARY
1870Content-Type: text/plain; charset="us-ascii"
1871MIME-Version: 1.0
1872Content-Transfer-Encoding: 7bit
1873
1874One
1875--BOUNDARY
1876Content-Type: text/plain; charset="us-ascii"
1877MIME-Version: 1.0
1878Content-Transfer-Encoding: 7bit
1879
1880Two
1881--BOUNDARY--
1882""")
1883
1884 def test_default_type(self):
1885 eq = self.assertEqual
1886 with openfile('msg_30.txt') as fp:
1887 msg = email.message_from_file(fp)
1888 container1 = msg.get_payload(0)
1889 eq(container1.get_default_type(), 'message/rfc822')
1890 eq(container1.get_content_type(), 'message/rfc822')
1891 container2 = msg.get_payload(1)
1892 eq(container2.get_default_type(), 'message/rfc822')
1893 eq(container2.get_content_type(), 'message/rfc822')
1894 container1a = container1.get_payload(0)
1895 eq(container1a.get_default_type(), 'text/plain')
1896 eq(container1a.get_content_type(), 'text/plain')
1897 container2a = container2.get_payload(0)
1898 eq(container2a.get_default_type(), 'text/plain')
1899 eq(container2a.get_content_type(), 'text/plain')
1900
1901 def test_default_type_with_explicit_container_type(self):
1902 eq = self.assertEqual
1903 with openfile('msg_28.txt') as fp:
1904 msg = email.message_from_file(fp)
1905 container1 = msg.get_payload(0)
1906 eq(container1.get_default_type(), 'message/rfc822')
1907 eq(container1.get_content_type(), 'message/rfc822')
1908 container2 = msg.get_payload(1)
1909 eq(container2.get_default_type(), 'message/rfc822')
1910 eq(container2.get_content_type(), 'message/rfc822')
1911 container1a = container1.get_payload(0)
1912 eq(container1a.get_default_type(), 'text/plain')
1913 eq(container1a.get_content_type(), 'text/plain')
1914 container2a = container2.get_payload(0)
1915 eq(container2a.get_default_type(), 'text/plain')
1916 eq(container2a.get_content_type(), 'text/plain')
1917
1918 def test_default_type_non_parsed(self):
1919 eq = self.assertEqual
1920 neq = self.ndiffAssertEqual
1921 # Set up container
1922 container = MIMEMultipart('digest', 'BOUNDARY')
1923 container.epilogue = ''
1924 # Set up subparts
1925 subpart1a = MIMEText('message 1\n')
1926 subpart2a = MIMEText('message 2\n')
1927 subpart1 = MIMEMessage(subpart1a)
1928 subpart2 = MIMEMessage(subpart2a)
1929 container.attach(subpart1)
1930 container.attach(subpart2)
1931 eq(subpart1.get_content_type(), 'message/rfc822')
1932 eq(subpart1.get_default_type(), 'message/rfc822')
1933 eq(subpart2.get_content_type(), 'message/rfc822')
1934 eq(subpart2.get_default_type(), 'message/rfc822')
1935 neq(container.as_string(0), '''\
1936Content-Type: multipart/digest; boundary="BOUNDARY"
1937MIME-Version: 1.0
1938
1939--BOUNDARY
1940Content-Type: message/rfc822
1941MIME-Version: 1.0
1942
1943Content-Type: text/plain; charset="us-ascii"
1944MIME-Version: 1.0
1945Content-Transfer-Encoding: 7bit
1946
1947message 1
1948
1949--BOUNDARY
1950Content-Type: message/rfc822
1951MIME-Version: 1.0
1952
1953Content-Type: text/plain; charset="us-ascii"
1954MIME-Version: 1.0
1955Content-Transfer-Encoding: 7bit
1956
1957message 2
1958
1959--BOUNDARY--
1960''')
1961 del subpart1['content-type']
1962 del subpart1['mime-version']
1963 del subpart2['content-type']
1964 del subpart2['mime-version']
1965 eq(subpart1.get_content_type(), 'message/rfc822')
1966 eq(subpart1.get_default_type(), 'message/rfc822')
1967 eq(subpart2.get_content_type(), 'message/rfc822')
1968 eq(subpart2.get_default_type(), 'message/rfc822')
1969 neq(container.as_string(0), '''\
1970Content-Type: multipart/digest; boundary="BOUNDARY"
1971MIME-Version: 1.0
1972
1973--BOUNDARY
1974
1975Content-Type: text/plain; charset="us-ascii"
1976MIME-Version: 1.0
1977Content-Transfer-Encoding: 7bit
1978
1979message 1
1980
1981--BOUNDARY
1982
1983Content-Type: text/plain; charset="us-ascii"
1984MIME-Version: 1.0
1985Content-Transfer-Encoding: 7bit
1986
1987message 2
1988
1989--BOUNDARY--
1990''')
1991
1992 def test_mime_attachments_in_constructor(self):
1993 eq = self.assertEqual
1994 text1 = MIMEText('')
1995 text2 = MIMEText('')
1996 msg = MIMEMultipart(_subparts=(text1, text2))
1997 eq(len(msg.get_payload()), 2)
1998 eq(msg.get_payload(0), text1)
1999 eq(msg.get_payload(1), text2)
2000
Christian Heimes587c2bf2008-01-19 16:21:02 +00002001 def test_default_multipart_constructor(self):
2002 msg = MIMEMultipart()
2003 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002004
Ezio Melottib3aedd42010-11-20 19:04:17 +00002005
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002006# A general test of parser->model->generator idempotency. IOW, read a message
2007# in, parse it into a message object tree, then without touching the tree,
2008# regenerate the plain text. The original text and the transformed text
2009# should be identical. Note: that we ignore the Unix-From since that may
2010# contain a changed date.
2011class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002012
2013 linesep = '\n'
2014
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002015 def _msgobj(self, filename):
2016 with openfile(filename) as fp:
2017 data = fp.read()
2018 msg = email.message_from_string(data)
2019 return msg, data
2020
R. David Murray719a4492010-11-21 16:53:48 +00002021 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002022 eq = self.ndiffAssertEqual
2023 s = StringIO()
2024 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002025 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002026 eq(text, s.getvalue())
2027
2028 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002029 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002030 msg, text = self._msgobj('msg_01.txt')
2031 eq(msg.get_content_type(), 'text/plain')
2032 eq(msg.get_content_maintype(), 'text')
2033 eq(msg.get_content_subtype(), 'plain')
2034 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2035 eq(msg.get_param('charset'), 'us-ascii')
2036 eq(msg.preamble, None)
2037 eq(msg.epilogue, None)
2038 self._idempotent(msg, text)
2039
2040 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002041 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002042 msg, text = self._msgobj('msg_03.txt')
2043 eq(msg.get_content_type(), 'text/plain')
2044 eq(msg.get_params(), None)
2045 eq(msg.get_param('charset'), None)
2046 self._idempotent(msg, text)
2047
2048 def test_simple_multipart(self):
2049 msg, text = self._msgobj('msg_04.txt')
2050 self._idempotent(msg, text)
2051
2052 def test_MIME_digest(self):
2053 msg, text = self._msgobj('msg_02.txt')
2054 self._idempotent(msg, text)
2055
2056 def test_long_header(self):
2057 msg, text = self._msgobj('msg_27.txt')
2058 self._idempotent(msg, text)
2059
2060 def test_MIME_digest_with_part_headers(self):
2061 msg, text = self._msgobj('msg_28.txt')
2062 self._idempotent(msg, text)
2063
2064 def test_mixed_with_image(self):
2065 msg, text = self._msgobj('msg_06.txt')
2066 self._idempotent(msg, text)
2067
2068 def test_multipart_report(self):
2069 msg, text = self._msgobj('msg_05.txt')
2070 self._idempotent(msg, text)
2071
2072 def test_dsn(self):
2073 msg, text = self._msgobj('msg_16.txt')
2074 self._idempotent(msg, text)
2075
2076 def test_preamble_epilogue(self):
2077 msg, text = self._msgobj('msg_21.txt')
2078 self._idempotent(msg, text)
2079
2080 def test_multipart_one_part(self):
2081 msg, text = self._msgobj('msg_23.txt')
2082 self._idempotent(msg, text)
2083
2084 def test_multipart_no_parts(self):
2085 msg, text = self._msgobj('msg_24.txt')
2086 self._idempotent(msg, text)
2087
2088 def test_no_start_boundary(self):
2089 msg, text = self._msgobj('msg_31.txt')
2090 self._idempotent(msg, text)
2091
2092 def test_rfc2231_charset(self):
2093 msg, text = self._msgobj('msg_32.txt')
2094 self._idempotent(msg, text)
2095
2096 def test_more_rfc2231_parameters(self):
2097 msg, text = self._msgobj('msg_33.txt')
2098 self._idempotent(msg, text)
2099
2100 def test_text_plain_in_a_multipart_digest(self):
2101 msg, text = self._msgobj('msg_34.txt')
2102 self._idempotent(msg, text)
2103
2104 def test_nested_multipart_mixeds(self):
2105 msg, text = self._msgobj('msg_12a.txt')
2106 self._idempotent(msg, text)
2107
2108 def test_message_external_body_idempotent(self):
2109 msg, text = self._msgobj('msg_36.txt')
2110 self._idempotent(msg, text)
2111
R. David Murray719a4492010-11-21 16:53:48 +00002112 def test_message_delivery_status(self):
2113 msg, text = self._msgobj('msg_43.txt')
2114 self._idempotent(msg, text, unixfrom=True)
2115
R. David Murray96fd54e2010-10-08 15:55:28 +00002116 def test_message_signed_idempotent(self):
2117 msg, text = self._msgobj('msg_45.txt')
2118 self._idempotent(msg, text)
2119
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002120 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002121 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002122 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002123 # Get a message object and reset the seek pointer for other tests
2124 msg, text = self._msgobj('msg_05.txt')
2125 eq(msg.get_content_type(), 'multipart/report')
2126 # Test the Content-Type: parameters
2127 params = {}
2128 for pk, pv in msg.get_params():
2129 params[pk] = pv
2130 eq(params['report-type'], 'delivery-status')
2131 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002132 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2133 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002134 eq(len(msg.get_payload()), 3)
2135 # Make sure the subparts are what we expect
2136 msg1 = msg.get_payload(0)
2137 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002138 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002139 msg2 = msg.get_payload(1)
2140 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002141 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002142 msg3 = msg.get_payload(2)
2143 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002144 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002145 payload = msg3.get_payload()
2146 unless(isinstance(payload, list))
2147 eq(len(payload), 1)
2148 msg4 = payload[0]
2149 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002150 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002151
2152 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002153 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002154 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002155 msg, text = self._msgobj('msg_06.txt')
2156 # Check some of the outer headers
2157 eq(msg.get_content_type(), 'message/rfc822')
2158 # Make sure the payload is a list of exactly one sub-Message, and that
2159 # that submessage has a type of text/plain
2160 payload = msg.get_payload()
2161 unless(isinstance(payload, list))
2162 eq(len(payload), 1)
2163 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002164 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002165 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002166 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002167 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002168
2169
Ezio Melottib3aedd42010-11-20 19:04:17 +00002170
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002171# Test various other bits of the package's functionality
2172class TestMiscellaneous(TestEmailBase):
2173 def test_message_from_string(self):
2174 with openfile('msg_01.txt') as fp:
2175 text = fp.read()
2176 msg = email.message_from_string(text)
2177 s = StringIO()
2178 # Don't wrap/continue long headers since we're trying to test
2179 # idempotency.
2180 g = Generator(s, maxheaderlen=0)
2181 g.flatten(msg)
2182 self.assertEqual(text, s.getvalue())
2183
2184 def test_message_from_file(self):
2185 with openfile('msg_01.txt') as fp:
2186 text = fp.read()
2187 fp.seek(0)
2188 msg = email.message_from_file(fp)
2189 s = StringIO()
2190 # Don't wrap/continue long headers since we're trying to test
2191 # idempotency.
2192 g = Generator(s, maxheaderlen=0)
2193 g.flatten(msg)
2194 self.assertEqual(text, s.getvalue())
2195
2196 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002197 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002198 with openfile('msg_01.txt') as fp:
2199 text = fp.read()
2200
2201 # Create a subclass
2202 class MyMessage(Message):
2203 pass
2204
2205 msg = email.message_from_string(text, MyMessage)
2206 unless(isinstance(msg, MyMessage))
2207 # Try something more complicated
2208 with openfile('msg_02.txt') as fp:
2209 text = fp.read()
2210 msg = email.message_from_string(text, MyMessage)
2211 for subpart in msg.walk():
2212 unless(isinstance(subpart, MyMessage))
2213
2214 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002215 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002216 # Create a subclass
2217 class MyMessage(Message):
2218 pass
2219
2220 with openfile('msg_01.txt') as fp:
2221 msg = email.message_from_file(fp, MyMessage)
2222 unless(isinstance(msg, MyMessage))
2223 # Try something more complicated
2224 with openfile('msg_02.txt') as fp:
2225 msg = email.message_from_file(fp, MyMessage)
2226 for subpart in msg.walk():
2227 unless(isinstance(subpart, MyMessage))
2228
2229 def test__all__(self):
2230 module = __import__('email')
2231 # Can't use sorted() here due to Python 2.3 compatibility
2232 all = module.__all__[:]
2233 all.sort()
2234 self.assertEqual(all, [
2235 'base64mime', 'charset', 'encoders', 'errors', 'generator',
R. David Murray96fd54e2010-10-08 15:55:28 +00002236 'header', 'iterators', 'message', 'message_from_binary_file',
2237 'message_from_bytes', 'message_from_file',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002238 'message_from_string', 'mime', 'parser',
2239 'quoprimime', 'utils',
2240 ])
2241
2242 def test_formatdate(self):
2243 now = time.time()
2244 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2245 time.gmtime(now)[:6])
2246
2247 def test_formatdate_localtime(self):
2248 now = time.time()
2249 self.assertEqual(
2250 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2251 time.localtime(now)[:6])
2252
2253 def test_formatdate_usegmt(self):
2254 now = time.time()
2255 self.assertEqual(
2256 utils.formatdate(now, localtime=False),
2257 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2258 self.assertEqual(
2259 utils.formatdate(now, localtime=False, usegmt=True),
2260 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2261
2262 def test_parsedate_none(self):
2263 self.assertEqual(utils.parsedate(''), None)
2264
2265 def test_parsedate_compact(self):
2266 # The FWS after the comma is optional
2267 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2268 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2269
2270 def test_parsedate_no_dayofweek(self):
2271 eq = self.assertEqual
2272 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2273 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2274
2275 def test_parsedate_compact_no_dayofweek(self):
2276 eq = self.assertEqual
2277 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2278 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2279
R. David Murray4a62e892010-12-23 20:35:46 +00002280 def test_parsedate_no_space_before_positive_offset(self):
2281 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2282 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2283
2284 def test_parsedate_no_space_before_negative_offset(self):
2285 # Issue 1155362: we already handled '+' for this case.
2286 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
2287 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
2288
2289
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002290 def test_parsedate_acceptable_to_time_functions(self):
2291 eq = self.assertEqual
2292 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2293 t = int(time.mktime(timetup))
2294 eq(time.localtime(t)[:6], timetup[:6])
2295 eq(int(time.strftime('%Y', timetup)), 2003)
2296 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2297 t = int(time.mktime(timetup[:9]))
2298 eq(time.localtime(t)[:6], timetup[:6])
2299 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2300
R. David Murray219d1c82010-08-25 00:45:55 +00002301 def test_parsedate_y2k(self):
2302 """Test for parsing a date with a two-digit year.
2303
2304 Parsing a date with a two-digit year should return the correct
2305 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2306 obsoletes RFC822) requires four-digit years.
2307
2308 """
2309 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2310 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2311 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2312 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2313
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002314 def test_parseaddr_empty(self):
2315 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2316 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2317
2318 def test_noquote_dump(self):
2319 self.assertEqual(
2320 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2321 'A Silly Person <person@dom.ain>')
2322
2323 def test_escape_dump(self):
2324 self.assertEqual(
2325 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
2326 r'"A \(Very\) Silly Person" <person@dom.ain>')
2327 a = r'A \(Special\) Person'
2328 b = 'person@dom.ain'
2329 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2330
2331 def test_escape_backslashes(self):
2332 self.assertEqual(
2333 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2334 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2335 a = r'Arthur \Backslash\ Foobar'
2336 b = 'person@dom.ain'
2337 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2338
2339 def test_name_with_dot(self):
2340 x = 'John X. Doe <jxd@example.com>'
2341 y = '"John X. Doe" <jxd@example.com>'
2342 a, b = ('John X. Doe', 'jxd@example.com')
2343 self.assertEqual(utils.parseaddr(x), (a, b))
2344 self.assertEqual(utils.parseaddr(y), (a, b))
2345 # formataddr() quotes the name if there's a dot in it
2346 self.assertEqual(utils.formataddr((a, b)), y)
2347
R. David Murray5397e862010-10-02 15:58:26 +00002348 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2349 # issue 10005. Note that in the third test the second pair of
2350 # backslashes is not actually a quoted pair because it is not inside a
2351 # comment or quoted string: the address being parsed has a quoted
2352 # string containing a quoted backslash, followed by 'example' and two
2353 # backslashes, followed by another quoted string containing a space and
2354 # the word 'example'. parseaddr copies those two backslashes
2355 # literally. Per rfc5322 this is not technically correct since a \ may
2356 # not appear in an address outside of a quoted string. It is probably
2357 # a sensible Postel interpretation, though.
2358 eq = self.assertEqual
2359 eq(utils.parseaddr('""example" example"@example.com'),
2360 ('', '""example" example"@example.com'))
2361 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2362 ('', '"\\"example\\" example"@example.com'))
2363 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2364 ('', '"\\\\"example\\\\" example"@example.com'))
2365
R. David Murray63563cd2010-12-18 18:25:38 +00002366 def test_parseaddr_preserves_spaces_in_local_part(self):
2367 # issue 9286. A normal RFC5322 local part should not contain any
2368 # folding white space, but legacy local parts can (they are a sequence
2369 # of atoms, not dotatoms). On the other hand we strip whitespace from
2370 # before the @ and around dots, on the assumption that the whitespace
2371 # around the punctuation is a mistake in what would otherwise be
2372 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
2373 self.assertEqual(('', "merwok wok@xample.com"),
2374 utils.parseaddr("merwok wok@xample.com"))
2375 self.assertEqual(('', "merwok wok@xample.com"),
2376 utils.parseaddr("merwok wok@xample.com"))
2377 self.assertEqual(('', "merwok wok@xample.com"),
2378 utils.parseaddr(" merwok wok @xample.com"))
2379 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
2380 utils.parseaddr('merwok"wok" wok@xample.com'))
2381 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
2382 utils.parseaddr('merwok. wok . wok@xample.com'))
2383
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002384 def test_multiline_from_comment(self):
2385 x = """\
2386Foo
2387\tBar <foo@example.com>"""
2388 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2389
2390 def test_quote_dump(self):
2391 self.assertEqual(
2392 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2393 r'"A Silly; Person" <person@dom.ain>')
2394
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002395 def test_charset_richcomparisons(self):
2396 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002397 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002398 cset1 = Charset()
2399 cset2 = Charset()
2400 eq(cset1, 'us-ascii')
2401 eq(cset1, 'US-ASCII')
2402 eq(cset1, 'Us-AsCiI')
2403 eq('us-ascii', cset1)
2404 eq('US-ASCII', cset1)
2405 eq('Us-AsCiI', cset1)
2406 ne(cset1, 'usascii')
2407 ne(cset1, 'USASCII')
2408 ne(cset1, 'UsAsCiI')
2409 ne('usascii', cset1)
2410 ne('USASCII', cset1)
2411 ne('UsAsCiI', cset1)
2412 eq(cset1, cset2)
2413 eq(cset2, cset1)
2414
2415 def test_getaddresses(self):
2416 eq = self.assertEqual
2417 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2418 'Bud Person <bperson@dom.ain>']),
2419 [('Al Person', 'aperson@dom.ain'),
2420 ('Bud Person', 'bperson@dom.ain')])
2421
2422 def test_getaddresses_nasty(self):
2423 eq = self.assertEqual
2424 eq(utils.getaddresses(['foo: ;']), [('', '')])
2425 eq(utils.getaddresses(
2426 ['[]*-- =~$']),
2427 [('', ''), ('', ''), ('', '*--')])
2428 eq(utils.getaddresses(
2429 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2430 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2431
2432 def test_getaddresses_embedded_comment(self):
2433 """Test proper handling of a nested comment"""
2434 eq = self.assertEqual
2435 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2436 eq(addrs[0][1], 'foo@bar.com')
2437
2438 def test_utils_quote_unquote(self):
2439 eq = self.assertEqual
2440 msg = Message()
2441 msg.add_header('content-disposition', 'attachment',
2442 filename='foo\\wacky"name')
2443 eq(msg.get_filename(), 'foo\\wacky"name')
2444
2445 def test_get_body_encoding_with_bogus_charset(self):
2446 charset = Charset('not a charset')
2447 self.assertEqual(charset.get_body_encoding(), 'base64')
2448
2449 def test_get_body_encoding_with_uppercase_charset(self):
2450 eq = self.assertEqual
2451 msg = Message()
2452 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2453 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2454 charsets = msg.get_charsets()
2455 eq(len(charsets), 1)
2456 eq(charsets[0], 'utf-8')
2457 charset = Charset(charsets[0])
2458 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002459 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002460 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2461 eq(msg.get_payload(decode=True), b'hello world')
2462 eq(msg['content-transfer-encoding'], 'base64')
2463 # Try another one
2464 msg = Message()
2465 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2466 charsets = msg.get_charsets()
2467 eq(len(charsets), 1)
2468 eq(charsets[0], 'us-ascii')
2469 charset = Charset(charsets[0])
2470 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2471 msg.set_payload('hello world', charset=charset)
2472 eq(msg.get_payload(), 'hello world')
2473 eq(msg['content-transfer-encoding'], '7bit')
2474
2475 def test_charsets_case_insensitive(self):
2476 lc = Charset('us-ascii')
2477 uc = Charset('US-ASCII')
2478 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2479
2480 def test_partial_falls_inside_message_delivery_status(self):
2481 eq = self.ndiffAssertEqual
2482 # The Parser interface provides chunks of data to FeedParser in 8192
2483 # byte gulps. SF bug #1076485 found one of those chunks inside
2484 # message/delivery-status header block, which triggered an
2485 # unreadline() of NeedMoreData.
2486 msg = self._msgobj('msg_43.txt')
2487 sfp = StringIO()
2488 iterators._structure(msg, sfp)
2489 eq(sfp.getvalue(), """\
2490multipart/report
2491 text/plain
2492 message/delivery-status
2493 text/plain
2494 text/plain
2495 text/plain
2496 text/plain
2497 text/plain
2498 text/plain
2499 text/plain
2500 text/plain
2501 text/plain
2502 text/plain
2503 text/plain
2504 text/plain
2505 text/plain
2506 text/plain
2507 text/plain
2508 text/plain
2509 text/plain
2510 text/plain
2511 text/plain
2512 text/plain
2513 text/plain
2514 text/plain
2515 text/plain
2516 text/plain
2517 text/plain
2518 text/plain
2519 text/rfc822-headers
2520""")
2521
R. David Murraya0b44b52010-12-02 21:47:19 +00002522 def test_make_msgid_domain(self):
2523 self.assertEqual(
2524 email.utils.make_msgid(domain='testdomain-string')[-19:],
2525 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002526
Ezio Melottib3aedd42010-11-20 19:04:17 +00002527
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002528# Test the iterator/generators
2529class TestIterators(TestEmailBase):
2530 def test_body_line_iterator(self):
2531 eq = self.assertEqual
2532 neq = self.ndiffAssertEqual
2533 # First a simple non-multipart message
2534 msg = self._msgobj('msg_01.txt')
2535 it = iterators.body_line_iterator(msg)
2536 lines = list(it)
2537 eq(len(lines), 6)
2538 neq(EMPTYSTRING.join(lines), msg.get_payload())
2539 # Now a more complicated multipart
2540 msg = self._msgobj('msg_02.txt')
2541 it = iterators.body_line_iterator(msg)
2542 lines = list(it)
2543 eq(len(lines), 43)
2544 with openfile('msg_19.txt') as fp:
2545 neq(EMPTYSTRING.join(lines), fp.read())
2546
2547 def test_typed_subpart_iterator(self):
2548 eq = self.assertEqual
2549 msg = self._msgobj('msg_04.txt')
2550 it = iterators.typed_subpart_iterator(msg, 'text')
2551 lines = []
2552 subparts = 0
2553 for subpart in it:
2554 subparts += 1
2555 lines.append(subpart.get_payload())
2556 eq(subparts, 2)
2557 eq(EMPTYSTRING.join(lines), """\
2558a simple kind of mirror
2559to reflect upon our own
2560a simple kind of mirror
2561to reflect upon our own
2562""")
2563
2564 def test_typed_subpart_iterator_default_type(self):
2565 eq = self.assertEqual
2566 msg = self._msgobj('msg_03.txt')
2567 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
2568 lines = []
2569 subparts = 0
2570 for subpart in it:
2571 subparts += 1
2572 lines.append(subpart.get_payload())
2573 eq(subparts, 1)
2574 eq(EMPTYSTRING.join(lines), """\
2575
2576Hi,
2577
2578Do you like this message?
2579
2580-Me
2581""")
2582
R. David Murray45bf773f2010-07-17 01:19:57 +00002583 def test_pushCR_LF(self):
2584 '''FeedParser BufferedSubFile.push() assumed it received complete
2585 line endings. A CR ending one push() followed by a LF starting
2586 the next push() added an empty line.
2587 '''
2588 imt = [
2589 ("a\r \n", 2),
2590 ("b", 0),
2591 ("c\n", 1),
2592 ("", 0),
2593 ("d\r\n", 1),
2594 ("e\r", 0),
2595 ("\nf", 1),
2596 ("\r\n", 1),
2597 ]
2598 from email.feedparser import BufferedSubFile, NeedMoreData
2599 bsf = BufferedSubFile()
2600 om = []
2601 nt = 0
2602 for il, n in imt:
2603 bsf.push(il)
2604 nt += n
2605 n1 = 0
2606 while True:
2607 ol = bsf.readline()
2608 if ol == NeedMoreData:
2609 break
2610 om.append(ol)
2611 n1 += 1
2612 self.assertTrue(n == n1)
2613 self.assertTrue(len(om) == nt)
2614 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
2615
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002616
Ezio Melottib3aedd42010-11-20 19:04:17 +00002617
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002618class TestParsers(TestEmailBase):
2619 def test_header_parser(self):
2620 eq = self.assertEqual
2621 # Parse only the headers of a complex multipart MIME document
2622 with openfile('msg_02.txt') as fp:
2623 msg = HeaderParser().parse(fp)
2624 eq(msg['from'], 'ppp-request@zzz.org')
2625 eq(msg['to'], 'ppp@zzz.org')
2626 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002627 self.assertFalse(msg.is_multipart())
2628 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002629
2630 def test_whitespace_continuation(self):
2631 eq = self.assertEqual
2632 # This message contains a line after the Subject: header that has only
2633 # whitespace, but it is not empty!
2634 msg = email.message_from_string("""\
2635From: aperson@dom.ain
2636To: bperson@dom.ain
2637Subject: the next line has a space on it
2638\x20
2639Date: Mon, 8 Apr 2002 15:09:19 -0400
2640Message-ID: spam
2641
2642Here's the message body
2643""")
2644 eq(msg['subject'], 'the next line has a space on it\n ')
2645 eq(msg['message-id'], 'spam')
2646 eq(msg.get_payload(), "Here's the message body\n")
2647
2648 def test_whitespace_continuation_last_header(self):
2649 eq = self.assertEqual
2650 # Like the previous test, but the subject line is the last
2651 # header.
2652 msg = email.message_from_string("""\
2653From: aperson@dom.ain
2654To: bperson@dom.ain
2655Date: Mon, 8 Apr 2002 15:09:19 -0400
2656Message-ID: spam
2657Subject: the next line has a space on it
2658\x20
2659
2660Here's the message body
2661""")
2662 eq(msg['subject'], 'the next line has a space on it\n ')
2663 eq(msg['message-id'], 'spam')
2664 eq(msg.get_payload(), "Here's the message body\n")
2665
2666 def test_crlf_separation(self):
2667 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00002668 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002669 msg = Parser().parse(fp)
2670 eq(len(msg.get_payload()), 2)
2671 part1 = msg.get_payload(0)
2672 eq(part1.get_content_type(), 'text/plain')
2673 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
2674 part2 = msg.get_payload(1)
2675 eq(part2.get_content_type(), 'application/riscos')
2676
R. David Murray8451c4b2010-10-23 22:19:56 +00002677 def test_crlf_flatten(self):
2678 # Using newline='\n' preserves the crlfs in this input file.
2679 with openfile('msg_26.txt', newline='\n') as fp:
2680 text = fp.read()
2681 msg = email.message_from_string(text)
2682 s = StringIO()
2683 g = Generator(s)
2684 g.flatten(msg, linesep='\r\n')
2685 self.assertEqual(s.getvalue(), text)
2686
2687 maxDiff = None
2688
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002689 def test_multipart_digest_with_extra_mime_headers(self):
2690 eq = self.assertEqual
2691 neq = self.ndiffAssertEqual
2692 with openfile('msg_28.txt') as fp:
2693 msg = email.message_from_file(fp)
2694 # Structure is:
2695 # multipart/digest
2696 # message/rfc822
2697 # text/plain
2698 # message/rfc822
2699 # text/plain
2700 eq(msg.is_multipart(), 1)
2701 eq(len(msg.get_payload()), 2)
2702 part1 = msg.get_payload(0)
2703 eq(part1.get_content_type(), 'message/rfc822')
2704 eq(part1.is_multipart(), 1)
2705 eq(len(part1.get_payload()), 1)
2706 part1a = part1.get_payload(0)
2707 eq(part1a.is_multipart(), 0)
2708 eq(part1a.get_content_type(), 'text/plain')
2709 neq(part1a.get_payload(), 'message 1\n')
2710 # next message/rfc822
2711 part2 = msg.get_payload(1)
2712 eq(part2.get_content_type(), 'message/rfc822')
2713 eq(part2.is_multipart(), 1)
2714 eq(len(part2.get_payload()), 1)
2715 part2a = part2.get_payload(0)
2716 eq(part2a.is_multipart(), 0)
2717 eq(part2a.get_content_type(), 'text/plain')
2718 neq(part2a.get_payload(), 'message 2\n')
2719
2720 def test_three_lines(self):
2721 # A bug report by Andrew McNamara
2722 lines = ['From: Andrew Person <aperson@dom.ain',
2723 'Subject: Test',
2724 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
2725 msg = email.message_from_string(NL.join(lines))
2726 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
2727
2728 def test_strip_line_feed_and_carriage_return_in_headers(self):
2729 eq = self.assertEqual
2730 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
2731 value1 = 'text'
2732 value2 = 'more text'
2733 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
2734 value1, value2)
2735 msg = email.message_from_string(m)
2736 eq(msg.get('Header'), value1)
2737 eq(msg.get('Next-Header'), value2)
2738
2739 def test_rfc2822_header_syntax(self):
2740 eq = self.assertEqual
2741 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2742 msg = email.message_from_string(m)
2743 eq(len(msg), 3)
2744 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
2745 eq(msg.get_payload(), 'body')
2746
2747 def test_rfc2822_space_not_allowed_in_header(self):
2748 eq = self.assertEqual
2749 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2750 msg = email.message_from_string(m)
2751 eq(len(msg.keys()), 0)
2752
2753 def test_rfc2822_one_character_header(self):
2754 eq = self.assertEqual
2755 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
2756 msg = email.message_from_string(m)
2757 headers = msg.keys()
2758 headers.sort()
2759 eq(headers, ['A', 'B', 'CC'])
2760 eq(msg.get_payload(), 'body')
2761
R. David Murray45e0e142010-06-16 02:19:40 +00002762 def test_CRLFLF_at_end_of_part(self):
2763 # issue 5610: feedparser should not eat two chars from body part ending
2764 # with "\r\n\n".
2765 m = (
2766 "From: foo@bar.com\n"
2767 "To: baz\n"
2768 "Mime-Version: 1.0\n"
2769 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
2770 "\n"
2771 "--BOUNDARY\n"
2772 "Content-Type: text/plain\n"
2773 "\n"
2774 "body ending with CRLF newline\r\n"
2775 "\n"
2776 "--BOUNDARY--\n"
2777 )
2778 msg = email.message_from_string(m)
2779 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002780
Ezio Melottib3aedd42010-11-20 19:04:17 +00002781
R. David Murray96fd54e2010-10-08 15:55:28 +00002782class Test8BitBytesHandling(unittest.TestCase):
2783 # In Python3 all input is string, but that doesn't work if the actual input
2784 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
2785 # decode byte streams using the surrogateescape error handler, and
2786 # reconvert to binary at appropriate places if we detect surrogates. This
2787 # doesn't allow us to transform headers with 8bit bytes (they get munged),
2788 # but it does allow us to parse and preserve them, and to decode body
2789 # parts that use an 8bit CTE.
2790
2791 bodytest_msg = textwrap.dedent("""\
2792 From: foo@bar.com
2793 To: baz
2794 Mime-Version: 1.0
2795 Content-Type: text/plain; charset={charset}
2796 Content-Transfer-Encoding: {cte}
2797
2798 {bodyline}
2799 """)
2800
2801 def test_known_8bit_CTE(self):
2802 m = self.bodytest_msg.format(charset='utf-8',
2803 cte='8bit',
2804 bodyline='pöstal').encode('utf-8')
2805 msg = email.message_from_bytes(m)
2806 self.assertEqual(msg.get_payload(), "pöstal\n")
2807 self.assertEqual(msg.get_payload(decode=True),
2808 "pöstal\n".encode('utf-8'))
2809
2810 def test_unknown_8bit_CTE(self):
2811 m = self.bodytest_msg.format(charset='notavalidcharset',
2812 cte='8bit',
2813 bodyline='pöstal').encode('utf-8')
2814 msg = email.message_from_bytes(m)
2815 self.assertEqual(msg.get_payload(), "p��stal\n")
2816 self.assertEqual(msg.get_payload(decode=True),
2817 "pöstal\n".encode('utf-8'))
2818
2819 def test_8bit_in_quopri_body(self):
2820 # This is non-RFC compliant data...without 'decode' the library code
2821 # decodes the body using the charset from the headers, and because the
2822 # source byte really is utf-8 this works. This is likely to fail
2823 # against real dirty data (ie: produce mojibake), but the data is
2824 # invalid anyway so it is as good a guess as any. But this means that
2825 # this test just confirms the current behavior; that behavior is not
2826 # necessarily the best possible behavior. With 'decode' it is
2827 # returning the raw bytes, so that test should be of correct behavior,
2828 # or at least produce the same result that email4 did.
2829 m = self.bodytest_msg.format(charset='utf-8',
2830 cte='quoted-printable',
2831 bodyline='p=C3=B6stál').encode('utf-8')
2832 msg = email.message_from_bytes(m)
2833 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
2834 self.assertEqual(msg.get_payload(decode=True),
2835 'pöstál\n'.encode('utf-8'))
2836
2837 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
2838 # This is similar to the previous test, but proves that if the 8bit
2839 # byte is undecodeable in the specified charset, it gets replaced
2840 # by the unicode 'unknown' character. Again, this may or may not
2841 # be the ideal behavior. Note that if decode=False none of the
2842 # decoders will get involved, so this is the only test we need
2843 # for this behavior.
2844 m = self.bodytest_msg.format(charset='ascii',
2845 cte='quoted-printable',
2846 bodyline='p=C3=B6stál').encode('utf-8')
2847 msg = email.message_from_bytes(m)
2848 self.assertEqual(msg.get_payload(), 'p=C3=B6st��l\n')
2849 self.assertEqual(msg.get_payload(decode=True),
2850 'pöstál\n'.encode('utf-8'))
2851
2852 def test_8bit_in_base64_body(self):
2853 # Sticking an 8bit byte in a base64 block makes it undecodable by
2854 # normal means, so the block is returned undecoded, but as bytes.
2855 m = self.bodytest_msg.format(charset='utf-8',
2856 cte='base64',
2857 bodyline='cMO2c3RhbAá=').encode('utf-8')
2858 msg = email.message_from_bytes(m)
2859 self.assertEqual(msg.get_payload(decode=True),
2860 'cMO2c3RhbAá=\n'.encode('utf-8'))
2861
2862 def test_8bit_in_uuencode_body(self):
2863 # Sticking an 8bit byte in a uuencode block makes it undecodable by
2864 # normal means, so the block is returned undecoded, but as bytes.
2865 m = self.bodytest_msg.format(charset='utf-8',
2866 cte='uuencode',
2867 bodyline='<,.V<W1A; á ').encode('utf-8')
2868 msg = email.message_from_bytes(m)
2869 self.assertEqual(msg.get_payload(decode=True),
2870 '<,.V<W1A; á \n'.encode('utf-8'))
2871
2872
2873 headertest_msg = textwrap.dedent("""\
2874 From: foo@bar.com
2875 To: báz
2876 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
2877 \tJean de Baddie
2878 From: göst
2879
2880 Yes, they are flying.
2881 """).encode('utf-8')
2882
2883 def test_get_8bit_header(self):
2884 msg = email.message_from_bytes(self.headertest_msg)
2885 self.assertEqual(msg.get('to'), 'b??z')
2886 self.assertEqual(msg['to'], 'b??z')
2887
2888 def test_print_8bit_headers(self):
2889 msg = email.message_from_bytes(self.headertest_msg)
2890 self.assertEqual(str(msg),
2891 self.headertest_msg.decode(
2892 'ascii', 'replace').replace('�', '?'))
2893
2894 def test_values_with_8bit_headers(self):
2895 msg = email.message_from_bytes(self.headertest_msg)
2896 self.assertListEqual(msg.values(),
2897 ['foo@bar.com',
2898 'b??z',
2899 'Maintenant je vous pr??sente mon '
2900 'coll??gue, le pouf c??l??bre\n'
2901 '\tJean de Baddie',
2902 "g??st"])
2903
2904 def test_items_with_8bit_headers(self):
2905 msg = email.message_from_bytes(self.headertest_msg)
2906 self.assertListEqual(msg.items(),
2907 [('From', 'foo@bar.com'),
2908 ('To', 'b??z'),
2909 ('Subject', 'Maintenant je vous pr??sente mon '
2910 'coll??gue, le pouf c??l??bre\n'
2911 '\tJean de Baddie'),
2912 ('From', 'g??st')])
2913
2914 def test_get_all_with_8bit_headers(self):
2915 msg = email.message_from_bytes(self.headertest_msg)
2916 self.assertListEqual(msg.get_all('from'),
2917 ['foo@bar.com',
2918 'g??st'])
2919
2920 non_latin_bin_msg = textwrap.dedent("""\
2921 From: foo@bar.com
2922 To: báz
2923 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
2924 \tJean de Baddie
2925 Mime-Version: 1.0
2926 Content-Type: text/plain; charset="utf-8"
2927 Content-Transfer-Encoding: 8bit
2928
2929 Да, они летят.
2930 """).encode('utf-8')
2931
2932 def test_bytes_generator(self):
2933 msg = email.message_from_bytes(self.non_latin_bin_msg)
2934 out = BytesIO()
2935 email.generator.BytesGenerator(out).flatten(msg)
2936 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
2937
2938 # XXX: ultimately the '?' should turn into CTE encoded bytes
2939 # using 'unknown-8bit' charset.
2940 non_latin_bin_msg_as7bit = textwrap.dedent("""\
2941 From: foo@bar.com
2942 To: b??z
2943 Subject: Maintenant je vous pr??sente mon coll??gue, le pouf c??l??bre
2944 \tJean de Baddie
2945 Mime-Version: 1.0
2946 Content-Type: text/plain; charset="utf-8"
2947 Content-Transfer-Encoding: base64
2948
2949 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
2950 """)
2951
2952 def test_generator_handles_8bit(self):
2953 msg = email.message_from_bytes(self.non_latin_bin_msg)
2954 out = StringIO()
2955 email.generator.Generator(out).flatten(msg)
2956 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit)
2957
2958 def test_bytes_generator_with_unix_from(self):
2959 # The unixfrom contains a current date, so we can't check it
2960 # literally. Just make sure the first word is 'From' and the
2961 # rest of the message matches the input.
2962 msg = email.message_from_bytes(self.non_latin_bin_msg)
2963 out = BytesIO()
2964 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
2965 lines = out.getvalue().split(b'\n')
2966 self.assertEqual(lines[0].split()[0], b'From')
2967 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
2968
2969 def test_message_from_binary_file(self):
2970 fn = 'test.msg'
2971 self.addCleanup(unlink, fn)
2972 with open(fn, 'wb') as testfile:
2973 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00002974 with open(fn, 'rb') as testfile:
2975 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00002976 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
2977
2978 latin_bin_msg = textwrap.dedent("""\
2979 From: foo@bar.com
2980 To: Dinsdale
2981 Subject: Nudge nudge, wink, wink
2982 Mime-Version: 1.0
2983 Content-Type: text/plain; charset="latin-1"
2984 Content-Transfer-Encoding: 8bit
2985
2986 oh là là, know what I mean, know what I mean?
2987 """).encode('latin-1')
2988
2989 latin_bin_msg_as7bit = textwrap.dedent("""\
2990 From: foo@bar.com
2991 To: Dinsdale
2992 Subject: Nudge nudge, wink, wink
2993 Mime-Version: 1.0
2994 Content-Type: text/plain; charset="iso-8859-1"
2995 Content-Transfer-Encoding: quoted-printable
2996
2997 oh l=E0 l=E0, know what I mean, know what I mean?
2998 """)
2999
3000 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
3001 m = email.message_from_bytes(self.latin_bin_msg)
3002 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3003
3004 def test_decoded_generator_emits_unicode_body(self):
3005 m = email.message_from_bytes(self.latin_bin_msg)
3006 out = StringIO()
3007 email.generator.DecodedGenerator(out).flatten(m)
3008 #DecodedHeader output contains an extra blank line compared
3009 #to the input message. RDM: not sure if this is a bug or not,
3010 #but it is not specific to the 8bit->7bit conversion.
3011 self.assertEqual(out.getvalue(),
3012 self.latin_bin_msg.decode('latin-1')+'\n')
3013
3014 def test_bytes_feedparser(self):
3015 bfp = email.feedparser.BytesFeedParser()
3016 for i in range(0, len(self.latin_bin_msg), 10):
3017 bfp.feed(self.latin_bin_msg[i:i+10])
3018 m = bfp.close()
3019 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3020
R. David Murray8451c4b2010-10-23 22:19:56 +00003021 def test_crlf_flatten(self):
3022 with openfile('msg_26.txt', 'rb') as fp:
3023 text = fp.read()
3024 msg = email.message_from_bytes(text)
3025 s = BytesIO()
3026 g = email.generator.BytesGenerator(s)
3027 g.flatten(msg, linesep='\r\n')
3028 self.assertEqual(s.getvalue(), text)
3029 maxDiff = None
3030
Ezio Melottib3aedd42010-11-20 19:04:17 +00003031
R. David Murray719a4492010-11-21 16:53:48 +00003032class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003033
R. David Murraye5db2632010-11-20 15:10:13 +00003034 maxDiff = None
3035
R. David Murray96fd54e2010-10-08 15:55:28 +00003036 def _msgobj(self, filename):
3037 with openfile(filename, 'rb') as fp:
3038 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003039 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003040 msg = email.message_from_bytes(data)
3041 return msg, data
3042
R. David Murray719a4492010-11-21 16:53:48 +00003043 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003044 b = BytesIO()
3045 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003046 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R. David Murraye5db2632010-11-20 15:10:13 +00003047 self.assertByteStringsEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003048
R. David Murraye5db2632010-11-20 15:10:13 +00003049 def assertByteStringsEqual(self, str1, str2):
R. David Murray719a4492010-11-21 16:53:48 +00003050 # Not using self.blinesep here is intentional. This way the output
3051 # is more useful when the failure results in mixed line endings.
R. David Murray96fd54e2010-10-08 15:55:28 +00003052 self.assertListEqual(str1.split(b'\n'), str2.split(b'\n'))
3053
3054
R. David Murray719a4492010-11-21 16:53:48 +00003055class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3056 TestIdempotent):
3057 linesep = '\n'
3058 blinesep = b'\n'
3059 normalize_linesep_regex = re.compile(br'\r\n')
3060
3061
3062class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3063 TestIdempotent):
3064 linesep = '\r\n'
3065 blinesep = b'\r\n'
3066 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3067
Ezio Melottib3aedd42010-11-20 19:04:17 +00003068
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003069class TestBase64(unittest.TestCase):
3070 def test_len(self):
3071 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003072 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003073 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003074 for size in range(15):
3075 if size == 0 : bsize = 0
3076 elif size <= 3 : bsize = 4
3077 elif size <= 6 : bsize = 8
3078 elif size <= 9 : bsize = 12
3079 elif size <= 12: bsize = 16
3080 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003081 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003082
3083 def test_decode(self):
3084 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003085 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003086 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003087
3088 def test_encode(self):
3089 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003090 eq(base64mime.body_encode(b''), b'')
3091 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003092 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003093 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003094 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003095 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003096eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3097eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3098eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3099eHh4eCB4eHh4IA==
3100""")
3101 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003102 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003103 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003104eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3105eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3106eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3107eHh4eCB4eHh4IA==\r
3108""")
3109
3110 def test_header_encode(self):
3111 eq = self.assertEqual
3112 he = base64mime.header_encode
3113 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003114 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3115 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003116 # Test the charset option
3117 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3118 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003119
3120
Ezio Melottib3aedd42010-11-20 19:04:17 +00003121
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003122class TestQuopri(unittest.TestCase):
3123 def setUp(self):
3124 # Set of characters (as byte integers) that don't need to be encoded
3125 # in headers.
3126 self.hlit = list(chain(
3127 range(ord('a'), ord('z') + 1),
3128 range(ord('A'), ord('Z') + 1),
3129 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003130 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003131 # Set of characters (as byte integers) that do need to be encoded in
3132 # headers.
3133 self.hnon = [c for c in range(256) if c not in self.hlit]
3134 assert len(self.hlit) + len(self.hnon) == 256
3135 # Set of characters (as byte integers) that don't need to be encoded
3136 # in bodies.
3137 self.blit = list(range(ord(' '), ord('~') + 1))
3138 self.blit.append(ord('\t'))
3139 self.blit.remove(ord('='))
3140 # Set of characters (as byte integers) that do need to be encoded in
3141 # bodies.
3142 self.bnon = [c for c in range(256) if c not in self.blit]
3143 assert len(self.blit) + len(self.bnon) == 256
3144
Guido van Rossum9604e662007-08-30 03:46:43 +00003145 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003146 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003147 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003148 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003149 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003150 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003151 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003152
Guido van Rossum9604e662007-08-30 03:46:43 +00003153 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003154 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003155 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003156 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003157 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003158 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003159 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003160
3161 def test_header_quopri_len(self):
3162 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003163 eq(quoprimime.header_length(b'hello'), 5)
3164 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003165 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003166 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003167 # =?xxx?q?...?= means 10 extra characters
3168 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00003169 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
3170 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003171 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003172 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003173 # =?xxx?q?...?= means 10 extra characters
3174 10)
3175 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003176 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003177 'expected length 1 for %r' % chr(c))
3178 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003179 # Space is special; it's encoded to _
3180 if c == ord(' '):
3181 continue
3182 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003183 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00003184 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003185
3186 def test_body_quopri_len(self):
3187 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003188 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003189 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003190 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003191 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003192
3193 def test_quote_unquote_idempotent(self):
3194 for x in range(256):
3195 c = chr(x)
3196 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
3197
3198 def test_header_encode(self):
3199 eq = self.assertEqual
3200 he = quoprimime.header_encode
3201 eq(he(b'hello'), '=?iso-8859-1?q?hello?=')
3202 eq(he(b'hello', charset='iso-8859-2'), '=?iso-8859-2?q?hello?=')
3203 eq(he(b'hello\nworld'), '=?iso-8859-1?q?hello=0Aworld?=')
3204 # Test a non-ASCII character
3205 eq(he(b'hello\xc7there'), '=?iso-8859-1?q?hello=C7there?=')
3206
3207 def test_decode(self):
3208 eq = self.assertEqual
3209 eq(quoprimime.decode(''), '')
3210 eq(quoprimime.decode('hello'), 'hello')
3211 eq(quoprimime.decode('hello', 'X'), 'hello')
3212 eq(quoprimime.decode('hello\nworld', 'X'), 'helloXworld')
3213
3214 def test_encode(self):
3215 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003216 eq(quoprimime.body_encode(''), '')
3217 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003218 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00003219 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003220 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00003221 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003222xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
3223 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
3224x xxxx xxxx xxxx xxxx=20""")
3225 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00003226 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
3227 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003228xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
3229 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
3230x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00003231 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003232one line
3233
3234two line"""), """\
3235one line
3236
3237two line""")
3238
3239
Ezio Melottib3aedd42010-11-20 19:04:17 +00003240
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003241# Test the Charset class
3242class TestCharset(unittest.TestCase):
3243 def tearDown(self):
3244 from email import charset as CharsetModule
3245 try:
3246 del CharsetModule.CHARSETS['fake']
3247 except KeyError:
3248 pass
3249
Guido van Rossum9604e662007-08-30 03:46:43 +00003250 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003251 eq = self.assertEqual
3252 # Make sure us-ascii = no Unicode conversion
3253 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00003254 eq(c.header_encode('Hello World!'), 'Hello World!')
3255 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003256 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00003257 self.assertRaises(UnicodeError, c.header_encode, s)
3258 c = Charset('utf-8')
3259 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003260
3261 def test_body_encode(self):
3262 eq = self.assertEqual
3263 # Try a charset with QP body encoding
3264 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003265 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003266 # Try a charset with Base64 body encoding
3267 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003268 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003269 # Try a charset with None body encoding
3270 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003271 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003272 # Try the convert argument, where input codec != output codec
3273 c = Charset('euc-jp')
3274 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00003275 # XXX FIXME
3276## try:
3277## eq('\x1b$B5FCO;~IW\x1b(B',
3278## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
3279## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
3280## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
3281## except LookupError:
3282## # We probably don't have the Japanese codecs installed
3283## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003284 # Testing SF bug #625509, which we have to fake, since there are no
3285 # built-in encodings where the header encoding is QP but the body
3286 # encoding is not.
3287 from email import charset as CharsetModule
3288 CharsetModule.add_charset('fake', CharsetModule.QP, None)
3289 c = Charset('fake')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003290 eq('hello w\xf6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003291
3292 def test_unicode_charset_name(self):
3293 charset = Charset('us-ascii')
3294 self.assertEqual(str(charset), 'us-ascii')
3295 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
3296
3297
Ezio Melottib3aedd42010-11-20 19:04:17 +00003298
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003299# Test multilingual MIME headers.
3300class TestHeader(TestEmailBase):
3301 def test_simple(self):
3302 eq = self.ndiffAssertEqual
3303 h = Header('Hello World!')
3304 eq(h.encode(), 'Hello World!')
3305 h.append(' Goodbye World!')
3306 eq(h.encode(), 'Hello World! Goodbye World!')
3307
3308 def test_simple_surprise(self):
3309 eq = self.ndiffAssertEqual
3310 h = Header('Hello World!')
3311 eq(h.encode(), 'Hello World!')
3312 h.append('Goodbye World!')
3313 eq(h.encode(), 'Hello World! Goodbye World!')
3314
3315 def test_header_needs_no_decoding(self):
3316 h = 'no decoding needed'
3317 self.assertEqual(decode_header(h), [(h, None)])
3318
3319 def test_long(self):
3320 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
3321 maxlinelen=76)
3322 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003323 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003324
3325 def test_multilingual(self):
3326 eq = self.ndiffAssertEqual
3327 g = Charset("iso-8859-1")
3328 cz = Charset("iso-8859-2")
3329 utf8 = Charset("utf-8")
3330 g_head = (b'Die Mieter treten hier ein werden mit einem '
3331 b'Foerderband komfortabel den Korridor entlang, '
3332 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
3333 b'gegen die rotierenden Klingen bef\xf6rdert. ')
3334 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
3335 b'd\xf9vtipu.. ')
3336 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
3337 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
3338 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
3339 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
3340 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
3341 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
3342 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
3343 '\u3044\u307e\u3059\u3002')
3344 h = Header(g_head, g)
3345 h.append(cz_head, cz)
3346 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00003347 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003348 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00003349=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
3350 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
3351 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
3352 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003353 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
3354 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
3355 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
3356 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00003357 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
3358 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
3359 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
3360 decoded = decode_header(enc)
3361 eq(len(decoded), 3)
3362 eq(decoded[0], (g_head, 'iso-8859-1'))
3363 eq(decoded[1], (cz_head, 'iso-8859-2'))
3364 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003365 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00003366 eq(ustr,
3367 (b'Die Mieter treten hier ein werden mit einem Foerderband '
3368 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
3369 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
3370 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
3371 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
3372 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
3373 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
3374 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
3375 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
3376 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
3377 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
3378 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
3379 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
3380 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
3381 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
3382 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
3383 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003384 # Test make_header()
3385 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00003386 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003387
3388 def test_empty_header_encode(self):
3389 h = Header()
3390 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00003391
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003392 def test_header_ctor_default_args(self):
3393 eq = self.ndiffAssertEqual
3394 h = Header()
3395 eq(h, '')
3396 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00003397 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003398
3399 def test_explicit_maxlinelen(self):
3400 eq = self.ndiffAssertEqual
3401 hstr = ('A very long line that must get split to something other '
3402 'than at the 76th character boundary to test the non-default '
3403 'behavior')
3404 h = Header(hstr)
3405 eq(h.encode(), '''\
3406A very long line that must get split to something other than at the 76th
3407 character boundary to test the non-default behavior''')
3408 eq(str(h), hstr)
3409 h = Header(hstr, header_name='Subject')
3410 eq(h.encode(), '''\
3411A very long line that must get split to something other than at the
3412 76th character boundary to test the non-default behavior''')
3413 eq(str(h), hstr)
3414 h = Header(hstr, maxlinelen=1024, header_name='Subject')
3415 eq(h.encode(), hstr)
3416 eq(str(h), hstr)
3417
Guido van Rossum9604e662007-08-30 03:46:43 +00003418 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003419 eq = self.ndiffAssertEqual
3420 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003421 x = 'xxxx ' * 20
3422 h.append(x)
3423 s = h.encode()
3424 eq(s, """\
3425=?iso-8859-1?q?xxx?=
3426 =?iso-8859-1?q?x_?=
3427 =?iso-8859-1?q?xx?=
3428 =?iso-8859-1?q?xx?=
3429 =?iso-8859-1?q?_x?=
3430 =?iso-8859-1?q?xx?=
3431 =?iso-8859-1?q?x_?=
3432 =?iso-8859-1?q?xx?=
3433 =?iso-8859-1?q?xx?=
3434 =?iso-8859-1?q?_x?=
3435 =?iso-8859-1?q?xx?=
3436 =?iso-8859-1?q?x_?=
3437 =?iso-8859-1?q?xx?=
3438 =?iso-8859-1?q?xx?=
3439 =?iso-8859-1?q?_x?=
3440 =?iso-8859-1?q?xx?=
3441 =?iso-8859-1?q?x_?=
3442 =?iso-8859-1?q?xx?=
3443 =?iso-8859-1?q?xx?=
3444 =?iso-8859-1?q?_x?=
3445 =?iso-8859-1?q?xx?=
3446 =?iso-8859-1?q?x_?=
3447 =?iso-8859-1?q?xx?=
3448 =?iso-8859-1?q?xx?=
3449 =?iso-8859-1?q?_x?=
3450 =?iso-8859-1?q?xx?=
3451 =?iso-8859-1?q?x_?=
3452 =?iso-8859-1?q?xx?=
3453 =?iso-8859-1?q?xx?=
3454 =?iso-8859-1?q?_x?=
3455 =?iso-8859-1?q?xx?=
3456 =?iso-8859-1?q?x_?=
3457 =?iso-8859-1?q?xx?=
3458 =?iso-8859-1?q?xx?=
3459 =?iso-8859-1?q?_x?=
3460 =?iso-8859-1?q?xx?=
3461 =?iso-8859-1?q?x_?=
3462 =?iso-8859-1?q?xx?=
3463 =?iso-8859-1?q?xx?=
3464 =?iso-8859-1?q?_x?=
3465 =?iso-8859-1?q?xx?=
3466 =?iso-8859-1?q?x_?=
3467 =?iso-8859-1?q?xx?=
3468 =?iso-8859-1?q?xx?=
3469 =?iso-8859-1?q?_x?=
3470 =?iso-8859-1?q?xx?=
3471 =?iso-8859-1?q?x_?=
3472 =?iso-8859-1?q?xx?=
3473 =?iso-8859-1?q?xx?=
3474 =?iso-8859-1?q?_?=""")
3475 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003476 h = Header(charset='iso-8859-1', maxlinelen=40)
3477 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003478 s = h.encode()
3479 eq(s, """\
3480=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
3481 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
3482 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
3483 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
3484 =?iso-8859-1?q?_xxxx_xxxx_?=""")
3485 eq(x, str(make_header(decode_header(s))))
3486
3487 def test_base64_splittable(self):
3488 eq = self.ndiffAssertEqual
3489 h = Header(charset='koi8-r', maxlinelen=20)
3490 x = 'xxxx ' * 20
3491 h.append(x)
3492 s = h.encode()
3493 eq(s, """\
3494=?koi8-r?b?eHh4?=
3495 =?koi8-r?b?eCB4?=
3496 =?koi8-r?b?eHh4?=
3497 =?koi8-r?b?IHh4?=
3498 =?koi8-r?b?eHgg?=
3499 =?koi8-r?b?eHh4?=
3500 =?koi8-r?b?eCB4?=
3501 =?koi8-r?b?eHh4?=
3502 =?koi8-r?b?IHh4?=
3503 =?koi8-r?b?eHgg?=
3504 =?koi8-r?b?eHh4?=
3505 =?koi8-r?b?eCB4?=
3506 =?koi8-r?b?eHh4?=
3507 =?koi8-r?b?IHh4?=
3508 =?koi8-r?b?eHgg?=
3509 =?koi8-r?b?eHh4?=
3510 =?koi8-r?b?eCB4?=
3511 =?koi8-r?b?eHh4?=
3512 =?koi8-r?b?IHh4?=
3513 =?koi8-r?b?eHgg?=
3514 =?koi8-r?b?eHh4?=
3515 =?koi8-r?b?eCB4?=
3516 =?koi8-r?b?eHh4?=
3517 =?koi8-r?b?IHh4?=
3518 =?koi8-r?b?eHgg?=
3519 =?koi8-r?b?eHh4?=
3520 =?koi8-r?b?eCB4?=
3521 =?koi8-r?b?eHh4?=
3522 =?koi8-r?b?IHh4?=
3523 =?koi8-r?b?eHgg?=
3524 =?koi8-r?b?eHh4?=
3525 =?koi8-r?b?eCB4?=
3526 =?koi8-r?b?eHh4?=
3527 =?koi8-r?b?IA==?=""")
3528 eq(x, str(make_header(decode_header(s))))
3529 h = Header(charset='koi8-r', maxlinelen=40)
3530 h.append(x)
3531 s = h.encode()
3532 eq(s, """\
3533=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
3534 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
3535 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
3536 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
3537 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
3538 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
3539 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003540
3541 def test_us_ascii_header(self):
3542 eq = self.assertEqual
3543 s = 'hello'
3544 x = decode_header(s)
3545 eq(x, [('hello', None)])
3546 h = make_header(x)
3547 eq(s, h.encode())
3548
3549 def test_string_charset(self):
3550 eq = self.assertEqual
3551 h = Header()
3552 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00003553 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003554
3555## def test_unicode_error(self):
3556## raises = self.assertRaises
3557## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
3558## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
3559## h = Header()
3560## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
3561## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
3562## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
3563
3564 def test_utf8_shortest(self):
3565 eq = self.assertEqual
3566 h = Header('p\xf6stal', 'utf-8')
3567 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
3568 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
3569 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
3570
3571 def test_bad_8bit_header(self):
3572 raises = self.assertRaises
3573 eq = self.assertEqual
3574 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
3575 raises(UnicodeError, Header, x)
3576 h = Header()
3577 raises(UnicodeError, h.append, x)
3578 e = x.decode('utf-8', 'replace')
3579 eq(str(Header(x, errors='replace')), e)
3580 h.append(x, errors='replace')
3581 eq(str(h), e)
3582
3583 def test_encoded_adjacent_nonencoded(self):
3584 eq = self.assertEqual
3585 h = Header()
3586 h.append('hello', 'iso-8859-1')
3587 h.append('world')
3588 s = h.encode()
3589 eq(s, '=?iso-8859-1?q?hello?= world')
3590 h = make_header(decode_header(s))
3591 eq(h.encode(), s)
3592
3593 def test_whitespace_eater(self):
3594 eq = self.assertEqual
3595 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
3596 parts = decode_header(s)
3597 eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)])
3598 hdr = make_header(parts)
3599 eq(hdr.encode(),
3600 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
3601
3602 def test_broken_base64_header(self):
3603 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00003604 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003605 raises(errors.HeaderParseError, decode_header, s)
3606
3607
Ezio Melottib3aedd42010-11-20 19:04:17 +00003608
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003609# Test RFC 2231 header parameters (en/de)coding
3610class TestRFC2231(TestEmailBase):
3611 def test_get_param(self):
3612 eq = self.assertEqual
3613 msg = self._msgobj('msg_29.txt')
3614 eq(msg.get_param('title'),
3615 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
3616 eq(msg.get_param('title', unquote=False),
3617 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
3618
3619 def test_set_param(self):
3620 eq = self.ndiffAssertEqual
3621 msg = Message()
3622 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3623 charset='us-ascii')
3624 eq(msg.get_param('title'),
3625 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
3626 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3627 charset='us-ascii', language='en')
3628 eq(msg.get_param('title'),
3629 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
3630 msg = self._msgobj('msg_01.txt')
3631 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3632 charset='us-ascii', language='en')
3633 eq(msg.as_string(maxheaderlen=78), """\
3634Return-Path: <bbb@zzz.org>
3635Delivered-To: bbb@zzz.org
3636Received: by mail.zzz.org (Postfix, from userid 889)
3637\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
3638MIME-Version: 1.0
3639Content-Transfer-Encoding: 7bit
3640Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
3641From: bbb@ddd.com (John X. Doe)
3642To: bbb@zzz.org
3643Subject: This is a test message
3644Date: Fri, 4 May 2001 14:05:44 -0400
3645Content-Type: text/plain; charset=us-ascii;
3646 title*="us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21"
3647
3648
3649Hi,
3650
3651Do you like this message?
3652
3653-Me
3654""")
3655
3656 def test_del_param(self):
3657 eq = self.ndiffAssertEqual
3658 msg = self._msgobj('msg_01.txt')
3659 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
3660 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3661 charset='us-ascii', language='en')
3662 msg.del_param('foo', header='Content-Type')
3663 eq(msg.as_string(maxheaderlen=78), """\
3664Return-Path: <bbb@zzz.org>
3665Delivered-To: bbb@zzz.org
3666Received: by mail.zzz.org (Postfix, from userid 889)
3667\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
3668MIME-Version: 1.0
3669Content-Transfer-Encoding: 7bit
3670Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
3671From: bbb@ddd.com (John X. Doe)
3672To: bbb@zzz.org
3673Subject: This is a test message
3674Date: Fri, 4 May 2001 14:05:44 -0400
3675Content-Type: text/plain; charset="us-ascii";
3676 title*="us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21"
3677
3678
3679Hi,
3680
3681Do you like this message?
3682
3683-Me
3684""")
3685
3686 def test_rfc2231_get_content_charset(self):
3687 eq = self.assertEqual
3688 msg = self._msgobj('msg_32.txt')
3689 eq(msg.get_content_charset(), 'us-ascii')
3690
3691 def test_rfc2231_no_language_or_charset(self):
3692 m = '''\
3693Content-Transfer-Encoding: 8bit
3694Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
3695Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
3696
3697'''
3698 msg = email.message_from_string(m)
3699 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003700 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003701 self.assertEqual(
3702 param,
3703 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
3704
3705 def test_rfc2231_no_language_or_charset_in_filename(self):
3706 m = '''\
3707Content-Disposition: inline;
3708\tfilename*0*="''This%20is%20even%20more%20";
3709\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3710\tfilename*2="is it not.pdf"
3711
3712'''
3713 msg = email.message_from_string(m)
3714 self.assertEqual(msg.get_filename(),
3715 'This is even more ***fun*** is it not.pdf')
3716
3717 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
3718 m = '''\
3719Content-Disposition: inline;
3720\tfilename*0*="''This%20is%20even%20more%20";
3721\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3722\tfilename*2="is it not.pdf"
3723
3724'''
3725 msg = email.message_from_string(m)
3726 self.assertEqual(msg.get_filename(),
3727 'This is even more ***fun*** is it not.pdf')
3728
3729 def test_rfc2231_partly_encoded(self):
3730 m = '''\
3731Content-Disposition: inline;
3732\tfilename*0="''This%20is%20even%20more%20";
3733\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3734\tfilename*2="is it not.pdf"
3735
3736'''
3737 msg = email.message_from_string(m)
3738 self.assertEqual(
3739 msg.get_filename(),
3740 'This%20is%20even%20more%20***fun*** is it not.pdf')
3741
3742 def test_rfc2231_partly_nonencoded(self):
3743 m = '''\
3744Content-Disposition: inline;
3745\tfilename*0="This%20is%20even%20more%20";
3746\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
3747\tfilename*2="is it not.pdf"
3748
3749'''
3750 msg = email.message_from_string(m)
3751 self.assertEqual(
3752 msg.get_filename(),
3753 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
3754
3755 def test_rfc2231_no_language_or_charset_in_boundary(self):
3756 m = '''\
3757Content-Type: multipart/alternative;
3758\tboundary*0*="''This%20is%20even%20more%20";
3759\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
3760\tboundary*2="is it not.pdf"
3761
3762'''
3763 msg = email.message_from_string(m)
3764 self.assertEqual(msg.get_boundary(),
3765 'This is even more ***fun*** is it not.pdf')
3766
3767 def test_rfc2231_no_language_or_charset_in_charset(self):
3768 # This is a nonsensical charset value, but tests the code anyway
3769 m = '''\
3770Content-Type: text/plain;
3771\tcharset*0*="This%20is%20even%20more%20";
3772\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
3773\tcharset*2="is it not.pdf"
3774
3775'''
3776 msg = email.message_from_string(m)
3777 self.assertEqual(msg.get_content_charset(),
3778 'this is even more ***fun*** is it not.pdf')
3779
3780 def test_rfc2231_bad_encoding_in_filename(self):
3781 m = '''\
3782Content-Disposition: inline;
3783\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
3784\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3785\tfilename*2="is it not.pdf"
3786
3787'''
3788 msg = email.message_from_string(m)
3789 self.assertEqual(msg.get_filename(),
3790 'This is even more ***fun*** is it not.pdf')
3791
3792 def test_rfc2231_bad_encoding_in_charset(self):
3793 m = """\
3794Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
3795
3796"""
3797 msg = email.message_from_string(m)
3798 # This should return None because non-ascii characters in the charset
3799 # are not allowed.
3800 self.assertEqual(msg.get_content_charset(), None)
3801
3802 def test_rfc2231_bad_character_in_charset(self):
3803 m = """\
3804Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
3805
3806"""
3807 msg = email.message_from_string(m)
3808 # This should return None because non-ascii characters in the charset
3809 # are not allowed.
3810 self.assertEqual(msg.get_content_charset(), None)
3811
3812 def test_rfc2231_bad_character_in_filename(self):
3813 m = '''\
3814Content-Disposition: inline;
3815\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
3816\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3817\tfilename*2*="is it not.pdf%E2"
3818
3819'''
3820 msg = email.message_from_string(m)
3821 self.assertEqual(msg.get_filename(),
3822 'This is even more ***fun*** is it not.pdf\ufffd')
3823
3824 def test_rfc2231_unknown_encoding(self):
3825 m = """\
3826Content-Transfer-Encoding: 8bit
3827Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
3828
3829"""
3830 msg = email.message_from_string(m)
3831 self.assertEqual(msg.get_filename(), 'myfile.txt')
3832
3833 def test_rfc2231_single_tick_in_filename_extended(self):
3834 eq = self.assertEqual
3835 m = """\
3836Content-Type: application/x-foo;
3837\tname*0*=\"Frank's\"; name*1*=\" Document\"
3838
3839"""
3840 msg = email.message_from_string(m)
3841 charset, language, s = msg.get_param('name')
3842 eq(charset, None)
3843 eq(language, None)
3844 eq(s, "Frank's Document")
3845
3846 def test_rfc2231_single_tick_in_filename(self):
3847 m = """\
3848Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
3849
3850"""
3851 msg = email.message_from_string(m)
3852 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003853 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003854 self.assertEqual(param, "Frank's Document")
3855
3856 def test_rfc2231_tick_attack_extended(self):
3857 eq = self.assertEqual
3858 m = """\
3859Content-Type: application/x-foo;
3860\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
3861
3862"""
3863 msg = email.message_from_string(m)
3864 charset, language, s = msg.get_param('name')
3865 eq(charset, 'us-ascii')
3866 eq(language, 'en-us')
3867 eq(s, "Frank's Document")
3868
3869 def test_rfc2231_tick_attack(self):
3870 m = """\
3871Content-Type: application/x-foo;
3872\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
3873
3874"""
3875 msg = email.message_from_string(m)
3876 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003877 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003878 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
3879
3880 def test_rfc2231_no_extended_values(self):
3881 eq = self.assertEqual
3882 m = """\
3883Content-Type: application/x-foo; name=\"Frank's Document\"
3884
3885"""
3886 msg = email.message_from_string(m)
3887 eq(msg.get_param('name'), "Frank's Document")
3888
3889 def test_rfc2231_encoded_then_unencoded_segments(self):
3890 eq = self.assertEqual
3891 m = """\
3892Content-Type: application/x-foo;
3893\tname*0*=\"us-ascii'en-us'My\";
3894\tname*1=\" Document\";
3895\tname*2*=\" For You\"
3896
3897"""
3898 msg = email.message_from_string(m)
3899 charset, language, s = msg.get_param('name')
3900 eq(charset, 'us-ascii')
3901 eq(language, 'en-us')
3902 eq(s, 'My Document For You')
3903
3904 def test_rfc2231_unencoded_then_encoded_segments(self):
3905 eq = self.assertEqual
3906 m = """\
3907Content-Type: application/x-foo;
3908\tname*0=\"us-ascii'en-us'My\";
3909\tname*1*=\" Document\";
3910\tname*2*=\" For You\"
3911
3912"""
3913 msg = email.message_from_string(m)
3914 charset, language, s = msg.get_param('name')
3915 eq(charset, 'us-ascii')
3916 eq(language, 'en-us')
3917 eq(s, 'My Document For You')
3918
3919
Ezio Melottib3aedd42010-11-20 19:04:17 +00003920
R. David Murraya8f480f2010-01-16 18:30:03 +00003921# Tests to ensure that signed parts of an email are completely preserved, as
3922# required by RFC1847 section 2.1. Note that these are incomplete, because the
3923# email package does not currently always preserve the body. See issue 1670765.
3924class TestSigned(TestEmailBase):
3925
3926 def _msg_and_obj(self, filename):
3927 with openfile(findfile(filename)) as fp:
3928 original = fp.read()
3929 msg = email.message_from_string(original)
3930 return original, msg
3931
3932 def _signed_parts_eq(self, original, result):
3933 # Extract the first mime part of each message
3934 import re
3935 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
3936 inpart = repart.search(original).group(2)
3937 outpart = repart.search(result).group(2)
3938 self.assertEqual(outpart, inpart)
3939
3940 def test_long_headers_as_string(self):
3941 original, msg = self._msg_and_obj('msg_45.txt')
3942 result = msg.as_string()
3943 self._signed_parts_eq(original, result)
3944
3945 def test_long_headers_as_string_maxheaderlen(self):
3946 original, msg = self._msg_and_obj('msg_45.txt')
3947 result = msg.as_string(maxheaderlen=60)
3948 self._signed_parts_eq(original, result)
3949
3950 def test_long_headers_flatten(self):
3951 original, msg = self._msg_and_obj('msg_45.txt')
3952 fp = StringIO()
3953 Generator(fp).flatten(msg)
3954 result = fp.getvalue()
3955 self._signed_parts_eq(original, result)
3956
3957
Ezio Melottib3aedd42010-11-20 19:04:17 +00003958
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003959def _testclasses():
3960 mod = sys.modules[__name__]
3961 return [getattr(mod, name) for name in dir(mod) if name.startswith('Test')]
3962
3963
3964def suite():
3965 suite = unittest.TestSuite()
3966 for testclass in _testclasses():
3967 suite.addTest(unittest.makeSuite(testclass))
3968 return suite
3969
3970
3971def test_main():
3972 for testclass in _testclasses():
3973 run_unittest(testclass)
3974
3975
Ezio Melottib3aedd42010-11-20 19:04:17 +00003976
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003977if __name__ == '__main__':
3978 unittest.main(defaultTest='suite')