blob: 8cc2da0c0421b51a73b9cba9e8e0bf0c05c36890 [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
R. David Murray719a4492010-11-21 16:53:48 +00006import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00007import sys
8import time
9import base64
10import difflib
11import unittest
12import warnings
R. David Murray96fd54e2010-10-08 15:55:28 +000013import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000014
R. David Murray96fd54e2010-10-08 15:55:28 +000015from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016from itertools import chain
17
18import email
R David Murrayc27e5222012-05-25 15:01:48 -040019import email.policy
Guido van Rossum8b3febe2007-08-30 01:15:14 +000020
21from email.charset import Charset
22from email.header import Header, decode_header, make_header
23from email.parser import Parser, HeaderParser
24from email.generator import Generator, DecodedGenerator
25from email.message import Message
26from email.mime.application import MIMEApplication
27from email.mime.audio import MIMEAudio
28from email.mime.text import MIMEText
29from email.mime.image import MIMEImage
30from email.mime.base import MIMEBase
31from email.mime.message import MIMEMessage
32from email.mime.multipart import MIMEMultipart
33from email import utils
34from email import errors
35from email import encoders
36from email import iterators
37from email import base64mime
38from email import quoprimime
39
R David Murray28346b82011-03-31 11:40:20 -040040from test.support import run_unittest, unlink
R David Murraya256bac2011-03-31 12:20:23 -040041from test.test_email import openfile, TestEmailBase
Guido van Rossum8b3febe2007-08-30 01:15:14 +000042
43NL = '\n'
44EMPTYSTRING = ''
45SPACE = ' '
46
47
Guido van Rossum8b3febe2007-08-30 01:15:14 +000048# Test various aspects of the Message class's API
49class TestMessageAPI(TestEmailBase):
50 def test_get_all(self):
51 eq = self.assertEqual
52 msg = self._msgobj('msg_20.txt')
53 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
54 eq(msg.get_all('xx', 'n/a'), 'n/a')
55
R. David Murraye5db2632010-11-20 15:10:13 +000056 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000057 eq = self.assertEqual
58 msg = Message()
59 eq(msg.get_charset(), None)
60 charset = Charset('iso-8859-1')
61 msg.set_charset(charset)
62 eq(msg['mime-version'], '1.0')
63 eq(msg.get_content_type(), 'text/plain')
64 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
65 eq(msg.get_param('charset'), 'iso-8859-1')
66 eq(msg['content-transfer-encoding'], 'quoted-printable')
67 eq(msg.get_charset().input_charset, 'iso-8859-1')
68 # Remove the charset
69 msg.set_charset(None)
70 eq(msg.get_charset(), None)
71 eq(msg['content-type'], 'text/plain')
72 # Try adding a charset when there's already MIME headers present
73 msg = Message()
74 msg['MIME-Version'] = '2.0'
75 msg['Content-Type'] = 'text/x-weird'
76 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
77 msg.set_charset(charset)
78 eq(msg['mime-version'], '2.0')
79 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
80 eq(msg['content-transfer-encoding'], 'quinted-puntable')
81
82 def test_set_charset_from_string(self):
83 eq = self.assertEqual
84 msg = Message()
85 msg.set_charset('us-ascii')
86 eq(msg.get_charset().input_charset, 'us-ascii')
87 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
88
89 def test_set_payload_with_charset(self):
90 msg = Message()
91 charset = Charset('iso-8859-1')
92 msg.set_payload('This is a string payload', charset)
93 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
94
95 def test_get_charsets(self):
96 eq = self.assertEqual
97
98 msg = self._msgobj('msg_08.txt')
99 charsets = msg.get_charsets()
100 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
101
102 msg = self._msgobj('msg_09.txt')
103 charsets = msg.get_charsets('dingbat')
104 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
105 'koi8-r'])
106
107 msg = self._msgobj('msg_12.txt')
108 charsets = msg.get_charsets()
109 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
110 'iso-8859-3', 'us-ascii', 'koi8-r'])
111
112 def test_get_filename(self):
113 eq = self.assertEqual
114
115 msg = self._msgobj('msg_04.txt')
116 filenames = [p.get_filename() for p in msg.get_payload()]
117 eq(filenames, ['msg.txt', 'msg.txt'])
118
119 msg = self._msgobj('msg_07.txt')
120 subpart = msg.get_payload(1)
121 eq(subpart.get_filename(), 'dingusfish.gif')
122
123 def test_get_filename_with_name_parameter(self):
124 eq = self.assertEqual
125
126 msg = self._msgobj('msg_44.txt')
127 filenames = [p.get_filename() for p in msg.get_payload()]
128 eq(filenames, ['msg.txt', 'msg.txt'])
129
130 def test_get_boundary(self):
131 eq = self.assertEqual
132 msg = self._msgobj('msg_07.txt')
133 # No quotes!
134 eq(msg.get_boundary(), 'BOUNDARY')
135
136 def test_set_boundary(self):
137 eq = self.assertEqual
138 # This one has no existing boundary parameter, but the Content-Type:
139 # header appears fifth.
140 msg = self._msgobj('msg_01.txt')
141 msg.set_boundary('BOUNDARY')
142 header, value = msg.items()[4]
143 eq(header.lower(), 'content-type')
144 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
145 # This one has a Content-Type: header, with a boundary, stuck in the
146 # middle of its headers. Make sure the order is preserved; it should
147 # be fifth.
148 msg = self._msgobj('msg_04.txt')
149 msg.set_boundary('BOUNDARY')
150 header, value = msg.items()[4]
151 eq(header.lower(), 'content-type')
152 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
153 # And this one has no Content-Type: header at all.
154 msg = self._msgobj('msg_03.txt')
155 self.assertRaises(errors.HeaderParseError,
156 msg.set_boundary, 'BOUNDARY')
157
R. David Murray73a559d2010-12-21 18:07:59 +0000158 def test_make_boundary(self):
159 msg = MIMEMultipart('form-data')
160 # Note that when the boundary gets created is an implementation
161 # detail and might change.
162 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
163 # Trigger creation of boundary
164 msg.as_string()
165 self.assertEqual(msg.items()[0][1][:33],
166 'multipart/form-data; boundary="==')
167 # XXX: there ought to be tests of the uniqueness of the boundary, too.
168
R. David Murray57c45ac2010-02-21 04:39:40 +0000169 def test_message_rfc822_only(self):
170 # Issue 7970: message/rfc822 not in multipart parsed by
171 # HeaderParser caused an exception when flattened.
R David Murray28346b82011-03-31 11:40:20 -0400172 with openfile('msg_46.txt') as fp:
Brett Cannon384917a2010-10-29 23:08:36 +0000173 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000174 parser = HeaderParser()
175 msg = parser.parsestr(msgdata)
176 out = StringIO()
177 gen = Generator(out, True, 0)
178 gen.flatten(msg, False)
179 self.assertEqual(out.getvalue(), msgdata)
180
R David Murrayb35c8502011-04-13 16:46:05 -0400181 def test_byte_message_rfc822_only(self):
182 # Make sure new bytes header parser also passes this.
183 with openfile('msg_46.txt', 'rb') as fp:
184 msgdata = fp.read()
185 parser = email.parser.BytesHeaderParser()
186 msg = parser.parsebytes(msgdata)
187 out = BytesIO()
188 gen = email.generator.BytesGenerator(out)
189 gen.flatten(msg)
190 self.assertEqual(out.getvalue(), msgdata)
191
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000192 def test_get_decoded_payload(self):
193 eq = self.assertEqual
194 msg = self._msgobj('msg_10.txt')
195 # The outer message is a multipart
196 eq(msg.get_payload(decode=True), None)
197 # Subpart 1 is 7bit encoded
198 eq(msg.get_payload(0).get_payload(decode=True),
199 b'This is a 7bit encoded message.\n')
200 # Subpart 2 is quopri
201 eq(msg.get_payload(1).get_payload(decode=True),
202 b'\xa1This is a Quoted Printable encoded message!\n')
203 # Subpart 3 is base64
204 eq(msg.get_payload(2).get_payload(decode=True),
205 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000206 # Subpart 4 is base64 with a trailing newline, which
207 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000208 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000209 b'This is a Base64 encoded message.\n')
210 # Subpart 5 has no Content-Transfer-Encoding: header.
211 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000212 b'This has no Content-Transfer-Encoding: header.\n')
213
214 def test_get_decoded_uu_payload(self):
215 eq = self.assertEqual
216 msg = Message()
217 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
218 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
219 msg['content-transfer-encoding'] = cte
220 eq(msg.get_payload(decode=True), b'hello world')
221 # Now try some bogus data
222 msg.set_payload('foo')
223 eq(msg.get_payload(decode=True), b'foo')
224
R David Murraya2860e82011-04-16 09:20:30 -0400225 def test_get_payload_n_raises_on_non_multipart(self):
226 msg = Message()
227 self.assertRaises(TypeError, msg.get_payload, 1)
228
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000229 def test_decoded_generator(self):
230 eq = self.assertEqual
231 msg = self._msgobj('msg_07.txt')
232 with openfile('msg_17.txt') as fp:
233 text = fp.read()
234 s = StringIO()
235 g = DecodedGenerator(s)
236 g.flatten(msg)
237 eq(s.getvalue(), text)
238
239 def test__contains__(self):
240 msg = Message()
241 msg['From'] = 'Me'
242 msg['to'] = 'You'
243 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000244 self.assertTrue('from' in msg)
245 self.assertTrue('From' in msg)
246 self.assertTrue('FROM' in msg)
247 self.assertTrue('to' in msg)
248 self.assertTrue('To' in msg)
249 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000250
251 def test_as_string(self):
252 eq = self.ndiffAssertEqual
253 msg = self._msgobj('msg_01.txt')
254 with openfile('msg_01.txt') as fp:
255 text = fp.read()
256 eq(text, str(msg))
257 fullrepr = msg.as_string(unixfrom=True)
258 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000259 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000260 eq(text, NL.join(lines[1:]))
261
262 def test_bad_param(self):
263 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
264 self.assertEqual(msg.get_param('baz'), '')
265
266 def test_missing_filename(self):
267 msg = email.message_from_string("From: foo\n")
268 self.assertEqual(msg.get_filename(), None)
269
270 def test_bogus_filename(self):
271 msg = email.message_from_string(
272 "Content-Disposition: blarg; filename\n")
273 self.assertEqual(msg.get_filename(), '')
274
275 def test_missing_boundary(self):
276 msg = email.message_from_string("From: foo\n")
277 self.assertEqual(msg.get_boundary(), None)
278
279 def test_get_params(self):
280 eq = self.assertEqual
281 msg = email.message_from_string(
282 'X-Header: foo=one; bar=two; baz=three\n')
283 eq(msg.get_params(header='x-header'),
284 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
285 msg = email.message_from_string(
286 'X-Header: foo; bar=one; baz=two\n')
287 eq(msg.get_params(header='x-header'),
288 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
289 eq(msg.get_params(), None)
290 msg = email.message_from_string(
291 'X-Header: foo; bar="one"; baz=two\n')
292 eq(msg.get_params(header='x-header'),
293 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
294
295 def test_get_param_liberal(self):
296 msg = Message()
297 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
298 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
299
300 def test_get_param(self):
301 eq = self.assertEqual
302 msg = email.message_from_string(
303 "X-Header: foo=one; bar=two; baz=three\n")
304 eq(msg.get_param('bar', header='x-header'), 'two')
305 eq(msg.get_param('quuz', header='x-header'), None)
306 eq(msg.get_param('quuz'), None)
307 msg = email.message_from_string(
308 'X-Header: foo; bar="one"; baz=two\n')
309 eq(msg.get_param('foo', header='x-header'), '')
310 eq(msg.get_param('bar', header='x-header'), 'one')
311 eq(msg.get_param('baz', header='x-header'), 'two')
312 # XXX: We are not RFC-2045 compliant! We cannot parse:
313 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
314 # msg.get_param("weird")
315 # yet.
316
317 def test_get_param_funky_continuation_lines(self):
318 msg = self._msgobj('msg_22.txt')
319 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
320
321 def test_get_param_with_semis_in_quotes(self):
322 msg = email.message_from_string(
323 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
324 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
325 self.assertEqual(msg.get_param('name', unquote=False),
326 '"Jim&amp;&amp;Jill"')
327
R. David Murrayd48739f2010-04-14 18:59:18 +0000328 def test_get_param_with_quotes(self):
329 msg = email.message_from_string(
330 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
331 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
332 msg = email.message_from_string(
333 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
334 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
335
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000336 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000337 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000338 msg = email.message_from_string('Header: exists')
339 unless('header' in msg)
340 unless('Header' in msg)
341 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000342 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000343
344 def test_set_param(self):
345 eq = self.assertEqual
346 msg = Message()
347 msg.set_param('charset', 'iso-2022-jp')
348 eq(msg.get_param('charset'), 'iso-2022-jp')
349 msg.set_param('importance', 'high value')
350 eq(msg.get_param('importance'), 'high value')
351 eq(msg.get_param('importance', unquote=False), '"high value"')
352 eq(msg.get_params(), [('text/plain', ''),
353 ('charset', 'iso-2022-jp'),
354 ('importance', 'high value')])
355 eq(msg.get_params(unquote=False), [('text/plain', ''),
356 ('charset', '"iso-2022-jp"'),
357 ('importance', '"high value"')])
358 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
359 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
360
361 def test_del_param(self):
362 eq = self.assertEqual
363 msg = self._msgobj('msg_05.txt')
364 eq(msg.get_params(),
365 [('multipart/report', ''), ('report-type', 'delivery-status'),
366 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
367 old_val = msg.get_param("report-type")
368 msg.del_param("report-type")
369 eq(msg.get_params(),
370 [('multipart/report', ''),
371 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
372 msg.set_param("report-type", old_val)
373 eq(msg.get_params(),
374 [('multipart/report', ''),
375 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
376 ('report-type', old_val)])
377
378 def test_del_param_on_other_header(self):
379 msg = Message()
380 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
381 msg.del_param('filename', 'content-disposition')
382 self.assertEqual(msg['content-disposition'], 'attachment')
383
R David Murraya2860e82011-04-16 09:20:30 -0400384 def test_del_param_on_nonexistent_header(self):
385 msg = Message()
386 msg.del_param('filename', 'content-disposition')
387
388 def test_del_nonexistent_param(self):
389 msg = Message()
390 msg.add_header('Content-Type', 'text/plain', charset='utf-8')
391 existing_header = msg['Content-Type']
392 msg.del_param('foobar', header='Content-Type')
393 self.assertEqual(msg['Content-Type'], 'text/plain; charset="utf-8"')
394
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000395 def test_set_type(self):
396 eq = self.assertEqual
397 msg = Message()
398 self.assertRaises(ValueError, msg.set_type, 'text')
399 msg.set_type('text/plain')
400 eq(msg['content-type'], 'text/plain')
401 msg.set_param('charset', 'us-ascii')
402 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
403 msg.set_type('text/html')
404 eq(msg['content-type'], 'text/html; charset="us-ascii"')
405
406 def test_set_type_on_other_header(self):
407 msg = Message()
408 msg['X-Content-Type'] = 'text/plain'
409 msg.set_type('application/octet-stream', 'X-Content-Type')
410 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
411
412 def test_get_content_type_missing(self):
413 msg = Message()
414 self.assertEqual(msg.get_content_type(), 'text/plain')
415
416 def test_get_content_type_missing_with_default_type(self):
417 msg = Message()
418 msg.set_default_type('message/rfc822')
419 self.assertEqual(msg.get_content_type(), 'message/rfc822')
420
421 def test_get_content_type_from_message_implicit(self):
422 msg = self._msgobj('msg_30.txt')
423 self.assertEqual(msg.get_payload(0).get_content_type(),
424 'message/rfc822')
425
426 def test_get_content_type_from_message_explicit(self):
427 msg = self._msgobj('msg_28.txt')
428 self.assertEqual(msg.get_payload(0).get_content_type(),
429 'message/rfc822')
430
431 def test_get_content_type_from_message_text_plain_implicit(self):
432 msg = self._msgobj('msg_03.txt')
433 self.assertEqual(msg.get_content_type(), 'text/plain')
434
435 def test_get_content_type_from_message_text_plain_explicit(self):
436 msg = self._msgobj('msg_01.txt')
437 self.assertEqual(msg.get_content_type(), 'text/plain')
438
439 def test_get_content_maintype_missing(self):
440 msg = Message()
441 self.assertEqual(msg.get_content_maintype(), 'text')
442
443 def test_get_content_maintype_missing_with_default_type(self):
444 msg = Message()
445 msg.set_default_type('message/rfc822')
446 self.assertEqual(msg.get_content_maintype(), 'message')
447
448 def test_get_content_maintype_from_message_implicit(self):
449 msg = self._msgobj('msg_30.txt')
450 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
451
452 def test_get_content_maintype_from_message_explicit(self):
453 msg = self._msgobj('msg_28.txt')
454 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
455
456 def test_get_content_maintype_from_message_text_plain_implicit(self):
457 msg = self._msgobj('msg_03.txt')
458 self.assertEqual(msg.get_content_maintype(), 'text')
459
460 def test_get_content_maintype_from_message_text_plain_explicit(self):
461 msg = self._msgobj('msg_01.txt')
462 self.assertEqual(msg.get_content_maintype(), 'text')
463
464 def test_get_content_subtype_missing(self):
465 msg = Message()
466 self.assertEqual(msg.get_content_subtype(), 'plain')
467
468 def test_get_content_subtype_missing_with_default_type(self):
469 msg = Message()
470 msg.set_default_type('message/rfc822')
471 self.assertEqual(msg.get_content_subtype(), 'rfc822')
472
473 def test_get_content_subtype_from_message_implicit(self):
474 msg = self._msgobj('msg_30.txt')
475 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
476
477 def test_get_content_subtype_from_message_explicit(self):
478 msg = self._msgobj('msg_28.txt')
479 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
480
481 def test_get_content_subtype_from_message_text_plain_implicit(self):
482 msg = self._msgobj('msg_03.txt')
483 self.assertEqual(msg.get_content_subtype(), 'plain')
484
485 def test_get_content_subtype_from_message_text_plain_explicit(self):
486 msg = self._msgobj('msg_01.txt')
487 self.assertEqual(msg.get_content_subtype(), 'plain')
488
489 def test_get_content_maintype_error(self):
490 msg = Message()
491 msg['Content-Type'] = 'no-slash-in-this-string'
492 self.assertEqual(msg.get_content_maintype(), 'text')
493
494 def test_get_content_subtype_error(self):
495 msg = Message()
496 msg['Content-Type'] = 'no-slash-in-this-string'
497 self.assertEqual(msg.get_content_subtype(), 'plain')
498
499 def test_replace_header(self):
500 eq = self.assertEqual
501 msg = Message()
502 msg.add_header('First', 'One')
503 msg.add_header('Second', 'Two')
504 msg.add_header('Third', 'Three')
505 eq(msg.keys(), ['First', 'Second', 'Third'])
506 eq(msg.values(), ['One', 'Two', 'Three'])
507 msg.replace_header('Second', 'Twenty')
508 eq(msg.keys(), ['First', 'Second', 'Third'])
509 eq(msg.values(), ['One', 'Twenty', 'Three'])
510 msg.add_header('First', 'Eleven')
511 msg.replace_header('First', 'One Hundred')
512 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
513 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
514 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
515
R David Murray80e0aee2012-05-27 21:23:34 -0400516 # test_defect_handling:test_invalid_chars_in_base64_payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000517 def test_broken_base64_payload(self):
518 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
519 msg = Message()
520 msg['content-type'] = 'audio/x-midi'
521 msg['content-transfer-encoding'] = 'base64'
522 msg.set_payload(x)
523 self.assertEqual(msg.get_payload(decode=True),
R David Murray80e0aee2012-05-27 21:23:34 -0400524 (b'\x03\x00\xe9\xd0\xfe\xff\xff.\x8b\xc0'
525 b'\xa1\x00p\xf6\xbf\xe9\x0f'))
526 self.assertIsInstance(msg.defects[0],
527 errors.InvalidBase64CharactersDefect)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000528
R David Murraya2860e82011-04-16 09:20:30 -0400529 def test_broken_unicode_payload(self):
530 # This test improves coverage but is not a compliance test.
531 # The behavior in this situation is currently undefined by the API.
532 x = 'this is a br\xf6ken thing to do'
533 msg = Message()
534 msg['content-type'] = 'text/plain'
535 msg['content-transfer-encoding'] = '8bit'
536 msg.set_payload(x)
537 self.assertEqual(msg.get_payload(decode=True),
538 bytes(x, 'raw-unicode-escape'))
539
540 def test_questionable_bytes_payload(self):
541 # This test improves coverage but is not a compliance test,
542 # since it involves poking inside the black box.
543 x = 'this is a quéstionable thing to do'.encode('utf-8')
544 msg = Message()
545 msg['content-type'] = 'text/plain; charset="utf-8"'
546 msg['content-transfer-encoding'] = '8bit'
547 msg._payload = x
548 self.assertEqual(msg.get_payload(decode=True), x)
549
R. David Murray7ec754b2010-12-13 23:51:19 +0000550 # Issue 1078919
551 def test_ascii_add_header(self):
552 msg = Message()
553 msg.add_header('Content-Disposition', 'attachment',
554 filename='bud.gif')
555 self.assertEqual('attachment; filename="bud.gif"',
556 msg['Content-Disposition'])
557
558 def test_noascii_add_header(self):
559 msg = Message()
560 msg.add_header('Content-Disposition', 'attachment',
561 filename="Fußballer.ppt")
562 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000563 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
R. David Murray7ec754b2010-12-13 23:51:19 +0000564 msg['Content-Disposition'])
565
566 def test_nonascii_add_header_via_triple(self):
567 msg = Message()
568 msg.add_header('Content-Disposition', 'attachment',
569 filename=('iso-8859-1', '', 'Fußballer.ppt'))
570 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000571 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
572 msg['Content-Disposition'])
573
574 def test_ascii_add_header_with_tspecial(self):
575 msg = Message()
576 msg.add_header('Content-Disposition', 'attachment',
577 filename="windows [filename].ppt")
578 self.assertEqual(
579 'attachment; filename="windows [filename].ppt"',
580 msg['Content-Disposition'])
581
582 def test_nonascii_add_header_with_tspecial(self):
583 msg = Message()
584 msg.add_header('Content-Disposition', 'attachment',
585 filename="Fußballer [filename].ppt")
586 self.assertEqual(
587 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
R. David Murray7ec754b2010-12-13 23:51:19 +0000588 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000589
R David Murraya2860e82011-04-16 09:20:30 -0400590 def test_add_header_with_name_only_param(self):
591 msg = Message()
592 msg.add_header('Content-Disposition', 'inline', foo_bar=None)
593 self.assertEqual("inline; foo-bar", msg['Content-Disposition'])
594
595 def test_add_header_with_no_value(self):
596 msg = Message()
597 msg.add_header('X-Status', None)
598 self.assertEqual('', msg['X-Status'])
599
R. David Murray5b2d9dd2011-01-09 02:35:24 +0000600 # Issue 5871: reject an attempt to embed a header inside a header value
601 # (header injection attack).
602 def test_embeded_header_via_Header_rejected(self):
603 msg = Message()
604 msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
605 self.assertRaises(errors.HeaderParseError, msg.as_string)
606
607 def test_embeded_header_via_string_rejected(self):
608 msg = Message()
609 msg['Dummy'] = 'dummy\nX-Injected-Header: test'
610 self.assertRaises(errors.HeaderParseError, msg.as_string)
611
R David Murray7441a7a2012-03-14 02:59:51 -0400612 def test_unicode_header_defaults_to_utf8_encoding(self):
613 # Issue 14291
614 m = MIMEText('abc\n')
615 m['Subject'] = 'É test'
616 self.assertEqual(str(m),textwrap.dedent("""\
617 Content-Type: text/plain; charset="us-ascii"
618 MIME-Version: 1.0
619 Content-Transfer-Encoding: 7bit
620 Subject: =?utf-8?q?=C3=89_test?=
621
622 abc
623 """))
624
R David Murray8680bcc2012-03-22 22:17:51 -0400625 def test_unicode_body_defaults_to_utf8_encoding(self):
626 # Issue 14291
627 m = MIMEText('É testabc\n')
628 self.assertEqual(str(m),textwrap.dedent("""\
R David Murray8680bcc2012-03-22 22:17:51 -0400629 Content-Type: text/plain; charset="utf-8"
R David Murray42243c42012-03-22 22:40:44 -0400630 MIME-Version: 1.0
R David Murray8680bcc2012-03-22 22:17:51 -0400631 Content-Transfer-Encoding: base64
632
633 w4kgdGVzdGFiYwo=
634 """))
635
636
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000637# Test the email.encoders module
638class TestEncoders(unittest.TestCase):
R David Murray6d94bd42011-03-16 15:52:22 -0400639
640 def test_EncodersEncode_base64(self):
641 with openfile('PyBanner048.gif', 'rb') as fp:
642 bindata = fp.read()
643 mimed = email.mime.image.MIMEImage(bindata)
644 base64ed = mimed.get_payload()
645 # the transfer-encoded body lines should all be <=76 characters
646 lines = base64ed.split('\n')
647 self.assertLessEqual(max([ len(x) for x in lines ]), 76)
648
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000649 def test_encode_empty_payload(self):
650 eq = self.assertEqual
651 msg = Message()
652 msg.set_charset('us-ascii')
653 eq(msg['content-transfer-encoding'], '7bit')
654
655 def test_default_cte(self):
656 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000657 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000658 msg = MIMEText('hello world')
659 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000660 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000661 msg = MIMEText('hello \xf8 world')
R David Murray8680bcc2012-03-22 22:17:51 -0400662 eq(msg['content-transfer-encoding'], 'base64')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000663 # And now with a different charset
664 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
665 eq(msg['content-transfer-encoding'], 'quoted-printable')
666
R. David Murraye85200d2010-05-06 01:41:14 +0000667 def test_encode7or8bit(self):
668 # Make sure a charset whose input character set is 8bit but
669 # whose output character set is 7bit gets a transfer-encoding
670 # of 7bit.
671 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000672 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000673 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000674
Ezio Melottib3aedd42010-11-20 19:04:17 +0000675
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000676# Test long header wrapping
677class TestLongHeaders(TestEmailBase):
R David Murray01581ee2011-04-18 10:04:34 -0400678
679 maxDiff = None
680
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000681 def test_split_long_continuation(self):
682 eq = self.ndiffAssertEqual
683 msg = email.message_from_string("""\
684Subject: bug demonstration
685\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
686\tmore text
687
688test
689""")
690 sfp = StringIO()
691 g = Generator(sfp)
692 g.flatten(msg)
693 eq(sfp.getvalue(), """\
694Subject: bug demonstration
695\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
696\tmore text
697
698test
699""")
700
701 def test_another_long_almost_unsplittable_header(self):
702 eq = self.ndiffAssertEqual
703 hstr = """\
704bug demonstration
705\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
706\tmore text"""
707 h = Header(hstr, continuation_ws='\t')
708 eq(h.encode(), """\
709bug demonstration
710\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
711\tmore text""")
712 h = Header(hstr.replace('\t', ' '))
713 eq(h.encode(), """\
714bug demonstration
715 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
716 more text""")
717
718 def test_long_nonstring(self):
719 eq = self.ndiffAssertEqual
720 g = Charset("iso-8859-1")
721 cz = Charset("iso-8859-2")
722 utf8 = Charset("utf-8")
723 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
724 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
725 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
726 b'bef\xf6rdert. ')
727 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
728 b'd\xf9vtipu.. ')
729 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
730 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
731 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
732 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
733 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
734 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
735 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
736 '\u3044\u307e\u3059\u3002')
737 h = Header(g_head, g, header_name='Subject')
738 h.append(cz_head, cz)
739 h.append(utf8_head, utf8)
740 msg = Message()
741 msg['Subject'] = h
742 sfp = StringIO()
743 g = Generator(sfp)
744 g.flatten(msg)
745 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000746Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
747 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
748 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
749 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
750 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
751 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
752 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
753 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
754 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
755 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
756 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000757
758""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000759 eq(h.encode(maxlinelen=76), """\
760=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
761 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
762 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
763 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
764 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
765 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
766 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
767 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
768 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
769 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
770 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000771
772 def test_long_header_encode(self):
773 eq = self.ndiffAssertEqual
774 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
775 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
776 header_name='X-Foobar-Spoink-Defrobnit')
777 eq(h.encode(), '''\
778wasnipoop; giraffes="very-long-necked-animals";
779 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
780
781 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
782 eq = self.ndiffAssertEqual
783 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
784 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
785 header_name='X-Foobar-Spoink-Defrobnit',
786 continuation_ws='\t')
787 eq(h.encode(), '''\
788wasnipoop; giraffes="very-long-necked-animals";
789 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
790
791 def test_long_header_encode_with_tab_continuation(self):
792 eq = self.ndiffAssertEqual
793 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
794 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
795 header_name='X-Foobar-Spoink-Defrobnit',
796 continuation_ws='\t')
797 eq(h.encode(), '''\
798wasnipoop; giraffes="very-long-necked-animals";
799\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
800
R David Murray3a6152f2011-03-14 21:13:03 -0400801 def test_header_encode_with_different_output_charset(self):
802 h = Header('文', 'euc-jp')
803 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
804
805 def test_long_header_encode_with_different_output_charset(self):
806 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
807 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
808 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
809 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
810 res = """\
811=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
812 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
813 self.assertEqual(h.encode(), res)
814
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000815 def test_header_splitter(self):
816 eq = self.ndiffAssertEqual
817 msg = MIMEText('')
818 # It'd be great if we could use add_header() here, but that doesn't
819 # guarantee an order of the parameters.
820 msg['X-Foobar-Spoink-Defrobnit'] = (
821 'wasnipoop; giraffes="very-long-necked-animals"; '
822 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
823 sfp = StringIO()
824 g = Generator(sfp)
825 g.flatten(msg)
826 eq(sfp.getvalue(), '''\
827Content-Type: text/plain; charset="us-ascii"
828MIME-Version: 1.0
829Content-Transfer-Encoding: 7bit
830X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
831 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
832
833''')
834
835 def test_no_semis_header_splitter(self):
836 eq = self.ndiffAssertEqual
837 msg = Message()
838 msg['From'] = 'test@dom.ain'
839 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
840 msg.set_payload('Test')
841 sfp = StringIO()
842 g = Generator(sfp)
843 g.flatten(msg)
844 eq(sfp.getvalue(), """\
845From: test@dom.ain
846References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
847 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
848
849Test""")
850
R David Murray7da4db12011-04-07 20:37:17 -0400851 def test_last_split_chunk_does_not_fit(self):
852 eq = self.ndiffAssertEqual
853 h = Header('Subject: the first part of this is short, but_the_second'
854 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
855 '_all_by_itself')
856 eq(h.encode(), """\
857Subject: the first part of this is short,
858 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
859
860 def test_splittable_leading_char_followed_by_overlong_unsplitable(self):
861 eq = self.ndiffAssertEqual
862 h = Header(', but_the_second'
863 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
864 '_all_by_itself')
865 eq(h.encode(), """\
866,
867 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
868
869 def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self):
870 eq = self.ndiffAssertEqual
871 h = Header(', , but_the_second'
872 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
873 '_all_by_itself')
874 eq(h.encode(), """\
875, ,
876 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
877
878 def test_trailing_splitable_on_overlong_unsplitable(self):
879 eq = self.ndiffAssertEqual
880 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
881 'be_on_a_line_all_by_itself;')
882 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
883 "be_on_a_line_all_by_itself;")
884
885 def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self):
886 eq = self.ndiffAssertEqual
887 h = Header('; '
888 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
R David Murray01581ee2011-04-18 10:04:34 -0400889 'be_on_a_line_all_by_itself; ')
R David Murray7da4db12011-04-07 20:37:17 -0400890 eq(h.encode(), """\
891;
R David Murray01581ee2011-04-18 10:04:34 -0400892 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
R David Murray7da4db12011-04-07 20:37:17 -0400893
R David Murraye1292a22011-04-07 20:54:03 -0400894 def test_long_header_with_multiple_sequential_split_chars(self):
R David Murraye1292a22011-04-07 20:54:03 -0400895 eq = self.ndiffAssertEqual
896 h = Header('This is a long line that has two whitespaces in a row. '
897 'This used to cause truncation of the header when folded')
898 eq(h.encode(), """\
899This is a long line that has two whitespaces in a row. This used to cause
900 truncation of the header when folded""")
901
R David Murray01581ee2011-04-18 10:04:34 -0400902 def test_splitter_split_on_punctuation_only_if_fws(self):
903 eq = self.ndiffAssertEqual
904 h = Header('thisverylongheaderhas;semicolons;and,commas,but'
905 'they;arenotlegal;fold,points')
906 eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;"
907 "arenotlegal;fold,points")
908
909 def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self):
910 eq = self.ndiffAssertEqual
911 h = Header('this is a test where we need to have more than one line '
912 'before; our final line that is just too big to fit;; '
913 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
914 'be_on_a_line_all_by_itself;')
915 eq(h.encode(), """\
916this is a test where we need to have more than one line before;
917 our final line that is just too big to fit;;
918 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
919
920 def test_overlong_last_part_followed_by_split_point(self):
921 eq = self.ndiffAssertEqual
922 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
923 'be_on_a_line_all_by_itself ')
924 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_"
925 "should_be_on_a_line_all_by_itself ")
926
927 def test_multiline_with_overlong_parts_separated_by_two_split_points(self):
928 eq = self.ndiffAssertEqual
929 h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_'
930 'before_our_final_line_; ; '
931 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
932 'be_on_a_line_all_by_itself; ')
933 eq(h.encode(), """\
934this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_;
935 ;
936 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
937
938 def test_multiline_with_overlong_last_part_followed_by_split_point(self):
939 eq = self.ndiffAssertEqual
940 h = Header('this is a test where we need to have more than one line '
941 'before our final line; ; '
942 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
943 'be_on_a_line_all_by_itself; ')
944 eq(h.encode(), """\
945this is a test where we need to have more than one line before our final line;
946 ;
947 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
948
949 def test_long_header_with_whitespace_runs(self):
950 eq = self.ndiffAssertEqual
951 msg = Message()
952 msg['From'] = 'test@dom.ain'
953 msg['References'] = SPACE.join(['<foo@dom.ain> '] * 10)
954 msg.set_payload('Test')
955 sfp = StringIO()
956 g = Generator(sfp)
957 g.flatten(msg)
958 eq(sfp.getvalue(), """\
959From: test@dom.ain
960References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
961 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
962 <foo@dom.ain> <foo@dom.ain>\x20\x20
963
964Test""")
965
966 def test_long_run_with_semi_header_splitter(self):
967 eq = self.ndiffAssertEqual
968 msg = Message()
969 msg['From'] = 'test@dom.ain'
970 msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc'
971 msg.set_payload('Test')
972 sfp = StringIO()
973 g = Generator(sfp)
974 g.flatten(msg)
975 eq(sfp.getvalue(), """\
976From: test@dom.ain
977References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
978 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
979 <foo@dom.ain>; abc
980
981Test""")
982
983 def test_splitter_split_on_punctuation_only_if_fws(self):
984 eq = self.ndiffAssertEqual
985 msg = Message()
986 msg['From'] = 'test@dom.ain'
987 msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but'
988 'they;arenotlegal;fold,points')
989 msg.set_payload('Test')
990 sfp = StringIO()
991 g = Generator(sfp)
992 g.flatten(msg)
993 # XXX the space after the header should not be there.
994 eq(sfp.getvalue(), """\
995From: test@dom.ain
996References:\x20
997 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points
998
999Test""")
1000
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001001 def test_no_split_long_header(self):
1002 eq = self.ndiffAssertEqual
1003 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +00001004 h = Header(hstr)
1005 # These come on two lines because Headers are really field value
1006 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001007 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001008References:
1009 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
1010 h = Header('x' * 80)
1011 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001012
1013 def test_splitting_multiple_long_lines(self):
1014 eq = self.ndiffAssertEqual
1015 hstr = """\
1016from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1017\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1018\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1019"""
1020 h = Header(hstr, continuation_ws='\t')
1021 eq(h.encode(), """\
1022from babylon.socal-raves.org (localhost [127.0.0.1]);
1023 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1024 for <mailman-admin@babylon.socal-raves.org>;
1025 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1026\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1027 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1028 for <mailman-admin@babylon.socal-raves.org>;
1029 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1030\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1031 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1032 for <mailman-admin@babylon.socal-raves.org>;
1033 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
1034
1035 def test_splitting_first_line_only_is_long(self):
1036 eq = self.ndiffAssertEqual
1037 hstr = """\
1038from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
1039\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1040\tid 17k4h5-00034i-00
1041\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
1042 h = Header(hstr, maxlinelen=78, header_name='Received',
1043 continuation_ws='\t')
1044 eq(h.encode(), """\
1045from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
1046 helo=cthulhu.gerg.ca)
1047\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1048\tid 17k4h5-00034i-00
1049\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
1050
1051 def test_long_8bit_header(self):
1052 eq = self.ndiffAssertEqual
1053 msg = Message()
1054 h = Header('Britische Regierung gibt', 'iso-8859-1',
1055 header_name='Subject')
1056 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +00001057 eq(h.encode(maxlinelen=76), """\
1058=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1059 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001060 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +00001061 eq(msg.as_string(maxheaderlen=76), """\
1062Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1063 =?iso-8859-1?q?hore-Windkraftprojekte?=
1064
1065""")
1066 eq(msg.as_string(maxheaderlen=0), """\
1067Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001068
1069""")
1070
1071 def test_long_8bit_header_no_charset(self):
1072 eq = self.ndiffAssertEqual
1073 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +00001074 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
1075 'f\xfcr Offshore-Windkraftprojekte '
1076 '<a-very-long-address@example.com>')
1077 msg['Reply-To'] = header_string
R David Murray7441a7a2012-03-14 02:59:51 -04001078 eq(msg.as_string(maxheaderlen=78), """\
1079Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1080 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1081
1082""")
Barry Warsaw8c571042007-08-30 19:17:18 +00001083 msg = Message()
R David Murray7441a7a2012-03-14 02:59:51 -04001084 msg['Reply-To'] = Header(header_string,
Barry Warsaw8c571042007-08-30 19:17:18 +00001085 header_name='Reply-To')
1086 eq(msg.as_string(maxheaderlen=78), """\
1087Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1088 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001089
1090""")
1091
1092 def test_long_to_header(self):
1093 eq = self.ndiffAssertEqual
1094 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
R David Murray01581ee2011-04-18 10:04:34 -04001095 '<someone@eecs.umich.edu>, '
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001096 '"Someone Test #B" <someone@umich.edu>, '
1097 '"Someone Test #C" <someone@eecs.umich.edu>, '
1098 '"Someone Test #D" <someone@eecs.umich.edu>')
1099 msg = Message()
1100 msg['To'] = to
1101 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +00001102To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001103 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +00001104 "Someone Test #C" <someone@eecs.umich.edu>,
1105 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001106
1107''')
1108
1109 def test_long_line_after_append(self):
1110 eq = self.ndiffAssertEqual
1111 s = 'This is an example of string which has almost the limit of header length.'
1112 h = Header(s)
1113 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +00001114 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001115This is an example of string which has almost the limit of header length.
1116 Add another line.""")
1117
1118 def test_shorter_line_with_append(self):
1119 eq = self.ndiffAssertEqual
1120 s = 'This is a shorter line.'
1121 h = Header(s)
1122 h.append('Add another sentence. (Surprise?)')
1123 eq(h.encode(),
1124 'This is a shorter line. Add another sentence. (Surprise?)')
1125
1126 def test_long_field_name(self):
1127 eq = self.ndiffAssertEqual
1128 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +00001129 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
1130 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
1131 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
1132 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001133 h = Header(gs, 'iso-8859-1', header_name=fn)
1134 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +00001135 eq(h.encode(maxlinelen=76), """\
1136=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
1137 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
1138 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
1139 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001140
1141 def test_long_received_header(self):
1142 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
1143 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
1144 'Wed, 05 Mar 2003 18:10:18 -0700')
1145 msg = Message()
1146 msg['Received-1'] = Header(h, continuation_ws='\t')
1147 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +00001148 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001149 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001150Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1151 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001152 Wed, 05 Mar 2003 18:10:18 -0700
R David Murray01581ee2011-04-18 10:04:34 -04001153Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1154 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001155 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001156
1157""")
1158
1159 def test_string_headerinst_eq(self):
1160 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
1161 'tu-muenchen.de> (David Bremner\'s message of '
1162 '"Thu, 6 Mar 2003 13:58:21 +0100")')
1163 msg = Message()
1164 msg['Received-1'] = Header(h, header_name='Received-1',
1165 continuation_ws='\t')
1166 msg['Received-2'] = h
R David Murray01581ee2011-04-18 10:04:34 -04001167 # XXX The space after the ':' should not be there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001168 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001169Received-1:\x20
1170 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1171 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1172Received-2:\x20
1173 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1174 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001175
1176""")
1177
1178 def test_long_unbreakable_lines_with_continuation(self):
1179 eq = self.ndiffAssertEqual
1180 msg = Message()
1181 t = """\
1182iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1183 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
1184 msg['Face-1'] = t
1185 msg['Face-2'] = Header(t, header_name='Face-2')
R David Murray01581ee2011-04-18 10:04:34 -04001186 msg['Face-3'] = ' ' + t
Barry Warsawbef9d212007-08-31 10:55:37 +00001187 # XXX This splitting is all wrong. It the first value line should be
R David Murray01581ee2011-04-18 10:04:34 -04001188 # snug against the field name or the space after the header not there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001189 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +00001190Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001191 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001192 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +00001193Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +00001194 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001195 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
R David Murray01581ee2011-04-18 10:04:34 -04001196Face-3:\x20
1197 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1198 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001199
1200""")
1201
1202 def test_another_long_multiline_header(self):
1203 eq = self.ndiffAssertEqual
1204 m = ('Received: from siimage.com '
1205 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +00001206 'Microsoft SMTPSVC(5.0.2195.4905); '
1207 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001208 msg = email.message_from_string(m)
1209 eq(msg.as_string(maxheaderlen=78), '''\
R David Murray01581ee2011-04-18 10:04:34 -04001210Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
1211 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001212
1213''')
1214
1215 def test_long_lines_with_different_header(self):
1216 eq = self.ndiffAssertEqual
1217 h = ('List-Unsubscribe: '
1218 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
1219 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
1220 '?subject=unsubscribe>')
1221 msg = Message()
1222 msg['List'] = h
1223 msg['List'] = Header(h, header_name='List')
1224 eq(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001225List: List-Unsubscribe:
1226 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001227 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
R David Murray01581ee2011-04-18 10:04:34 -04001228List: List-Unsubscribe:
1229 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001230 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001231
1232""")
1233
R. David Murray6f0022d2011-01-07 21:57:25 +00001234 def test_long_rfc2047_header_with_embedded_fws(self):
1235 h = Header(textwrap.dedent("""\
1236 We're going to pretend this header is in a non-ascii character set
1237 \tto see if line wrapping with encoded words and embedded
1238 folding white space works"""),
1239 charset='utf-8',
1240 header_name='Test')
1241 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1242 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1243 =?utf-8?q?cter_set?=
1244 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1245 =?utf-8?q?_folding_white_space_works?=""")+'\n')
1246
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001247
Ezio Melottib3aedd42010-11-20 19:04:17 +00001248
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001249# Test mangling of "From " lines in the body of a message
1250class TestFromMangling(unittest.TestCase):
1251 def setUp(self):
1252 self.msg = Message()
1253 self.msg['From'] = 'aaa@bbb.org'
1254 self.msg.set_payload("""\
1255From the desk of A.A.A.:
1256Blah blah blah
1257""")
1258
1259 def test_mangled_from(self):
1260 s = StringIO()
1261 g = Generator(s, mangle_from_=True)
1262 g.flatten(self.msg)
1263 self.assertEqual(s.getvalue(), """\
1264From: aaa@bbb.org
1265
1266>From the desk of A.A.A.:
1267Blah blah blah
1268""")
1269
1270 def test_dont_mangle_from(self):
1271 s = StringIO()
1272 g = Generator(s, mangle_from_=False)
1273 g.flatten(self.msg)
1274 self.assertEqual(s.getvalue(), """\
1275From: aaa@bbb.org
1276
1277From the desk of A.A.A.:
1278Blah blah blah
1279""")
1280
1281
Ezio Melottib3aedd42010-11-20 19:04:17 +00001282
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001283# Test the basic MIMEAudio class
1284class TestMIMEAudio(unittest.TestCase):
1285 def setUp(self):
R David Murray28346b82011-03-31 11:40:20 -04001286 with openfile('audiotest.au', 'rb') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001287 self._audiodata = fp.read()
1288 self._au = MIMEAudio(self._audiodata)
1289
1290 def test_guess_minor_type(self):
1291 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1292
1293 def test_encoding(self):
1294 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001295 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1296 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001297
1298 def test_checkSetMinor(self):
1299 au = MIMEAudio(self._audiodata, 'fish')
1300 self.assertEqual(au.get_content_type(), 'audio/fish')
1301
1302 def test_add_header(self):
1303 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001304 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001305 self._au.add_header('Content-Disposition', 'attachment',
1306 filename='audiotest.au')
1307 eq(self._au['content-disposition'],
1308 'attachment; filename="audiotest.au"')
1309 eq(self._au.get_params(header='content-disposition'),
1310 [('attachment', ''), ('filename', 'audiotest.au')])
1311 eq(self._au.get_param('filename', header='content-disposition'),
1312 'audiotest.au')
1313 missing = []
1314 eq(self._au.get_param('attachment', header='content-disposition'), '')
1315 unless(self._au.get_param('foo', failobj=missing,
1316 header='content-disposition') is missing)
1317 # Try some missing stuff
1318 unless(self._au.get_param('foobar', missing) is missing)
1319 unless(self._au.get_param('attachment', missing,
1320 header='foobar') is missing)
1321
1322
Ezio Melottib3aedd42010-11-20 19:04:17 +00001323
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001324# Test the basic MIMEImage class
1325class TestMIMEImage(unittest.TestCase):
1326 def setUp(self):
1327 with openfile('PyBanner048.gif', 'rb') as fp:
1328 self._imgdata = fp.read()
1329 self._im = MIMEImage(self._imgdata)
1330
1331 def test_guess_minor_type(self):
1332 self.assertEqual(self._im.get_content_type(), 'image/gif')
1333
1334 def test_encoding(self):
1335 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001336 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1337 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001338
1339 def test_checkSetMinor(self):
1340 im = MIMEImage(self._imgdata, 'fish')
1341 self.assertEqual(im.get_content_type(), 'image/fish')
1342
1343 def test_add_header(self):
1344 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001345 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001346 self._im.add_header('Content-Disposition', 'attachment',
1347 filename='dingusfish.gif')
1348 eq(self._im['content-disposition'],
1349 'attachment; filename="dingusfish.gif"')
1350 eq(self._im.get_params(header='content-disposition'),
1351 [('attachment', ''), ('filename', 'dingusfish.gif')])
1352 eq(self._im.get_param('filename', header='content-disposition'),
1353 'dingusfish.gif')
1354 missing = []
1355 eq(self._im.get_param('attachment', header='content-disposition'), '')
1356 unless(self._im.get_param('foo', failobj=missing,
1357 header='content-disposition') is missing)
1358 # Try some missing stuff
1359 unless(self._im.get_param('foobar', missing) is missing)
1360 unless(self._im.get_param('attachment', missing,
1361 header='foobar') is missing)
1362
1363
Ezio Melottib3aedd42010-11-20 19:04:17 +00001364
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001365# Test the basic MIMEApplication class
1366class TestMIMEApplication(unittest.TestCase):
1367 def test_headers(self):
1368 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001369 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001370 eq(msg.get_content_type(), 'application/octet-stream')
1371 eq(msg['content-transfer-encoding'], 'base64')
1372
1373 def test_body(self):
1374 eq = self.assertEqual
R David Murray6d94bd42011-03-16 15:52:22 -04001375 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1376 msg = MIMEApplication(bytesdata)
1377 # whitespace in the cte encoded block is RFC-irrelevant.
1378 eq(msg.get_payload().strip(), '+vv8/f7/')
1379 eq(msg.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001380
1381
Ezio Melottib3aedd42010-11-20 19:04:17 +00001382
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001383# Test the basic MIMEText class
1384class TestMIMEText(unittest.TestCase):
1385 def setUp(self):
1386 self._msg = MIMEText('hello there')
1387
1388 def test_types(self):
1389 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001390 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001391 eq(self._msg.get_content_type(), 'text/plain')
1392 eq(self._msg.get_param('charset'), 'us-ascii')
1393 missing = []
1394 unless(self._msg.get_param('foobar', missing) is missing)
1395 unless(self._msg.get_param('charset', missing, header='foobar')
1396 is missing)
1397
1398 def test_payload(self):
1399 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001400 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001401
1402 def test_charset(self):
1403 eq = self.assertEqual
1404 msg = MIMEText('hello there', _charset='us-ascii')
1405 eq(msg.get_charset().input_charset, 'us-ascii')
1406 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1407
R. David Murray850fc852010-06-03 01:58:28 +00001408 def test_7bit_input(self):
1409 eq = self.assertEqual
1410 msg = MIMEText('hello there', _charset='us-ascii')
1411 eq(msg.get_charset().input_charset, 'us-ascii')
1412 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1413
1414 def test_7bit_input_no_charset(self):
1415 eq = self.assertEqual
1416 msg = MIMEText('hello there')
1417 eq(msg.get_charset(), 'us-ascii')
1418 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1419 self.assertTrue('hello there' in msg.as_string())
1420
1421 def test_utf8_input(self):
1422 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1423 eq = self.assertEqual
1424 msg = MIMEText(teststr, _charset='utf-8')
1425 eq(msg.get_charset().output_charset, 'utf-8')
1426 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1427 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1428
1429 @unittest.skip("can't fix because of backward compat in email5, "
1430 "will fix in email6")
1431 def test_utf8_input_no_charset(self):
1432 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1433 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1434
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001435
Ezio Melottib3aedd42010-11-20 19:04:17 +00001436
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001437# Test complicated multipart/* messages
1438class TestMultipart(TestEmailBase):
1439 def setUp(self):
1440 with openfile('PyBanner048.gif', 'rb') as fp:
1441 data = fp.read()
1442 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1443 image = MIMEImage(data, name='dingusfish.gif')
1444 image.add_header('content-disposition', 'attachment',
1445 filename='dingusfish.gif')
1446 intro = MIMEText('''\
1447Hi there,
1448
1449This is the dingus fish.
1450''')
1451 container.attach(intro)
1452 container.attach(image)
1453 container['From'] = 'Barry <barry@digicool.com>'
1454 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1455 container['Subject'] = 'Here is your dingus fish'
1456
1457 now = 987809702.54848599
1458 timetuple = time.localtime(now)
1459 if timetuple[-1] == 0:
1460 tzsecs = time.timezone
1461 else:
1462 tzsecs = time.altzone
1463 if tzsecs > 0:
1464 sign = '-'
1465 else:
1466 sign = '+'
1467 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1468 container['Date'] = time.strftime(
1469 '%a, %d %b %Y %H:%M:%S',
1470 time.localtime(now)) + tzoffset
1471 self._msg = container
1472 self._im = image
1473 self._txt = intro
1474
1475 def test_hierarchy(self):
1476 # convenience
1477 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001478 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001479 raises = self.assertRaises
1480 # tests
1481 m = self._msg
1482 unless(m.is_multipart())
1483 eq(m.get_content_type(), 'multipart/mixed')
1484 eq(len(m.get_payload()), 2)
1485 raises(IndexError, m.get_payload, 2)
1486 m0 = m.get_payload(0)
1487 m1 = m.get_payload(1)
1488 unless(m0 is self._txt)
1489 unless(m1 is self._im)
1490 eq(m.get_payload(), [m0, m1])
1491 unless(not m0.is_multipart())
1492 unless(not m1.is_multipart())
1493
1494 def test_empty_multipart_idempotent(self):
1495 text = """\
1496Content-Type: multipart/mixed; boundary="BOUNDARY"
1497MIME-Version: 1.0
1498Subject: A subject
1499To: aperson@dom.ain
1500From: bperson@dom.ain
1501
1502
1503--BOUNDARY
1504
1505
1506--BOUNDARY--
1507"""
1508 msg = Parser().parsestr(text)
1509 self.ndiffAssertEqual(text, msg.as_string())
1510
1511 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1512 outer = MIMEBase('multipart', 'mixed')
1513 outer['Subject'] = 'A subject'
1514 outer['To'] = 'aperson@dom.ain'
1515 outer['From'] = 'bperson@dom.ain'
1516 outer.set_boundary('BOUNDARY')
1517 self.ndiffAssertEqual(outer.as_string(), '''\
1518Content-Type: multipart/mixed; boundary="BOUNDARY"
1519MIME-Version: 1.0
1520Subject: A subject
1521To: aperson@dom.ain
1522From: bperson@dom.ain
1523
1524--BOUNDARY
1525
1526--BOUNDARY--''')
1527
1528 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1529 outer = MIMEBase('multipart', 'mixed')
1530 outer['Subject'] = 'A subject'
1531 outer['To'] = 'aperson@dom.ain'
1532 outer['From'] = 'bperson@dom.ain'
1533 outer.preamble = ''
1534 outer.epilogue = ''
1535 outer.set_boundary('BOUNDARY')
1536 self.ndiffAssertEqual(outer.as_string(), '''\
1537Content-Type: multipart/mixed; boundary="BOUNDARY"
1538MIME-Version: 1.0
1539Subject: A subject
1540To: aperson@dom.ain
1541From: bperson@dom.ain
1542
1543
1544--BOUNDARY
1545
1546--BOUNDARY--
1547''')
1548
1549 def test_one_part_in_a_multipart(self):
1550 eq = self.ndiffAssertEqual
1551 outer = MIMEBase('multipart', 'mixed')
1552 outer['Subject'] = 'A subject'
1553 outer['To'] = 'aperson@dom.ain'
1554 outer['From'] = 'bperson@dom.ain'
1555 outer.set_boundary('BOUNDARY')
1556 msg = MIMEText('hello world')
1557 outer.attach(msg)
1558 eq(outer.as_string(), '''\
1559Content-Type: multipart/mixed; boundary="BOUNDARY"
1560MIME-Version: 1.0
1561Subject: A subject
1562To: aperson@dom.ain
1563From: bperson@dom.ain
1564
1565--BOUNDARY
1566Content-Type: text/plain; charset="us-ascii"
1567MIME-Version: 1.0
1568Content-Transfer-Encoding: 7bit
1569
1570hello world
1571--BOUNDARY--''')
1572
1573 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1574 eq = self.ndiffAssertEqual
1575 outer = MIMEBase('multipart', 'mixed')
1576 outer['Subject'] = 'A subject'
1577 outer['To'] = 'aperson@dom.ain'
1578 outer['From'] = 'bperson@dom.ain'
1579 outer.preamble = ''
1580 msg = MIMEText('hello world')
1581 outer.attach(msg)
1582 outer.set_boundary('BOUNDARY')
1583 eq(outer.as_string(), '''\
1584Content-Type: multipart/mixed; boundary="BOUNDARY"
1585MIME-Version: 1.0
1586Subject: A subject
1587To: aperson@dom.ain
1588From: bperson@dom.ain
1589
1590
1591--BOUNDARY
1592Content-Type: text/plain; charset="us-ascii"
1593MIME-Version: 1.0
1594Content-Transfer-Encoding: 7bit
1595
1596hello world
1597--BOUNDARY--''')
1598
1599
1600 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1601 eq = self.ndiffAssertEqual
1602 outer = MIMEBase('multipart', 'mixed')
1603 outer['Subject'] = 'A subject'
1604 outer['To'] = 'aperson@dom.ain'
1605 outer['From'] = 'bperson@dom.ain'
1606 outer.preamble = None
1607 msg = MIMEText('hello world')
1608 outer.attach(msg)
1609 outer.set_boundary('BOUNDARY')
1610 eq(outer.as_string(), '''\
1611Content-Type: multipart/mixed; boundary="BOUNDARY"
1612MIME-Version: 1.0
1613Subject: A subject
1614To: aperson@dom.ain
1615From: bperson@dom.ain
1616
1617--BOUNDARY
1618Content-Type: text/plain; charset="us-ascii"
1619MIME-Version: 1.0
1620Content-Transfer-Encoding: 7bit
1621
1622hello world
1623--BOUNDARY--''')
1624
1625
1626 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1627 eq = self.ndiffAssertEqual
1628 outer = MIMEBase('multipart', 'mixed')
1629 outer['Subject'] = 'A subject'
1630 outer['To'] = 'aperson@dom.ain'
1631 outer['From'] = 'bperson@dom.ain'
1632 outer.epilogue = None
1633 msg = MIMEText('hello world')
1634 outer.attach(msg)
1635 outer.set_boundary('BOUNDARY')
1636 eq(outer.as_string(), '''\
1637Content-Type: multipart/mixed; boundary="BOUNDARY"
1638MIME-Version: 1.0
1639Subject: A subject
1640To: aperson@dom.ain
1641From: bperson@dom.ain
1642
1643--BOUNDARY
1644Content-Type: text/plain; charset="us-ascii"
1645MIME-Version: 1.0
1646Content-Transfer-Encoding: 7bit
1647
1648hello world
1649--BOUNDARY--''')
1650
1651
1652 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1653 eq = self.ndiffAssertEqual
1654 outer = MIMEBase('multipart', 'mixed')
1655 outer['Subject'] = 'A subject'
1656 outer['To'] = 'aperson@dom.ain'
1657 outer['From'] = 'bperson@dom.ain'
1658 outer.epilogue = ''
1659 msg = MIMEText('hello world')
1660 outer.attach(msg)
1661 outer.set_boundary('BOUNDARY')
1662 eq(outer.as_string(), '''\
1663Content-Type: multipart/mixed; boundary="BOUNDARY"
1664MIME-Version: 1.0
1665Subject: A subject
1666To: aperson@dom.ain
1667From: bperson@dom.ain
1668
1669--BOUNDARY
1670Content-Type: text/plain; charset="us-ascii"
1671MIME-Version: 1.0
1672Content-Transfer-Encoding: 7bit
1673
1674hello world
1675--BOUNDARY--
1676''')
1677
1678
1679 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1680 eq = self.ndiffAssertEqual
1681 outer = MIMEBase('multipart', 'mixed')
1682 outer['Subject'] = 'A subject'
1683 outer['To'] = 'aperson@dom.ain'
1684 outer['From'] = 'bperson@dom.ain'
1685 outer.epilogue = '\n'
1686 msg = MIMEText('hello world')
1687 outer.attach(msg)
1688 outer.set_boundary('BOUNDARY')
1689 eq(outer.as_string(), '''\
1690Content-Type: multipart/mixed; boundary="BOUNDARY"
1691MIME-Version: 1.0
1692Subject: A subject
1693To: aperson@dom.ain
1694From: bperson@dom.ain
1695
1696--BOUNDARY
1697Content-Type: text/plain; charset="us-ascii"
1698MIME-Version: 1.0
1699Content-Transfer-Encoding: 7bit
1700
1701hello world
1702--BOUNDARY--
1703
1704''')
1705
1706 def test_message_external_body(self):
1707 eq = self.assertEqual
1708 msg = self._msgobj('msg_36.txt')
1709 eq(len(msg.get_payload()), 2)
1710 msg1 = msg.get_payload(1)
1711 eq(msg1.get_content_type(), 'multipart/alternative')
1712 eq(len(msg1.get_payload()), 2)
1713 for subpart in msg1.get_payload():
1714 eq(subpart.get_content_type(), 'message/external-body')
1715 eq(len(subpart.get_payload()), 1)
1716 subsubpart = subpart.get_payload(0)
1717 eq(subsubpart.get_content_type(), 'text/plain')
1718
1719 def test_double_boundary(self):
1720 # msg_37.txt is a multipart that contains two dash-boundary's in a
1721 # row. Our interpretation of RFC 2046 calls for ignoring the second
1722 # and subsequent boundaries.
1723 msg = self._msgobj('msg_37.txt')
1724 self.assertEqual(len(msg.get_payload()), 3)
1725
1726 def test_nested_inner_contains_outer_boundary(self):
1727 eq = self.ndiffAssertEqual
1728 # msg_38.txt has an inner part that contains outer boundaries. My
1729 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1730 # these are illegal and should be interpreted as unterminated inner
1731 # parts.
1732 msg = self._msgobj('msg_38.txt')
1733 sfp = StringIO()
1734 iterators._structure(msg, sfp)
1735 eq(sfp.getvalue(), """\
1736multipart/mixed
1737 multipart/mixed
1738 multipart/alternative
1739 text/plain
1740 text/plain
1741 text/plain
1742 text/plain
1743""")
1744
1745 def test_nested_with_same_boundary(self):
1746 eq = self.ndiffAssertEqual
1747 # msg 39.txt is similarly evil in that it's got inner parts that use
1748 # the same boundary as outer parts. Again, I believe the way this is
1749 # parsed is closest to the spirit of RFC 2046
1750 msg = self._msgobj('msg_39.txt')
1751 sfp = StringIO()
1752 iterators._structure(msg, sfp)
1753 eq(sfp.getvalue(), """\
1754multipart/mixed
1755 multipart/mixed
1756 multipart/alternative
1757 application/octet-stream
1758 application/octet-stream
1759 text/plain
1760""")
1761
1762 def test_boundary_in_non_multipart(self):
1763 msg = self._msgobj('msg_40.txt')
1764 self.assertEqual(msg.as_string(), '''\
1765MIME-Version: 1.0
1766Content-Type: text/html; boundary="--961284236552522269"
1767
1768----961284236552522269
1769Content-Type: text/html;
1770Content-Transfer-Encoding: 7Bit
1771
1772<html></html>
1773
1774----961284236552522269--
1775''')
1776
1777 def test_boundary_with_leading_space(self):
1778 eq = self.assertEqual
1779 msg = email.message_from_string('''\
1780MIME-Version: 1.0
1781Content-Type: multipart/mixed; boundary=" XXXX"
1782
1783-- XXXX
1784Content-Type: text/plain
1785
1786
1787-- XXXX
1788Content-Type: text/plain
1789
1790-- XXXX--
1791''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001792 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001793 eq(msg.get_boundary(), ' XXXX')
1794 eq(len(msg.get_payload()), 2)
1795
1796 def test_boundary_without_trailing_newline(self):
1797 m = Parser().parsestr("""\
1798Content-Type: multipart/mixed; boundary="===============0012394164=="
1799MIME-Version: 1.0
1800
1801--===============0012394164==
1802Content-Type: image/file1.jpg
1803MIME-Version: 1.0
1804Content-Transfer-Encoding: base64
1805
1806YXNkZg==
1807--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001808 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001809
1810
Ezio Melottib3aedd42010-11-20 19:04:17 +00001811
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001812# Test some badly formatted messages
R David Murrayc27e5222012-05-25 15:01:48 -04001813class TestNonConformant(TestEmailBase):
R David Murray3edd22a2011-04-18 13:59:37 -04001814
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001815 def test_parse_missing_minor_type(self):
1816 eq = self.assertEqual
1817 msg = self._msgobj('msg_14.txt')
1818 eq(msg.get_content_type(), 'text/plain')
1819 eq(msg.get_content_maintype(), 'text')
1820 eq(msg.get_content_subtype(), 'plain')
1821
R David Murray80e0aee2012-05-27 21:23:34 -04001822 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001823 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001824 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001825 msg = self._msgobj('msg_15.txt')
1826 # XXX We can probably eventually do better
1827 inner = msg.get_payload(0)
1828 unless(hasattr(inner, 'defects'))
R David Murrayc27e5222012-05-25 15:01:48 -04001829 self.assertEqual(len(inner.defects), 1)
1830 unless(isinstance(inner.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001831 errors.StartBoundaryNotFoundDefect))
1832
R David Murray80e0aee2012-05-27 21:23:34 -04001833 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001834 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001835 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001836 msg = self._msgobj('msg_25.txt')
1837 unless(isinstance(msg.get_payload(), str))
R David Murrayc27e5222012-05-25 15:01:48 -04001838 self.assertEqual(len(msg.defects), 2)
1839 unless(isinstance(msg.defects[0],
R David Murray3edd22a2011-04-18 13:59:37 -04001840 errors.NoBoundaryInMultipartDefect))
R David Murrayc27e5222012-05-25 15:01:48 -04001841 unless(isinstance(msg.defects[1],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001842 errors.MultipartInvariantViolationDefect))
1843
R David Murray749073a2011-06-22 13:47:53 -04001844 multipart_msg = textwrap.dedent("""\
1845 Date: Wed, 14 Nov 2007 12:56:23 GMT
1846 From: foo@bar.invalid
1847 To: foo@bar.invalid
1848 Subject: Content-Transfer-Encoding: base64 and multipart
1849 MIME-Version: 1.0
1850 Content-Type: multipart/mixed;
1851 boundary="===============3344438784458119861=="{}
1852
1853 --===============3344438784458119861==
1854 Content-Type: text/plain
1855
1856 Test message
1857
1858 --===============3344438784458119861==
1859 Content-Type: application/octet-stream
1860 Content-Transfer-Encoding: base64
1861
1862 YWJj
1863
1864 --===============3344438784458119861==--
1865 """)
1866
R David Murray80e0aee2012-05-27 21:23:34 -04001867 # test_defect_handling
R David Murray749073a2011-06-22 13:47:53 -04001868 def test_multipart_invalid_cte(self):
R David Murrayc27e5222012-05-25 15:01:48 -04001869 msg = self._str_msg(
1870 self.multipart_msg.format("\nContent-Transfer-Encoding: base64"))
1871 self.assertEqual(len(msg.defects), 1)
1872 self.assertIsInstance(msg.defects[0],
R David Murray749073a2011-06-22 13:47:53 -04001873 errors.InvalidMultipartContentTransferEncodingDefect)
1874
R David Murray80e0aee2012-05-27 21:23:34 -04001875 # test_defect_handling
R David Murray749073a2011-06-22 13:47:53 -04001876 def test_multipart_no_cte_no_defect(self):
R David Murrayc27e5222012-05-25 15:01:48 -04001877 msg = self._str_msg(self.multipart_msg.format(''))
1878 self.assertEqual(len(msg.defects), 0)
R David Murray749073a2011-06-22 13:47:53 -04001879
R David Murray80e0aee2012-05-27 21:23:34 -04001880 # test_defect_handling
R David Murray749073a2011-06-22 13:47:53 -04001881 def test_multipart_valid_cte_no_defect(self):
1882 for cte in ('7bit', '8bit', 'BINary'):
R David Murrayc27e5222012-05-25 15:01:48 -04001883 msg = self._str_msg(
R David Murray749073a2011-06-22 13:47:53 -04001884 self.multipart_msg.format(
R David Murrayc27e5222012-05-25 15:01:48 -04001885 "\nContent-Transfer-Encoding: {}".format(cte)))
1886 self.assertEqual(len(msg.defects), 0)
R David Murray749073a2011-06-22 13:47:53 -04001887
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001888 def test_invalid_content_type(self):
1889 eq = self.assertEqual
1890 neq = self.ndiffAssertEqual
1891 msg = Message()
1892 # RFC 2045, $5.2 says invalid yields text/plain
1893 msg['Content-Type'] = 'text'
1894 eq(msg.get_content_maintype(), 'text')
1895 eq(msg.get_content_subtype(), 'plain')
1896 eq(msg.get_content_type(), 'text/plain')
1897 # Clear the old value and try something /really/ invalid
1898 del msg['content-type']
1899 msg['Content-Type'] = 'foo'
1900 eq(msg.get_content_maintype(), 'text')
1901 eq(msg.get_content_subtype(), 'plain')
1902 eq(msg.get_content_type(), 'text/plain')
1903 # Still, make sure that the message is idempotently generated
1904 s = StringIO()
1905 g = Generator(s)
1906 g.flatten(msg)
1907 neq(s.getvalue(), 'Content-Type: foo\n\n')
1908
1909 def test_no_start_boundary(self):
1910 eq = self.ndiffAssertEqual
1911 msg = self._msgobj('msg_31.txt')
1912 eq(msg.get_payload(), """\
1913--BOUNDARY
1914Content-Type: text/plain
1915
1916message 1
1917
1918--BOUNDARY
1919Content-Type: text/plain
1920
1921message 2
1922
1923--BOUNDARY--
1924""")
1925
1926 def test_no_separating_blank_line(self):
1927 eq = self.ndiffAssertEqual
1928 msg = self._msgobj('msg_35.txt')
1929 eq(msg.as_string(), """\
1930From: aperson@dom.ain
1931To: bperson@dom.ain
1932Subject: here's something interesting
1933
1934counter to RFC 2822, there's no separating newline here
1935""")
1936
R David Murray80e0aee2012-05-27 21:23:34 -04001937 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001938 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001939 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001940 msg = self._msgobj('msg_41.txt')
1941 unless(hasattr(msg, 'defects'))
R David Murrayc27e5222012-05-25 15:01:48 -04001942 self.assertEqual(len(msg.defects), 2)
1943 unless(isinstance(msg.defects[0],
R David Murray3edd22a2011-04-18 13:59:37 -04001944 errors.NoBoundaryInMultipartDefect))
R David Murrayc27e5222012-05-25 15:01:48 -04001945 unless(isinstance(msg.defects[1],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001946 errors.MultipartInvariantViolationDefect))
1947
R David Murray80e0aee2012-05-27 21:23:34 -04001948 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001949 def test_missing_start_boundary(self):
1950 outer = self._msgobj('msg_42.txt')
1951 # The message structure is:
1952 #
1953 # multipart/mixed
1954 # text/plain
1955 # message/rfc822
1956 # multipart/mixed [*]
1957 #
1958 # [*] This message is missing its start boundary
1959 bad = outer.get_payload(1).get_payload(0)
R David Murrayc27e5222012-05-25 15:01:48 -04001960 self.assertEqual(len(bad.defects), 1)
1961 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001962 errors.StartBoundaryNotFoundDefect))
1963
R David Murray80e0aee2012-05-27 21:23:34 -04001964 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001965 def test_first_line_is_continuation_header(self):
1966 eq = self.assertEqual
R David Murrayadbdcdb2012-05-27 20:45:01 -04001967 m = ' Line 1\nSubject: test\n\nbody'
R David Murrayc27e5222012-05-25 15:01:48 -04001968 msg = email.message_from_string(m)
R David Murrayadbdcdb2012-05-27 20:45:01 -04001969 eq(msg.keys(), ['Subject'])
1970 eq(msg.get_payload(), 'body')
R David Murrayc27e5222012-05-25 15:01:48 -04001971 eq(len(msg.defects), 1)
R David Murrayadbdcdb2012-05-27 20:45:01 -04001972 self.assertDefectsEqual(msg.defects,
1973 [errors.FirstHeaderLineIsContinuationDefect])
R David Murrayc27e5222012-05-25 15:01:48 -04001974 eq(msg.defects[0].line, ' Line 1\n')
R David Murray3edd22a2011-04-18 13:59:37 -04001975
R David Murrayd41595b2012-05-28 20:14:10 -04001976 # test_defect_handling
R David Murrayadbdcdb2012-05-27 20:45:01 -04001977 def test_missing_header_body_separator(self):
1978 # Our heuristic if we see a line that doesn't look like a header (no
1979 # leading whitespace but no ':') is to assume that the blank line that
1980 # separates the header from the body is missing, and to stop parsing
1981 # headers and start parsing the body.
1982 msg = self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n')
1983 self.assertEqual(msg.keys(), ['Subject'])
1984 self.assertEqual(msg.get_payload(), 'not a header\nTo: abc\n\nb\n')
1985 self.assertDefectsEqual(msg.defects,
1986 [errors.MissingHeaderBodySeparatorDefect])
1987
Ezio Melottib3aedd42010-11-20 19:04:17 +00001988
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001989# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00001990class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001991 def test_rfc2047_multiline(self):
1992 eq = self.assertEqual
1993 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
1994 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
1995 dh = decode_header(s)
1996 eq(dh, [
R David Murray07ea53c2012-06-02 17:56:49 -04001997 (b'Re: ', None),
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001998 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
R David Murray07ea53c2012-06-02 17:56:49 -04001999 (b' baz foo bar ', None),
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002000 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
2001 header = make_header(dh)
2002 eq(str(header),
2003 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00002004 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00002005Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
2006 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002007
R David Murray07ea53c2012-06-02 17:56:49 -04002008 def test_whitespace_keeper_unicode(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002009 eq = self.assertEqual
2010 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
2011 dh = decode_header(s)
2012 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
R David Murray07ea53c2012-06-02 17:56:49 -04002013 (b' Pirard <pirard@dom.ain>', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002014 header = str(make_header(dh))
2015 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
2016
R David Murray07ea53c2012-06-02 17:56:49 -04002017 def test_whitespace_keeper_unicode_2(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002018 eq = self.assertEqual
2019 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
2020 dh = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04002021 eq(dh, [(b'The ', None), (b'quick brown fox', 'iso-8859-1'),
2022 (b' jumped over the ', None), (b'lazy dog', 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002023 hu = str(make_header(dh))
2024 eq(hu, 'The quick brown fox jumped over the lazy dog')
2025
2026 def test_rfc2047_missing_whitespace(self):
2027 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
2028 dh = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04002029 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
2030 (b'rg', None), (b'\xe5', 'iso-8859-1'),
2031 (b'sbord', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002032
2033 def test_rfc2047_with_whitespace(self):
2034 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
2035 dh = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04002036 self.assertEqual(dh, [(b'Sm ', None), (b'\xf6', 'iso-8859-1'),
2037 (b' rg ', None), (b'\xe5', 'iso-8859-1'),
2038 (b' sbord', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002039
R. David Murrayc4e69cc2010-08-03 22:14:10 +00002040 def test_rfc2047_B_bad_padding(self):
2041 s = '=?iso-8859-1?B?%s?='
2042 data = [ # only test complete bytes
2043 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
2044 ('dmk=', b'vi'), ('dmk', b'vi')
2045 ]
2046 for q, a in data:
2047 dh = decode_header(s % q)
2048 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002049
R. David Murray31e984c2010-10-01 15:40:20 +00002050 def test_rfc2047_Q_invalid_digits(self):
2051 # issue 10004.
2052 s = '=?iso-8659-1?Q?andr=e9=zz?='
2053 self.assertEqual(decode_header(s),
2054 [(b'andr\xe9=zz', 'iso-8659-1')])
2055
R David Murray07ea53c2012-06-02 17:56:49 -04002056 def test_rfc2047_rfc2047_1(self):
2057 # 1st testcase at end of rfc2047
2058 s = '(=?ISO-8859-1?Q?a?=)'
2059 self.assertEqual(decode_header(s),
2060 [(b'(', None), (b'a', 'iso-8859-1'), (b')', None)])
2061
2062 def test_rfc2047_rfc2047_2(self):
2063 # 2nd testcase at end of rfc2047
2064 s = '(=?ISO-8859-1?Q?a?= b)'
2065 self.assertEqual(decode_header(s),
2066 [(b'(', None), (b'a', 'iso-8859-1'), (b' b)', None)])
2067
2068 def test_rfc2047_rfc2047_3(self):
2069 # 3rd testcase at end of rfc2047
2070 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)'
2071 self.assertEqual(decode_header(s),
2072 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2073
2074 def test_rfc2047_rfc2047_4(self):
2075 # 4th testcase at end of rfc2047
2076 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)'
2077 self.assertEqual(decode_header(s),
2078 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2079
2080 def test_rfc2047_rfc2047_5a(self):
2081 # 5th testcase at end of rfc2047 newline is \r\n
2082 s = '(=?ISO-8859-1?Q?a?=\r\n =?ISO-8859-1?Q?b?=)'
2083 self.assertEqual(decode_header(s),
2084 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2085
2086 def test_rfc2047_rfc2047_5b(self):
2087 # 5th testcase at end of rfc2047 newline is \n
2088 s = '(=?ISO-8859-1?Q?a?=\n =?ISO-8859-1?Q?b?=)'
2089 self.assertEqual(decode_header(s),
2090 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2091
2092 def test_rfc2047_rfc2047_6(self):
2093 # 6th testcase at end of rfc2047
2094 s = '(=?ISO-8859-1?Q?a_b?=)'
2095 self.assertEqual(decode_header(s),
2096 [(b'(', None), (b'a b', 'iso-8859-1'), (b')', None)])
2097
2098 def test_rfc2047_rfc2047_7(self):
2099 # 7th testcase at end of rfc2047
2100 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)'
2101 self.assertEqual(decode_header(s),
2102 [(b'(', None), (b'a', 'iso-8859-1'), (b' b', 'iso-8859-2'),
2103 (b')', None)])
2104 self.assertEqual(make_header(decode_header(s)).encode(), s.lower())
2105 self.assertEqual(str(make_header(decode_header(s))), '(a b)')
2106
R David Murray82ffabd2012-06-03 12:27:07 -04002107 def test_multiline_header(self):
2108 s = '=?windows-1252?q?=22M=FCller_T=22?=\r\n <T.Mueller@xxx.com>'
2109 self.assertEqual(decode_header(s),
2110 [(b'"M\xfcller T"', 'windows-1252'),
2111 (b'<T.Mueller@xxx.com>', None)])
2112 self.assertEqual(make_header(decode_header(s)).encode(),
2113 ''.join(s.splitlines()))
2114 self.assertEqual(str(make_header(decode_header(s))),
2115 '"Müller T" <T.Mueller@xxx.com>')
2116
Ezio Melottib3aedd42010-11-20 19:04:17 +00002117
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002118# Test the MIMEMessage class
2119class TestMIMEMessage(TestEmailBase):
2120 def setUp(self):
2121 with openfile('msg_11.txt') as fp:
2122 self._text = fp.read()
2123
2124 def test_type_error(self):
2125 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
2126
2127 def test_valid_argument(self):
2128 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002129 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002130 subject = 'A sub-message'
2131 m = Message()
2132 m['Subject'] = subject
2133 r = MIMEMessage(m)
2134 eq(r.get_content_type(), 'message/rfc822')
2135 payload = r.get_payload()
2136 unless(isinstance(payload, list))
2137 eq(len(payload), 1)
2138 subpart = payload[0]
2139 unless(subpart is m)
2140 eq(subpart['subject'], subject)
2141
2142 def test_bad_multipart(self):
2143 eq = self.assertEqual
2144 msg1 = Message()
2145 msg1['Subject'] = 'subpart 1'
2146 msg2 = Message()
2147 msg2['Subject'] = 'subpart 2'
2148 r = MIMEMessage(msg1)
2149 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
2150
2151 def test_generate(self):
2152 # First craft the message to be encapsulated
2153 m = Message()
2154 m['Subject'] = 'An enclosed message'
2155 m.set_payload('Here is the body of the message.\n')
2156 r = MIMEMessage(m)
2157 r['Subject'] = 'The enclosing message'
2158 s = StringIO()
2159 g = Generator(s)
2160 g.flatten(r)
2161 self.assertEqual(s.getvalue(), """\
2162Content-Type: message/rfc822
2163MIME-Version: 1.0
2164Subject: The enclosing message
2165
2166Subject: An enclosed message
2167
2168Here is the body of the message.
2169""")
2170
2171 def test_parse_message_rfc822(self):
2172 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002173 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002174 msg = self._msgobj('msg_11.txt')
2175 eq(msg.get_content_type(), 'message/rfc822')
2176 payload = msg.get_payload()
2177 unless(isinstance(payload, list))
2178 eq(len(payload), 1)
2179 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002180 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002181 eq(submsg['subject'], 'An enclosed message')
2182 eq(submsg.get_payload(), 'Here is the body of the message.\n')
2183
2184 def test_dsn(self):
2185 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002186 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002187 # msg 16 is a Delivery Status Notification, see RFC 1894
2188 msg = self._msgobj('msg_16.txt')
2189 eq(msg.get_content_type(), 'multipart/report')
2190 unless(msg.is_multipart())
2191 eq(len(msg.get_payload()), 3)
2192 # Subpart 1 is a text/plain, human readable section
2193 subpart = msg.get_payload(0)
2194 eq(subpart.get_content_type(), 'text/plain')
2195 eq(subpart.get_payload(), """\
2196This report relates to a message you sent with the following header fields:
2197
2198 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
2199 Date: Sun, 23 Sep 2001 20:10:55 -0700
2200 From: "Ian T. Henry" <henryi@oxy.edu>
2201 To: SoCal Raves <scr@socal-raves.org>
2202 Subject: [scr] yeah for Ians!!
2203
2204Your message cannot be delivered to the following recipients:
2205
2206 Recipient address: jangel1@cougar.noc.ucla.edu
2207 Reason: recipient reached disk quota
2208
2209""")
2210 # Subpart 2 contains the machine parsable DSN information. It
2211 # consists of two blocks of headers, represented by two nested Message
2212 # objects.
2213 subpart = msg.get_payload(1)
2214 eq(subpart.get_content_type(), 'message/delivery-status')
2215 eq(len(subpart.get_payload()), 2)
2216 # message/delivery-status should treat each block as a bunch of
2217 # headers, i.e. a bunch of Message objects.
2218 dsn1 = subpart.get_payload(0)
2219 unless(isinstance(dsn1, Message))
2220 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
2221 eq(dsn1.get_param('dns', header='reporting-mta'), '')
2222 # Try a missing one <wink>
2223 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
2224 dsn2 = subpart.get_payload(1)
2225 unless(isinstance(dsn2, Message))
2226 eq(dsn2['action'], 'failed')
2227 eq(dsn2.get_params(header='original-recipient'),
2228 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
2229 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
2230 # Subpart 3 is the original message
2231 subpart = msg.get_payload(2)
2232 eq(subpart.get_content_type(), 'message/rfc822')
2233 payload = subpart.get_payload()
2234 unless(isinstance(payload, list))
2235 eq(len(payload), 1)
2236 subsubpart = payload[0]
2237 unless(isinstance(subsubpart, Message))
2238 eq(subsubpart.get_content_type(), 'text/plain')
2239 eq(subsubpart['message-id'],
2240 '<002001c144a6$8752e060$56104586@oxy.edu>')
2241
2242 def test_epilogue(self):
2243 eq = self.ndiffAssertEqual
2244 with openfile('msg_21.txt') as fp:
2245 text = fp.read()
2246 msg = Message()
2247 msg['From'] = 'aperson@dom.ain'
2248 msg['To'] = 'bperson@dom.ain'
2249 msg['Subject'] = 'Test'
2250 msg.preamble = 'MIME message'
2251 msg.epilogue = 'End of MIME message\n'
2252 msg1 = MIMEText('One')
2253 msg2 = MIMEText('Two')
2254 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2255 msg.attach(msg1)
2256 msg.attach(msg2)
2257 sfp = StringIO()
2258 g = Generator(sfp)
2259 g.flatten(msg)
2260 eq(sfp.getvalue(), text)
2261
2262 def test_no_nl_preamble(self):
2263 eq = self.ndiffAssertEqual
2264 msg = Message()
2265 msg['From'] = 'aperson@dom.ain'
2266 msg['To'] = 'bperson@dom.ain'
2267 msg['Subject'] = 'Test'
2268 msg.preamble = 'MIME message'
2269 msg.epilogue = ''
2270 msg1 = MIMEText('One')
2271 msg2 = MIMEText('Two')
2272 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2273 msg.attach(msg1)
2274 msg.attach(msg2)
2275 eq(msg.as_string(), """\
2276From: aperson@dom.ain
2277To: bperson@dom.ain
2278Subject: Test
2279Content-Type: multipart/mixed; boundary="BOUNDARY"
2280
2281MIME message
2282--BOUNDARY
2283Content-Type: text/plain; charset="us-ascii"
2284MIME-Version: 1.0
2285Content-Transfer-Encoding: 7bit
2286
2287One
2288--BOUNDARY
2289Content-Type: text/plain; charset="us-ascii"
2290MIME-Version: 1.0
2291Content-Transfer-Encoding: 7bit
2292
2293Two
2294--BOUNDARY--
2295""")
2296
2297 def test_default_type(self):
2298 eq = self.assertEqual
2299 with openfile('msg_30.txt') as fp:
2300 msg = email.message_from_file(fp)
2301 container1 = msg.get_payload(0)
2302 eq(container1.get_default_type(), 'message/rfc822')
2303 eq(container1.get_content_type(), 'message/rfc822')
2304 container2 = msg.get_payload(1)
2305 eq(container2.get_default_type(), 'message/rfc822')
2306 eq(container2.get_content_type(), 'message/rfc822')
2307 container1a = container1.get_payload(0)
2308 eq(container1a.get_default_type(), 'text/plain')
2309 eq(container1a.get_content_type(), 'text/plain')
2310 container2a = container2.get_payload(0)
2311 eq(container2a.get_default_type(), 'text/plain')
2312 eq(container2a.get_content_type(), 'text/plain')
2313
2314 def test_default_type_with_explicit_container_type(self):
2315 eq = self.assertEqual
2316 with openfile('msg_28.txt') as fp:
2317 msg = email.message_from_file(fp)
2318 container1 = msg.get_payload(0)
2319 eq(container1.get_default_type(), 'message/rfc822')
2320 eq(container1.get_content_type(), 'message/rfc822')
2321 container2 = msg.get_payload(1)
2322 eq(container2.get_default_type(), 'message/rfc822')
2323 eq(container2.get_content_type(), 'message/rfc822')
2324 container1a = container1.get_payload(0)
2325 eq(container1a.get_default_type(), 'text/plain')
2326 eq(container1a.get_content_type(), 'text/plain')
2327 container2a = container2.get_payload(0)
2328 eq(container2a.get_default_type(), 'text/plain')
2329 eq(container2a.get_content_type(), 'text/plain')
2330
2331 def test_default_type_non_parsed(self):
2332 eq = self.assertEqual
2333 neq = self.ndiffAssertEqual
2334 # Set up container
2335 container = MIMEMultipart('digest', 'BOUNDARY')
2336 container.epilogue = ''
2337 # Set up subparts
2338 subpart1a = MIMEText('message 1\n')
2339 subpart2a = MIMEText('message 2\n')
2340 subpart1 = MIMEMessage(subpart1a)
2341 subpart2 = MIMEMessage(subpart2a)
2342 container.attach(subpart1)
2343 container.attach(subpart2)
2344 eq(subpart1.get_content_type(), 'message/rfc822')
2345 eq(subpart1.get_default_type(), 'message/rfc822')
2346 eq(subpart2.get_content_type(), 'message/rfc822')
2347 eq(subpart2.get_default_type(), 'message/rfc822')
2348 neq(container.as_string(0), '''\
2349Content-Type: multipart/digest; boundary="BOUNDARY"
2350MIME-Version: 1.0
2351
2352--BOUNDARY
2353Content-Type: message/rfc822
2354MIME-Version: 1.0
2355
2356Content-Type: text/plain; charset="us-ascii"
2357MIME-Version: 1.0
2358Content-Transfer-Encoding: 7bit
2359
2360message 1
2361
2362--BOUNDARY
2363Content-Type: message/rfc822
2364MIME-Version: 1.0
2365
2366Content-Type: text/plain; charset="us-ascii"
2367MIME-Version: 1.0
2368Content-Transfer-Encoding: 7bit
2369
2370message 2
2371
2372--BOUNDARY--
2373''')
2374 del subpart1['content-type']
2375 del subpart1['mime-version']
2376 del subpart2['content-type']
2377 del subpart2['mime-version']
2378 eq(subpart1.get_content_type(), 'message/rfc822')
2379 eq(subpart1.get_default_type(), 'message/rfc822')
2380 eq(subpart2.get_content_type(), 'message/rfc822')
2381 eq(subpart2.get_default_type(), 'message/rfc822')
2382 neq(container.as_string(0), '''\
2383Content-Type: multipart/digest; boundary="BOUNDARY"
2384MIME-Version: 1.0
2385
2386--BOUNDARY
2387
2388Content-Type: text/plain; charset="us-ascii"
2389MIME-Version: 1.0
2390Content-Transfer-Encoding: 7bit
2391
2392message 1
2393
2394--BOUNDARY
2395
2396Content-Type: text/plain; charset="us-ascii"
2397MIME-Version: 1.0
2398Content-Transfer-Encoding: 7bit
2399
2400message 2
2401
2402--BOUNDARY--
2403''')
2404
2405 def test_mime_attachments_in_constructor(self):
2406 eq = self.assertEqual
2407 text1 = MIMEText('')
2408 text2 = MIMEText('')
2409 msg = MIMEMultipart(_subparts=(text1, text2))
2410 eq(len(msg.get_payload()), 2)
2411 eq(msg.get_payload(0), text1)
2412 eq(msg.get_payload(1), text2)
2413
Christian Heimes587c2bf2008-01-19 16:21:02 +00002414 def test_default_multipart_constructor(self):
2415 msg = MIMEMultipart()
2416 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002417
Ezio Melottib3aedd42010-11-20 19:04:17 +00002418
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002419# A general test of parser->model->generator idempotency. IOW, read a message
2420# in, parse it into a message object tree, then without touching the tree,
2421# regenerate the plain text. The original text and the transformed text
2422# should be identical. Note: that we ignore the Unix-From since that may
2423# contain a changed date.
2424class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002425
2426 linesep = '\n'
2427
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002428 def _msgobj(self, filename):
2429 with openfile(filename) as fp:
2430 data = fp.read()
2431 msg = email.message_from_string(data)
2432 return msg, data
2433
R. David Murray719a4492010-11-21 16:53:48 +00002434 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002435 eq = self.ndiffAssertEqual
2436 s = StringIO()
2437 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002438 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002439 eq(text, s.getvalue())
2440
2441 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002442 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002443 msg, text = self._msgobj('msg_01.txt')
2444 eq(msg.get_content_type(), 'text/plain')
2445 eq(msg.get_content_maintype(), 'text')
2446 eq(msg.get_content_subtype(), 'plain')
2447 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2448 eq(msg.get_param('charset'), 'us-ascii')
2449 eq(msg.preamble, None)
2450 eq(msg.epilogue, None)
2451 self._idempotent(msg, text)
2452
2453 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002454 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002455 msg, text = self._msgobj('msg_03.txt')
2456 eq(msg.get_content_type(), 'text/plain')
2457 eq(msg.get_params(), None)
2458 eq(msg.get_param('charset'), None)
2459 self._idempotent(msg, text)
2460
2461 def test_simple_multipart(self):
2462 msg, text = self._msgobj('msg_04.txt')
2463 self._idempotent(msg, text)
2464
2465 def test_MIME_digest(self):
2466 msg, text = self._msgobj('msg_02.txt')
2467 self._idempotent(msg, text)
2468
2469 def test_long_header(self):
2470 msg, text = self._msgobj('msg_27.txt')
2471 self._idempotent(msg, text)
2472
2473 def test_MIME_digest_with_part_headers(self):
2474 msg, text = self._msgobj('msg_28.txt')
2475 self._idempotent(msg, text)
2476
2477 def test_mixed_with_image(self):
2478 msg, text = self._msgobj('msg_06.txt')
2479 self._idempotent(msg, text)
2480
2481 def test_multipart_report(self):
2482 msg, text = self._msgobj('msg_05.txt')
2483 self._idempotent(msg, text)
2484
2485 def test_dsn(self):
2486 msg, text = self._msgobj('msg_16.txt')
2487 self._idempotent(msg, text)
2488
2489 def test_preamble_epilogue(self):
2490 msg, text = self._msgobj('msg_21.txt')
2491 self._idempotent(msg, text)
2492
2493 def test_multipart_one_part(self):
2494 msg, text = self._msgobj('msg_23.txt')
2495 self._idempotent(msg, text)
2496
2497 def test_multipart_no_parts(self):
2498 msg, text = self._msgobj('msg_24.txt')
2499 self._idempotent(msg, text)
2500
2501 def test_no_start_boundary(self):
2502 msg, text = self._msgobj('msg_31.txt')
2503 self._idempotent(msg, text)
2504
2505 def test_rfc2231_charset(self):
2506 msg, text = self._msgobj('msg_32.txt')
2507 self._idempotent(msg, text)
2508
2509 def test_more_rfc2231_parameters(self):
2510 msg, text = self._msgobj('msg_33.txt')
2511 self._idempotent(msg, text)
2512
2513 def test_text_plain_in_a_multipart_digest(self):
2514 msg, text = self._msgobj('msg_34.txt')
2515 self._idempotent(msg, text)
2516
2517 def test_nested_multipart_mixeds(self):
2518 msg, text = self._msgobj('msg_12a.txt')
2519 self._idempotent(msg, text)
2520
2521 def test_message_external_body_idempotent(self):
2522 msg, text = self._msgobj('msg_36.txt')
2523 self._idempotent(msg, text)
2524
R. David Murray719a4492010-11-21 16:53:48 +00002525 def test_message_delivery_status(self):
2526 msg, text = self._msgobj('msg_43.txt')
2527 self._idempotent(msg, text, unixfrom=True)
2528
R. David Murray96fd54e2010-10-08 15:55:28 +00002529 def test_message_signed_idempotent(self):
2530 msg, text = self._msgobj('msg_45.txt')
2531 self._idempotent(msg, text)
2532
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002533 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002534 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002535 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002536 # Get a message object and reset the seek pointer for other tests
2537 msg, text = self._msgobj('msg_05.txt')
2538 eq(msg.get_content_type(), 'multipart/report')
2539 # Test the Content-Type: parameters
2540 params = {}
2541 for pk, pv in msg.get_params():
2542 params[pk] = pv
2543 eq(params['report-type'], 'delivery-status')
2544 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002545 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2546 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002547 eq(len(msg.get_payload()), 3)
2548 # Make sure the subparts are what we expect
2549 msg1 = msg.get_payload(0)
2550 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002551 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002552 msg2 = msg.get_payload(1)
2553 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002554 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002555 msg3 = msg.get_payload(2)
2556 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002557 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002558 payload = msg3.get_payload()
2559 unless(isinstance(payload, list))
2560 eq(len(payload), 1)
2561 msg4 = payload[0]
2562 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002563 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002564
2565 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002566 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002567 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002568 msg, text = self._msgobj('msg_06.txt')
2569 # Check some of the outer headers
2570 eq(msg.get_content_type(), 'message/rfc822')
2571 # Make sure the payload is a list of exactly one sub-Message, and that
2572 # that submessage has a type of text/plain
2573 payload = msg.get_payload()
2574 unless(isinstance(payload, list))
2575 eq(len(payload), 1)
2576 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002577 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002578 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002579 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002580 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002581
2582
Ezio Melottib3aedd42010-11-20 19:04:17 +00002583
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002584# Test various other bits of the package's functionality
2585class TestMiscellaneous(TestEmailBase):
2586 def test_message_from_string(self):
2587 with openfile('msg_01.txt') as fp:
2588 text = fp.read()
2589 msg = email.message_from_string(text)
2590 s = StringIO()
2591 # Don't wrap/continue long headers since we're trying to test
2592 # idempotency.
2593 g = Generator(s, maxheaderlen=0)
2594 g.flatten(msg)
2595 self.assertEqual(text, s.getvalue())
2596
2597 def test_message_from_file(self):
2598 with openfile('msg_01.txt') as fp:
2599 text = fp.read()
2600 fp.seek(0)
2601 msg = email.message_from_file(fp)
2602 s = StringIO()
2603 # Don't wrap/continue long headers since we're trying to test
2604 # idempotency.
2605 g = Generator(s, maxheaderlen=0)
2606 g.flatten(msg)
2607 self.assertEqual(text, s.getvalue())
2608
2609 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002610 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002611 with openfile('msg_01.txt') as fp:
2612 text = fp.read()
2613
2614 # Create a subclass
2615 class MyMessage(Message):
2616 pass
2617
2618 msg = email.message_from_string(text, MyMessage)
2619 unless(isinstance(msg, MyMessage))
2620 # Try something more complicated
2621 with openfile('msg_02.txt') as fp:
2622 text = fp.read()
2623 msg = email.message_from_string(text, MyMessage)
2624 for subpart in msg.walk():
2625 unless(isinstance(subpart, MyMessage))
2626
2627 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002628 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002629 # Create a subclass
2630 class MyMessage(Message):
2631 pass
2632
2633 with openfile('msg_01.txt') as fp:
2634 msg = email.message_from_file(fp, MyMessage)
2635 unless(isinstance(msg, MyMessage))
2636 # Try something more complicated
2637 with openfile('msg_02.txt') as fp:
2638 msg = email.message_from_file(fp, MyMessage)
2639 for subpart in msg.walk():
2640 unless(isinstance(subpart, MyMessage))
2641
R David Murrayc27e5222012-05-25 15:01:48 -04002642 def test_custom_message_does_not_require_arguments(self):
2643 class MyMessage(Message):
2644 def __init__(self):
2645 super().__init__()
2646 msg = self._str_msg("Subject: test\n\ntest", MyMessage)
2647 self.assertTrue(isinstance(msg, MyMessage))
2648
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002649 def test__all__(self):
2650 module = __import__('email')
R David Murray1b6c7242012-03-16 22:43:05 -04002651 self.assertEqual(sorted(module.__all__), [
2652 'base64mime', 'charset', 'encoders', 'errors', 'feedparser',
2653 'generator', 'header', 'iterators', 'message',
2654 'message_from_binary_file', 'message_from_bytes',
2655 'message_from_file', 'message_from_string', 'mime', 'parser',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002656 'quoprimime', 'utils',
2657 ])
2658
2659 def test_formatdate(self):
2660 now = time.time()
2661 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2662 time.gmtime(now)[:6])
2663
2664 def test_formatdate_localtime(self):
2665 now = time.time()
2666 self.assertEqual(
2667 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2668 time.localtime(now)[:6])
2669
2670 def test_formatdate_usegmt(self):
2671 now = time.time()
2672 self.assertEqual(
2673 utils.formatdate(now, localtime=False),
2674 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2675 self.assertEqual(
2676 utils.formatdate(now, localtime=False, usegmt=True),
2677 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2678
2679 def test_parsedate_none(self):
2680 self.assertEqual(utils.parsedate(''), None)
2681
2682 def test_parsedate_compact(self):
2683 # The FWS after the comma is optional
2684 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2685 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2686
2687 def test_parsedate_no_dayofweek(self):
2688 eq = self.assertEqual
2689 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2690 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2691
2692 def test_parsedate_compact_no_dayofweek(self):
2693 eq = self.assertEqual
2694 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2695 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2696
R. David Murray4a62e892010-12-23 20:35:46 +00002697 def test_parsedate_no_space_before_positive_offset(self):
2698 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2699 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2700
2701 def test_parsedate_no_space_before_negative_offset(self):
2702 # Issue 1155362: we already handled '+' for this case.
2703 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
2704 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
2705
2706
R David Murrayaccd1c02011-03-13 20:06:23 -04002707 def test_parsedate_accepts_time_with_dots(self):
2708 eq = self.assertEqual
2709 eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'),
2710 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2711 eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'),
2712 (2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800))
2713
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002714 def test_parsedate_acceptable_to_time_functions(self):
2715 eq = self.assertEqual
2716 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2717 t = int(time.mktime(timetup))
2718 eq(time.localtime(t)[:6], timetup[:6])
2719 eq(int(time.strftime('%Y', timetup)), 2003)
2720 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2721 t = int(time.mktime(timetup[:9]))
2722 eq(time.localtime(t)[:6], timetup[:6])
2723 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2724
Alexander Belopolskya07548e2012-06-21 20:34:09 -04002725 def test_mktime_tz(self):
2726 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
2727 -1, -1, -1, 0)), 0)
2728 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
2729 -1, -1, -1, 1234)), -1234)
2730
R. David Murray219d1c82010-08-25 00:45:55 +00002731 def test_parsedate_y2k(self):
2732 """Test for parsing a date with a two-digit year.
2733
2734 Parsing a date with a two-digit year should return the correct
2735 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2736 obsoletes RFC822) requires four-digit years.
2737
2738 """
2739 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2740 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2741 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2742 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2743
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002744 def test_parseaddr_empty(self):
2745 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2746 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2747
2748 def test_noquote_dump(self):
2749 self.assertEqual(
2750 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2751 'A Silly Person <person@dom.ain>')
2752
2753 def test_escape_dump(self):
2754 self.assertEqual(
2755 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
R David Murrayb53319f2012-03-14 15:31:47 -04002756 r'"A (Very) Silly Person" <person@dom.ain>')
2757 self.assertEqual(
2758 utils.parseaddr(r'"A \(Very\) Silly Person" <person@dom.ain>'),
2759 ('A (Very) Silly Person', 'person@dom.ain'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002760 a = r'A \(Special\) Person'
2761 b = 'person@dom.ain'
2762 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2763
2764 def test_escape_backslashes(self):
2765 self.assertEqual(
2766 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2767 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2768 a = r'Arthur \Backslash\ Foobar'
2769 b = 'person@dom.ain'
2770 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2771
R David Murray8debacb2011-04-06 09:35:57 -04002772 def test_quotes_unicode_names(self):
2773 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2774 name = "H\u00e4ns W\u00fcrst"
2775 addr = 'person@dom.ain'
2776 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2777 latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <person@dom.ain>"
2778 self.assertEqual(utils.formataddr((name, addr)), utf8_base64)
2779 self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'),
2780 latin1_quopri)
2781
2782 def test_accepts_any_charset_like_object(self):
2783 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2784 name = "H\u00e4ns W\u00fcrst"
2785 addr = 'person@dom.ain'
2786 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2787 foobar = "FOOBAR"
2788 class CharsetMock:
2789 def header_encode(self, string):
2790 return foobar
2791 mock = CharsetMock()
2792 mock_expected = "%s <%s>" % (foobar, addr)
2793 self.assertEqual(utils.formataddr((name, addr), mock), mock_expected)
2794 self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')),
2795 utf8_base64)
2796
2797 def test_invalid_charset_like_object_raises_error(self):
2798 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2799 name = "H\u00e4ns W\u00fcrst"
2800 addr = 'person@dom.ain'
2801 # A object without a header_encode method:
2802 bad_charset = object()
2803 self.assertRaises(AttributeError, utils.formataddr, (name, addr),
2804 bad_charset)
2805
2806 def test_unicode_address_raises_error(self):
2807 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2808 addr = 'pers\u00f6n@dom.in'
2809 self.assertRaises(UnicodeError, utils.formataddr, (None, addr))
2810 self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr))
2811
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002812 def test_name_with_dot(self):
2813 x = 'John X. Doe <jxd@example.com>'
2814 y = '"John X. Doe" <jxd@example.com>'
2815 a, b = ('John X. Doe', 'jxd@example.com')
2816 self.assertEqual(utils.parseaddr(x), (a, b))
2817 self.assertEqual(utils.parseaddr(y), (a, b))
2818 # formataddr() quotes the name if there's a dot in it
2819 self.assertEqual(utils.formataddr((a, b)), y)
2820
R. David Murray5397e862010-10-02 15:58:26 +00002821 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2822 # issue 10005. Note that in the third test the second pair of
2823 # backslashes is not actually a quoted pair because it is not inside a
2824 # comment or quoted string: the address being parsed has a quoted
2825 # string containing a quoted backslash, followed by 'example' and two
2826 # backslashes, followed by another quoted string containing a space and
2827 # the word 'example'. parseaddr copies those two backslashes
2828 # literally. Per rfc5322 this is not technically correct since a \ may
2829 # not appear in an address outside of a quoted string. It is probably
2830 # a sensible Postel interpretation, though.
2831 eq = self.assertEqual
2832 eq(utils.parseaddr('""example" example"@example.com'),
2833 ('', '""example" example"@example.com'))
2834 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2835 ('', '"\\"example\\" example"@example.com'))
2836 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2837 ('', '"\\\\"example\\\\" example"@example.com'))
2838
R. David Murray63563cd2010-12-18 18:25:38 +00002839 def test_parseaddr_preserves_spaces_in_local_part(self):
2840 # issue 9286. A normal RFC5322 local part should not contain any
2841 # folding white space, but legacy local parts can (they are a sequence
2842 # of atoms, not dotatoms). On the other hand we strip whitespace from
2843 # before the @ and around dots, on the assumption that the whitespace
2844 # around the punctuation is a mistake in what would otherwise be
2845 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
2846 self.assertEqual(('', "merwok wok@xample.com"),
2847 utils.parseaddr("merwok wok@xample.com"))
2848 self.assertEqual(('', "merwok wok@xample.com"),
2849 utils.parseaddr("merwok wok@xample.com"))
2850 self.assertEqual(('', "merwok wok@xample.com"),
2851 utils.parseaddr(" merwok wok @xample.com"))
2852 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
2853 utils.parseaddr('merwok"wok" wok@xample.com'))
2854 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
2855 utils.parseaddr('merwok. wok . wok@xample.com'))
2856
R David Murrayb53319f2012-03-14 15:31:47 -04002857 def test_formataddr_does_not_quote_parens_in_quoted_string(self):
2858 addr = ("'foo@example.com' (foo@example.com)",
2859 'foo@example.com')
2860 addrstr = ('"\'foo@example.com\' '
2861 '(foo@example.com)" <foo@example.com>')
2862 self.assertEqual(utils.parseaddr(addrstr), addr)
2863 self.assertEqual(utils.formataddr(addr), addrstr)
2864
2865
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002866 def test_multiline_from_comment(self):
2867 x = """\
2868Foo
2869\tBar <foo@example.com>"""
2870 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2871
2872 def test_quote_dump(self):
2873 self.assertEqual(
2874 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2875 r'"A Silly; Person" <person@dom.ain>')
2876
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002877 def test_charset_richcomparisons(self):
2878 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002879 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002880 cset1 = Charset()
2881 cset2 = Charset()
2882 eq(cset1, 'us-ascii')
2883 eq(cset1, 'US-ASCII')
2884 eq(cset1, 'Us-AsCiI')
2885 eq('us-ascii', cset1)
2886 eq('US-ASCII', cset1)
2887 eq('Us-AsCiI', cset1)
2888 ne(cset1, 'usascii')
2889 ne(cset1, 'USASCII')
2890 ne(cset1, 'UsAsCiI')
2891 ne('usascii', cset1)
2892 ne('USASCII', cset1)
2893 ne('UsAsCiI', cset1)
2894 eq(cset1, cset2)
2895 eq(cset2, cset1)
2896
2897 def test_getaddresses(self):
2898 eq = self.assertEqual
2899 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2900 'Bud Person <bperson@dom.ain>']),
2901 [('Al Person', 'aperson@dom.ain'),
2902 ('Bud Person', 'bperson@dom.ain')])
2903
2904 def test_getaddresses_nasty(self):
2905 eq = self.assertEqual
2906 eq(utils.getaddresses(['foo: ;']), [('', '')])
2907 eq(utils.getaddresses(
2908 ['[]*-- =~$']),
2909 [('', ''), ('', ''), ('', '*--')])
2910 eq(utils.getaddresses(
2911 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2912 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2913
2914 def test_getaddresses_embedded_comment(self):
2915 """Test proper handling of a nested comment"""
2916 eq = self.assertEqual
2917 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2918 eq(addrs[0][1], 'foo@bar.com')
2919
2920 def test_utils_quote_unquote(self):
2921 eq = self.assertEqual
2922 msg = Message()
2923 msg.add_header('content-disposition', 'attachment',
2924 filename='foo\\wacky"name')
2925 eq(msg.get_filename(), 'foo\\wacky"name')
2926
2927 def test_get_body_encoding_with_bogus_charset(self):
2928 charset = Charset('not a charset')
2929 self.assertEqual(charset.get_body_encoding(), 'base64')
2930
2931 def test_get_body_encoding_with_uppercase_charset(self):
2932 eq = self.assertEqual
2933 msg = Message()
2934 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2935 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2936 charsets = msg.get_charsets()
2937 eq(len(charsets), 1)
2938 eq(charsets[0], 'utf-8')
2939 charset = Charset(charsets[0])
2940 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002941 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002942 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2943 eq(msg.get_payload(decode=True), b'hello world')
2944 eq(msg['content-transfer-encoding'], 'base64')
2945 # Try another one
2946 msg = Message()
2947 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2948 charsets = msg.get_charsets()
2949 eq(len(charsets), 1)
2950 eq(charsets[0], 'us-ascii')
2951 charset = Charset(charsets[0])
2952 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2953 msg.set_payload('hello world', charset=charset)
2954 eq(msg.get_payload(), 'hello world')
2955 eq(msg['content-transfer-encoding'], '7bit')
2956
2957 def test_charsets_case_insensitive(self):
2958 lc = Charset('us-ascii')
2959 uc = Charset('US-ASCII')
2960 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2961
2962 def test_partial_falls_inside_message_delivery_status(self):
2963 eq = self.ndiffAssertEqual
2964 # The Parser interface provides chunks of data to FeedParser in 8192
2965 # byte gulps. SF bug #1076485 found one of those chunks inside
2966 # message/delivery-status header block, which triggered an
2967 # unreadline() of NeedMoreData.
2968 msg = self._msgobj('msg_43.txt')
2969 sfp = StringIO()
2970 iterators._structure(msg, sfp)
2971 eq(sfp.getvalue(), """\
2972multipart/report
2973 text/plain
2974 message/delivery-status
2975 text/plain
2976 text/plain
2977 text/plain
2978 text/plain
2979 text/plain
2980 text/plain
2981 text/plain
2982 text/plain
2983 text/plain
2984 text/plain
2985 text/plain
2986 text/plain
2987 text/plain
2988 text/plain
2989 text/plain
2990 text/plain
2991 text/plain
2992 text/plain
2993 text/plain
2994 text/plain
2995 text/plain
2996 text/plain
2997 text/plain
2998 text/plain
2999 text/plain
3000 text/plain
3001 text/rfc822-headers
3002""")
3003
R. David Murraya0b44b52010-12-02 21:47:19 +00003004 def test_make_msgid_domain(self):
3005 self.assertEqual(
3006 email.utils.make_msgid(domain='testdomain-string')[-19:],
3007 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003008
Ezio Melottib3aedd42010-11-20 19:04:17 +00003009
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003010# Test the iterator/generators
3011class TestIterators(TestEmailBase):
3012 def test_body_line_iterator(self):
3013 eq = self.assertEqual
3014 neq = self.ndiffAssertEqual
3015 # First a simple non-multipart message
3016 msg = self._msgobj('msg_01.txt')
3017 it = iterators.body_line_iterator(msg)
3018 lines = list(it)
3019 eq(len(lines), 6)
3020 neq(EMPTYSTRING.join(lines), msg.get_payload())
3021 # Now a more complicated multipart
3022 msg = self._msgobj('msg_02.txt')
3023 it = iterators.body_line_iterator(msg)
3024 lines = list(it)
3025 eq(len(lines), 43)
3026 with openfile('msg_19.txt') as fp:
3027 neq(EMPTYSTRING.join(lines), fp.read())
3028
3029 def test_typed_subpart_iterator(self):
3030 eq = self.assertEqual
3031 msg = self._msgobj('msg_04.txt')
3032 it = iterators.typed_subpart_iterator(msg, 'text')
3033 lines = []
3034 subparts = 0
3035 for subpart in it:
3036 subparts += 1
3037 lines.append(subpart.get_payload())
3038 eq(subparts, 2)
3039 eq(EMPTYSTRING.join(lines), """\
3040a simple kind of mirror
3041to reflect upon our own
3042a simple kind of mirror
3043to reflect upon our own
3044""")
3045
3046 def test_typed_subpart_iterator_default_type(self):
3047 eq = self.assertEqual
3048 msg = self._msgobj('msg_03.txt')
3049 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
3050 lines = []
3051 subparts = 0
3052 for subpart in it:
3053 subparts += 1
3054 lines.append(subpart.get_payload())
3055 eq(subparts, 1)
3056 eq(EMPTYSTRING.join(lines), """\
3057
3058Hi,
3059
3060Do you like this message?
3061
3062-Me
3063""")
3064
R. David Murray45bf773f2010-07-17 01:19:57 +00003065 def test_pushCR_LF(self):
3066 '''FeedParser BufferedSubFile.push() assumed it received complete
3067 line endings. A CR ending one push() followed by a LF starting
3068 the next push() added an empty line.
3069 '''
3070 imt = [
3071 ("a\r \n", 2),
3072 ("b", 0),
3073 ("c\n", 1),
3074 ("", 0),
3075 ("d\r\n", 1),
3076 ("e\r", 0),
3077 ("\nf", 1),
3078 ("\r\n", 1),
3079 ]
3080 from email.feedparser import BufferedSubFile, NeedMoreData
3081 bsf = BufferedSubFile()
3082 om = []
3083 nt = 0
3084 for il, n in imt:
3085 bsf.push(il)
3086 nt += n
3087 n1 = 0
3088 while True:
3089 ol = bsf.readline()
3090 if ol == NeedMoreData:
3091 break
3092 om.append(ol)
3093 n1 += 1
3094 self.assertTrue(n == n1)
3095 self.assertTrue(len(om) == nt)
3096 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
3097
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003098
Ezio Melottib3aedd42010-11-20 19:04:17 +00003099
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003100class TestParsers(TestEmailBase):
R David Murrayb35c8502011-04-13 16:46:05 -04003101
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003102 def test_header_parser(self):
3103 eq = self.assertEqual
3104 # Parse only the headers of a complex multipart MIME document
3105 with openfile('msg_02.txt') as fp:
3106 msg = HeaderParser().parse(fp)
3107 eq(msg['from'], 'ppp-request@zzz.org')
3108 eq(msg['to'], 'ppp@zzz.org')
3109 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003110 self.assertFalse(msg.is_multipart())
3111 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003112
R David Murrayb35c8502011-04-13 16:46:05 -04003113 def test_bytes_header_parser(self):
3114 eq = self.assertEqual
3115 # Parse only the headers of a complex multipart MIME document
3116 with openfile('msg_02.txt', 'rb') as fp:
3117 msg = email.parser.BytesHeaderParser().parse(fp)
3118 eq(msg['from'], 'ppp-request@zzz.org')
3119 eq(msg['to'], 'ppp@zzz.org')
3120 eq(msg.get_content_type(), 'multipart/mixed')
3121 self.assertFalse(msg.is_multipart())
3122 self.assertTrue(isinstance(msg.get_payload(), str))
3123 self.assertTrue(isinstance(msg.get_payload(decode=True), bytes))
3124
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003125 def test_whitespace_continuation(self):
3126 eq = self.assertEqual
3127 # This message contains a line after the Subject: header that has only
3128 # whitespace, but it is not empty!
3129 msg = email.message_from_string("""\
3130From: aperson@dom.ain
3131To: bperson@dom.ain
3132Subject: the next line has a space on it
3133\x20
3134Date: Mon, 8 Apr 2002 15:09:19 -0400
3135Message-ID: spam
3136
3137Here's the message body
3138""")
3139 eq(msg['subject'], 'the next line has a space on it\n ')
3140 eq(msg['message-id'], 'spam')
3141 eq(msg.get_payload(), "Here's the message body\n")
3142
3143 def test_whitespace_continuation_last_header(self):
3144 eq = self.assertEqual
3145 # Like the previous test, but the subject line is the last
3146 # header.
3147 msg = email.message_from_string("""\
3148From: aperson@dom.ain
3149To: bperson@dom.ain
3150Date: Mon, 8 Apr 2002 15:09:19 -0400
3151Message-ID: spam
3152Subject: the next line has a space on it
3153\x20
3154
3155Here's the message body
3156""")
3157 eq(msg['subject'], 'the next line has a space on it\n ')
3158 eq(msg['message-id'], 'spam')
3159 eq(msg.get_payload(), "Here's the message body\n")
3160
3161 def test_crlf_separation(self):
3162 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00003163 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003164 msg = Parser().parse(fp)
3165 eq(len(msg.get_payload()), 2)
3166 part1 = msg.get_payload(0)
3167 eq(part1.get_content_type(), 'text/plain')
3168 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
3169 part2 = msg.get_payload(1)
3170 eq(part2.get_content_type(), 'application/riscos')
3171
R. David Murray8451c4b2010-10-23 22:19:56 +00003172 def test_crlf_flatten(self):
3173 # Using newline='\n' preserves the crlfs in this input file.
3174 with openfile('msg_26.txt', newline='\n') as fp:
3175 text = fp.read()
3176 msg = email.message_from_string(text)
3177 s = StringIO()
3178 g = Generator(s)
3179 g.flatten(msg, linesep='\r\n')
3180 self.assertEqual(s.getvalue(), text)
3181
3182 maxDiff = None
3183
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003184 def test_multipart_digest_with_extra_mime_headers(self):
3185 eq = self.assertEqual
3186 neq = self.ndiffAssertEqual
3187 with openfile('msg_28.txt') as fp:
3188 msg = email.message_from_file(fp)
3189 # Structure is:
3190 # multipart/digest
3191 # message/rfc822
3192 # text/plain
3193 # message/rfc822
3194 # text/plain
3195 eq(msg.is_multipart(), 1)
3196 eq(len(msg.get_payload()), 2)
3197 part1 = msg.get_payload(0)
3198 eq(part1.get_content_type(), 'message/rfc822')
3199 eq(part1.is_multipart(), 1)
3200 eq(len(part1.get_payload()), 1)
3201 part1a = part1.get_payload(0)
3202 eq(part1a.is_multipart(), 0)
3203 eq(part1a.get_content_type(), 'text/plain')
3204 neq(part1a.get_payload(), 'message 1\n')
3205 # next message/rfc822
3206 part2 = msg.get_payload(1)
3207 eq(part2.get_content_type(), 'message/rfc822')
3208 eq(part2.is_multipart(), 1)
3209 eq(len(part2.get_payload()), 1)
3210 part2a = part2.get_payload(0)
3211 eq(part2a.is_multipart(), 0)
3212 eq(part2a.get_content_type(), 'text/plain')
3213 neq(part2a.get_payload(), 'message 2\n')
3214
3215 def test_three_lines(self):
3216 # A bug report by Andrew McNamara
3217 lines = ['From: Andrew Person <aperson@dom.ain',
3218 'Subject: Test',
3219 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
3220 msg = email.message_from_string(NL.join(lines))
3221 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
3222
3223 def test_strip_line_feed_and_carriage_return_in_headers(self):
3224 eq = self.assertEqual
3225 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
3226 value1 = 'text'
3227 value2 = 'more text'
3228 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
3229 value1, value2)
3230 msg = email.message_from_string(m)
3231 eq(msg.get('Header'), value1)
3232 eq(msg.get('Next-Header'), value2)
3233
3234 def test_rfc2822_header_syntax(self):
3235 eq = self.assertEqual
3236 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3237 msg = email.message_from_string(m)
3238 eq(len(msg), 3)
3239 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
3240 eq(msg.get_payload(), 'body')
3241
3242 def test_rfc2822_space_not_allowed_in_header(self):
3243 eq = self.assertEqual
3244 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3245 msg = email.message_from_string(m)
3246 eq(len(msg.keys()), 0)
3247
3248 def test_rfc2822_one_character_header(self):
3249 eq = self.assertEqual
3250 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
3251 msg = email.message_from_string(m)
3252 headers = msg.keys()
3253 headers.sort()
3254 eq(headers, ['A', 'B', 'CC'])
3255 eq(msg.get_payload(), 'body')
3256
R. David Murray45e0e142010-06-16 02:19:40 +00003257 def test_CRLFLF_at_end_of_part(self):
3258 # issue 5610: feedparser should not eat two chars from body part ending
3259 # with "\r\n\n".
3260 m = (
3261 "From: foo@bar.com\n"
3262 "To: baz\n"
3263 "Mime-Version: 1.0\n"
3264 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
3265 "\n"
3266 "--BOUNDARY\n"
3267 "Content-Type: text/plain\n"
3268 "\n"
3269 "body ending with CRLF newline\r\n"
3270 "\n"
3271 "--BOUNDARY--\n"
3272 )
3273 msg = email.message_from_string(m)
3274 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003275
Ezio Melottib3aedd42010-11-20 19:04:17 +00003276
R. David Murray96fd54e2010-10-08 15:55:28 +00003277class Test8BitBytesHandling(unittest.TestCase):
3278 # In Python3 all input is string, but that doesn't work if the actual input
3279 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
3280 # decode byte streams using the surrogateescape error handler, and
3281 # reconvert to binary at appropriate places if we detect surrogates. This
3282 # doesn't allow us to transform headers with 8bit bytes (they get munged),
3283 # but it does allow us to parse and preserve them, and to decode body
3284 # parts that use an 8bit CTE.
3285
3286 bodytest_msg = textwrap.dedent("""\
3287 From: foo@bar.com
3288 To: baz
3289 Mime-Version: 1.0
3290 Content-Type: text/plain; charset={charset}
3291 Content-Transfer-Encoding: {cte}
3292
3293 {bodyline}
3294 """)
3295
3296 def test_known_8bit_CTE(self):
3297 m = self.bodytest_msg.format(charset='utf-8',
3298 cte='8bit',
3299 bodyline='pöstal').encode('utf-8')
3300 msg = email.message_from_bytes(m)
3301 self.assertEqual(msg.get_payload(), "pöstal\n")
3302 self.assertEqual(msg.get_payload(decode=True),
3303 "pöstal\n".encode('utf-8'))
3304
3305 def test_unknown_8bit_CTE(self):
3306 m = self.bodytest_msg.format(charset='notavalidcharset',
3307 cte='8bit',
3308 bodyline='pöstal').encode('utf-8')
3309 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003310 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
R. David Murray96fd54e2010-10-08 15:55:28 +00003311 self.assertEqual(msg.get_payload(decode=True),
3312 "pöstal\n".encode('utf-8'))
3313
3314 def test_8bit_in_quopri_body(self):
3315 # This is non-RFC compliant data...without 'decode' the library code
3316 # decodes the body using the charset from the headers, and because the
3317 # source byte really is utf-8 this works. This is likely to fail
3318 # against real dirty data (ie: produce mojibake), but the data is
3319 # invalid anyway so it is as good a guess as any. But this means that
3320 # this test just confirms the current behavior; that behavior is not
3321 # necessarily the best possible behavior. With 'decode' it is
3322 # returning the raw bytes, so that test should be of correct behavior,
3323 # or at least produce the same result that email4 did.
3324 m = self.bodytest_msg.format(charset='utf-8',
3325 cte='quoted-printable',
3326 bodyline='p=C3=B6stál').encode('utf-8')
3327 msg = email.message_from_bytes(m)
3328 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
3329 self.assertEqual(msg.get_payload(decode=True),
3330 'pöstál\n'.encode('utf-8'))
3331
3332 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
3333 # This is similar to the previous test, but proves that if the 8bit
3334 # byte is undecodeable in the specified charset, it gets replaced
3335 # by the unicode 'unknown' character. Again, this may or may not
3336 # be the ideal behavior. Note that if decode=False none of the
3337 # decoders will get involved, so this is the only test we need
3338 # for this behavior.
3339 m = self.bodytest_msg.format(charset='ascii',
3340 cte='quoted-printable',
3341 bodyline='p=C3=B6stál').encode('utf-8')
3342 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003343 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
R. David Murray96fd54e2010-10-08 15:55:28 +00003344 self.assertEqual(msg.get_payload(decode=True),
3345 'pöstál\n'.encode('utf-8'))
3346
R David Murray80e0aee2012-05-27 21:23:34 -04003347 # test_defect_handling:test_invalid_chars_in_base64_payload
R. David Murray96fd54e2010-10-08 15:55:28 +00003348 def test_8bit_in_base64_body(self):
R David Murray80e0aee2012-05-27 21:23:34 -04003349 # If we get 8bit bytes in a base64 body, we can just ignore them
3350 # as being outside the base64 alphabet and decode anyway. But
3351 # we register a defect.
R. David Murray96fd54e2010-10-08 15:55:28 +00003352 m = self.bodytest_msg.format(charset='utf-8',
3353 cte='base64',
3354 bodyline='cMO2c3RhbAá=').encode('utf-8')
3355 msg = email.message_from_bytes(m)
3356 self.assertEqual(msg.get_payload(decode=True),
R David Murray80e0aee2012-05-27 21:23:34 -04003357 'pöstal'.encode('utf-8'))
3358 self.assertIsInstance(msg.defects[0],
3359 errors.InvalidBase64CharactersDefect)
R. David Murray96fd54e2010-10-08 15:55:28 +00003360
3361 def test_8bit_in_uuencode_body(self):
3362 # Sticking an 8bit byte in a uuencode block makes it undecodable by
3363 # normal means, so the block is returned undecoded, but as bytes.
3364 m = self.bodytest_msg.format(charset='utf-8',
3365 cte='uuencode',
3366 bodyline='<,.V<W1A; á ').encode('utf-8')
3367 msg = email.message_from_bytes(m)
3368 self.assertEqual(msg.get_payload(decode=True),
3369 '<,.V<W1A; á \n'.encode('utf-8'))
3370
3371
R. David Murray92532142011-01-07 23:25:30 +00003372 headertest_headers = (
3373 ('From: foo@bar.com', ('From', 'foo@bar.com')),
3374 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
3375 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
3376 '\tJean de Baddie',
3377 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3378 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
3379 ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
3380 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
3381 )
3382 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
3383 '\nYes, they are flying.\n').encode('utf-8')
R. David Murray96fd54e2010-10-08 15:55:28 +00003384
3385 def test_get_8bit_header(self):
3386 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003387 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
3388 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
R. David Murray96fd54e2010-10-08 15:55:28 +00003389
3390 def test_print_8bit_headers(self):
3391 msg = email.message_from_bytes(self.headertest_msg)
3392 self.assertEqual(str(msg),
R. David Murray92532142011-01-07 23:25:30 +00003393 textwrap.dedent("""\
3394 From: {}
3395 To: {}
3396 Subject: {}
3397 From: {}
3398
3399 Yes, they are flying.
3400 """).format(*[expected[1] for (_, expected) in
3401 self.headertest_headers]))
R. David Murray96fd54e2010-10-08 15:55:28 +00003402
3403 def test_values_with_8bit_headers(self):
3404 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003405 self.assertListEqual([str(x) for x in msg.values()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003406 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003407 'b\uFFFD\uFFFDz',
3408 'Maintenant je vous pr\uFFFD\uFFFDsente mon '
3409 'coll\uFFFD\uFFFDgue, le pouf '
3410 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
R. David Murray96fd54e2010-10-08 15:55:28 +00003411 '\tJean de Baddie',
R. David Murray92532142011-01-07 23:25:30 +00003412 "g\uFFFD\uFFFDst"])
R. David Murray96fd54e2010-10-08 15:55:28 +00003413
3414 def test_items_with_8bit_headers(self):
3415 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003416 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003417 [('From', 'foo@bar.com'),
R. David Murray92532142011-01-07 23:25:30 +00003418 ('To', 'b\uFFFD\uFFFDz'),
3419 ('Subject', 'Maintenant je vous '
3420 'pr\uFFFD\uFFFDsente '
3421 'mon coll\uFFFD\uFFFDgue, le pouf '
3422 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3423 '\tJean de Baddie'),
3424 ('From', 'g\uFFFD\uFFFDst')])
R. David Murray96fd54e2010-10-08 15:55:28 +00003425
3426 def test_get_all_with_8bit_headers(self):
3427 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003428 self.assertListEqual([str(x) for x in msg.get_all('from')],
R. David Murray96fd54e2010-10-08 15:55:28 +00003429 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003430 'g\uFFFD\uFFFDst'])
R. David Murray96fd54e2010-10-08 15:55:28 +00003431
R David Murraya2150232011-03-16 21:11:23 -04003432 def test_get_content_type_with_8bit(self):
3433 msg = email.message_from_bytes(textwrap.dedent("""\
3434 Content-Type: text/pl\xA7in; charset=utf-8
3435 """).encode('latin-1'))
3436 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
3437 self.assertEqual(msg.get_content_maintype(), "text")
3438 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
3439
3440 def test_get_params_with_8bit(self):
3441 msg = email.message_from_bytes(
3442 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
3443 self.assertEqual(msg.get_params(header='x-header'),
3444 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
3445 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
3446 # XXX: someday you might be able to get 'b\xa7r', for now you can't.
3447 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
3448
3449 def test_get_rfc2231_params_with_8bit(self):
3450 msg = email.message_from_bytes(textwrap.dedent("""\
3451 Content-Type: text/plain; charset=us-ascii;
3452 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3453 ).encode('latin-1'))
3454 self.assertEqual(msg.get_param('title'),
3455 ('us-ascii', 'en', 'This is not f\uFFFDn'))
3456
3457 def test_set_rfc2231_params_with_8bit(self):
3458 msg = email.message_from_bytes(textwrap.dedent("""\
3459 Content-Type: text/plain; charset=us-ascii;
3460 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3461 ).encode('latin-1'))
3462 msg.set_param('title', 'test')
3463 self.assertEqual(msg.get_param('title'), 'test')
3464
3465 def test_del_rfc2231_params_with_8bit(self):
3466 msg = email.message_from_bytes(textwrap.dedent("""\
3467 Content-Type: text/plain; charset=us-ascii;
3468 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3469 ).encode('latin-1'))
3470 msg.del_param('title')
3471 self.assertEqual(msg.get_param('title'), None)
3472 self.assertEqual(msg.get_content_maintype(), 'text')
3473
3474 def test_get_payload_with_8bit_cte_header(self):
3475 msg = email.message_from_bytes(textwrap.dedent("""\
3476 Content-Transfer-Encoding: b\xa7se64
3477 Content-Type: text/plain; charset=latin-1
3478
3479 payload
3480 """).encode('latin-1'))
3481 self.assertEqual(msg.get_payload(), 'payload\n')
3482 self.assertEqual(msg.get_payload(decode=True), b'payload\n')
3483
R. David Murray96fd54e2010-10-08 15:55:28 +00003484 non_latin_bin_msg = textwrap.dedent("""\
3485 From: foo@bar.com
3486 To: báz
3487 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
3488 \tJean de Baddie
3489 Mime-Version: 1.0
3490 Content-Type: text/plain; charset="utf-8"
3491 Content-Transfer-Encoding: 8bit
3492
3493 Да, они летят.
3494 """).encode('utf-8')
3495
3496 def test_bytes_generator(self):
3497 msg = email.message_from_bytes(self.non_latin_bin_msg)
3498 out = BytesIO()
3499 email.generator.BytesGenerator(out).flatten(msg)
3500 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
3501
R. David Murray7372a072011-01-26 21:21:32 +00003502 def test_bytes_generator_handles_None_body(self):
3503 #Issue 11019
3504 msg = email.message.Message()
3505 out = BytesIO()
3506 email.generator.BytesGenerator(out).flatten(msg)
3507 self.assertEqual(out.getvalue(), b"\n")
3508
R. David Murray92532142011-01-07 23:25:30 +00003509 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
R. David Murray96fd54e2010-10-08 15:55:28 +00003510 From: foo@bar.com
R. David Murray92532142011-01-07 23:25:30 +00003511 To: =?unknown-8bit?q?b=C3=A1z?=
3512 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
3513 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
3514 =?unknown-8bit?q?_Jean_de_Baddie?=
R. David Murray96fd54e2010-10-08 15:55:28 +00003515 Mime-Version: 1.0
3516 Content-Type: text/plain; charset="utf-8"
3517 Content-Transfer-Encoding: base64
3518
3519 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
3520 """)
3521
3522 def test_generator_handles_8bit(self):
3523 msg = email.message_from_bytes(self.non_latin_bin_msg)
3524 out = StringIO()
3525 email.generator.Generator(out).flatten(msg)
R. David Murray92532142011-01-07 23:25:30 +00003526 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
R. David Murray96fd54e2010-10-08 15:55:28 +00003527
3528 def test_bytes_generator_with_unix_from(self):
3529 # The unixfrom contains a current date, so we can't check it
3530 # literally. Just make sure the first word is 'From' and the
3531 # rest of the message matches the input.
3532 msg = email.message_from_bytes(self.non_latin_bin_msg)
3533 out = BytesIO()
3534 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
3535 lines = out.getvalue().split(b'\n')
3536 self.assertEqual(lines[0].split()[0], b'From')
3537 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
3538
R. David Murray92532142011-01-07 23:25:30 +00003539 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
3540 non_latin_bin_msg_as7bit[2:4] = [
3541 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3542 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
3543 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
3544
R. David Murray96fd54e2010-10-08 15:55:28 +00003545 def test_message_from_binary_file(self):
3546 fn = 'test.msg'
3547 self.addCleanup(unlink, fn)
3548 with open(fn, 'wb') as testfile:
3549 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00003550 with open(fn, 'rb') as testfile:
3551 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00003552 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
3553
3554 latin_bin_msg = textwrap.dedent("""\
3555 From: foo@bar.com
3556 To: Dinsdale
3557 Subject: Nudge nudge, wink, wink
3558 Mime-Version: 1.0
3559 Content-Type: text/plain; charset="latin-1"
3560 Content-Transfer-Encoding: 8bit
3561
3562 oh là là, know what I mean, know what I mean?
3563 """).encode('latin-1')
3564
3565 latin_bin_msg_as7bit = textwrap.dedent("""\
3566 From: foo@bar.com
3567 To: Dinsdale
3568 Subject: Nudge nudge, wink, wink
3569 Mime-Version: 1.0
3570 Content-Type: text/plain; charset="iso-8859-1"
3571 Content-Transfer-Encoding: quoted-printable
3572
3573 oh l=E0 l=E0, know what I mean, know what I mean?
3574 """)
3575
3576 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
3577 m = email.message_from_bytes(self.latin_bin_msg)
3578 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3579
3580 def test_decoded_generator_emits_unicode_body(self):
3581 m = email.message_from_bytes(self.latin_bin_msg)
3582 out = StringIO()
3583 email.generator.DecodedGenerator(out).flatten(m)
3584 #DecodedHeader output contains an extra blank line compared
3585 #to the input message. RDM: not sure if this is a bug or not,
3586 #but it is not specific to the 8bit->7bit conversion.
3587 self.assertEqual(out.getvalue(),
3588 self.latin_bin_msg.decode('latin-1')+'\n')
3589
3590 def test_bytes_feedparser(self):
3591 bfp = email.feedparser.BytesFeedParser()
3592 for i in range(0, len(self.latin_bin_msg), 10):
3593 bfp.feed(self.latin_bin_msg[i:i+10])
3594 m = bfp.close()
3595 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3596
R. David Murray8451c4b2010-10-23 22:19:56 +00003597 def test_crlf_flatten(self):
3598 with openfile('msg_26.txt', 'rb') as fp:
3599 text = fp.read()
3600 msg = email.message_from_bytes(text)
3601 s = BytesIO()
3602 g = email.generator.BytesGenerator(s)
3603 g.flatten(msg, linesep='\r\n')
3604 self.assertEqual(s.getvalue(), text)
R David Murrayc5c14722011-04-06 08:13:02 -04003605
3606 def test_8bit_multipart(self):
3607 # Issue 11605
3608 source = textwrap.dedent("""\
3609 Date: Fri, 18 Mar 2011 17:15:43 +0100
3610 To: foo@example.com
3611 From: foodwatch-Newsletter <bar@example.com>
3612 Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
3613 Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
3614 MIME-Version: 1.0
3615 Content-Type: multipart/alternative;
3616 boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
3617
3618 --b1_76a486bee62b0d200f33dc2ca08220ad
3619 Content-Type: text/plain; charset="utf-8"
3620 Content-Transfer-Encoding: 8bit
3621
3622 Guten Tag, ,
3623
3624 mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
3625 Nachrichten aus Japan.
3626
3627
3628 --b1_76a486bee62b0d200f33dc2ca08220ad
3629 Content-Type: text/html; charset="utf-8"
3630 Content-Transfer-Encoding: 8bit
3631
3632 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
3633 "http://www.w3.org/TR/html4/loose.dtd">
3634 <html lang="de">
3635 <head>
3636 <title>foodwatch - Newsletter</title>
3637 </head>
3638 <body>
3639 <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
3640 die Nachrichten aus Japan.</p>
3641 </body>
3642 </html>
3643 --b1_76a486bee62b0d200f33dc2ca08220ad--
3644
3645 """).encode('utf-8')
3646 msg = email.message_from_bytes(source)
3647 s = BytesIO()
3648 g = email.generator.BytesGenerator(s)
3649 g.flatten(msg)
3650 self.assertEqual(s.getvalue(), source)
3651
R David Murray9fd170e2012-03-14 14:05:03 -04003652 def test_bytes_generator_b_encoding_linesep(self):
3653 # Issue 14062: b encoding was tacking on an extra \n.
3654 m = Message()
3655 # This has enough non-ascii that it should always end up b encoded.
3656 m['Subject'] = Header('žluťoučký kůň')
3657 s = BytesIO()
3658 g = email.generator.BytesGenerator(s)
3659 g.flatten(m, linesep='\r\n')
3660 self.assertEqual(
3661 s.getvalue(),
3662 b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3663
3664 def test_generator_b_encoding_linesep(self):
3665 # Since this broke in ByteGenerator, test Generator for completeness.
3666 m = Message()
3667 # This has enough non-ascii that it should always end up b encoded.
3668 m['Subject'] = Header('žluťoučký kůň')
3669 s = StringIO()
3670 g = email.generator.Generator(s)
3671 g.flatten(m, linesep='\r\n')
3672 self.assertEqual(
3673 s.getvalue(),
3674 'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3675
R. David Murray8451c4b2010-10-23 22:19:56 +00003676 maxDiff = None
3677
Ezio Melottib3aedd42010-11-20 19:04:17 +00003678
R. David Murray719a4492010-11-21 16:53:48 +00003679class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003680
R. David Murraye5db2632010-11-20 15:10:13 +00003681 maxDiff = None
3682
R. David Murray96fd54e2010-10-08 15:55:28 +00003683 def _msgobj(self, filename):
3684 with openfile(filename, 'rb') as fp:
3685 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003686 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003687 msg = email.message_from_bytes(data)
3688 return msg, data
3689
R. David Murray719a4492010-11-21 16:53:48 +00003690 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003691 b = BytesIO()
3692 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003693 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R David Murraya46ed112011-03-31 13:11:40 -04003694 self.assertEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003695
3696
R. David Murray719a4492010-11-21 16:53:48 +00003697class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3698 TestIdempotent):
3699 linesep = '\n'
3700 blinesep = b'\n'
3701 normalize_linesep_regex = re.compile(br'\r\n')
3702
3703
3704class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3705 TestIdempotent):
3706 linesep = '\r\n'
3707 blinesep = b'\r\n'
3708 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3709
Ezio Melottib3aedd42010-11-20 19:04:17 +00003710
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003711class TestBase64(unittest.TestCase):
3712 def test_len(self):
3713 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003714 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003715 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003716 for size in range(15):
3717 if size == 0 : bsize = 0
3718 elif size <= 3 : bsize = 4
3719 elif size <= 6 : bsize = 8
3720 elif size <= 9 : bsize = 12
3721 elif size <= 12: bsize = 16
3722 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003723 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003724
3725 def test_decode(self):
3726 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003727 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003728 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003729
3730 def test_encode(self):
3731 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003732 eq(base64mime.body_encode(b''), b'')
3733 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003734 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003735 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003736 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003737 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003738eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3739eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3740eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3741eHh4eCB4eHh4IA==
3742""")
3743 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003744 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003745 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003746eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3747eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3748eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3749eHh4eCB4eHh4IA==\r
3750""")
3751
3752 def test_header_encode(self):
3753 eq = self.assertEqual
3754 he = base64mime.header_encode
3755 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003756 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3757 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003758 # Test the charset option
3759 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3760 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003761
3762
Ezio Melottib3aedd42010-11-20 19:04:17 +00003763
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003764class TestQuopri(unittest.TestCase):
3765 def setUp(self):
3766 # Set of characters (as byte integers) that don't need to be encoded
3767 # in headers.
3768 self.hlit = list(chain(
3769 range(ord('a'), ord('z') + 1),
3770 range(ord('A'), ord('Z') + 1),
3771 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003772 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003773 # Set of characters (as byte integers) that do need to be encoded in
3774 # headers.
3775 self.hnon = [c for c in range(256) if c not in self.hlit]
3776 assert len(self.hlit) + len(self.hnon) == 256
3777 # Set of characters (as byte integers) that don't need to be encoded
3778 # in bodies.
3779 self.blit = list(range(ord(' '), ord('~') + 1))
3780 self.blit.append(ord('\t'))
3781 self.blit.remove(ord('='))
3782 # Set of characters (as byte integers) that do need to be encoded in
3783 # bodies.
3784 self.bnon = [c for c in range(256) if c not in self.blit]
3785 assert len(self.blit) + len(self.bnon) == 256
3786
Guido van Rossum9604e662007-08-30 03:46:43 +00003787 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003788 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003789 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003790 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003791 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003792 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003793 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003794
Guido van Rossum9604e662007-08-30 03:46:43 +00003795 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003796 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003797 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003798 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003799 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003800 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003801 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003802
3803 def test_header_quopri_len(self):
3804 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003805 eq(quoprimime.header_length(b'hello'), 5)
3806 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003807 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003808 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003809 # =?xxx?q?...?= means 10 extra characters
3810 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00003811 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
3812 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003813 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003814 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003815 # =?xxx?q?...?= means 10 extra characters
3816 10)
3817 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003818 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003819 'expected length 1 for %r' % chr(c))
3820 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003821 # Space is special; it's encoded to _
3822 if c == ord(' '):
3823 continue
3824 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003825 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00003826 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003827
3828 def test_body_quopri_len(self):
3829 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003830 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003831 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003832 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003833 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003834
3835 def test_quote_unquote_idempotent(self):
3836 for x in range(256):
3837 c = chr(x)
3838 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
3839
R David Murrayec1b5b82011-03-23 14:19:05 -04003840 def _test_header_encode(self, header, expected_encoded_header, charset=None):
3841 if charset is None:
3842 encoded_header = quoprimime.header_encode(header)
3843 else:
3844 encoded_header = quoprimime.header_encode(header, charset)
3845 self.assertEqual(encoded_header, expected_encoded_header)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003846
R David Murraycafd79d2011-03-23 15:25:55 -04003847 def test_header_encode_null(self):
3848 self._test_header_encode(b'', '')
3849
R David Murrayec1b5b82011-03-23 14:19:05 -04003850 def test_header_encode_one_word(self):
3851 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
3852
3853 def test_header_encode_two_lines(self):
3854 self._test_header_encode(b'hello\nworld',
3855 '=?iso-8859-1?q?hello=0Aworld?=')
3856
3857 def test_header_encode_non_ascii(self):
3858 self._test_header_encode(b'hello\xc7there',
3859 '=?iso-8859-1?q?hello=C7there?=')
3860
3861 def test_header_encode_alt_charset(self):
3862 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
3863 charset='iso-8859-2')
3864
3865 def _test_header_decode(self, encoded_header, expected_decoded_header):
3866 decoded_header = quoprimime.header_decode(encoded_header)
3867 self.assertEqual(decoded_header, expected_decoded_header)
3868
3869 def test_header_decode_null(self):
3870 self._test_header_decode('', '')
3871
3872 def test_header_decode_one_word(self):
3873 self._test_header_decode('hello', 'hello')
3874
3875 def test_header_decode_two_lines(self):
3876 self._test_header_decode('hello=0Aworld', 'hello\nworld')
3877
3878 def test_header_decode_non_ascii(self):
3879 self._test_header_decode('hello=C7there', 'hello\xc7there')
3880
3881 def _test_decode(self, encoded, expected_decoded, eol=None):
3882 if eol is None:
3883 decoded = quoprimime.decode(encoded)
3884 else:
3885 decoded = quoprimime.decode(encoded, eol=eol)
3886 self.assertEqual(decoded, expected_decoded)
3887
3888 def test_decode_null_word(self):
3889 self._test_decode('', '')
3890
3891 def test_decode_null_line_null_word(self):
3892 self._test_decode('\r\n', '\n')
3893
3894 def test_decode_one_word(self):
3895 self._test_decode('hello', 'hello')
3896
3897 def test_decode_one_word_eol(self):
3898 self._test_decode('hello', 'hello', eol='X')
3899
3900 def test_decode_one_line(self):
3901 self._test_decode('hello\r\n', 'hello\n')
3902
3903 def test_decode_one_line_lf(self):
3904 self._test_decode('hello\n', 'hello\n')
3905
R David Murraycafd79d2011-03-23 15:25:55 -04003906 def test_decode_one_line_cr(self):
3907 self._test_decode('hello\r', 'hello\n')
3908
3909 def test_decode_one_line_nl(self):
3910 self._test_decode('hello\n', 'helloX', eol='X')
3911
3912 def test_decode_one_line_crnl(self):
3913 self._test_decode('hello\r\n', 'helloX', eol='X')
3914
R David Murrayec1b5b82011-03-23 14:19:05 -04003915 def test_decode_one_line_one_word(self):
3916 self._test_decode('hello\r\nworld', 'hello\nworld')
3917
3918 def test_decode_one_line_one_word_eol(self):
3919 self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
3920
3921 def test_decode_two_lines(self):
3922 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
3923
R David Murraycafd79d2011-03-23 15:25:55 -04003924 def test_decode_two_lines_eol(self):
3925 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
3926
R David Murrayec1b5b82011-03-23 14:19:05 -04003927 def test_decode_one_long_line(self):
3928 self._test_decode('Spam' * 250, 'Spam' * 250)
3929
3930 def test_decode_one_space(self):
3931 self._test_decode(' ', '')
3932
3933 def test_decode_multiple_spaces(self):
3934 self._test_decode(' ' * 5, '')
3935
3936 def test_decode_one_line_trailing_spaces(self):
3937 self._test_decode('hello \r\n', 'hello\n')
3938
3939 def test_decode_two_lines_trailing_spaces(self):
3940 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n')
3941
3942 def test_decode_quoted_word(self):
3943 self._test_decode('=22quoted=20words=22', '"quoted words"')
3944
3945 def test_decode_uppercase_quoting(self):
3946 self._test_decode('ab=CD=EF', 'ab\xcd\xef')
3947
3948 def test_decode_lowercase_quoting(self):
3949 self._test_decode('ab=cd=ef', 'ab\xcd\xef')
3950
3951 def test_decode_soft_line_break(self):
3952 self._test_decode('soft line=\r\nbreak', 'soft linebreak')
3953
3954 def test_decode_false_quoting(self):
3955 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
3956
3957 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
3958 kwargs = {}
3959 if maxlinelen is None:
3960 # Use body_encode's default.
3961 maxlinelen = 76
3962 else:
3963 kwargs['maxlinelen'] = maxlinelen
3964 if eol is None:
3965 # Use body_encode's default.
3966 eol = '\n'
3967 else:
3968 kwargs['eol'] = eol
3969 encoded_body = quoprimime.body_encode(body, **kwargs)
3970 self.assertEqual(encoded_body, expected_encoded_body)
3971 if eol == '\n' or eol == '\r\n':
3972 # We know how to split the result back into lines, so maxlinelen
3973 # can be checked.
3974 for line in encoded_body.splitlines():
3975 self.assertLessEqual(len(line), maxlinelen)
3976
3977 def test_encode_null(self):
3978 self._test_encode('', '')
3979
3980 def test_encode_null_lines(self):
3981 self._test_encode('\n\n', '\n\n')
3982
3983 def test_encode_one_line(self):
3984 self._test_encode('hello\n', 'hello\n')
3985
3986 def test_encode_one_line_crlf(self):
3987 self._test_encode('hello\r\n', 'hello\n')
3988
3989 def test_encode_one_line_eol(self):
3990 self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
3991
3992 def test_encode_one_space(self):
3993 self._test_encode(' ', '=20')
3994
3995 def test_encode_one_line_one_space(self):
3996 self._test_encode(' \n', '=20\n')
3997
R David Murrayb938c8c2011-03-24 12:19:26 -04003998# XXX: body_encode() expect strings, but uses ord(char) from these strings
3999# to index into a 256-entry list. For code points above 255, this will fail.
4000# Should there be a check for 8-bit only ord() values in body, or at least
4001# a comment about the expected input?
4002
4003 def test_encode_two_lines_one_space(self):
4004 self._test_encode(' \n \n', '=20\n=20\n')
4005
R David Murrayec1b5b82011-03-23 14:19:05 -04004006 def test_encode_one_word_trailing_spaces(self):
4007 self._test_encode('hello ', 'hello =20')
4008
4009 def test_encode_one_line_trailing_spaces(self):
4010 self._test_encode('hello \n', 'hello =20\n')
4011
4012 def test_encode_one_word_trailing_tab(self):
4013 self._test_encode('hello \t', 'hello =09')
4014
4015 def test_encode_one_line_trailing_tab(self):
4016 self._test_encode('hello \t\n', 'hello =09\n')
4017
4018 def test_encode_trailing_space_before_maxlinelen(self):
4019 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
4020
R David Murrayb938c8c2011-03-24 12:19:26 -04004021 def test_encode_trailing_space_at_maxlinelen(self):
4022 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
4023
R David Murrayec1b5b82011-03-23 14:19:05 -04004024 def test_encode_trailing_space_beyond_maxlinelen(self):
R David Murrayb938c8c2011-03-24 12:19:26 -04004025 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
4026
4027 def test_encode_whitespace_lines(self):
4028 self._test_encode(' \n' * 5, '=20\n' * 5)
R David Murrayec1b5b82011-03-23 14:19:05 -04004029
4030 def test_encode_quoted_equals(self):
4031 self._test_encode('a = b', 'a =3D b')
4032
4033 def test_encode_one_long_string(self):
4034 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
4035
4036 def test_encode_one_long_line(self):
4037 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
4038
4039 def test_encode_one_very_long_line(self):
4040 self._test_encode('x' * 200 + '\n',
4041 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
4042
4043 def test_encode_one_long_line(self):
4044 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
4045
4046 def test_encode_shortest_maxlinelen(self):
4047 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004048
R David Murrayb938c8c2011-03-24 12:19:26 -04004049 def test_encode_maxlinelen_too_small(self):
4050 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
4051
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004052 def test_encode(self):
4053 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00004054 eq(quoprimime.body_encode(''), '')
4055 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004056 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00004057 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004058 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00004059 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004060xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
4061 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
4062x xxxx xxxx xxxx xxxx=20""")
4063 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00004064 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
4065 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004066xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
4067 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
4068x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00004069 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004070one line
4071
4072two line"""), """\
4073one line
4074
4075two line""")
4076
4077
Ezio Melottib3aedd42010-11-20 19:04:17 +00004078
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004079# Test the Charset class
4080class TestCharset(unittest.TestCase):
4081 def tearDown(self):
4082 from email import charset as CharsetModule
4083 try:
4084 del CharsetModule.CHARSETS['fake']
4085 except KeyError:
4086 pass
4087
Guido van Rossum9604e662007-08-30 03:46:43 +00004088 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004089 eq = self.assertEqual
4090 # Make sure us-ascii = no Unicode conversion
4091 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00004092 eq(c.header_encode('Hello World!'), 'Hello World!')
4093 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004094 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00004095 self.assertRaises(UnicodeError, c.header_encode, s)
4096 c = Charset('utf-8')
4097 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004098
4099 def test_body_encode(self):
4100 eq = self.assertEqual
4101 # Try a charset with QP body encoding
4102 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004103 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004104 # Try a charset with Base64 body encoding
4105 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00004106 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004107 # Try a charset with None body encoding
4108 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004109 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004110 # Try the convert argument, where input codec != output codec
4111 c = Charset('euc-jp')
4112 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00004113 # XXX FIXME
4114## try:
4115## eq('\x1b$B5FCO;~IW\x1b(B',
4116## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
4117## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
4118## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
4119## except LookupError:
4120## # We probably don't have the Japanese codecs installed
4121## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004122 # Testing SF bug #625509, which we have to fake, since there are no
4123 # built-in encodings where the header encoding is QP but the body
4124 # encoding is not.
4125 from email import charset as CharsetModule
R David Murray56a9d7e2011-03-15 12:20:02 -04004126 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004127 c = Charset('fake')
R David Murray56a9d7e2011-03-15 12:20:02 -04004128 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004129
4130 def test_unicode_charset_name(self):
4131 charset = Charset('us-ascii')
4132 self.assertEqual(str(charset), 'us-ascii')
4133 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
4134
4135
Ezio Melottib3aedd42010-11-20 19:04:17 +00004136
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004137# Test multilingual MIME headers.
4138class TestHeader(TestEmailBase):
4139 def test_simple(self):
4140 eq = self.ndiffAssertEqual
4141 h = Header('Hello World!')
4142 eq(h.encode(), 'Hello World!')
4143 h.append(' Goodbye World!')
4144 eq(h.encode(), 'Hello World! Goodbye World!')
4145
4146 def test_simple_surprise(self):
4147 eq = self.ndiffAssertEqual
4148 h = Header('Hello World!')
4149 eq(h.encode(), 'Hello World!')
4150 h.append('Goodbye World!')
4151 eq(h.encode(), 'Hello World! Goodbye World!')
4152
4153 def test_header_needs_no_decoding(self):
4154 h = 'no decoding needed'
4155 self.assertEqual(decode_header(h), [(h, None)])
4156
4157 def test_long(self):
4158 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
4159 maxlinelen=76)
4160 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004161 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004162
4163 def test_multilingual(self):
4164 eq = self.ndiffAssertEqual
4165 g = Charset("iso-8859-1")
4166 cz = Charset("iso-8859-2")
4167 utf8 = Charset("utf-8")
4168 g_head = (b'Die Mieter treten hier ein werden mit einem '
4169 b'Foerderband komfortabel den Korridor entlang, '
4170 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
4171 b'gegen die rotierenden Klingen bef\xf6rdert. ')
4172 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
4173 b'd\xf9vtipu.. ')
4174 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
4175 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
4176 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
4177 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
4178 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
4179 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
4180 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
4181 '\u3044\u307e\u3059\u3002')
4182 h = Header(g_head, g)
4183 h.append(cz_head, cz)
4184 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00004185 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004186 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00004187=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
4188 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
4189 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
4190 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004191 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
4192 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
4193 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
4194 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00004195 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
4196 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
4197 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
4198 decoded = decode_header(enc)
4199 eq(len(decoded), 3)
4200 eq(decoded[0], (g_head, 'iso-8859-1'))
4201 eq(decoded[1], (cz_head, 'iso-8859-2'))
4202 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004203 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00004204 eq(ustr,
4205 (b'Die Mieter treten hier ein werden mit einem Foerderband '
4206 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
4207 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
4208 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
4209 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
4210 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
4211 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
4212 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
4213 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
4214 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
4215 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
4216 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
4217 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
4218 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
4219 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
4220 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
4221 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004222 # Test make_header()
4223 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00004224 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004225
4226 def test_empty_header_encode(self):
4227 h = Header()
4228 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00004229
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004230 def test_header_ctor_default_args(self):
4231 eq = self.ndiffAssertEqual
4232 h = Header()
4233 eq(h, '')
4234 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00004235 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004236
4237 def test_explicit_maxlinelen(self):
4238 eq = self.ndiffAssertEqual
4239 hstr = ('A very long line that must get split to something other '
4240 'than at the 76th character boundary to test the non-default '
4241 'behavior')
4242 h = Header(hstr)
4243 eq(h.encode(), '''\
4244A very long line that must get split to something other than at the 76th
4245 character boundary to test the non-default behavior''')
4246 eq(str(h), hstr)
4247 h = Header(hstr, header_name='Subject')
4248 eq(h.encode(), '''\
4249A very long line that must get split to something other than at the
4250 76th character boundary to test the non-default behavior''')
4251 eq(str(h), hstr)
4252 h = Header(hstr, maxlinelen=1024, header_name='Subject')
4253 eq(h.encode(), hstr)
4254 eq(str(h), hstr)
4255
Guido van Rossum9604e662007-08-30 03:46:43 +00004256 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004257 eq = self.ndiffAssertEqual
4258 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004259 x = 'xxxx ' * 20
4260 h.append(x)
4261 s = h.encode()
4262 eq(s, """\
4263=?iso-8859-1?q?xxx?=
4264 =?iso-8859-1?q?x_?=
4265 =?iso-8859-1?q?xx?=
4266 =?iso-8859-1?q?xx?=
4267 =?iso-8859-1?q?_x?=
4268 =?iso-8859-1?q?xx?=
4269 =?iso-8859-1?q?x_?=
4270 =?iso-8859-1?q?xx?=
4271 =?iso-8859-1?q?xx?=
4272 =?iso-8859-1?q?_x?=
4273 =?iso-8859-1?q?xx?=
4274 =?iso-8859-1?q?x_?=
4275 =?iso-8859-1?q?xx?=
4276 =?iso-8859-1?q?xx?=
4277 =?iso-8859-1?q?_x?=
4278 =?iso-8859-1?q?xx?=
4279 =?iso-8859-1?q?x_?=
4280 =?iso-8859-1?q?xx?=
4281 =?iso-8859-1?q?xx?=
4282 =?iso-8859-1?q?_x?=
4283 =?iso-8859-1?q?xx?=
4284 =?iso-8859-1?q?x_?=
4285 =?iso-8859-1?q?xx?=
4286 =?iso-8859-1?q?xx?=
4287 =?iso-8859-1?q?_x?=
4288 =?iso-8859-1?q?xx?=
4289 =?iso-8859-1?q?x_?=
4290 =?iso-8859-1?q?xx?=
4291 =?iso-8859-1?q?xx?=
4292 =?iso-8859-1?q?_x?=
4293 =?iso-8859-1?q?xx?=
4294 =?iso-8859-1?q?x_?=
4295 =?iso-8859-1?q?xx?=
4296 =?iso-8859-1?q?xx?=
4297 =?iso-8859-1?q?_x?=
4298 =?iso-8859-1?q?xx?=
4299 =?iso-8859-1?q?x_?=
4300 =?iso-8859-1?q?xx?=
4301 =?iso-8859-1?q?xx?=
4302 =?iso-8859-1?q?_x?=
4303 =?iso-8859-1?q?xx?=
4304 =?iso-8859-1?q?x_?=
4305 =?iso-8859-1?q?xx?=
4306 =?iso-8859-1?q?xx?=
4307 =?iso-8859-1?q?_x?=
4308 =?iso-8859-1?q?xx?=
4309 =?iso-8859-1?q?x_?=
4310 =?iso-8859-1?q?xx?=
4311 =?iso-8859-1?q?xx?=
4312 =?iso-8859-1?q?_?=""")
4313 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004314 h = Header(charset='iso-8859-1', maxlinelen=40)
4315 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004316 s = h.encode()
4317 eq(s, """\
4318=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
4319 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
4320 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
4321 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
4322 =?iso-8859-1?q?_xxxx_xxxx_?=""")
4323 eq(x, str(make_header(decode_header(s))))
4324
4325 def test_base64_splittable(self):
4326 eq = self.ndiffAssertEqual
4327 h = Header(charset='koi8-r', maxlinelen=20)
4328 x = 'xxxx ' * 20
4329 h.append(x)
4330 s = h.encode()
4331 eq(s, """\
4332=?koi8-r?b?eHh4?=
4333 =?koi8-r?b?eCB4?=
4334 =?koi8-r?b?eHh4?=
4335 =?koi8-r?b?IHh4?=
4336 =?koi8-r?b?eHgg?=
4337 =?koi8-r?b?eHh4?=
4338 =?koi8-r?b?eCB4?=
4339 =?koi8-r?b?eHh4?=
4340 =?koi8-r?b?IHh4?=
4341 =?koi8-r?b?eHgg?=
4342 =?koi8-r?b?eHh4?=
4343 =?koi8-r?b?eCB4?=
4344 =?koi8-r?b?eHh4?=
4345 =?koi8-r?b?IHh4?=
4346 =?koi8-r?b?eHgg?=
4347 =?koi8-r?b?eHh4?=
4348 =?koi8-r?b?eCB4?=
4349 =?koi8-r?b?eHh4?=
4350 =?koi8-r?b?IHh4?=
4351 =?koi8-r?b?eHgg?=
4352 =?koi8-r?b?eHh4?=
4353 =?koi8-r?b?eCB4?=
4354 =?koi8-r?b?eHh4?=
4355 =?koi8-r?b?IHh4?=
4356 =?koi8-r?b?eHgg?=
4357 =?koi8-r?b?eHh4?=
4358 =?koi8-r?b?eCB4?=
4359 =?koi8-r?b?eHh4?=
4360 =?koi8-r?b?IHh4?=
4361 =?koi8-r?b?eHgg?=
4362 =?koi8-r?b?eHh4?=
4363 =?koi8-r?b?eCB4?=
4364 =?koi8-r?b?eHh4?=
4365 =?koi8-r?b?IA==?=""")
4366 eq(x, str(make_header(decode_header(s))))
4367 h = Header(charset='koi8-r', maxlinelen=40)
4368 h.append(x)
4369 s = h.encode()
4370 eq(s, """\
4371=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
4372 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
4373 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
4374 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
4375 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
4376 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
4377 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004378
4379 def test_us_ascii_header(self):
4380 eq = self.assertEqual
4381 s = 'hello'
4382 x = decode_header(s)
4383 eq(x, [('hello', None)])
4384 h = make_header(x)
4385 eq(s, h.encode())
4386
4387 def test_string_charset(self):
4388 eq = self.assertEqual
4389 h = Header()
4390 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00004391 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004392
4393## def test_unicode_error(self):
4394## raises = self.assertRaises
4395## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
4396## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
4397## h = Header()
4398## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
4399## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
4400## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
4401
4402 def test_utf8_shortest(self):
4403 eq = self.assertEqual
4404 h = Header('p\xf6stal', 'utf-8')
4405 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
4406 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
4407 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
4408
4409 def test_bad_8bit_header(self):
4410 raises = self.assertRaises
4411 eq = self.assertEqual
4412 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4413 raises(UnicodeError, Header, x)
4414 h = Header()
4415 raises(UnicodeError, h.append, x)
4416 e = x.decode('utf-8', 'replace')
4417 eq(str(Header(x, errors='replace')), e)
4418 h.append(x, errors='replace')
4419 eq(str(h), e)
4420
R David Murray041015c2011-03-25 15:10:55 -04004421 def test_escaped_8bit_header(self):
4422 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
R David Murray6bdb1762011-06-18 12:30:55 -04004423 e = x.decode('ascii', 'surrogateescape')
4424 h = Header(e, charset=email.charset.UNKNOWN8BIT)
R David Murray041015c2011-03-25 15:10:55 -04004425 self.assertEqual(str(h),
4426 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4427 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4428
R David Murraye5e366c2011-06-18 12:57:28 -04004429 def test_header_handles_binary_unknown8bit(self):
4430 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4431 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4432 self.assertEqual(str(h),
4433 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4434 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4435
4436 def test_make_header_handles_binary_unknown8bit(self):
4437 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4438 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4439 h2 = email.header.make_header(email.header.decode_header(h))
4440 self.assertEqual(str(h2),
4441 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4442 self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')])
4443
R David Murray041015c2011-03-25 15:10:55 -04004444 def test_modify_returned_list_does_not_change_header(self):
4445 h = Header('test')
4446 chunks = email.header.decode_header(h)
4447 chunks.append(('ascii', 'test2'))
4448 self.assertEqual(str(h), 'test')
4449
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004450 def test_encoded_adjacent_nonencoded(self):
4451 eq = self.assertEqual
4452 h = Header()
4453 h.append('hello', 'iso-8859-1')
4454 h.append('world')
4455 s = h.encode()
4456 eq(s, '=?iso-8859-1?q?hello?= world')
4457 h = make_header(decode_header(s))
4458 eq(h.encode(), s)
4459
R David Murray07ea53c2012-06-02 17:56:49 -04004460 def test_whitespace_keeper(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004461 eq = self.assertEqual
4462 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
4463 parts = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04004464 eq(parts, [(b'Subject: ', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b' zz.', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004465 hdr = make_header(parts)
4466 eq(hdr.encode(),
4467 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
4468
4469 def test_broken_base64_header(self):
4470 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00004471 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004472 raises(errors.HeaderParseError, decode_header, s)
4473
R. David Murray477efb32011-01-05 01:39:32 +00004474 def test_shift_jis_charset(self):
4475 h = Header('文', charset='shift_jis')
4476 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
4477
R David Murrayde912762011-03-16 18:26:23 -04004478 def test_flatten_header_with_no_value(self):
4479 # Issue 11401 (regression from email 4.x) Note that the space after
4480 # the header doesn't reflect the input, but this is also the way
4481 # email 4.x behaved. At some point it would be nice to fix that.
4482 msg = email.message_from_string("EmptyHeader:")
4483 self.assertEqual(str(msg), "EmptyHeader: \n\n")
4484
R David Murray01581ee2011-04-18 10:04:34 -04004485 def test_encode_preserves_leading_ws_on_value(self):
4486 msg = Message()
4487 msg['SomeHeader'] = ' value with leading ws'
4488 self.assertEqual(str(msg), "SomeHeader: value with leading ws\n\n")
4489
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004490
Ezio Melottib3aedd42010-11-20 19:04:17 +00004491
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004492# Test RFC 2231 header parameters (en/de)coding
4493class TestRFC2231(TestEmailBase):
4494 def test_get_param(self):
4495 eq = self.assertEqual
4496 msg = self._msgobj('msg_29.txt')
4497 eq(msg.get_param('title'),
4498 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4499 eq(msg.get_param('title', unquote=False),
4500 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
4501
4502 def test_set_param(self):
4503 eq = self.ndiffAssertEqual
4504 msg = Message()
4505 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4506 charset='us-ascii')
4507 eq(msg.get_param('title'),
4508 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
4509 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4510 charset='us-ascii', language='en')
4511 eq(msg.get_param('title'),
4512 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4513 msg = self._msgobj('msg_01.txt')
4514 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4515 charset='us-ascii', language='en')
4516 eq(msg.as_string(maxheaderlen=78), """\
4517Return-Path: <bbb@zzz.org>
4518Delivered-To: bbb@zzz.org
4519Received: by mail.zzz.org (Postfix, from userid 889)
4520\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4521MIME-Version: 1.0
4522Content-Transfer-Encoding: 7bit
4523Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4524From: bbb@ddd.com (John X. Doe)
4525To: bbb@zzz.org
4526Subject: This is a test message
4527Date: Fri, 4 May 2001 14:05:44 -0400
4528Content-Type: text/plain; charset=us-ascii;
R. David Murraydfd7eb02010-12-24 22:36:49 +00004529 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004530
4531
4532Hi,
4533
4534Do you like this message?
4535
4536-Me
4537""")
4538
R David Murraya2860e82011-04-16 09:20:30 -04004539 def test_set_param_requote(self):
4540 msg = Message()
4541 msg.set_param('title', 'foo')
4542 self.assertEqual(msg['content-type'], 'text/plain; title="foo"')
4543 msg.set_param('title', 'bar', requote=False)
4544 self.assertEqual(msg['content-type'], 'text/plain; title=bar')
4545 # tspecial is still quoted.
4546 msg.set_param('title', "(bar)bell", requote=False)
4547 self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"')
4548
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004549 def test_del_param(self):
4550 eq = self.ndiffAssertEqual
4551 msg = self._msgobj('msg_01.txt')
4552 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
4553 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4554 charset='us-ascii', language='en')
4555 msg.del_param('foo', header='Content-Type')
4556 eq(msg.as_string(maxheaderlen=78), """\
4557Return-Path: <bbb@zzz.org>
4558Delivered-To: bbb@zzz.org
4559Received: by mail.zzz.org (Postfix, from userid 889)
4560\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4561MIME-Version: 1.0
4562Content-Transfer-Encoding: 7bit
4563Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4564From: bbb@ddd.com (John X. Doe)
4565To: bbb@zzz.org
4566Subject: This is a test message
4567Date: Fri, 4 May 2001 14:05:44 -0400
4568Content-Type: text/plain; charset="us-ascii";
R. David Murraydfd7eb02010-12-24 22:36:49 +00004569 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004570
4571
4572Hi,
4573
4574Do you like this message?
4575
4576-Me
4577""")
4578
4579 def test_rfc2231_get_content_charset(self):
4580 eq = self.assertEqual
4581 msg = self._msgobj('msg_32.txt')
4582 eq(msg.get_content_charset(), 'us-ascii')
4583
R. David Murraydfd7eb02010-12-24 22:36:49 +00004584 def test_rfc2231_parse_rfc_quoting(self):
4585 m = textwrap.dedent('''\
4586 Content-Disposition: inline;
4587 \tfilename*0*=''This%20is%20even%20more%20;
4588 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
4589 \tfilename*2="is it not.pdf"
4590
4591 ''')
4592 msg = email.message_from_string(m)
4593 self.assertEqual(msg.get_filename(),
4594 'This is even more ***fun*** is it not.pdf')
4595 self.assertEqual(m, msg.as_string())
4596
4597 def test_rfc2231_parse_extra_quoting(self):
4598 m = textwrap.dedent('''\
4599 Content-Disposition: inline;
4600 \tfilename*0*="''This%20is%20even%20more%20";
4601 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4602 \tfilename*2="is it not.pdf"
4603
4604 ''')
4605 msg = email.message_from_string(m)
4606 self.assertEqual(msg.get_filename(),
4607 'This is even more ***fun*** is it not.pdf')
4608 self.assertEqual(m, msg.as_string())
4609
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004610 def test_rfc2231_no_language_or_charset(self):
4611 m = '''\
4612Content-Transfer-Encoding: 8bit
4613Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
4614Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
4615
4616'''
4617 msg = email.message_from_string(m)
4618 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004619 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004620 self.assertEqual(
4621 param,
4622 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
4623
4624 def test_rfc2231_no_language_or_charset_in_filename(self):
4625 m = '''\
4626Content-Disposition: inline;
4627\tfilename*0*="''This%20is%20even%20more%20";
4628\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4629\tfilename*2="is it not.pdf"
4630
4631'''
4632 msg = email.message_from_string(m)
4633 self.assertEqual(msg.get_filename(),
4634 'This is even more ***fun*** is it not.pdf')
4635
4636 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
4637 m = '''\
4638Content-Disposition: inline;
4639\tfilename*0*="''This%20is%20even%20more%20";
4640\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4641\tfilename*2="is it not.pdf"
4642
4643'''
4644 msg = email.message_from_string(m)
4645 self.assertEqual(msg.get_filename(),
4646 'This is even more ***fun*** is it not.pdf')
4647
4648 def test_rfc2231_partly_encoded(self):
4649 m = '''\
4650Content-Disposition: inline;
4651\tfilename*0="''This%20is%20even%20more%20";
4652\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4653\tfilename*2="is it not.pdf"
4654
4655'''
4656 msg = email.message_from_string(m)
4657 self.assertEqual(
4658 msg.get_filename(),
4659 'This%20is%20even%20more%20***fun*** is it not.pdf')
4660
4661 def test_rfc2231_partly_nonencoded(self):
4662 m = '''\
4663Content-Disposition: inline;
4664\tfilename*0="This%20is%20even%20more%20";
4665\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
4666\tfilename*2="is it not.pdf"
4667
4668'''
4669 msg = email.message_from_string(m)
4670 self.assertEqual(
4671 msg.get_filename(),
4672 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
4673
4674 def test_rfc2231_no_language_or_charset_in_boundary(self):
4675 m = '''\
4676Content-Type: multipart/alternative;
4677\tboundary*0*="''This%20is%20even%20more%20";
4678\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
4679\tboundary*2="is it not.pdf"
4680
4681'''
4682 msg = email.message_from_string(m)
4683 self.assertEqual(msg.get_boundary(),
4684 'This is even more ***fun*** is it not.pdf')
4685
4686 def test_rfc2231_no_language_or_charset_in_charset(self):
4687 # This is a nonsensical charset value, but tests the code anyway
4688 m = '''\
4689Content-Type: text/plain;
4690\tcharset*0*="This%20is%20even%20more%20";
4691\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
4692\tcharset*2="is it not.pdf"
4693
4694'''
4695 msg = email.message_from_string(m)
4696 self.assertEqual(msg.get_content_charset(),
4697 'this is even more ***fun*** is it not.pdf')
4698
4699 def test_rfc2231_bad_encoding_in_filename(self):
4700 m = '''\
4701Content-Disposition: inline;
4702\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
4703\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4704\tfilename*2="is it not.pdf"
4705
4706'''
4707 msg = email.message_from_string(m)
4708 self.assertEqual(msg.get_filename(),
4709 'This is even more ***fun*** is it not.pdf')
4710
4711 def test_rfc2231_bad_encoding_in_charset(self):
4712 m = """\
4713Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
4714
4715"""
4716 msg = email.message_from_string(m)
4717 # This should return None because non-ascii characters in the charset
4718 # are not allowed.
4719 self.assertEqual(msg.get_content_charset(), None)
4720
4721 def test_rfc2231_bad_character_in_charset(self):
4722 m = """\
4723Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
4724
4725"""
4726 msg = email.message_from_string(m)
4727 # This should return None because non-ascii characters in the charset
4728 # are not allowed.
4729 self.assertEqual(msg.get_content_charset(), None)
4730
4731 def test_rfc2231_bad_character_in_filename(self):
4732 m = '''\
4733Content-Disposition: inline;
4734\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
4735\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4736\tfilename*2*="is it not.pdf%E2"
4737
4738'''
4739 msg = email.message_from_string(m)
4740 self.assertEqual(msg.get_filename(),
4741 'This is even more ***fun*** is it not.pdf\ufffd')
4742
4743 def test_rfc2231_unknown_encoding(self):
4744 m = """\
4745Content-Transfer-Encoding: 8bit
4746Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
4747
4748"""
4749 msg = email.message_from_string(m)
4750 self.assertEqual(msg.get_filename(), 'myfile.txt')
4751
4752 def test_rfc2231_single_tick_in_filename_extended(self):
4753 eq = self.assertEqual
4754 m = """\
4755Content-Type: application/x-foo;
4756\tname*0*=\"Frank's\"; name*1*=\" Document\"
4757
4758"""
4759 msg = email.message_from_string(m)
4760 charset, language, s = msg.get_param('name')
4761 eq(charset, None)
4762 eq(language, None)
4763 eq(s, "Frank's Document")
4764
4765 def test_rfc2231_single_tick_in_filename(self):
4766 m = """\
4767Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
4768
4769"""
4770 msg = email.message_from_string(m)
4771 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004772 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004773 self.assertEqual(param, "Frank's Document")
4774
4775 def test_rfc2231_tick_attack_extended(self):
4776 eq = self.assertEqual
4777 m = """\
4778Content-Type: application/x-foo;
4779\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
4780
4781"""
4782 msg = email.message_from_string(m)
4783 charset, language, s = msg.get_param('name')
4784 eq(charset, 'us-ascii')
4785 eq(language, 'en-us')
4786 eq(s, "Frank's Document")
4787
4788 def test_rfc2231_tick_attack(self):
4789 m = """\
4790Content-Type: application/x-foo;
4791\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
4792
4793"""
4794 msg = email.message_from_string(m)
4795 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004796 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004797 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
4798
4799 def test_rfc2231_no_extended_values(self):
4800 eq = self.assertEqual
4801 m = """\
4802Content-Type: application/x-foo; name=\"Frank's Document\"
4803
4804"""
4805 msg = email.message_from_string(m)
4806 eq(msg.get_param('name'), "Frank's Document")
4807
4808 def test_rfc2231_encoded_then_unencoded_segments(self):
4809 eq = self.assertEqual
4810 m = """\
4811Content-Type: application/x-foo;
4812\tname*0*=\"us-ascii'en-us'My\";
4813\tname*1=\" Document\";
4814\tname*2*=\" For You\"
4815
4816"""
4817 msg = email.message_from_string(m)
4818 charset, language, s = msg.get_param('name')
4819 eq(charset, 'us-ascii')
4820 eq(language, 'en-us')
4821 eq(s, 'My Document For You')
4822
4823 def test_rfc2231_unencoded_then_encoded_segments(self):
4824 eq = self.assertEqual
4825 m = """\
4826Content-Type: application/x-foo;
4827\tname*0=\"us-ascii'en-us'My\";
4828\tname*1*=\" Document\";
4829\tname*2*=\" For You\"
4830
4831"""
4832 msg = email.message_from_string(m)
4833 charset, language, s = msg.get_param('name')
4834 eq(charset, 'us-ascii')
4835 eq(language, 'en-us')
4836 eq(s, 'My Document For You')
4837
4838
Ezio Melottib3aedd42010-11-20 19:04:17 +00004839
R. David Murraya8f480f2010-01-16 18:30:03 +00004840# Tests to ensure that signed parts of an email are completely preserved, as
4841# required by RFC1847 section 2.1. Note that these are incomplete, because the
4842# email package does not currently always preserve the body. See issue 1670765.
4843class TestSigned(TestEmailBase):
4844
4845 def _msg_and_obj(self, filename):
R David Murray28346b82011-03-31 11:40:20 -04004846 with openfile(filename) as fp:
R. David Murraya8f480f2010-01-16 18:30:03 +00004847 original = fp.read()
4848 msg = email.message_from_string(original)
4849 return original, msg
4850
4851 def _signed_parts_eq(self, original, result):
4852 # Extract the first mime part of each message
4853 import re
4854 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
4855 inpart = repart.search(original).group(2)
4856 outpart = repart.search(result).group(2)
4857 self.assertEqual(outpart, inpart)
4858
4859 def test_long_headers_as_string(self):
4860 original, msg = self._msg_and_obj('msg_45.txt')
4861 result = msg.as_string()
4862 self._signed_parts_eq(original, result)
4863
4864 def test_long_headers_as_string_maxheaderlen(self):
4865 original, msg = self._msg_and_obj('msg_45.txt')
4866 result = msg.as_string(maxheaderlen=60)
4867 self._signed_parts_eq(original, result)
4868
4869 def test_long_headers_flatten(self):
4870 original, msg = self._msg_and_obj('msg_45.txt')
4871 fp = StringIO()
4872 Generator(fp).flatten(msg)
4873 result = fp.getvalue()
4874 self._signed_parts_eq(original, result)
4875
4876
Ezio Melottib3aedd42010-11-20 19:04:17 +00004877
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004878if __name__ == '__main__':
R David Murray9aaba782011-03-21 17:17:06 -04004879 unittest.main()