blob: c04952cad37fe8f90dcbeb64abd3a09dbf6bae0b [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
R. David Murray719a4492010-11-21 16:53:48 +00006import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00007import sys
8import time
9import base64
10import difflib
11import unittest
12import warnings
R. David Murray96fd54e2010-10-08 15:55:28 +000013import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000014
R. David Murray96fd54e2010-10-08 15:55:28 +000015from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016from itertools import chain
17
18import email
R David Murrayc27e5222012-05-25 15:01:48 -040019import email.policy
Guido van Rossum8b3febe2007-08-30 01:15:14 +000020
21from email.charset import Charset
22from email.header import Header, decode_header, make_header
23from email.parser import Parser, HeaderParser
24from email.generator import Generator, DecodedGenerator
25from email.message import Message
26from email.mime.application import MIMEApplication
27from email.mime.audio import MIMEAudio
28from email.mime.text import MIMEText
29from email.mime.image import MIMEImage
30from email.mime.base import MIMEBase
31from email.mime.message import MIMEMessage
32from email.mime.multipart import MIMEMultipart
33from email import utils
34from email import errors
35from email import encoders
36from email import iterators
37from email import base64mime
38from email import quoprimime
39
R David Murray28346b82011-03-31 11:40:20 -040040from test.support import run_unittest, unlink
R David Murraya256bac2011-03-31 12:20:23 -040041from test.test_email import openfile, TestEmailBase
Guido van Rossum8b3febe2007-08-30 01:15:14 +000042
43NL = '\n'
44EMPTYSTRING = ''
45SPACE = ' '
46
47
Guido van Rossum8b3febe2007-08-30 01:15:14 +000048# Test various aspects of the Message class's API
49class TestMessageAPI(TestEmailBase):
50 def test_get_all(self):
51 eq = self.assertEqual
52 msg = self._msgobj('msg_20.txt')
53 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
54 eq(msg.get_all('xx', 'n/a'), 'n/a')
55
R. David Murraye5db2632010-11-20 15:10:13 +000056 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000057 eq = self.assertEqual
58 msg = Message()
59 eq(msg.get_charset(), None)
60 charset = Charset('iso-8859-1')
61 msg.set_charset(charset)
62 eq(msg['mime-version'], '1.0')
63 eq(msg.get_content_type(), 'text/plain')
64 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
65 eq(msg.get_param('charset'), 'iso-8859-1')
66 eq(msg['content-transfer-encoding'], 'quoted-printable')
67 eq(msg.get_charset().input_charset, 'iso-8859-1')
68 # Remove the charset
69 msg.set_charset(None)
70 eq(msg.get_charset(), None)
71 eq(msg['content-type'], 'text/plain')
72 # Try adding a charset when there's already MIME headers present
73 msg = Message()
74 msg['MIME-Version'] = '2.0'
75 msg['Content-Type'] = 'text/x-weird'
76 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
77 msg.set_charset(charset)
78 eq(msg['mime-version'], '2.0')
79 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
80 eq(msg['content-transfer-encoding'], 'quinted-puntable')
81
82 def test_set_charset_from_string(self):
83 eq = self.assertEqual
84 msg = Message()
85 msg.set_charset('us-ascii')
86 eq(msg.get_charset().input_charset, 'us-ascii')
87 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
88
89 def test_set_payload_with_charset(self):
90 msg = Message()
91 charset = Charset('iso-8859-1')
92 msg.set_payload('This is a string payload', charset)
93 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
94
95 def test_get_charsets(self):
96 eq = self.assertEqual
97
98 msg = self._msgobj('msg_08.txt')
99 charsets = msg.get_charsets()
100 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
101
102 msg = self._msgobj('msg_09.txt')
103 charsets = msg.get_charsets('dingbat')
104 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
105 'koi8-r'])
106
107 msg = self._msgobj('msg_12.txt')
108 charsets = msg.get_charsets()
109 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
110 'iso-8859-3', 'us-ascii', 'koi8-r'])
111
112 def test_get_filename(self):
113 eq = self.assertEqual
114
115 msg = self._msgobj('msg_04.txt')
116 filenames = [p.get_filename() for p in msg.get_payload()]
117 eq(filenames, ['msg.txt', 'msg.txt'])
118
119 msg = self._msgobj('msg_07.txt')
120 subpart = msg.get_payload(1)
121 eq(subpart.get_filename(), 'dingusfish.gif')
122
123 def test_get_filename_with_name_parameter(self):
124 eq = self.assertEqual
125
126 msg = self._msgobj('msg_44.txt')
127 filenames = [p.get_filename() for p in msg.get_payload()]
128 eq(filenames, ['msg.txt', 'msg.txt'])
129
130 def test_get_boundary(self):
131 eq = self.assertEqual
132 msg = self._msgobj('msg_07.txt')
133 # No quotes!
134 eq(msg.get_boundary(), 'BOUNDARY')
135
136 def test_set_boundary(self):
137 eq = self.assertEqual
138 # This one has no existing boundary parameter, but the Content-Type:
139 # header appears fifth.
140 msg = self._msgobj('msg_01.txt')
141 msg.set_boundary('BOUNDARY')
142 header, value = msg.items()[4]
143 eq(header.lower(), 'content-type')
144 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
145 # This one has a Content-Type: header, with a boundary, stuck in the
146 # middle of its headers. Make sure the order is preserved; it should
147 # be fifth.
148 msg = self._msgobj('msg_04.txt')
149 msg.set_boundary('BOUNDARY')
150 header, value = msg.items()[4]
151 eq(header.lower(), 'content-type')
152 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
153 # And this one has no Content-Type: header at all.
154 msg = self._msgobj('msg_03.txt')
155 self.assertRaises(errors.HeaderParseError,
156 msg.set_boundary, 'BOUNDARY')
157
R. David Murray73a559d2010-12-21 18:07:59 +0000158 def test_make_boundary(self):
159 msg = MIMEMultipart('form-data')
160 # Note that when the boundary gets created is an implementation
161 # detail and might change.
162 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
163 # Trigger creation of boundary
164 msg.as_string()
165 self.assertEqual(msg.items()[0][1][:33],
166 'multipart/form-data; boundary="==')
167 # XXX: there ought to be tests of the uniqueness of the boundary, too.
168
R. David Murray57c45ac2010-02-21 04:39:40 +0000169 def test_message_rfc822_only(self):
170 # Issue 7970: message/rfc822 not in multipart parsed by
171 # HeaderParser caused an exception when flattened.
R David Murray28346b82011-03-31 11:40:20 -0400172 with openfile('msg_46.txt') as fp:
Brett Cannon384917a2010-10-29 23:08:36 +0000173 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000174 parser = HeaderParser()
175 msg = parser.parsestr(msgdata)
176 out = StringIO()
177 gen = Generator(out, True, 0)
178 gen.flatten(msg, False)
179 self.assertEqual(out.getvalue(), msgdata)
180
R David Murrayb35c8502011-04-13 16:46:05 -0400181 def test_byte_message_rfc822_only(self):
182 # Make sure new bytes header parser also passes this.
183 with openfile('msg_46.txt', 'rb') as fp:
184 msgdata = fp.read()
185 parser = email.parser.BytesHeaderParser()
186 msg = parser.parsebytes(msgdata)
187 out = BytesIO()
188 gen = email.generator.BytesGenerator(out)
189 gen.flatten(msg)
190 self.assertEqual(out.getvalue(), msgdata)
191
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000192 def test_get_decoded_payload(self):
193 eq = self.assertEqual
194 msg = self._msgobj('msg_10.txt')
195 # The outer message is a multipart
196 eq(msg.get_payload(decode=True), None)
197 # Subpart 1 is 7bit encoded
198 eq(msg.get_payload(0).get_payload(decode=True),
199 b'This is a 7bit encoded message.\n')
200 # Subpart 2 is quopri
201 eq(msg.get_payload(1).get_payload(decode=True),
202 b'\xa1This is a Quoted Printable encoded message!\n')
203 # Subpart 3 is base64
204 eq(msg.get_payload(2).get_payload(decode=True),
205 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000206 # Subpart 4 is base64 with a trailing newline, which
207 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000208 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000209 b'This is a Base64 encoded message.\n')
210 # Subpart 5 has no Content-Transfer-Encoding: header.
211 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000212 b'This has no Content-Transfer-Encoding: header.\n')
213
214 def test_get_decoded_uu_payload(self):
215 eq = self.assertEqual
216 msg = Message()
217 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
218 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
219 msg['content-transfer-encoding'] = cte
220 eq(msg.get_payload(decode=True), b'hello world')
221 # Now try some bogus data
222 msg.set_payload('foo')
223 eq(msg.get_payload(decode=True), b'foo')
224
R David Murraya2860e82011-04-16 09:20:30 -0400225 def test_get_payload_n_raises_on_non_multipart(self):
226 msg = Message()
227 self.assertRaises(TypeError, msg.get_payload, 1)
228
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000229 def test_decoded_generator(self):
230 eq = self.assertEqual
231 msg = self._msgobj('msg_07.txt')
232 with openfile('msg_17.txt') as fp:
233 text = fp.read()
234 s = StringIO()
235 g = DecodedGenerator(s)
236 g.flatten(msg)
237 eq(s.getvalue(), text)
238
239 def test__contains__(self):
240 msg = Message()
241 msg['From'] = 'Me'
242 msg['to'] = 'You'
243 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000244 self.assertTrue('from' in msg)
245 self.assertTrue('From' in msg)
246 self.assertTrue('FROM' in msg)
247 self.assertTrue('to' in msg)
248 self.assertTrue('To' in msg)
249 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000250
251 def test_as_string(self):
252 eq = self.ndiffAssertEqual
253 msg = self._msgobj('msg_01.txt')
254 with openfile('msg_01.txt') as fp:
255 text = fp.read()
256 eq(text, str(msg))
257 fullrepr = msg.as_string(unixfrom=True)
258 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000259 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000260 eq(text, NL.join(lines[1:]))
261
262 def test_bad_param(self):
263 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
264 self.assertEqual(msg.get_param('baz'), '')
265
266 def test_missing_filename(self):
267 msg = email.message_from_string("From: foo\n")
268 self.assertEqual(msg.get_filename(), None)
269
270 def test_bogus_filename(self):
271 msg = email.message_from_string(
272 "Content-Disposition: blarg; filename\n")
273 self.assertEqual(msg.get_filename(), '')
274
275 def test_missing_boundary(self):
276 msg = email.message_from_string("From: foo\n")
277 self.assertEqual(msg.get_boundary(), None)
278
279 def test_get_params(self):
280 eq = self.assertEqual
281 msg = email.message_from_string(
282 'X-Header: foo=one; bar=two; baz=three\n')
283 eq(msg.get_params(header='x-header'),
284 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
285 msg = email.message_from_string(
286 'X-Header: foo; bar=one; baz=two\n')
287 eq(msg.get_params(header='x-header'),
288 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
289 eq(msg.get_params(), None)
290 msg = email.message_from_string(
291 'X-Header: foo; bar="one"; baz=two\n')
292 eq(msg.get_params(header='x-header'),
293 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
294
295 def test_get_param_liberal(self):
296 msg = Message()
297 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
298 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
299
300 def test_get_param(self):
301 eq = self.assertEqual
302 msg = email.message_from_string(
303 "X-Header: foo=one; bar=two; baz=three\n")
304 eq(msg.get_param('bar', header='x-header'), 'two')
305 eq(msg.get_param('quuz', header='x-header'), None)
306 eq(msg.get_param('quuz'), None)
307 msg = email.message_from_string(
308 'X-Header: foo; bar="one"; baz=two\n')
309 eq(msg.get_param('foo', header='x-header'), '')
310 eq(msg.get_param('bar', header='x-header'), 'one')
311 eq(msg.get_param('baz', header='x-header'), 'two')
312 # XXX: We are not RFC-2045 compliant! We cannot parse:
313 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
314 # msg.get_param("weird")
315 # yet.
316
317 def test_get_param_funky_continuation_lines(self):
318 msg = self._msgobj('msg_22.txt')
319 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
320
321 def test_get_param_with_semis_in_quotes(self):
322 msg = email.message_from_string(
323 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
324 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
325 self.assertEqual(msg.get_param('name', unquote=False),
326 '"Jim&amp;&amp;Jill"')
327
R. David Murrayd48739f2010-04-14 18:59:18 +0000328 def test_get_param_with_quotes(self):
329 msg = email.message_from_string(
330 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
331 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
332 msg = email.message_from_string(
333 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
334 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
335
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000336 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000337 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000338 msg = email.message_from_string('Header: exists')
339 unless('header' in msg)
340 unless('Header' in msg)
341 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000342 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000343
344 def test_set_param(self):
345 eq = self.assertEqual
346 msg = Message()
347 msg.set_param('charset', 'iso-2022-jp')
348 eq(msg.get_param('charset'), 'iso-2022-jp')
349 msg.set_param('importance', 'high value')
350 eq(msg.get_param('importance'), 'high value')
351 eq(msg.get_param('importance', unquote=False), '"high value"')
352 eq(msg.get_params(), [('text/plain', ''),
353 ('charset', 'iso-2022-jp'),
354 ('importance', 'high value')])
355 eq(msg.get_params(unquote=False), [('text/plain', ''),
356 ('charset', '"iso-2022-jp"'),
357 ('importance', '"high value"')])
358 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
359 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
360
361 def test_del_param(self):
362 eq = self.assertEqual
363 msg = self._msgobj('msg_05.txt')
364 eq(msg.get_params(),
365 [('multipart/report', ''), ('report-type', 'delivery-status'),
366 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
367 old_val = msg.get_param("report-type")
368 msg.del_param("report-type")
369 eq(msg.get_params(),
370 [('multipart/report', ''),
371 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
372 msg.set_param("report-type", old_val)
373 eq(msg.get_params(),
374 [('multipart/report', ''),
375 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
376 ('report-type', old_val)])
377
378 def test_del_param_on_other_header(self):
379 msg = Message()
380 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
381 msg.del_param('filename', 'content-disposition')
382 self.assertEqual(msg['content-disposition'], 'attachment')
383
R David Murraya2860e82011-04-16 09:20:30 -0400384 def test_del_param_on_nonexistent_header(self):
385 msg = Message()
386 msg.del_param('filename', 'content-disposition')
387
388 def test_del_nonexistent_param(self):
389 msg = Message()
390 msg.add_header('Content-Type', 'text/plain', charset='utf-8')
391 existing_header = msg['Content-Type']
392 msg.del_param('foobar', header='Content-Type')
393 self.assertEqual(msg['Content-Type'], 'text/plain; charset="utf-8"')
394
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000395 def test_set_type(self):
396 eq = self.assertEqual
397 msg = Message()
398 self.assertRaises(ValueError, msg.set_type, 'text')
399 msg.set_type('text/plain')
400 eq(msg['content-type'], 'text/plain')
401 msg.set_param('charset', 'us-ascii')
402 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
403 msg.set_type('text/html')
404 eq(msg['content-type'], 'text/html; charset="us-ascii"')
405
406 def test_set_type_on_other_header(self):
407 msg = Message()
408 msg['X-Content-Type'] = 'text/plain'
409 msg.set_type('application/octet-stream', 'X-Content-Type')
410 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
411
412 def test_get_content_type_missing(self):
413 msg = Message()
414 self.assertEqual(msg.get_content_type(), 'text/plain')
415
416 def test_get_content_type_missing_with_default_type(self):
417 msg = Message()
418 msg.set_default_type('message/rfc822')
419 self.assertEqual(msg.get_content_type(), 'message/rfc822')
420
421 def test_get_content_type_from_message_implicit(self):
422 msg = self._msgobj('msg_30.txt')
423 self.assertEqual(msg.get_payload(0).get_content_type(),
424 'message/rfc822')
425
426 def test_get_content_type_from_message_explicit(self):
427 msg = self._msgobj('msg_28.txt')
428 self.assertEqual(msg.get_payload(0).get_content_type(),
429 'message/rfc822')
430
431 def test_get_content_type_from_message_text_plain_implicit(self):
432 msg = self._msgobj('msg_03.txt')
433 self.assertEqual(msg.get_content_type(), 'text/plain')
434
435 def test_get_content_type_from_message_text_plain_explicit(self):
436 msg = self._msgobj('msg_01.txt')
437 self.assertEqual(msg.get_content_type(), 'text/plain')
438
439 def test_get_content_maintype_missing(self):
440 msg = Message()
441 self.assertEqual(msg.get_content_maintype(), 'text')
442
443 def test_get_content_maintype_missing_with_default_type(self):
444 msg = Message()
445 msg.set_default_type('message/rfc822')
446 self.assertEqual(msg.get_content_maintype(), 'message')
447
448 def test_get_content_maintype_from_message_implicit(self):
449 msg = self._msgobj('msg_30.txt')
450 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
451
452 def test_get_content_maintype_from_message_explicit(self):
453 msg = self._msgobj('msg_28.txt')
454 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
455
456 def test_get_content_maintype_from_message_text_plain_implicit(self):
457 msg = self._msgobj('msg_03.txt')
458 self.assertEqual(msg.get_content_maintype(), 'text')
459
460 def test_get_content_maintype_from_message_text_plain_explicit(self):
461 msg = self._msgobj('msg_01.txt')
462 self.assertEqual(msg.get_content_maintype(), 'text')
463
464 def test_get_content_subtype_missing(self):
465 msg = Message()
466 self.assertEqual(msg.get_content_subtype(), 'plain')
467
468 def test_get_content_subtype_missing_with_default_type(self):
469 msg = Message()
470 msg.set_default_type('message/rfc822')
471 self.assertEqual(msg.get_content_subtype(), 'rfc822')
472
473 def test_get_content_subtype_from_message_implicit(self):
474 msg = self._msgobj('msg_30.txt')
475 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
476
477 def test_get_content_subtype_from_message_explicit(self):
478 msg = self._msgobj('msg_28.txt')
479 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
480
481 def test_get_content_subtype_from_message_text_plain_implicit(self):
482 msg = self._msgobj('msg_03.txt')
483 self.assertEqual(msg.get_content_subtype(), 'plain')
484
485 def test_get_content_subtype_from_message_text_plain_explicit(self):
486 msg = self._msgobj('msg_01.txt')
487 self.assertEqual(msg.get_content_subtype(), 'plain')
488
489 def test_get_content_maintype_error(self):
490 msg = Message()
491 msg['Content-Type'] = 'no-slash-in-this-string'
492 self.assertEqual(msg.get_content_maintype(), 'text')
493
494 def test_get_content_subtype_error(self):
495 msg = Message()
496 msg['Content-Type'] = 'no-slash-in-this-string'
497 self.assertEqual(msg.get_content_subtype(), 'plain')
498
499 def test_replace_header(self):
500 eq = self.assertEqual
501 msg = Message()
502 msg.add_header('First', 'One')
503 msg.add_header('Second', 'Two')
504 msg.add_header('Third', 'Three')
505 eq(msg.keys(), ['First', 'Second', 'Third'])
506 eq(msg.values(), ['One', 'Two', 'Three'])
507 msg.replace_header('Second', 'Twenty')
508 eq(msg.keys(), ['First', 'Second', 'Third'])
509 eq(msg.values(), ['One', 'Twenty', 'Three'])
510 msg.add_header('First', 'Eleven')
511 msg.replace_header('First', 'One Hundred')
512 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
513 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
514 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
515
516 def test_broken_base64_payload(self):
517 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
518 msg = Message()
519 msg['content-type'] = 'audio/x-midi'
520 msg['content-transfer-encoding'] = 'base64'
521 msg.set_payload(x)
522 self.assertEqual(msg.get_payload(decode=True),
Guido van Rossum9604e662007-08-30 03:46:43 +0000523 bytes(x, 'raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000524
R David Murraya2860e82011-04-16 09:20:30 -0400525 def test_broken_unicode_payload(self):
526 # This test improves coverage but is not a compliance test.
527 # The behavior in this situation is currently undefined by the API.
528 x = 'this is a br\xf6ken thing to do'
529 msg = Message()
530 msg['content-type'] = 'text/plain'
531 msg['content-transfer-encoding'] = '8bit'
532 msg.set_payload(x)
533 self.assertEqual(msg.get_payload(decode=True),
534 bytes(x, 'raw-unicode-escape'))
535
536 def test_questionable_bytes_payload(self):
537 # This test improves coverage but is not a compliance test,
538 # since it involves poking inside the black box.
539 x = 'this is a quéstionable thing to do'.encode('utf-8')
540 msg = Message()
541 msg['content-type'] = 'text/plain; charset="utf-8"'
542 msg['content-transfer-encoding'] = '8bit'
543 msg._payload = x
544 self.assertEqual(msg.get_payload(decode=True), x)
545
R. David Murray7ec754b2010-12-13 23:51:19 +0000546 # Issue 1078919
547 def test_ascii_add_header(self):
548 msg = Message()
549 msg.add_header('Content-Disposition', 'attachment',
550 filename='bud.gif')
551 self.assertEqual('attachment; filename="bud.gif"',
552 msg['Content-Disposition'])
553
554 def test_noascii_add_header(self):
555 msg = Message()
556 msg.add_header('Content-Disposition', 'attachment',
557 filename="Fußballer.ppt")
558 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000559 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
R. David Murray7ec754b2010-12-13 23:51:19 +0000560 msg['Content-Disposition'])
561
562 def test_nonascii_add_header_via_triple(self):
563 msg = Message()
564 msg.add_header('Content-Disposition', 'attachment',
565 filename=('iso-8859-1', '', 'Fußballer.ppt'))
566 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000567 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
568 msg['Content-Disposition'])
569
570 def test_ascii_add_header_with_tspecial(self):
571 msg = Message()
572 msg.add_header('Content-Disposition', 'attachment',
573 filename="windows [filename].ppt")
574 self.assertEqual(
575 'attachment; filename="windows [filename].ppt"',
576 msg['Content-Disposition'])
577
578 def test_nonascii_add_header_with_tspecial(self):
579 msg = Message()
580 msg.add_header('Content-Disposition', 'attachment',
581 filename="Fußballer [filename].ppt")
582 self.assertEqual(
583 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
R. David Murray7ec754b2010-12-13 23:51:19 +0000584 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000585
R David Murraya2860e82011-04-16 09:20:30 -0400586 def test_add_header_with_name_only_param(self):
587 msg = Message()
588 msg.add_header('Content-Disposition', 'inline', foo_bar=None)
589 self.assertEqual("inline; foo-bar", msg['Content-Disposition'])
590
591 def test_add_header_with_no_value(self):
592 msg = Message()
593 msg.add_header('X-Status', None)
594 self.assertEqual('', msg['X-Status'])
595
R. David Murray5b2d9dd2011-01-09 02:35:24 +0000596 # Issue 5871: reject an attempt to embed a header inside a header value
597 # (header injection attack).
598 def test_embeded_header_via_Header_rejected(self):
599 msg = Message()
600 msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
601 self.assertRaises(errors.HeaderParseError, msg.as_string)
602
603 def test_embeded_header_via_string_rejected(self):
604 msg = Message()
605 msg['Dummy'] = 'dummy\nX-Injected-Header: test'
606 self.assertRaises(errors.HeaderParseError, msg.as_string)
607
R David Murray7441a7a2012-03-14 02:59:51 -0400608 def test_unicode_header_defaults_to_utf8_encoding(self):
609 # Issue 14291
610 m = MIMEText('abc\n')
611 m['Subject'] = 'É test'
612 self.assertEqual(str(m),textwrap.dedent("""\
613 Content-Type: text/plain; charset="us-ascii"
614 MIME-Version: 1.0
615 Content-Transfer-Encoding: 7bit
616 Subject: =?utf-8?q?=C3=89_test?=
617
618 abc
619 """))
620
R David Murray8680bcc2012-03-22 22:17:51 -0400621 def test_unicode_body_defaults_to_utf8_encoding(self):
622 # Issue 14291
623 m = MIMEText('É testabc\n')
624 self.assertEqual(str(m),textwrap.dedent("""\
R David Murray8680bcc2012-03-22 22:17:51 -0400625 Content-Type: text/plain; charset="utf-8"
R David Murray42243c42012-03-22 22:40:44 -0400626 MIME-Version: 1.0
R David Murray8680bcc2012-03-22 22:17:51 -0400627 Content-Transfer-Encoding: base64
628
629 w4kgdGVzdGFiYwo=
630 """))
631
632
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000633# Test the email.encoders module
634class TestEncoders(unittest.TestCase):
R David Murray6d94bd42011-03-16 15:52:22 -0400635
636 def test_EncodersEncode_base64(self):
637 with openfile('PyBanner048.gif', 'rb') as fp:
638 bindata = fp.read()
639 mimed = email.mime.image.MIMEImage(bindata)
640 base64ed = mimed.get_payload()
641 # the transfer-encoded body lines should all be <=76 characters
642 lines = base64ed.split('\n')
643 self.assertLessEqual(max([ len(x) for x in lines ]), 76)
644
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000645 def test_encode_empty_payload(self):
646 eq = self.assertEqual
647 msg = Message()
648 msg.set_charset('us-ascii')
649 eq(msg['content-transfer-encoding'], '7bit')
650
651 def test_default_cte(self):
652 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000653 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000654 msg = MIMEText('hello world')
655 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000656 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000657 msg = MIMEText('hello \xf8 world')
R David Murray8680bcc2012-03-22 22:17:51 -0400658 eq(msg['content-transfer-encoding'], 'base64')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000659 # And now with a different charset
660 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
661 eq(msg['content-transfer-encoding'], 'quoted-printable')
662
R. David Murraye85200d2010-05-06 01:41:14 +0000663 def test_encode7or8bit(self):
664 # Make sure a charset whose input character set is 8bit but
665 # whose output character set is 7bit gets a transfer-encoding
666 # of 7bit.
667 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000668 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000669 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000670
Ezio Melottib3aedd42010-11-20 19:04:17 +0000671
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000672# Test long header wrapping
673class TestLongHeaders(TestEmailBase):
R David Murray01581ee2011-04-18 10:04:34 -0400674
675 maxDiff = None
676
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000677 def test_split_long_continuation(self):
678 eq = self.ndiffAssertEqual
679 msg = email.message_from_string("""\
680Subject: bug demonstration
681\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
682\tmore text
683
684test
685""")
686 sfp = StringIO()
687 g = Generator(sfp)
688 g.flatten(msg)
689 eq(sfp.getvalue(), """\
690Subject: bug demonstration
691\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
692\tmore text
693
694test
695""")
696
697 def test_another_long_almost_unsplittable_header(self):
698 eq = self.ndiffAssertEqual
699 hstr = """\
700bug demonstration
701\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
702\tmore text"""
703 h = Header(hstr, continuation_ws='\t')
704 eq(h.encode(), """\
705bug demonstration
706\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
707\tmore text""")
708 h = Header(hstr.replace('\t', ' '))
709 eq(h.encode(), """\
710bug demonstration
711 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
712 more text""")
713
714 def test_long_nonstring(self):
715 eq = self.ndiffAssertEqual
716 g = Charset("iso-8859-1")
717 cz = Charset("iso-8859-2")
718 utf8 = Charset("utf-8")
719 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
720 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
721 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
722 b'bef\xf6rdert. ')
723 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
724 b'd\xf9vtipu.. ')
725 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
726 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
727 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
728 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
729 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
730 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
731 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
732 '\u3044\u307e\u3059\u3002')
733 h = Header(g_head, g, header_name='Subject')
734 h.append(cz_head, cz)
735 h.append(utf8_head, utf8)
736 msg = Message()
737 msg['Subject'] = h
738 sfp = StringIO()
739 g = Generator(sfp)
740 g.flatten(msg)
741 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000742Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
743 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
744 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
745 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
746 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
747 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
748 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
749 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
750 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
751 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
752 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000753
754""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000755 eq(h.encode(maxlinelen=76), """\
756=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
757 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
758 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
759 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
760 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
761 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
762 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
763 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
764 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
765 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
766 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000767
768 def test_long_header_encode(self):
769 eq = self.ndiffAssertEqual
770 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
771 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
772 header_name='X-Foobar-Spoink-Defrobnit')
773 eq(h.encode(), '''\
774wasnipoop; giraffes="very-long-necked-animals";
775 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
776
777 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
778 eq = self.ndiffAssertEqual
779 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
780 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
781 header_name='X-Foobar-Spoink-Defrobnit',
782 continuation_ws='\t')
783 eq(h.encode(), '''\
784wasnipoop; giraffes="very-long-necked-animals";
785 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
786
787 def test_long_header_encode_with_tab_continuation(self):
788 eq = self.ndiffAssertEqual
789 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
790 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
791 header_name='X-Foobar-Spoink-Defrobnit',
792 continuation_ws='\t')
793 eq(h.encode(), '''\
794wasnipoop; giraffes="very-long-necked-animals";
795\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
796
R David Murray3a6152f2011-03-14 21:13:03 -0400797 def test_header_encode_with_different_output_charset(self):
798 h = Header('文', 'euc-jp')
799 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
800
801 def test_long_header_encode_with_different_output_charset(self):
802 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
803 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
804 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
805 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
806 res = """\
807=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
808 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
809 self.assertEqual(h.encode(), res)
810
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000811 def test_header_splitter(self):
812 eq = self.ndiffAssertEqual
813 msg = MIMEText('')
814 # It'd be great if we could use add_header() here, but that doesn't
815 # guarantee an order of the parameters.
816 msg['X-Foobar-Spoink-Defrobnit'] = (
817 'wasnipoop; giraffes="very-long-necked-animals"; '
818 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
819 sfp = StringIO()
820 g = Generator(sfp)
821 g.flatten(msg)
822 eq(sfp.getvalue(), '''\
823Content-Type: text/plain; charset="us-ascii"
824MIME-Version: 1.0
825Content-Transfer-Encoding: 7bit
826X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
827 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
828
829''')
830
831 def test_no_semis_header_splitter(self):
832 eq = self.ndiffAssertEqual
833 msg = Message()
834 msg['From'] = 'test@dom.ain'
835 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
836 msg.set_payload('Test')
837 sfp = StringIO()
838 g = Generator(sfp)
839 g.flatten(msg)
840 eq(sfp.getvalue(), """\
841From: test@dom.ain
842References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
843 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
844
845Test""")
846
R David Murray7da4db12011-04-07 20:37:17 -0400847 def test_last_split_chunk_does_not_fit(self):
848 eq = self.ndiffAssertEqual
849 h = Header('Subject: the first part of this is short, but_the_second'
850 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
851 '_all_by_itself')
852 eq(h.encode(), """\
853Subject: the first part of this is short,
854 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
855
856 def test_splittable_leading_char_followed_by_overlong_unsplitable(self):
857 eq = self.ndiffAssertEqual
858 h = Header(', but_the_second'
859 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
860 '_all_by_itself')
861 eq(h.encode(), """\
862,
863 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
864
865 def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self):
866 eq = self.ndiffAssertEqual
867 h = Header(', , but_the_second'
868 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
869 '_all_by_itself')
870 eq(h.encode(), """\
871, ,
872 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
873
874 def test_trailing_splitable_on_overlong_unsplitable(self):
875 eq = self.ndiffAssertEqual
876 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
877 'be_on_a_line_all_by_itself;')
878 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
879 "be_on_a_line_all_by_itself;")
880
881 def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self):
882 eq = self.ndiffAssertEqual
883 h = Header('; '
884 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
R David Murray01581ee2011-04-18 10:04:34 -0400885 'be_on_a_line_all_by_itself; ')
R David Murray7da4db12011-04-07 20:37:17 -0400886 eq(h.encode(), """\
887;
R David Murray01581ee2011-04-18 10:04:34 -0400888 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
R David Murray7da4db12011-04-07 20:37:17 -0400889
R David Murraye1292a22011-04-07 20:54:03 -0400890 def test_long_header_with_multiple_sequential_split_chars(self):
R David Murraye1292a22011-04-07 20:54:03 -0400891 eq = self.ndiffAssertEqual
892 h = Header('This is a long line that has two whitespaces in a row. '
893 'This used to cause truncation of the header when folded')
894 eq(h.encode(), """\
895This is a long line that has two whitespaces in a row. This used to cause
896 truncation of the header when folded""")
897
R David Murray01581ee2011-04-18 10:04:34 -0400898 def test_splitter_split_on_punctuation_only_if_fws(self):
899 eq = self.ndiffAssertEqual
900 h = Header('thisverylongheaderhas;semicolons;and,commas,but'
901 'they;arenotlegal;fold,points')
902 eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;"
903 "arenotlegal;fold,points")
904
905 def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self):
906 eq = self.ndiffAssertEqual
907 h = Header('this is a test where we need to have more than one line '
908 'before; our final line that is just too big to fit;; '
909 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
910 'be_on_a_line_all_by_itself;')
911 eq(h.encode(), """\
912this is a test where we need to have more than one line before;
913 our final line that is just too big to fit;;
914 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
915
916 def test_overlong_last_part_followed_by_split_point(self):
917 eq = self.ndiffAssertEqual
918 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
919 'be_on_a_line_all_by_itself ')
920 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_"
921 "should_be_on_a_line_all_by_itself ")
922
923 def test_multiline_with_overlong_parts_separated_by_two_split_points(self):
924 eq = self.ndiffAssertEqual
925 h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_'
926 'before_our_final_line_; ; '
927 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
928 'be_on_a_line_all_by_itself; ')
929 eq(h.encode(), """\
930this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_;
931 ;
932 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
933
934 def test_multiline_with_overlong_last_part_followed_by_split_point(self):
935 eq = self.ndiffAssertEqual
936 h = Header('this is a test where we need to have more than one line '
937 'before our final line; ; '
938 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
939 'be_on_a_line_all_by_itself; ')
940 eq(h.encode(), """\
941this is a test where we need to have more than one line before our final line;
942 ;
943 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
944
945 def test_long_header_with_whitespace_runs(self):
946 eq = self.ndiffAssertEqual
947 msg = Message()
948 msg['From'] = 'test@dom.ain'
949 msg['References'] = SPACE.join(['<foo@dom.ain> '] * 10)
950 msg.set_payload('Test')
951 sfp = StringIO()
952 g = Generator(sfp)
953 g.flatten(msg)
954 eq(sfp.getvalue(), """\
955From: test@dom.ain
956References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
957 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
958 <foo@dom.ain> <foo@dom.ain>\x20\x20
959
960Test""")
961
962 def test_long_run_with_semi_header_splitter(self):
963 eq = self.ndiffAssertEqual
964 msg = Message()
965 msg['From'] = 'test@dom.ain'
966 msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc'
967 msg.set_payload('Test')
968 sfp = StringIO()
969 g = Generator(sfp)
970 g.flatten(msg)
971 eq(sfp.getvalue(), """\
972From: test@dom.ain
973References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
974 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
975 <foo@dom.ain>; abc
976
977Test""")
978
979 def test_splitter_split_on_punctuation_only_if_fws(self):
980 eq = self.ndiffAssertEqual
981 msg = Message()
982 msg['From'] = 'test@dom.ain'
983 msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but'
984 'they;arenotlegal;fold,points')
985 msg.set_payload('Test')
986 sfp = StringIO()
987 g = Generator(sfp)
988 g.flatten(msg)
989 # XXX the space after the header should not be there.
990 eq(sfp.getvalue(), """\
991From: test@dom.ain
992References:\x20
993 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points
994
995Test""")
996
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000997 def test_no_split_long_header(self):
998 eq = self.ndiffAssertEqual
999 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +00001000 h = Header(hstr)
1001 # These come on two lines because Headers are really field value
1002 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001003 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001004References:
1005 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
1006 h = Header('x' * 80)
1007 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001008
1009 def test_splitting_multiple_long_lines(self):
1010 eq = self.ndiffAssertEqual
1011 hstr = """\
1012from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1013\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1014\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1015"""
1016 h = Header(hstr, continuation_ws='\t')
1017 eq(h.encode(), """\
1018from babylon.socal-raves.org (localhost [127.0.0.1]);
1019 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1020 for <mailman-admin@babylon.socal-raves.org>;
1021 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1022\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1023 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1024 for <mailman-admin@babylon.socal-raves.org>;
1025 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1026\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1027 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1028 for <mailman-admin@babylon.socal-raves.org>;
1029 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
1030
1031 def test_splitting_first_line_only_is_long(self):
1032 eq = self.ndiffAssertEqual
1033 hstr = """\
1034from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
1035\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1036\tid 17k4h5-00034i-00
1037\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
1038 h = Header(hstr, maxlinelen=78, header_name='Received',
1039 continuation_ws='\t')
1040 eq(h.encode(), """\
1041from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
1042 helo=cthulhu.gerg.ca)
1043\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1044\tid 17k4h5-00034i-00
1045\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
1046
1047 def test_long_8bit_header(self):
1048 eq = self.ndiffAssertEqual
1049 msg = Message()
1050 h = Header('Britische Regierung gibt', 'iso-8859-1',
1051 header_name='Subject')
1052 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +00001053 eq(h.encode(maxlinelen=76), """\
1054=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1055 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001056 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +00001057 eq(msg.as_string(maxheaderlen=76), """\
1058Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1059 =?iso-8859-1?q?hore-Windkraftprojekte?=
1060
1061""")
1062 eq(msg.as_string(maxheaderlen=0), """\
1063Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001064
1065""")
1066
1067 def test_long_8bit_header_no_charset(self):
1068 eq = self.ndiffAssertEqual
1069 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +00001070 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
1071 'f\xfcr Offshore-Windkraftprojekte '
1072 '<a-very-long-address@example.com>')
1073 msg['Reply-To'] = header_string
R David Murray7441a7a2012-03-14 02:59:51 -04001074 eq(msg.as_string(maxheaderlen=78), """\
1075Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1076 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1077
1078""")
Barry Warsaw8c571042007-08-30 19:17:18 +00001079 msg = Message()
R David Murray7441a7a2012-03-14 02:59:51 -04001080 msg['Reply-To'] = Header(header_string,
Barry Warsaw8c571042007-08-30 19:17:18 +00001081 header_name='Reply-To')
1082 eq(msg.as_string(maxheaderlen=78), """\
1083Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1084 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001085
1086""")
1087
1088 def test_long_to_header(self):
1089 eq = self.ndiffAssertEqual
1090 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
R David Murray01581ee2011-04-18 10:04:34 -04001091 '<someone@eecs.umich.edu>, '
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001092 '"Someone Test #B" <someone@umich.edu>, '
1093 '"Someone Test #C" <someone@eecs.umich.edu>, '
1094 '"Someone Test #D" <someone@eecs.umich.edu>')
1095 msg = Message()
1096 msg['To'] = to
1097 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +00001098To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001099 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +00001100 "Someone Test #C" <someone@eecs.umich.edu>,
1101 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001102
1103''')
1104
1105 def test_long_line_after_append(self):
1106 eq = self.ndiffAssertEqual
1107 s = 'This is an example of string which has almost the limit of header length.'
1108 h = Header(s)
1109 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +00001110 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001111This is an example of string which has almost the limit of header length.
1112 Add another line.""")
1113
1114 def test_shorter_line_with_append(self):
1115 eq = self.ndiffAssertEqual
1116 s = 'This is a shorter line.'
1117 h = Header(s)
1118 h.append('Add another sentence. (Surprise?)')
1119 eq(h.encode(),
1120 'This is a shorter line. Add another sentence. (Surprise?)')
1121
1122 def test_long_field_name(self):
1123 eq = self.ndiffAssertEqual
1124 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +00001125 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
1126 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
1127 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
1128 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001129 h = Header(gs, 'iso-8859-1', header_name=fn)
1130 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +00001131 eq(h.encode(maxlinelen=76), """\
1132=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
1133 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
1134 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
1135 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001136
1137 def test_long_received_header(self):
1138 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
1139 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
1140 'Wed, 05 Mar 2003 18:10:18 -0700')
1141 msg = Message()
1142 msg['Received-1'] = Header(h, continuation_ws='\t')
1143 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +00001144 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001145 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001146Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1147 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001148 Wed, 05 Mar 2003 18:10:18 -0700
R David Murray01581ee2011-04-18 10:04:34 -04001149Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1150 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001151 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001152
1153""")
1154
1155 def test_string_headerinst_eq(self):
1156 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
1157 'tu-muenchen.de> (David Bremner\'s message of '
1158 '"Thu, 6 Mar 2003 13:58:21 +0100")')
1159 msg = Message()
1160 msg['Received-1'] = Header(h, header_name='Received-1',
1161 continuation_ws='\t')
1162 msg['Received-2'] = h
R David Murray01581ee2011-04-18 10:04:34 -04001163 # XXX The space after the ':' should not be there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001164 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001165Received-1:\x20
1166 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1167 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1168Received-2:\x20
1169 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1170 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001171
1172""")
1173
1174 def test_long_unbreakable_lines_with_continuation(self):
1175 eq = self.ndiffAssertEqual
1176 msg = Message()
1177 t = """\
1178iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1179 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
1180 msg['Face-1'] = t
1181 msg['Face-2'] = Header(t, header_name='Face-2')
R David Murray01581ee2011-04-18 10:04:34 -04001182 msg['Face-3'] = ' ' + t
Barry Warsawbef9d212007-08-31 10:55:37 +00001183 # XXX This splitting is all wrong. It the first value line should be
R David Murray01581ee2011-04-18 10:04:34 -04001184 # snug against the field name or the space after the header not there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001185 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +00001186Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001187 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001188 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +00001189Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +00001190 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001191 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
R David Murray01581ee2011-04-18 10:04:34 -04001192Face-3:\x20
1193 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1194 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001195
1196""")
1197
1198 def test_another_long_multiline_header(self):
1199 eq = self.ndiffAssertEqual
1200 m = ('Received: from siimage.com '
1201 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +00001202 'Microsoft SMTPSVC(5.0.2195.4905); '
1203 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001204 msg = email.message_from_string(m)
1205 eq(msg.as_string(maxheaderlen=78), '''\
R David Murray01581ee2011-04-18 10:04:34 -04001206Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
1207 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001208
1209''')
1210
1211 def test_long_lines_with_different_header(self):
1212 eq = self.ndiffAssertEqual
1213 h = ('List-Unsubscribe: '
1214 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
1215 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
1216 '?subject=unsubscribe>')
1217 msg = Message()
1218 msg['List'] = h
1219 msg['List'] = Header(h, header_name='List')
1220 eq(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001221List: List-Unsubscribe:
1222 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001223 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
R David Murray01581ee2011-04-18 10:04:34 -04001224List: List-Unsubscribe:
1225 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001226 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001227
1228""")
1229
R. David Murray6f0022d2011-01-07 21:57:25 +00001230 def test_long_rfc2047_header_with_embedded_fws(self):
1231 h = Header(textwrap.dedent("""\
1232 We're going to pretend this header is in a non-ascii character set
1233 \tto see if line wrapping with encoded words and embedded
1234 folding white space works"""),
1235 charset='utf-8',
1236 header_name='Test')
1237 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1238 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1239 =?utf-8?q?cter_set?=
1240 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1241 =?utf-8?q?_folding_white_space_works?=""")+'\n')
1242
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001243
Ezio Melottib3aedd42010-11-20 19:04:17 +00001244
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001245# Test mangling of "From " lines in the body of a message
1246class TestFromMangling(unittest.TestCase):
1247 def setUp(self):
1248 self.msg = Message()
1249 self.msg['From'] = 'aaa@bbb.org'
1250 self.msg.set_payload("""\
1251From the desk of A.A.A.:
1252Blah blah blah
1253""")
1254
1255 def test_mangled_from(self):
1256 s = StringIO()
1257 g = Generator(s, mangle_from_=True)
1258 g.flatten(self.msg)
1259 self.assertEqual(s.getvalue(), """\
1260From: aaa@bbb.org
1261
1262>From the desk of A.A.A.:
1263Blah blah blah
1264""")
1265
1266 def test_dont_mangle_from(self):
1267 s = StringIO()
1268 g = Generator(s, mangle_from_=False)
1269 g.flatten(self.msg)
1270 self.assertEqual(s.getvalue(), """\
1271From: aaa@bbb.org
1272
1273From the desk of A.A.A.:
1274Blah blah blah
1275""")
1276
1277
Ezio Melottib3aedd42010-11-20 19:04:17 +00001278
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001279# Test the basic MIMEAudio class
1280class TestMIMEAudio(unittest.TestCase):
1281 def setUp(self):
R David Murray28346b82011-03-31 11:40:20 -04001282 with openfile('audiotest.au', 'rb') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001283 self._audiodata = fp.read()
1284 self._au = MIMEAudio(self._audiodata)
1285
1286 def test_guess_minor_type(self):
1287 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1288
1289 def test_encoding(self):
1290 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001291 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1292 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001293
1294 def test_checkSetMinor(self):
1295 au = MIMEAudio(self._audiodata, 'fish')
1296 self.assertEqual(au.get_content_type(), 'audio/fish')
1297
1298 def test_add_header(self):
1299 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001300 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001301 self._au.add_header('Content-Disposition', 'attachment',
1302 filename='audiotest.au')
1303 eq(self._au['content-disposition'],
1304 'attachment; filename="audiotest.au"')
1305 eq(self._au.get_params(header='content-disposition'),
1306 [('attachment', ''), ('filename', 'audiotest.au')])
1307 eq(self._au.get_param('filename', header='content-disposition'),
1308 'audiotest.au')
1309 missing = []
1310 eq(self._au.get_param('attachment', header='content-disposition'), '')
1311 unless(self._au.get_param('foo', failobj=missing,
1312 header='content-disposition') is missing)
1313 # Try some missing stuff
1314 unless(self._au.get_param('foobar', missing) is missing)
1315 unless(self._au.get_param('attachment', missing,
1316 header='foobar') is missing)
1317
1318
Ezio Melottib3aedd42010-11-20 19:04:17 +00001319
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001320# Test the basic MIMEImage class
1321class TestMIMEImage(unittest.TestCase):
1322 def setUp(self):
1323 with openfile('PyBanner048.gif', 'rb') as fp:
1324 self._imgdata = fp.read()
1325 self._im = MIMEImage(self._imgdata)
1326
1327 def test_guess_minor_type(self):
1328 self.assertEqual(self._im.get_content_type(), 'image/gif')
1329
1330 def test_encoding(self):
1331 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001332 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1333 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001334
1335 def test_checkSetMinor(self):
1336 im = MIMEImage(self._imgdata, 'fish')
1337 self.assertEqual(im.get_content_type(), 'image/fish')
1338
1339 def test_add_header(self):
1340 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001341 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001342 self._im.add_header('Content-Disposition', 'attachment',
1343 filename='dingusfish.gif')
1344 eq(self._im['content-disposition'],
1345 'attachment; filename="dingusfish.gif"')
1346 eq(self._im.get_params(header='content-disposition'),
1347 [('attachment', ''), ('filename', 'dingusfish.gif')])
1348 eq(self._im.get_param('filename', header='content-disposition'),
1349 'dingusfish.gif')
1350 missing = []
1351 eq(self._im.get_param('attachment', header='content-disposition'), '')
1352 unless(self._im.get_param('foo', failobj=missing,
1353 header='content-disposition') is missing)
1354 # Try some missing stuff
1355 unless(self._im.get_param('foobar', missing) is missing)
1356 unless(self._im.get_param('attachment', missing,
1357 header='foobar') is missing)
1358
1359
Ezio Melottib3aedd42010-11-20 19:04:17 +00001360
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001361# Test the basic MIMEApplication class
1362class TestMIMEApplication(unittest.TestCase):
1363 def test_headers(self):
1364 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001365 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001366 eq(msg.get_content_type(), 'application/octet-stream')
1367 eq(msg['content-transfer-encoding'], 'base64')
1368
1369 def test_body(self):
1370 eq = self.assertEqual
R David Murray6d94bd42011-03-16 15:52:22 -04001371 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1372 msg = MIMEApplication(bytesdata)
1373 # whitespace in the cte encoded block is RFC-irrelevant.
1374 eq(msg.get_payload().strip(), '+vv8/f7/')
1375 eq(msg.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001376
1377
Ezio Melottib3aedd42010-11-20 19:04:17 +00001378
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001379# Test the basic MIMEText class
1380class TestMIMEText(unittest.TestCase):
1381 def setUp(self):
1382 self._msg = MIMEText('hello there')
1383
1384 def test_types(self):
1385 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001386 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001387 eq(self._msg.get_content_type(), 'text/plain')
1388 eq(self._msg.get_param('charset'), 'us-ascii')
1389 missing = []
1390 unless(self._msg.get_param('foobar', missing) is missing)
1391 unless(self._msg.get_param('charset', missing, header='foobar')
1392 is missing)
1393
1394 def test_payload(self):
1395 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001396 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001397
1398 def test_charset(self):
1399 eq = self.assertEqual
1400 msg = MIMEText('hello there', _charset='us-ascii')
1401 eq(msg.get_charset().input_charset, 'us-ascii')
1402 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1403
R. David Murray850fc852010-06-03 01:58:28 +00001404 def test_7bit_input(self):
1405 eq = self.assertEqual
1406 msg = MIMEText('hello there', _charset='us-ascii')
1407 eq(msg.get_charset().input_charset, 'us-ascii')
1408 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1409
1410 def test_7bit_input_no_charset(self):
1411 eq = self.assertEqual
1412 msg = MIMEText('hello there')
1413 eq(msg.get_charset(), 'us-ascii')
1414 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1415 self.assertTrue('hello there' in msg.as_string())
1416
1417 def test_utf8_input(self):
1418 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1419 eq = self.assertEqual
1420 msg = MIMEText(teststr, _charset='utf-8')
1421 eq(msg.get_charset().output_charset, 'utf-8')
1422 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1423 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1424
1425 @unittest.skip("can't fix because of backward compat in email5, "
1426 "will fix in email6")
1427 def test_utf8_input_no_charset(self):
1428 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1429 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1430
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001431
Ezio Melottib3aedd42010-11-20 19:04:17 +00001432
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001433# Test complicated multipart/* messages
1434class TestMultipart(TestEmailBase):
1435 def setUp(self):
1436 with openfile('PyBanner048.gif', 'rb') as fp:
1437 data = fp.read()
1438 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1439 image = MIMEImage(data, name='dingusfish.gif')
1440 image.add_header('content-disposition', 'attachment',
1441 filename='dingusfish.gif')
1442 intro = MIMEText('''\
1443Hi there,
1444
1445This is the dingus fish.
1446''')
1447 container.attach(intro)
1448 container.attach(image)
1449 container['From'] = 'Barry <barry@digicool.com>'
1450 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1451 container['Subject'] = 'Here is your dingus fish'
1452
1453 now = 987809702.54848599
1454 timetuple = time.localtime(now)
1455 if timetuple[-1] == 0:
1456 tzsecs = time.timezone
1457 else:
1458 tzsecs = time.altzone
1459 if tzsecs > 0:
1460 sign = '-'
1461 else:
1462 sign = '+'
1463 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1464 container['Date'] = time.strftime(
1465 '%a, %d %b %Y %H:%M:%S',
1466 time.localtime(now)) + tzoffset
1467 self._msg = container
1468 self._im = image
1469 self._txt = intro
1470
1471 def test_hierarchy(self):
1472 # convenience
1473 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001474 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001475 raises = self.assertRaises
1476 # tests
1477 m = self._msg
1478 unless(m.is_multipart())
1479 eq(m.get_content_type(), 'multipart/mixed')
1480 eq(len(m.get_payload()), 2)
1481 raises(IndexError, m.get_payload, 2)
1482 m0 = m.get_payload(0)
1483 m1 = m.get_payload(1)
1484 unless(m0 is self._txt)
1485 unless(m1 is self._im)
1486 eq(m.get_payload(), [m0, m1])
1487 unless(not m0.is_multipart())
1488 unless(not m1.is_multipart())
1489
1490 def test_empty_multipart_idempotent(self):
1491 text = """\
1492Content-Type: multipart/mixed; boundary="BOUNDARY"
1493MIME-Version: 1.0
1494Subject: A subject
1495To: aperson@dom.ain
1496From: bperson@dom.ain
1497
1498
1499--BOUNDARY
1500
1501
1502--BOUNDARY--
1503"""
1504 msg = Parser().parsestr(text)
1505 self.ndiffAssertEqual(text, msg.as_string())
1506
1507 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1508 outer = MIMEBase('multipart', 'mixed')
1509 outer['Subject'] = 'A subject'
1510 outer['To'] = 'aperson@dom.ain'
1511 outer['From'] = 'bperson@dom.ain'
1512 outer.set_boundary('BOUNDARY')
1513 self.ndiffAssertEqual(outer.as_string(), '''\
1514Content-Type: multipart/mixed; boundary="BOUNDARY"
1515MIME-Version: 1.0
1516Subject: A subject
1517To: aperson@dom.ain
1518From: bperson@dom.ain
1519
1520--BOUNDARY
1521
1522--BOUNDARY--''')
1523
1524 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1525 outer = MIMEBase('multipart', 'mixed')
1526 outer['Subject'] = 'A subject'
1527 outer['To'] = 'aperson@dom.ain'
1528 outer['From'] = 'bperson@dom.ain'
1529 outer.preamble = ''
1530 outer.epilogue = ''
1531 outer.set_boundary('BOUNDARY')
1532 self.ndiffAssertEqual(outer.as_string(), '''\
1533Content-Type: multipart/mixed; boundary="BOUNDARY"
1534MIME-Version: 1.0
1535Subject: A subject
1536To: aperson@dom.ain
1537From: bperson@dom.ain
1538
1539
1540--BOUNDARY
1541
1542--BOUNDARY--
1543''')
1544
1545 def test_one_part_in_a_multipart(self):
1546 eq = self.ndiffAssertEqual
1547 outer = MIMEBase('multipart', 'mixed')
1548 outer['Subject'] = 'A subject'
1549 outer['To'] = 'aperson@dom.ain'
1550 outer['From'] = 'bperson@dom.ain'
1551 outer.set_boundary('BOUNDARY')
1552 msg = MIMEText('hello world')
1553 outer.attach(msg)
1554 eq(outer.as_string(), '''\
1555Content-Type: multipart/mixed; boundary="BOUNDARY"
1556MIME-Version: 1.0
1557Subject: A subject
1558To: aperson@dom.ain
1559From: bperson@dom.ain
1560
1561--BOUNDARY
1562Content-Type: text/plain; charset="us-ascii"
1563MIME-Version: 1.0
1564Content-Transfer-Encoding: 7bit
1565
1566hello world
1567--BOUNDARY--''')
1568
1569 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1570 eq = self.ndiffAssertEqual
1571 outer = MIMEBase('multipart', 'mixed')
1572 outer['Subject'] = 'A subject'
1573 outer['To'] = 'aperson@dom.ain'
1574 outer['From'] = 'bperson@dom.ain'
1575 outer.preamble = ''
1576 msg = MIMEText('hello world')
1577 outer.attach(msg)
1578 outer.set_boundary('BOUNDARY')
1579 eq(outer.as_string(), '''\
1580Content-Type: multipart/mixed; boundary="BOUNDARY"
1581MIME-Version: 1.0
1582Subject: A subject
1583To: aperson@dom.ain
1584From: bperson@dom.ain
1585
1586
1587--BOUNDARY
1588Content-Type: text/plain; charset="us-ascii"
1589MIME-Version: 1.0
1590Content-Transfer-Encoding: 7bit
1591
1592hello world
1593--BOUNDARY--''')
1594
1595
1596 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1597 eq = self.ndiffAssertEqual
1598 outer = MIMEBase('multipart', 'mixed')
1599 outer['Subject'] = 'A subject'
1600 outer['To'] = 'aperson@dom.ain'
1601 outer['From'] = 'bperson@dom.ain'
1602 outer.preamble = None
1603 msg = MIMEText('hello world')
1604 outer.attach(msg)
1605 outer.set_boundary('BOUNDARY')
1606 eq(outer.as_string(), '''\
1607Content-Type: multipart/mixed; boundary="BOUNDARY"
1608MIME-Version: 1.0
1609Subject: A subject
1610To: aperson@dom.ain
1611From: bperson@dom.ain
1612
1613--BOUNDARY
1614Content-Type: text/plain; charset="us-ascii"
1615MIME-Version: 1.0
1616Content-Transfer-Encoding: 7bit
1617
1618hello world
1619--BOUNDARY--''')
1620
1621
1622 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1623 eq = self.ndiffAssertEqual
1624 outer = MIMEBase('multipart', 'mixed')
1625 outer['Subject'] = 'A subject'
1626 outer['To'] = 'aperson@dom.ain'
1627 outer['From'] = 'bperson@dom.ain'
1628 outer.epilogue = None
1629 msg = MIMEText('hello world')
1630 outer.attach(msg)
1631 outer.set_boundary('BOUNDARY')
1632 eq(outer.as_string(), '''\
1633Content-Type: multipart/mixed; boundary="BOUNDARY"
1634MIME-Version: 1.0
1635Subject: A subject
1636To: aperson@dom.ain
1637From: bperson@dom.ain
1638
1639--BOUNDARY
1640Content-Type: text/plain; charset="us-ascii"
1641MIME-Version: 1.0
1642Content-Transfer-Encoding: 7bit
1643
1644hello world
1645--BOUNDARY--''')
1646
1647
1648 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1649 eq = self.ndiffAssertEqual
1650 outer = MIMEBase('multipart', 'mixed')
1651 outer['Subject'] = 'A subject'
1652 outer['To'] = 'aperson@dom.ain'
1653 outer['From'] = 'bperson@dom.ain'
1654 outer.epilogue = ''
1655 msg = MIMEText('hello world')
1656 outer.attach(msg)
1657 outer.set_boundary('BOUNDARY')
1658 eq(outer.as_string(), '''\
1659Content-Type: multipart/mixed; boundary="BOUNDARY"
1660MIME-Version: 1.0
1661Subject: A subject
1662To: aperson@dom.ain
1663From: bperson@dom.ain
1664
1665--BOUNDARY
1666Content-Type: text/plain; charset="us-ascii"
1667MIME-Version: 1.0
1668Content-Transfer-Encoding: 7bit
1669
1670hello world
1671--BOUNDARY--
1672''')
1673
1674
1675 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1676 eq = self.ndiffAssertEqual
1677 outer = MIMEBase('multipart', 'mixed')
1678 outer['Subject'] = 'A subject'
1679 outer['To'] = 'aperson@dom.ain'
1680 outer['From'] = 'bperson@dom.ain'
1681 outer.epilogue = '\n'
1682 msg = MIMEText('hello world')
1683 outer.attach(msg)
1684 outer.set_boundary('BOUNDARY')
1685 eq(outer.as_string(), '''\
1686Content-Type: multipart/mixed; boundary="BOUNDARY"
1687MIME-Version: 1.0
1688Subject: A subject
1689To: aperson@dom.ain
1690From: bperson@dom.ain
1691
1692--BOUNDARY
1693Content-Type: text/plain; charset="us-ascii"
1694MIME-Version: 1.0
1695Content-Transfer-Encoding: 7bit
1696
1697hello world
1698--BOUNDARY--
1699
1700''')
1701
1702 def test_message_external_body(self):
1703 eq = self.assertEqual
1704 msg = self._msgobj('msg_36.txt')
1705 eq(len(msg.get_payload()), 2)
1706 msg1 = msg.get_payload(1)
1707 eq(msg1.get_content_type(), 'multipart/alternative')
1708 eq(len(msg1.get_payload()), 2)
1709 for subpart in msg1.get_payload():
1710 eq(subpart.get_content_type(), 'message/external-body')
1711 eq(len(subpart.get_payload()), 1)
1712 subsubpart = subpart.get_payload(0)
1713 eq(subsubpart.get_content_type(), 'text/plain')
1714
1715 def test_double_boundary(self):
1716 # msg_37.txt is a multipart that contains two dash-boundary's in a
1717 # row. Our interpretation of RFC 2046 calls for ignoring the second
1718 # and subsequent boundaries.
1719 msg = self._msgobj('msg_37.txt')
1720 self.assertEqual(len(msg.get_payload()), 3)
1721
1722 def test_nested_inner_contains_outer_boundary(self):
1723 eq = self.ndiffAssertEqual
1724 # msg_38.txt has an inner part that contains outer boundaries. My
1725 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1726 # these are illegal and should be interpreted as unterminated inner
1727 # parts.
1728 msg = self._msgobj('msg_38.txt')
1729 sfp = StringIO()
1730 iterators._structure(msg, sfp)
1731 eq(sfp.getvalue(), """\
1732multipart/mixed
1733 multipart/mixed
1734 multipart/alternative
1735 text/plain
1736 text/plain
1737 text/plain
1738 text/plain
1739""")
1740
1741 def test_nested_with_same_boundary(self):
1742 eq = self.ndiffAssertEqual
1743 # msg 39.txt is similarly evil in that it's got inner parts that use
1744 # the same boundary as outer parts. Again, I believe the way this is
1745 # parsed is closest to the spirit of RFC 2046
1746 msg = self._msgobj('msg_39.txt')
1747 sfp = StringIO()
1748 iterators._structure(msg, sfp)
1749 eq(sfp.getvalue(), """\
1750multipart/mixed
1751 multipart/mixed
1752 multipart/alternative
1753 application/octet-stream
1754 application/octet-stream
1755 text/plain
1756""")
1757
1758 def test_boundary_in_non_multipart(self):
1759 msg = self._msgobj('msg_40.txt')
1760 self.assertEqual(msg.as_string(), '''\
1761MIME-Version: 1.0
1762Content-Type: text/html; boundary="--961284236552522269"
1763
1764----961284236552522269
1765Content-Type: text/html;
1766Content-Transfer-Encoding: 7Bit
1767
1768<html></html>
1769
1770----961284236552522269--
1771''')
1772
1773 def test_boundary_with_leading_space(self):
1774 eq = self.assertEqual
1775 msg = email.message_from_string('''\
1776MIME-Version: 1.0
1777Content-Type: multipart/mixed; boundary=" XXXX"
1778
1779-- XXXX
1780Content-Type: text/plain
1781
1782
1783-- XXXX
1784Content-Type: text/plain
1785
1786-- XXXX--
1787''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001788 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001789 eq(msg.get_boundary(), ' XXXX')
1790 eq(len(msg.get_payload()), 2)
1791
1792 def test_boundary_without_trailing_newline(self):
1793 m = Parser().parsestr("""\
1794Content-Type: multipart/mixed; boundary="===============0012394164=="
1795MIME-Version: 1.0
1796
1797--===============0012394164==
1798Content-Type: image/file1.jpg
1799MIME-Version: 1.0
1800Content-Transfer-Encoding: base64
1801
1802YXNkZg==
1803--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001804 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001805
1806
Ezio Melottib3aedd42010-11-20 19:04:17 +00001807
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001808# Test some badly formatted messages
R David Murrayc27e5222012-05-25 15:01:48 -04001809class TestNonConformant(TestEmailBase):
R David Murray3edd22a2011-04-18 13:59:37 -04001810
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001811 def test_parse_missing_minor_type(self):
1812 eq = self.assertEqual
1813 msg = self._msgobj('msg_14.txt')
1814 eq(msg.get_content_type(), 'text/plain')
1815 eq(msg.get_content_maintype(), 'text')
1816 eq(msg.get_content_subtype(), 'plain')
1817
R David Murrayc27e5222012-05-25 15:01:48 -04001818 # test_parser.TestMessageDefectDetectionBase
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001819 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001820 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001821 msg = self._msgobj('msg_15.txt')
1822 # XXX We can probably eventually do better
1823 inner = msg.get_payload(0)
1824 unless(hasattr(inner, 'defects'))
R David Murrayc27e5222012-05-25 15:01:48 -04001825 self.assertEqual(len(inner.defects), 1)
1826 unless(isinstance(inner.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001827 errors.StartBoundaryNotFoundDefect))
1828
R David Murrayc27e5222012-05-25 15:01:48 -04001829 # test_parser.TestMessageDefectDetectionBase
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001830 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001831 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001832 msg = self._msgobj('msg_25.txt')
1833 unless(isinstance(msg.get_payload(), str))
R David Murrayc27e5222012-05-25 15:01:48 -04001834 self.assertEqual(len(msg.defects), 2)
1835 unless(isinstance(msg.defects[0],
R David Murray3edd22a2011-04-18 13:59:37 -04001836 errors.NoBoundaryInMultipartDefect))
R David Murrayc27e5222012-05-25 15:01:48 -04001837 unless(isinstance(msg.defects[1],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001838 errors.MultipartInvariantViolationDefect))
1839
R David Murray749073a2011-06-22 13:47:53 -04001840 multipart_msg = textwrap.dedent("""\
1841 Date: Wed, 14 Nov 2007 12:56:23 GMT
1842 From: foo@bar.invalid
1843 To: foo@bar.invalid
1844 Subject: Content-Transfer-Encoding: base64 and multipart
1845 MIME-Version: 1.0
1846 Content-Type: multipart/mixed;
1847 boundary="===============3344438784458119861=="{}
1848
1849 --===============3344438784458119861==
1850 Content-Type: text/plain
1851
1852 Test message
1853
1854 --===============3344438784458119861==
1855 Content-Type: application/octet-stream
1856 Content-Transfer-Encoding: base64
1857
1858 YWJj
1859
1860 --===============3344438784458119861==--
1861 """)
1862
R David Murrayc27e5222012-05-25 15:01:48 -04001863 # test_parser.TestMessageDefectDetectionBase
R David Murray749073a2011-06-22 13:47:53 -04001864 def test_multipart_invalid_cte(self):
R David Murrayc27e5222012-05-25 15:01:48 -04001865 msg = self._str_msg(
1866 self.multipart_msg.format("\nContent-Transfer-Encoding: base64"))
1867 self.assertEqual(len(msg.defects), 1)
1868 self.assertIsInstance(msg.defects[0],
R David Murray749073a2011-06-22 13:47:53 -04001869 errors.InvalidMultipartContentTransferEncodingDefect)
1870
R David Murrayc27e5222012-05-25 15:01:48 -04001871 # test_parser.TestMessageDefectDetectionBase
R David Murray749073a2011-06-22 13:47:53 -04001872 def test_multipart_no_cte_no_defect(self):
R David Murrayc27e5222012-05-25 15:01:48 -04001873 msg = self._str_msg(self.multipart_msg.format(''))
1874 self.assertEqual(len(msg.defects), 0)
R David Murray749073a2011-06-22 13:47:53 -04001875
R David Murrayc27e5222012-05-25 15:01:48 -04001876 # test_parser.TestMessageDefectDetectionBase
R David Murray749073a2011-06-22 13:47:53 -04001877 def test_multipart_valid_cte_no_defect(self):
1878 for cte in ('7bit', '8bit', 'BINary'):
R David Murrayc27e5222012-05-25 15:01:48 -04001879 msg = self._str_msg(
R David Murray749073a2011-06-22 13:47:53 -04001880 self.multipart_msg.format(
R David Murrayc27e5222012-05-25 15:01:48 -04001881 "\nContent-Transfer-Encoding: {}".format(cte)))
1882 self.assertEqual(len(msg.defects), 0)
R David Murray749073a2011-06-22 13:47:53 -04001883
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001884 def test_invalid_content_type(self):
1885 eq = self.assertEqual
1886 neq = self.ndiffAssertEqual
1887 msg = Message()
1888 # RFC 2045, $5.2 says invalid yields text/plain
1889 msg['Content-Type'] = 'text'
1890 eq(msg.get_content_maintype(), 'text')
1891 eq(msg.get_content_subtype(), 'plain')
1892 eq(msg.get_content_type(), 'text/plain')
1893 # Clear the old value and try something /really/ invalid
1894 del msg['content-type']
1895 msg['Content-Type'] = 'foo'
1896 eq(msg.get_content_maintype(), 'text')
1897 eq(msg.get_content_subtype(), 'plain')
1898 eq(msg.get_content_type(), 'text/plain')
1899 # Still, make sure that the message is idempotently generated
1900 s = StringIO()
1901 g = Generator(s)
1902 g.flatten(msg)
1903 neq(s.getvalue(), 'Content-Type: foo\n\n')
1904
1905 def test_no_start_boundary(self):
1906 eq = self.ndiffAssertEqual
1907 msg = self._msgobj('msg_31.txt')
1908 eq(msg.get_payload(), """\
1909--BOUNDARY
1910Content-Type: text/plain
1911
1912message 1
1913
1914--BOUNDARY
1915Content-Type: text/plain
1916
1917message 2
1918
1919--BOUNDARY--
1920""")
1921
1922 def test_no_separating_blank_line(self):
1923 eq = self.ndiffAssertEqual
1924 msg = self._msgobj('msg_35.txt')
1925 eq(msg.as_string(), """\
1926From: aperson@dom.ain
1927To: bperson@dom.ain
1928Subject: here's something interesting
1929
1930counter to RFC 2822, there's no separating newline here
1931""")
1932
R David Murrayc27e5222012-05-25 15:01:48 -04001933 # test_parser.TestMessageDefectDetectionBase
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001934 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001935 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001936 msg = self._msgobj('msg_41.txt')
1937 unless(hasattr(msg, 'defects'))
R David Murrayc27e5222012-05-25 15:01:48 -04001938 self.assertEqual(len(msg.defects), 2)
1939 unless(isinstance(msg.defects[0],
R David Murray3edd22a2011-04-18 13:59:37 -04001940 errors.NoBoundaryInMultipartDefect))
R David Murrayc27e5222012-05-25 15:01:48 -04001941 unless(isinstance(msg.defects[1],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001942 errors.MultipartInvariantViolationDefect))
1943
R David Murrayc27e5222012-05-25 15:01:48 -04001944 # test_parser.TestMessageDefectDetectionBase
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001945 def test_missing_start_boundary(self):
1946 outer = self._msgobj('msg_42.txt')
1947 # The message structure is:
1948 #
1949 # multipart/mixed
1950 # text/plain
1951 # message/rfc822
1952 # multipart/mixed [*]
1953 #
1954 # [*] This message is missing its start boundary
1955 bad = outer.get_payload(1).get_payload(0)
R David Murrayc27e5222012-05-25 15:01:48 -04001956 self.assertEqual(len(bad.defects), 1)
1957 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001958 errors.StartBoundaryNotFoundDefect))
1959
R David Murrayc27e5222012-05-25 15:01:48 -04001960 # test_parser.TestMessageDefectDetectionBase
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001961 def test_first_line_is_continuation_header(self):
1962 eq = self.assertEqual
R David Murrayadbdcdb2012-05-27 20:45:01 -04001963 m = ' Line 1\nSubject: test\n\nbody'
R David Murrayc27e5222012-05-25 15:01:48 -04001964 msg = email.message_from_string(m)
R David Murrayadbdcdb2012-05-27 20:45:01 -04001965 eq(msg.keys(), ['Subject'])
1966 eq(msg.get_payload(), 'body')
R David Murrayc27e5222012-05-25 15:01:48 -04001967 eq(len(msg.defects), 1)
R David Murrayadbdcdb2012-05-27 20:45:01 -04001968 self.assertDefectsEqual(msg.defects,
1969 [errors.FirstHeaderLineIsContinuationDefect])
R David Murrayc27e5222012-05-25 15:01:48 -04001970 eq(msg.defects[0].line, ' Line 1\n')
R David Murray3edd22a2011-04-18 13:59:37 -04001971
R David Murrayadbdcdb2012-05-27 20:45:01 -04001972 # test_parser.TestMessageDefectDetectionBase
1973 def test_missing_header_body_separator(self):
1974 # Our heuristic if we see a line that doesn't look like a header (no
1975 # leading whitespace but no ':') is to assume that the blank line that
1976 # separates the header from the body is missing, and to stop parsing
1977 # headers and start parsing the body.
1978 msg = self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n')
1979 self.assertEqual(msg.keys(), ['Subject'])
1980 self.assertEqual(msg.get_payload(), 'not a header\nTo: abc\n\nb\n')
1981 self.assertDefectsEqual(msg.defects,
1982 [errors.MissingHeaderBodySeparatorDefect])
1983
Ezio Melottib3aedd42010-11-20 19:04:17 +00001984
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001985# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00001986class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001987 def test_rfc2047_multiline(self):
1988 eq = self.assertEqual
1989 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
1990 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
1991 dh = decode_header(s)
1992 eq(dh, [
1993 (b'Re:', None),
1994 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
1995 (b'baz foo bar', None),
1996 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
1997 header = make_header(dh)
1998 eq(str(header),
1999 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00002000 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00002001Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
2002 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002003
2004 def test_whitespace_eater_unicode(self):
2005 eq = self.assertEqual
2006 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
2007 dh = decode_header(s)
2008 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
2009 (b'Pirard <pirard@dom.ain>', None)])
2010 header = str(make_header(dh))
2011 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
2012
2013 def test_whitespace_eater_unicode_2(self):
2014 eq = self.assertEqual
2015 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
2016 dh = decode_header(s)
2017 eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'),
2018 (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')])
2019 hu = str(make_header(dh))
2020 eq(hu, 'The quick brown fox jumped over the lazy dog')
2021
2022 def test_rfc2047_missing_whitespace(self):
2023 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
2024 dh = decode_header(s)
2025 self.assertEqual(dh, [(s, None)])
2026
2027 def test_rfc2047_with_whitespace(self):
2028 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
2029 dh = decode_header(s)
2030 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
2031 (b'rg', None), (b'\xe5', 'iso-8859-1'),
2032 (b'sbord', None)])
2033
R. David Murrayc4e69cc2010-08-03 22:14:10 +00002034 def test_rfc2047_B_bad_padding(self):
2035 s = '=?iso-8859-1?B?%s?='
2036 data = [ # only test complete bytes
2037 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
2038 ('dmk=', b'vi'), ('dmk', b'vi')
2039 ]
2040 for q, a in data:
2041 dh = decode_header(s % q)
2042 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002043
R. David Murray31e984c2010-10-01 15:40:20 +00002044 def test_rfc2047_Q_invalid_digits(self):
2045 # issue 10004.
2046 s = '=?iso-8659-1?Q?andr=e9=zz?='
2047 self.assertEqual(decode_header(s),
2048 [(b'andr\xe9=zz', 'iso-8659-1')])
2049
Ezio Melottib3aedd42010-11-20 19:04:17 +00002050
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002051# Test the MIMEMessage class
2052class TestMIMEMessage(TestEmailBase):
2053 def setUp(self):
2054 with openfile('msg_11.txt') as fp:
2055 self._text = fp.read()
2056
2057 def test_type_error(self):
2058 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
2059
2060 def test_valid_argument(self):
2061 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002062 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002063 subject = 'A sub-message'
2064 m = Message()
2065 m['Subject'] = subject
2066 r = MIMEMessage(m)
2067 eq(r.get_content_type(), 'message/rfc822')
2068 payload = r.get_payload()
2069 unless(isinstance(payload, list))
2070 eq(len(payload), 1)
2071 subpart = payload[0]
2072 unless(subpart is m)
2073 eq(subpart['subject'], subject)
2074
2075 def test_bad_multipart(self):
2076 eq = self.assertEqual
2077 msg1 = Message()
2078 msg1['Subject'] = 'subpart 1'
2079 msg2 = Message()
2080 msg2['Subject'] = 'subpart 2'
2081 r = MIMEMessage(msg1)
2082 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
2083
2084 def test_generate(self):
2085 # First craft the message to be encapsulated
2086 m = Message()
2087 m['Subject'] = 'An enclosed message'
2088 m.set_payload('Here is the body of the message.\n')
2089 r = MIMEMessage(m)
2090 r['Subject'] = 'The enclosing message'
2091 s = StringIO()
2092 g = Generator(s)
2093 g.flatten(r)
2094 self.assertEqual(s.getvalue(), """\
2095Content-Type: message/rfc822
2096MIME-Version: 1.0
2097Subject: The enclosing message
2098
2099Subject: An enclosed message
2100
2101Here is the body of the message.
2102""")
2103
2104 def test_parse_message_rfc822(self):
2105 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002106 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002107 msg = self._msgobj('msg_11.txt')
2108 eq(msg.get_content_type(), 'message/rfc822')
2109 payload = msg.get_payload()
2110 unless(isinstance(payload, list))
2111 eq(len(payload), 1)
2112 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002113 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002114 eq(submsg['subject'], 'An enclosed message')
2115 eq(submsg.get_payload(), 'Here is the body of the message.\n')
2116
2117 def test_dsn(self):
2118 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002119 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002120 # msg 16 is a Delivery Status Notification, see RFC 1894
2121 msg = self._msgobj('msg_16.txt')
2122 eq(msg.get_content_type(), 'multipart/report')
2123 unless(msg.is_multipart())
2124 eq(len(msg.get_payload()), 3)
2125 # Subpart 1 is a text/plain, human readable section
2126 subpart = msg.get_payload(0)
2127 eq(subpart.get_content_type(), 'text/plain')
2128 eq(subpart.get_payload(), """\
2129This report relates to a message you sent with the following header fields:
2130
2131 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
2132 Date: Sun, 23 Sep 2001 20:10:55 -0700
2133 From: "Ian T. Henry" <henryi@oxy.edu>
2134 To: SoCal Raves <scr@socal-raves.org>
2135 Subject: [scr] yeah for Ians!!
2136
2137Your message cannot be delivered to the following recipients:
2138
2139 Recipient address: jangel1@cougar.noc.ucla.edu
2140 Reason: recipient reached disk quota
2141
2142""")
2143 # Subpart 2 contains the machine parsable DSN information. It
2144 # consists of two blocks of headers, represented by two nested Message
2145 # objects.
2146 subpart = msg.get_payload(1)
2147 eq(subpart.get_content_type(), 'message/delivery-status')
2148 eq(len(subpart.get_payload()), 2)
2149 # message/delivery-status should treat each block as a bunch of
2150 # headers, i.e. a bunch of Message objects.
2151 dsn1 = subpart.get_payload(0)
2152 unless(isinstance(dsn1, Message))
2153 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
2154 eq(dsn1.get_param('dns', header='reporting-mta'), '')
2155 # Try a missing one <wink>
2156 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
2157 dsn2 = subpart.get_payload(1)
2158 unless(isinstance(dsn2, Message))
2159 eq(dsn2['action'], 'failed')
2160 eq(dsn2.get_params(header='original-recipient'),
2161 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
2162 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
2163 # Subpart 3 is the original message
2164 subpart = msg.get_payload(2)
2165 eq(subpart.get_content_type(), 'message/rfc822')
2166 payload = subpart.get_payload()
2167 unless(isinstance(payload, list))
2168 eq(len(payload), 1)
2169 subsubpart = payload[0]
2170 unless(isinstance(subsubpart, Message))
2171 eq(subsubpart.get_content_type(), 'text/plain')
2172 eq(subsubpart['message-id'],
2173 '<002001c144a6$8752e060$56104586@oxy.edu>')
2174
2175 def test_epilogue(self):
2176 eq = self.ndiffAssertEqual
2177 with openfile('msg_21.txt') as fp:
2178 text = fp.read()
2179 msg = Message()
2180 msg['From'] = 'aperson@dom.ain'
2181 msg['To'] = 'bperson@dom.ain'
2182 msg['Subject'] = 'Test'
2183 msg.preamble = 'MIME message'
2184 msg.epilogue = 'End of MIME message\n'
2185 msg1 = MIMEText('One')
2186 msg2 = MIMEText('Two')
2187 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2188 msg.attach(msg1)
2189 msg.attach(msg2)
2190 sfp = StringIO()
2191 g = Generator(sfp)
2192 g.flatten(msg)
2193 eq(sfp.getvalue(), text)
2194
2195 def test_no_nl_preamble(self):
2196 eq = self.ndiffAssertEqual
2197 msg = Message()
2198 msg['From'] = 'aperson@dom.ain'
2199 msg['To'] = 'bperson@dom.ain'
2200 msg['Subject'] = 'Test'
2201 msg.preamble = 'MIME message'
2202 msg.epilogue = ''
2203 msg1 = MIMEText('One')
2204 msg2 = MIMEText('Two')
2205 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2206 msg.attach(msg1)
2207 msg.attach(msg2)
2208 eq(msg.as_string(), """\
2209From: aperson@dom.ain
2210To: bperson@dom.ain
2211Subject: Test
2212Content-Type: multipart/mixed; boundary="BOUNDARY"
2213
2214MIME message
2215--BOUNDARY
2216Content-Type: text/plain; charset="us-ascii"
2217MIME-Version: 1.0
2218Content-Transfer-Encoding: 7bit
2219
2220One
2221--BOUNDARY
2222Content-Type: text/plain; charset="us-ascii"
2223MIME-Version: 1.0
2224Content-Transfer-Encoding: 7bit
2225
2226Two
2227--BOUNDARY--
2228""")
2229
2230 def test_default_type(self):
2231 eq = self.assertEqual
2232 with openfile('msg_30.txt') as fp:
2233 msg = email.message_from_file(fp)
2234 container1 = msg.get_payload(0)
2235 eq(container1.get_default_type(), 'message/rfc822')
2236 eq(container1.get_content_type(), 'message/rfc822')
2237 container2 = msg.get_payload(1)
2238 eq(container2.get_default_type(), 'message/rfc822')
2239 eq(container2.get_content_type(), 'message/rfc822')
2240 container1a = container1.get_payload(0)
2241 eq(container1a.get_default_type(), 'text/plain')
2242 eq(container1a.get_content_type(), 'text/plain')
2243 container2a = container2.get_payload(0)
2244 eq(container2a.get_default_type(), 'text/plain')
2245 eq(container2a.get_content_type(), 'text/plain')
2246
2247 def test_default_type_with_explicit_container_type(self):
2248 eq = self.assertEqual
2249 with openfile('msg_28.txt') as fp:
2250 msg = email.message_from_file(fp)
2251 container1 = msg.get_payload(0)
2252 eq(container1.get_default_type(), 'message/rfc822')
2253 eq(container1.get_content_type(), 'message/rfc822')
2254 container2 = msg.get_payload(1)
2255 eq(container2.get_default_type(), 'message/rfc822')
2256 eq(container2.get_content_type(), 'message/rfc822')
2257 container1a = container1.get_payload(0)
2258 eq(container1a.get_default_type(), 'text/plain')
2259 eq(container1a.get_content_type(), 'text/plain')
2260 container2a = container2.get_payload(0)
2261 eq(container2a.get_default_type(), 'text/plain')
2262 eq(container2a.get_content_type(), 'text/plain')
2263
2264 def test_default_type_non_parsed(self):
2265 eq = self.assertEqual
2266 neq = self.ndiffAssertEqual
2267 # Set up container
2268 container = MIMEMultipart('digest', 'BOUNDARY')
2269 container.epilogue = ''
2270 # Set up subparts
2271 subpart1a = MIMEText('message 1\n')
2272 subpart2a = MIMEText('message 2\n')
2273 subpart1 = MIMEMessage(subpart1a)
2274 subpart2 = MIMEMessage(subpart2a)
2275 container.attach(subpart1)
2276 container.attach(subpart2)
2277 eq(subpart1.get_content_type(), 'message/rfc822')
2278 eq(subpart1.get_default_type(), 'message/rfc822')
2279 eq(subpart2.get_content_type(), 'message/rfc822')
2280 eq(subpart2.get_default_type(), 'message/rfc822')
2281 neq(container.as_string(0), '''\
2282Content-Type: multipart/digest; boundary="BOUNDARY"
2283MIME-Version: 1.0
2284
2285--BOUNDARY
2286Content-Type: message/rfc822
2287MIME-Version: 1.0
2288
2289Content-Type: text/plain; charset="us-ascii"
2290MIME-Version: 1.0
2291Content-Transfer-Encoding: 7bit
2292
2293message 1
2294
2295--BOUNDARY
2296Content-Type: message/rfc822
2297MIME-Version: 1.0
2298
2299Content-Type: text/plain; charset="us-ascii"
2300MIME-Version: 1.0
2301Content-Transfer-Encoding: 7bit
2302
2303message 2
2304
2305--BOUNDARY--
2306''')
2307 del subpart1['content-type']
2308 del subpart1['mime-version']
2309 del subpart2['content-type']
2310 del subpart2['mime-version']
2311 eq(subpart1.get_content_type(), 'message/rfc822')
2312 eq(subpart1.get_default_type(), 'message/rfc822')
2313 eq(subpart2.get_content_type(), 'message/rfc822')
2314 eq(subpart2.get_default_type(), 'message/rfc822')
2315 neq(container.as_string(0), '''\
2316Content-Type: multipart/digest; boundary="BOUNDARY"
2317MIME-Version: 1.0
2318
2319--BOUNDARY
2320
2321Content-Type: text/plain; charset="us-ascii"
2322MIME-Version: 1.0
2323Content-Transfer-Encoding: 7bit
2324
2325message 1
2326
2327--BOUNDARY
2328
2329Content-Type: text/plain; charset="us-ascii"
2330MIME-Version: 1.0
2331Content-Transfer-Encoding: 7bit
2332
2333message 2
2334
2335--BOUNDARY--
2336''')
2337
2338 def test_mime_attachments_in_constructor(self):
2339 eq = self.assertEqual
2340 text1 = MIMEText('')
2341 text2 = MIMEText('')
2342 msg = MIMEMultipart(_subparts=(text1, text2))
2343 eq(len(msg.get_payload()), 2)
2344 eq(msg.get_payload(0), text1)
2345 eq(msg.get_payload(1), text2)
2346
Christian Heimes587c2bf2008-01-19 16:21:02 +00002347 def test_default_multipart_constructor(self):
2348 msg = MIMEMultipart()
2349 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002350
Ezio Melottib3aedd42010-11-20 19:04:17 +00002351
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002352# A general test of parser->model->generator idempotency. IOW, read a message
2353# in, parse it into a message object tree, then without touching the tree,
2354# regenerate the plain text. The original text and the transformed text
2355# should be identical. Note: that we ignore the Unix-From since that may
2356# contain a changed date.
2357class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002358
2359 linesep = '\n'
2360
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002361 def _msgobj(self, filename):
2362 with openfile(filename) as fp:
2363 data = fp.read()
2364 msg = email.message_from_string(data)
2365 return msg, data
2366
R. David Murray719a4492010-11-21 16:53:48 +00002367 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002368 eq = self.ndiffAssertEqual
2369 s = StringIO()
2370 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002371 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002372 eq(text, s.getvalue())
2373
2374 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002375 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002376 msg, text = self._msgobj('msg_01.txt')
2377 eq(msg.get_content_type(), 'text/plain')
2378 eq(msg.get_content_maintype(), 'text')
2379 eq(msg.get_content_subtype(), 'plain')
2380 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2381 eq(msg.get_param('charset'), 'us-ascii')
2382 eq(msg.preamble, None)
2383 eq(msg.epilogue, None)
2384 self._idempotent(msg, text)
2385
2386 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002387 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002388 msg, text = self._msgobj('msg_03.txt')
2389 eq(msg.get_content_type(), 'text/plain')
2390 eq(msg.get_params(), None)
2391 eq(msg.get_param('charset'), None)
2392 self._idempotent(msg, text)
2393
2394 def test_simple_multipart(self):
2395 msg, text = self._msgobj('msg_04.txt')
2396 self._idempotent(msg, text)
2397
2398 def test_MIME_digest(self):
2399 msg, text = self._msgobj('msg_02.txt')
2400 self._idempotent(msg, text)
2401
2402 def test_long_header(self):
2403 msg, text = self._msgobj('msg_27.txt')
2404 self._idempotent(msg, text)
2405
2406 def test_MIME_digest_with_part_headers(self):
2407 msg, text = self._msgobj('msg_28.txt')
2408 self._idempotent(msg, text)
2409
2410 def test_mixed_with_image(self):
2411 msg, text = self._msgobj('msg_06.txt')
2412 self._idempotent(msg, text)
2413
2414 def test_multipart_report(self):
2415 msg, text = self._msgobj('msg_05.txt')
2416 self._idempotent(msg, text)
2417
2418 def test_dsn(self):
2419 msg, text = self._msgobj('msg_16.txt')
2420 self._idempotent(msg, text)
2421
2422 def test_preamble_epilogue(self):
2423 msg, text = self._msgobj('msg_21.txt')
2424 self._idempotent(msg, text)
2425
2426 def test_multipart_one_part(self):
2427 msg, text = self._msgobj('msg_23.txt')
2428 self._idempotent(msg, text)
2429
2430 def test_multipart_no_parts(self):
2431 msg, text = self._msgobj('msg_24.txt')
2432 self._idempotent(msg, text)
2433
2434 def test_no_start_boundary(self):
2435 msg, text = self._msgobj('msg_31.txt')
2436 self._idempotent(msg, text)
2437
2438 def test_rfc2231_charset(self):
2439 msg, text = self._msgobj('msg_32.txt')
2440 self._idempotent(msg, text)
2441
2442 def test_more_rfc2231_parameters(self):
2443 msg, text = self._msgobj('msg_33.txt')
2444 self._idempotent(msg, text)
2445
2446 def test_text_plain_in_a_multipart_digest(self):
2447 msg, text = self._msgobj('msg_34.txt')
2448 self._idempotent(msg, text)
2449
2450 def test_nested_multipart_mixeds(self):
2451 msg, text = self._msgobj('msg_12a.txt')
2452 self._idempotent(msg, text)
2453
2454 def test_message_external_body_idempotent(self):
2455 msg, text = self._msgobj('msg_36.txt')
2456 self._idempotent(msg, text)
2457
R. David Murray719a4492010-11-21 16:53:48 +00002458 def test_message_delivery_status(self):
2459 msg, text = self._msgobj('msg_43.txt')
2460 self._idempotent(msg, text, unixfrom=True)
2461
R. David Murray96fd54e2010-10-08 15:55:28 +00002462 def test_message_signed_idempotent(self):
2463 msg, text = self._msgobj('msg_45.txt')
2464 self._idempotent(msg, text)
2465
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002466 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002467 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002468 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002469 # Get a message object and reset the seek pointer for other tests
2470 msg, text = self._msgobj('msg_05.txt')
2471 eq(msg.get_content_type(), 'multipart/report')
2472 # Test the Content-Type: parameters
2473 params = {}
2474 for pk, pv in msg.get_params():
2475 params[pk] = pv
2476 eq(params['report-type'], 'delivery-status')
2477 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002478 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2479 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002480 eq(len(msg.get_payload()), 3)
2481 # Make sure the subparts are what we expect
2482 msg1 = msg.get_payload(0)
2483 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002484 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002485 msg2 = msg.get_payload(1)
2486 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002487 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002488 msg3 = msg.get_payload(2)
2489 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002490 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002491 payload = msg3.get_payload()
2492 unless(isinstance(payload, list))
2493 eq(len(payload), 1)
2494 msg4 = payload[0]
2495 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002496 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002497
2498 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002499 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002500 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002501 msg, text = self._msgobj('msg_06.txt')
2502 # Check some of the outer headers
2503 eq(msg.get_content_type(), 'message/rfc822')
2504 # Make sure the payload is a list of exactly one sub-Message, and that
2505 # that submessage has a type of text/plain
2506 payload = msg.get_payload()
2507 unless(isinstance(payload, list))
2508 eq(len(payload), 1)
2509 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002510 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002511 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002512 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002513 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002514
2515
Ezio Melottib3aedd42010-11-20 19:04:17 +00002516
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002517# Test various other bits of the package's functionality
2518class TestMiscellaneous(TestEmailBase):
2519 def test_message_from_string(self):
2520 with openfile('msg_01.txt') as fp:
2521 text = fp.read()
2522 msg = email.message_from_string(text)
2523 s = StringIO()
2524 # Don't wrap/continue long headers since we're trying to test
2525 # idempotency.
2526 g = Generator(s, maxheaderlen=0)
2527 g.flatten(msg)
2528 self.assertEqual(text, s.getvalue())
2529
2530 def test_message_from_file(self):
2531 with openfile('msg_01.txt') as fp:
2532 text = fp.read()
2533 fp.seek(0)
2534 msg = email.message_from_file(fp)
2535 s = StringIO()
2536 # Don't wrap/continue long headers since we're trying to test
2537 # idempotency.
2538 g = Generator(s, maxheaderlen=0)
2539 g.flatten(msg)
2540 self.assertEqual(text, s.getvalue())
2541
2542 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002543 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002544 with openfile('msg_01.txt') as fp:
2545 text = fp.read()
2546
2547 # Create a subclass
2548 class MyMessage(Message):
2549 pass
2550
2551 msg = email.message_from_string(text, MyMessage)
2552 unless(isinstance(msg, MyMessage))
2553 # Try something more complicated
2554 with openfile('msg_02.txt') as fp:
2555 text = fp.read()
2556 msg = email.message_from_string(text, MyMessage)
2557 for subpart in msg.walk():
2558 unless(isinstance(subpart, MyMessage))
2559
2560 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002561 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002562 # Create a subclass
2563 class MyMessage(Message):
2564 pass
2565
2566 with openfile('msg_01.txt') as fp:
2567 msg = email.message_from_file(fp, MyMessage)
2568 unless(isinstance(msg, MyMessage))
2569 # Try something more complicated
2570 with openfile('msg_02.txt') as fp:
2571 msg = email.message_from_file(fp, MyMessage)
2572 for subpart in msg.walk():
2573 unless(isinstance(subpart, MyMessage))
2574
R David Murrayc27e5222012-05-25 15:01:48 -04002575 def test_custom_message_does_not_require_arguments(self):
2576 class MyMessage(Message):
2577 def __init__(self):
2578 super().__init__()
2579 msg = self._str_msg("Subject: test\n\ntest", MyMessage)
2580 self.assertTrue(isinstance(msg, MyMessage))
2581
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002582 def test__all__(self):
2583 module = __import__('email')
R David Murray1b6c7242012-03-16 22:43:05 -04002584 self.assertEqual(sorted(module.__all__), [
2585 'base64mime', 'charset', 'encoders', 'errors', 'feedparser',
2586 'generator', 'header', 'iterators', 'message',
2587 'message_from_binary_file', 'message_from_bytes',
2588 'message_from_file', 'message_from_string', 'mime', 'parser',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002589 'quoprimime', 'utils',
2590 ])
2591
2592 def test_formatdate(self):
2593 now = time.time()
2594 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2595 time.gmtime(now)[:6])
2596
2597 def test_formatdate_localtime(self):
2598 now = time.time()
2599 self.assertEqual(
2600 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2601 time.localtime(now)[:6])
2602
2603 def test_formatdate_usegmt(self):
2604 now = time.time()
2605 self.assertEqual(
2606 utils.formatdate(now, localtime=False),
2607 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2608 self.assertEqual(
2609 utils.formatdate(now, localtime=False, usegmt=True),
2610 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2611
2612 def test_parsedate_none(self):
2613 self.assertEqual(utils.parsedate(''), None)
2614
2615 def test_parsedate_compact(self):
2616 # The FWS after the comma is optional
2617 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2618 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2619
2620 def test_parsedate_no_dayofweek(self):
2621 eq = self.assertEqual
2622 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2623 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2624
2625 def test_parsedate_compact_no_dayofweek(self):
2626 eq = self.assertEqual
2627 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2628 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2629
R. David Murray4a62e892010-12-23 20:35:46 +00002630 def test_parsedate_no_space_before_positive_offset(self):
2631 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2632 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2633
2634 def test_parsedate_no_space_before_negative_offset(self):
2635 # Issue 1155362: we already handled '+' for this case.
2636 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
2637 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
2638
2639
R David Murrayaccd1c02011-03-13 20:06:23 -04002640 def test_parsedate_accepts_time_with_dots(self):
2641 eq = self.assertEqual
2642 eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'),
2643 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2644 eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'),
2645 (2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800))
2646
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002647 def test_parsedate_acceptable_to_time_functions(self):
2648 eq = self.assertEqual
2649 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2650 t = int(time.mktime(timetup))
2651 eq(time.localtime(t)[:6], timetup[:6])
2652 eq(int(time.strftime('%Y', timetup)), 2003)
2653 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2654 t = int(time.mktime(timetup[:9]))
2655 eq(time.localtime(t)[:6], timetup[:6])
2656 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2657
R. David Murray219d1c82010-08-25 00:45:55 +00002658 def test_parsedate_y2k(self):
2659 """Test for parsing a date with a two-digit year.
2660
2661 Parsing a date with a two-digit year should return the correct
2662 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2663 obsoletes RFC822) requires four-digit years.
2664
2665 """
2666 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2667 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2668 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2669 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2670
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002671 def test_parseaddr_empty(self):
2672 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2673 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2674
2675 def test_noquote_dump(self):
2676 self.assertEqual(
2677 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2678 'A Silly Person <person@dom.ain>')
2679
2680 def test_escape_dump(self):
2681 self.assertEqual(
2682 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
R David Murrayb53319f2012-03-14 15:31:47 -04002683 r'"A (Very) Silly Person" <person@dom.ain>')
2684 self.assertEqual(
2685 utils.parseaddr(r'"A \(Very\) Silly Person" <person@dom.ain>'),
2686 ('A (Very) Silly Person', 'person@dom.ain'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002687 a = r'A \(Special\) Person'
2688 b = 'person@dom.ain'
2689 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2690
2691 def test_escape_backslashes(self):
2692 self.assertEqual(
2693 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2694 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2695 a = r'Arthur \Backslash\ Foobar'
2696 b = 'person@dom.ain'
2697 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2698
R David Murray8debacb2011-04-06 09:35:57 -04002699 def test_quotes_unicode_names(self):
2700 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2701 name = "H\u00e4ns W\u00fcrst"
2702 addr = 'person@dom.ain'
2703 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2704 latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <person@dom.ain>"
2705 self.assertEqual(utils.formataddr((name, addr)), utf8_base64)
2706 self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'),
2707 latin1_quopri)
2708
2709 def test_accepts_any_charset_like_object(self):
2710 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2711 name = "H\u00e4ns W\u00fcrst"
2712 addr = 'person@dom.ain'
2713 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2714 foobar = "FOOBAR"
2715 class CharsetMock:
2716 def header_encode(self, string):
2717 return foobar
2718 mock = CharsetMock()
2719 mock_expected = "%s <%s>" % (foobar, addr)
2720 self.assertEqual(utils.formataddr((name, addr), mock), mock_expected)
2721 self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')),
2722 utf8_base64)
2723
2724 def test_invalid_charset_like_object_raises_error(self):
2725 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2726 name = "H\u00e4ns W\u00fcrst"
2727 addr = 'person@dom.ain'
2728 # A object without a header_encode method:
2729 bad_charset = object()
2730 self.assertRaises(AttributeError, utils.formataddr, (name, addr),
2731 bad_charset)
2732
2733 def test_unicode_address_raises_error(self):
2734 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2735 addr = 'pers\u00f6n@dom.in'
2736 self.assertRaises(UnicodeError, utils.formataddr, (None, addr))
2737 self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr))
2738
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002739 def test_name_with_dot(self):
2740 x = 'John X. Doe <jxd@example.com>'
2741 y = '"John X. Doe" <jxd@example.com>'
2742 a, b = ('John X. Doe', 'jxd@example.com')
2743 self.assertEqual(utils.parseaddr(x), (a, b))
2744 self.assertEqual(utils.parseaddr(y), (a, b))
2745 # formataddr() quotes the name if there's a dot in it
2746 self.assertEqual(utils.formataddr((a, b)), y)
2747
R. David Murray5397e862010-10-02 15:58:26 +00002748 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2749 # issue 10005. Note that in the third test the second pair of
2750 # backslashes is not actually a quoted pair because it is not inside a
2751 # comment or quoted string: the address being parsed has a quoted
2752 # string containing a quoted backslash, followed by 'example' and two
2753 # backslashes, followed by another quoted string containing a space and
2754 # the word 'example'. parseaddr copies those two backslashes
2755 # literally. Per rfc5322 this is not technically correct since a \ may
2756 # not appear in an address outside of a quoted string. It is probably
2757 # a sensible Postel interpretation, though.
2758 eq = self.assertEqual
2759 eq(utils.parseaddr('""example" example"@example.com'),
2760 ('', '""example" example"@example.com'))
2761 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2762 ('', '"\\"example\\" example"@example.com'))
2763 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2764 ('', '"\\\\"example\\\\" example"@example.com'))
2765
R. David Murray63563cd2010-12-18 18:25:38 +00002766 def test_parseaddr_preserves_spaces_in_local_part(self):
2767 # issue 9286. A normal RFC5322 local part should not contain any
2768 # folding white space, but legacy local parts can (they are a sequence
2769 # of atoms, not dotatoms). On the other hand we strip whitespace from
2770 # before the @ and around dots, on the assumption that the whitespace
2771 # around the punctuation is a mistake in what would otherwise be
2772 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
2773 self.assertEqual(('', "merwok wok@xample.com"),
2774 utils.parseaddr("merwok wok@xample.com"))
2775 self.assertEqual(('', "merwok wok@xample.com"),
2776 utils.parseaddr("merwok wok@xample.com"))
2777 self.assertEqual(('', "merwok wok@xample.com"),
2778 utils.parseaddr(" merwok wok @xample.com"))
2779 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
2780 utils.parseaddr('merwok"wok" wok@xample.com'))
2781 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
2782 utils.parseaddr('merwok. wok . wok@xample.com'))
2783
R David Murrayb53319f2012-03-14 15:31:47 -04002784 def test_formataddr_does_not_quote_parens_in_quoted_string(self):
2785 addr = ("'foo@example.com' (foo@example.com)",
2786 'foo@example.com')
2787 addrstr = ('"\'foo@example.com\' '
2788 '(foo@example.com)" <foo@example.com>')
2789 self.assertEqual(utils.parseaddr(addrstr), addr)
2790 self.assertEqual(utils.formataddr(addr), addrstr)
2791
2792
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002793 def test_multiline_from_comment(self):
2794 x = """\
2795Foo
2796\tBar <foo@example.com>"""
2797 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2798
2799 def test_quote_dump(self):
2800 self.assertEqual(
2801 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2802 r'"A Silly; Person" <person@dom.ain>')
2803
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002804 def test_charset_richcomparisons(self):
2805 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002806 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002807 cset1 = Charset()
2808 cset2 = Charset()
2809 eq(cset1, 'us-ascii')
2810 eq(cset1, 'US-ASCII')
2811 eq(cset1, 'Us-AsCiI')
2812 eq('us-ascii', cset1)
2813 eq('US-ASCII', cset1)
2814 eq('Us-AsCiI', cset1)
2815 ne(cset1, 'usascii')
2816 ne(cset1, 'USASCII')
2817 ne(cset1, 'UsAsCiI')
2818 ne('usascii', cset1)
2819 ne('USASCII', cset1)
2820 ne('UsAsCiI', cset1)
2821 eq(cset1, cset2)
2822 eq(cset2, cset1)
2823
2824 def test_getaddresses(self):
2825 eq = self.assertEqual
2826 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2827 'Bud Person <bperson@dom.ain>']),
2828 [('Al Person', 'aperson@dom.ain'),
2829 ('Bud Person', 'bperson@dom.ain')])
2830
2831 def test_getaddresses_nasty(self):
2832 eq = self.assertEqual
2833 eq(utils.getaddresses(['foo: ;']), [('', '')])
2834 eq(utils.getaddresses(
2835 ['[]*-- =~$']),
2836 [('', ''), ('', ''), ('', '*--')])
2837 eq(utils.getaddresses(
2838 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2839 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2840
2841 def test_getaddresses_embedded_comment(self):
2842 """Test proper handling of a nested comment"""
2843 eq = self.assertEqual
2844 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2845 eq(addrs[0][1], 'foo@bar.com')
2846
2847 def test_utils_quote_unquote(self):
2848 eq = self.assertEqual
2849 msg = Message()
2850 msg.add_header('content-disposition', 'attachment',
2851 filename='foo\\wacky"name')
2852 eq(msg.get_filename(), 'foo\\wacky"name')
2853
2854 def test_get_body_encoding_with_bogus_charset(self):
2855 charset = Charset('not a charset')
2856 self.assertEqual(charset.get_body_encoding(), 'base64')
2857
2858 def test_get_body_encoding_with_uppercase_charset(self):
2859 eq = self.assertEqual
2860 msg = Message()
2861 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2862 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2863 charsets = msg.get_charsets()
2864 eq(len(charsets), 1)
2865 eq(charsets[0], 'utf-8')
2866 charset = Charset(charsets[0])
2867 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002868 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002869 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2870 eq(msg.get_payload(decode=True), b'hello world')
2871 eq(msg['content-transfer-encoding'], 'base64')
2872 # Try another one
2873 msg = Message()
2874 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2875 charsets = msg.get_charsets()
2876 eq(len(charsets), 1)
2877 eq(charsets[0], 'us-ascii')
2878 charset = Charset(charsets[0])
2879 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2880 msg.set_payload('hello world', charset=charset)
2881 eq(msg.get_payload(), 'hello world')
2882 eq(msg['content-transfer-encoding'], '7bit')
2883
2884 def test_charsets_case_insensitive(self):
2885 lc = Charset('us-ascii')
2886 uc = Charset('US-ASCII')
2887 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2888
2889 def test_partial_falls_inside_message_delivery_status(self):
2890 eq = self.ndiffAssertEqual
2891 # The Parser interface provides chunks of data to FeedParser in 8192
2892 # byte gulps. SF bug #1076485 found one of those chunks inside
2893 # message/delivery-status header block, which triggered an
2894 # unreadline() of NeedMoreData.
2895 msg = self._msgobj('msg_43.txt')
2896 sfp = StringIO()
2897 iterators._structure(msg, sfp)
2898 eq(sfp.getvalue(), """\
2899multipart/report
2900 text/plain
2901 message/delivery-status
2902 text/plain
2903 text/plain
2904 text/plain
2905 text/plain
2906 text/plain
2907 text/plain
2908 text/plain
2909 text/plain
2910 text/plain
2911 text/plain
2912 text/plain
2913 text/plain
2914 text/plain
2915 text/plain
2916 text/plain
2917 text/plain
2918 text/plain
2919 text/plain
2920 text/plain
2921 text/plain
2922 text/plain
2923 text/plain
2924 text/plain
2925 text/plain
2926 text/plain
2927 text/plain
2928 text/rfc822-headers
2929""")
2930
R. David Murraya0b44b52010-12-02 21:47:19 +00002931 def test_make_msgid_domain(self):
2932 self.assertEqual(
2933 email.utils.make_msgid(domain='testdomain-string')[-19:],
2934 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002935
Ezio Melottib3aedd42010-11-20 19:04:17 +00002936
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002937# Test the iterator/generators
2938class TestIterators(TestEmailBase):
2939 def test_body_line_iterator(self):
2940 eq = self.assertEqual
2941 neq = self.ndiffAssertEqual
2942 # First a simple non-multipart message
2943 msg = self._msgobj('msg_01.txt')
2944 it = iterators.body_line_iterator(msg)
2945 lines = list(it)
2946 eq(len(lines), 6)
2947 neq(EMPTYSTRING.join(lines), msg.get_payload())
2948 # Now a more complicated multipart
2949 msg = self._msgobj('msg_02.txt')
2950 it = iterators.body_line_iterator(msg)
2951 lines = list(it)
2952 eq(len(lines), 43)
2953 with openfile('msg_19.txt') as fp:
2954 neq(EMPTYSTRING.join(lines), fp.read())
2955
2956 def test_typed_subpart_iterator(self):
2957 eq = self.assertEqual
2958 msg = self._msgobj('msg_04.txt')
2959 it = iterators.typed_subpart_iterator(msg, 'text')
2960 lines = []
2961 subparts = 0
2962 for subpart in it:
2963 subparts += 1
2964 lines.append(subpart.get_payload())
2965 eq(subparts, 2)
2966 eq(EMPTYSTRING.join(lines), """\
2967a simple kind of mirror
2968to reflect upon our own
2969a simple kind of mirror
2970to reflect upon our own
2971""")
2972
2973 def test_typed_subpart_iterator_default_type(self):
2974 eq = self.assertEqual
2975 msg = self._msgobj('msg_03.txt')
2976 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
2977 lines = []
2978 subparts = 0
2979 for subpart in it:
2980 subparts += 1
2981 lines.append(subpart.get_payload())
2982 eq(subparts, 1)
2983 eq(EMPTYSTRING.join(lines), """\
2984
2985Hi,
2986
2987Do you like this message?
2988
2989-Me
2990""")
2991
R. David Murray45bf773f2010-07-17 01:19:57 +00002992 def test_pushCR_LF(self):
2993 '''FeedParser BufferedSubFile.push() assumed it received complete
2994 line endings. A CR ending one push() followed by a LF starting
2995 the next push() added an empty line.
2996 '''
2997 imt = [
2998 ("a\r \n", 2),
2999 ("b", 0),
3000 ("c\n", 1),
3001 ("", 0),
3002 ("d\r\n", 1),
3003 ("e\r", 0),
3004 ("\nf", 1),
3005 ("\r\n", 1),
3006 ]
3007 from email.feedparser import BufferedSubFile, NeedMoreData
3008 bsf = BufferedSubFile()
3009 om = []
3010 nt = 0
3011 for il, n in imt:
3012 bsf.push(il)
3013 nt += n
3014 n1 = 0
3015 while True:
3016 ol = bsf.readline()
3017 if ol == NeedMoreData:
3018 break
3019 om.append(ol)
3020 n1 += 1
3021 self.assertTrue(n == n1)
3022 self.assertTrue(len(om) == nt)
3023 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
3024
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003025
Ezio Melottib3aedd42010-11-20 19:04:17 +00003026
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003027class TestParsers(TestEmailBase):
R David Murrayb35c8502011-04-13 16:46:05 -04003028
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003029 def test_header_parser(self):
3030 eq = self.assertEqual
3031 # Parse only the headers of a complex multipart MIME document
3032 with openfile('msg_02.txt') as fp:
3033 msg = HeaderParser().parse(fp)
3034 eq(msg['from'], 'ppp-request@zzz.org')
3035 eq(msg['to'], 'ppp@zzz.org')
3036 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003037 self.assertFalse(msg.is_multipart())
3038 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003039
R David Murrayb35c8502011-04-13 16:46:05 -04003040 def test_bytes_header_parser(self):
3041 eq = self.assertEqual
3042 # Parse only the headers of a complex multipart MIME document
3043 with openfile('msg_02.txt', 'rb') as fp:
3044 msg = email.parser.BytesHeaderParser().parse(fp)
3045 eq(msg['from'], 'ppp-request@zzz.org')
3046 eq(msg['to'], 'ppp@zzz.org')
3047 eq(msg.get_content_type(), 'multipart/mixed')
3048 self.assertFalse(msg.is_multipart())
3049 self.assertTrue(isinstance(msg.get_payload(), str))
3050 self.assertTrue(isinstance(msg.get_payload(decode=True), bytes))
3051
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003052 def test_whitespace_continuation(self):
3053 eq = self.assertEqual
3054 # This message contains a line after the Subject: header that has only
3055 # whitespace, but it is not empty!
3056 msg = email.message_from_string("""\
3057From: aperson@dom.ain
3058To: bperson@dom.ain
3059Subject: the next line has a space on it
3060\x20
3061Date: Mon, 8 Apr 2002 15:09:19 -0400
3062Message-ID: spam
3063
3064Here's the message body
3065""")
3066 eq(msg['subject'], 'the next line has a space on it\n ')
3067 eq(msg['message-id'], 'spam')
3068 eq(msg.get_payload(), "Here's the message body\n")
3069
3070 def test_whitespace_continuation_last_header(self):
3071 eq = self.assertEqual
3072 # Like the previous test, but the subject line is the last
3073 # header.
3074 msg = email.message_from_string("""\
3075From: aperson@dom.ain
3076To: bperson@dom.ain
3077Date: Mon, 8 Apr 2002 15:09:19 -0400
3078Message-ID: spam
3079Subject: the next line has a space on it
3080\x20
3081
3082Here's the message body
3083""")
3084 eq(msg['subject'], 'the next line has a space on it\n ')
3085 eq(msg['message-id'], 'spam')
3086 eq(msg.get_payload(), "Here's the message body\n")
3087
3088 def test_crlf_separation(self):
3089 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00003090 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003091 msg = Parser().parse(fp)
3092 eq(len(msg.get_payload()), 2)
3093 part1 = msg.get_payload(0)
3094 eq(part1.get_content_type(), 'text/plain')
3095 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
3096 part2 = msg.get_payload(1)
3097 eq(part2.get_content_type(), 'application/riscos')
3098
R. David Murray8451c4b2010-10-23 22:19:56 +00003099 def test_crlf_flatten(self):
3100 # Using newline='\n' preserves the crlfs in this input file.
3101 with openfile('msg_26.txt', newline='\n') as fp:
3102 text = fp.read()
3103 msg = email.message_from_string(text)
3104 s = StringIO()
3105 g = Generator(s)
3106 g.flatten(msg, linesep='\r\n')
3107 self.assertEqual(s.getvalue(), text)
3108
3109 maxDiff = None
3110
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003111 def test_multipart_digest_with_extra_mime_headers(self):
3112 eq = self.assertEqual
3113 neq = self.ndiffAssertEqual
3114 with openfile('msg_28.txt') as fp:
3115 msg = email.message_from_file(fp)
3116 # Structure is:
3117 # multipart/digest
3118 # message/rfc822
3119 # text/plain
3120 # message/rfc822
3121 # text/plain
3122 eq(msg.is_multipart(), 1)
3123 eq(len(msg.get_payload()), 2)
3124 part1 = msg.get_payload(0)
3125 eq(part1.get_content_type(), 'message/rfc822')
3126 eq(part1.is_multipart(), 1)
3127 eq(len(part1.get_payload()), 1)
3128 part1a = part1.get_payload(0)
3129 eq(part1a.is_multipart(), 0)
3130 eq(part1a.get_content_type(), 'text/plain')
3131 neq(part1a.get_payload(), 'message 1\n')
3132 # next message/rfc822
3133 part2 = msg.get_payload(1)
3134 eq(part2.get_content_type(), 'message/rfc822')
3135 eq(part2.is_multipart(), 1)
3136 eq(len(part2.get_payload()), 1)
3137 part2a = part2.get_payload(0)
3138 eq(part2a.is_multipart(), 0)
3139 eq(part2a.get_content_type(), 'text/plain')
3140 neq(part2a.get_payload(), 'message 2\n')
3141
3142 def test_three_lines(self):
3143 # A bug report by Andrew McNamara
3144 lines = ['From: Andrew Person <aperson@dom.ain',
3145 'Subject: Test',
3146 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
3147 msg = email.message_from_string(NL.join(lines))
3148 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
3149
3150 def test_strip_line_feed_and_carriage_return_in_headers(self):
3151 eq = self.assertEqual
3152 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
3153 value1 = 'text'
3154 value2 = 'more text'
3155 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
3156 value1, value2)
3157 msg = email.message_from_string(m)
3158 eq(msg.get('Header'), value1)
3159 eq(msg.get('Next-Header'), value2)
3160
3161 def test_rfc2822_header_syntax(self):
3162 eq = self.assertEqual
3163 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3164 msg = email.message_from_string(m)
3165 eq(len(msg), 3)
3166 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
3167 eq(msg.get_payload(), 'body')
3168
3169 def test_rfc2822_space_not_allowed_in_header(self):
3170 eq = self.assertEqual
3171 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3172 msg = email.message_from_string(m)
3173 eq(len(msg.keys()), 0)
3174
3175 def test_rfc2822_one_character_header(self):
3176 eq = self.assertEqual
3177 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
3178 msg = email.message_from_string(m)
3179 headers = msg.keys()
3180 headers.sort()
3181 eq(headers, ['A', 'B', 'CC'])
3182 eq(msg.get_payload(), 'body')
3183
R. David Murray45e0e142010-06-16 02:19:40 +00003184 def test_CRLFLF_at_end_of_part(self):
3185 # issue 5610: feedparser should not eat two chars from body part ending
3186 # with "\r\n\n".
3187 m = (
3188 "From: foo@bar.com\n"
3189 "To: baz\n"
3190 "Mime-Version: 1.0\n"
3191 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
3192 "\n"
3193 "--BOUNDARY\n"
3194 "Content-Type: text/plain\n"
3195 "\n"
3196 "body ending with CRLF newline\r\n"
3197 "\n"
3198 "--BOUNDARY--\n"
3199 )
3200 msg = email.message_from_string(m)
3201 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003202
Ezio Melottib3aedd42010-11-20 19:04:17 +00003203
R. David Murray96fd54e2010-10-08 15:55:28 +00003204class Test8BitBytesHandling(unittest.TestCase):
3205 # In Python3 all input is string, but that doesn't work if the actual input
3206 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
3207 # decode byte streams using the surrogateescape error handler, and
3208 # reconvert to binary at appropriate places if we detect surrogates. This
3209 # doesn't allow us to transform headers with 8bit bytes (they get munged),
3210 # but it does allow us to parse and preserve them, and to decode body
3211 # parts that use an 8bit CTE.
3212
3213 bodytest_msg = textwrap.dedent("""\
3214 From: foo@bar.com
3215 To: baz
3216 Mime-Version: 1.0
3217 Content-Type: text/plain; charset={charset}
3218 Content-Transfer-Encoding: {cte}
3219
3220 {bodyline}
3221 """)
3222
3223 def test_known_8bit_CTE(self):
3224 m = self.bodytest_msg.format(charset='utf-8',
3225 cte='8bit',
3226 bodyline='pöstal').encode('utf-8')
3227 msg = email.message_from_bytes(m)
3228 self.assertEqual(msg.get_payload(), "pöstal\n")
3229 self.assertEqual(msg.get_payload(decode=True),
3230 "pöstal\n".encode('utf-8'))
3231
3232 def test_unknown_8bit_CTE(self):
3233 m = self.bodytest_msg.format(charset='notavalidcharset',
3234 cte='8bit',
3235 bodyline='pöstal').encode('utf-8')
3236 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003237 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
R. David Murray96fd54e2010-10-08 15:55:28 +00003238 self.assertEqual(msg.get_payload(decode=True),
3239 "pöstal\n".encode('utf-8'))
3240
3241 def test_8bit_in_quopri_body(self):
3242 # This is non-RFC compliant data...without 'decode' the library code
3243 # decodes the body using the charset from the headers, and because the
3244 # source byte really is utf-8 this works. This is likely to fail
3245 # against real dirty data (ie: produce mojibake), but the data is
3246 # invalid anyway so it is as good a guess as any. But this means that
3247 # this test just confirms the current behavior; that behavior is not
3248 # necessarily the best possible behavior. With 'decode' it is
3249 # returning the raw bytes, so that test should be of correct behavior,
3250 # or at least produce the same result that email4 did.
3251 m = self.bodytest_msg.format(charset='utf-8',
3252 cte='quoted-printable',
3253 bodyline='p=C3=B6stál').encode('utf-8')
3254 msg = email.message_from_bytes(m)
3255 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
3256 self.assertEqual(msg.get_payload(decode=True),
3257 'pöstál\n'.encode('utf-8'))
3258
3259 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
3260 # This is similar to the previous test, but proves that if the 8bit
3261 # byte is undecodeable in the specified charset, it gets replaced
3262 # by the unicode 'unknown' character. Again, this may or may not
3263 # be the ideal behavior. Note that if decode=False none of the
3264 # decoders will get involved, so this is the only test we need
3265 # for this behavior.
3266 m = self.bodytest_msg.format(charset='ascii',
3267 cte='quoted-printable',
3268 bodyline='p=C3=B6stál').encode('utf-8')
3269 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003270 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
R. David Murray96fd54e2010-10-08 15:55:28 +00003271 self.assertEqual(msg.get_payload(decode=True),
3272 'pöstál\n'.encode('utf-8'))
3273
3274 def test_8bit_in_base64_body(self):
3275 # Sticking an 8bit byte in a base64 block makes it undecodable by
3276 # normal means, so the block is returned undecoded, but as bytes.
3277 m = self.bodytest_msg.format(charset='utf-8',
3278 cte='base64',
3279 bodyline='cMO2c3RhbAá=').encode('utf-8')
3280 msg = email.message_from_bytes(m)
3281 self.assertEqual(msg.get_payload(decode=True),
3282 'cMO2c3RhbAá=\n'.encode('utf-8'))
3283
3284 def test_8bit_in_uuencode_body(self):
3285 # Sticking an 8bit byte in a uuencode block makes it undecodable by
3286 # normal means, so the block is returned undecoded, but as bytes.
3287 m = self.bodytest_msg.format(charset='utf-8',
3288 cte='uuencode',
3289 bodyline='<,.V<W1A; á ').encode('utf-8')
3290 msg = email.message_from_bytes(m)
3291 self.assertEqual(msg.get_payload(decode=True),
3292 '<,.V<W1A; á \n'.encode('utf-8'))
3293
3294
R. David Murray92532142011-01-07 23:25:30 +00003295 headertest_headers = (
3296 ('From: foo@bar.com', ('From', 'foo@bar.com')),
3297 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
3298 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
3299 '\tJean de Baddie',
3300 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3301 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
3302 ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
3303 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
3304 )
3305 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
3306 '\nYes, they are flying.\n').encode('utf-8')
R. David Murray96fd54e2010-10-08 15:55:28 +00003307
3308 def test_get_8bit_header(self):
3309 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003310 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
3311 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
R. David Murray96fd54e2010-10-08 15:55:28 +00003312
3313 def test_print_8bit_headers(self):
3314 msg = email.message_from_bytes(self.headertest_msg)
3315 self.assertEqual(str(msg),
R. David Murray92532142011-01-07 23:25:30 +00003316 textwrap.dedent("""\
3317 From: {}
3318 To: {}
3319 Subject: {}
3320 From: {}
3321
3322 Yes, they are flying.
3323 """).format(*[expected[1] for (_, expected) in
3324 self.headertest_headers]))
R. David Murray96fd54e2010-10-08 15:55:28 +00003325
3326 def test_values_with_8bit_headers(self):
3327 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003328 self.assertListEqual([str(x) for x in msg.values()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003329 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003330 'b\uFFFD\uFFFDz',
3331 'Maintenant je vous pr\uFFFD\uFFFDsente mon '
3332 'coll\uFFFD\uFFFDgue, le pouf '
3333 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
R. David Murray96fd54e2010-10-08 15:55:28 +00003334 '\tJean de Baddie',
R. David Murray92532142011-01-07 23:25:30 +00003335 "g\uFFFD\uFFFDst"])
R. David Murray96fd54e2010-10-08 15:55:28 +00003336
3337 def test_items_with_8bit_headers(self):
3338 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003339 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003340 [('From', 'foo@bar.com'),
R. David Murray92532142011-01-07 23:25:30 +00003341 ('To', 'b\uFFFD\uFFFDz'),
3342 ('Subject', 'Maintenant je vous '
3343 'pr\uFFFD\uFFFDsente '
3344 'mon coll\uFFFD\uFFFDgue, le pouf '
3345 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3346 '\tJean de Baddie'),
3347 ('From', 'g\uFFFD\uFFFDst')])
R. David Murray96fd54e2010-10-08 15:55:28 +00003348
3349 def test_get_all_with_8bit_headers(self):
3350 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003351 self.assertListEqual([str(x) for x in msg.get_all('from')],
R. David Murray96fd54e2010-10-08 15:55:28 +00003352 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003353 'g\uFFFD\uFFFDst'])
R. David Murray96fd54e2010-10-08 15:55:28 +00003354
R David Murraya2150232011-03-16 21:11:23 -04003355 def test_get_content_type_with_8bit(self):
3356 msg = email.message_from_bytes(textwrap.dedent("""\
3357 Content-Type: text/pl\xA7in; charset=utf-8
3358 """).encode('latin-1'))
3359 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
3360 self.assertEqual(msg.get_content_maintype(), "text")
3361 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
3362
3363 def test_get_params_with_8bit(self):
3364 msg = email.message_from_bytes(
3365 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
3366 self.assertEqual(msg.get_params(header='x-header'),
3367 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
3368 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
3369 # XXX: someday you might be able to get 'b\xa7r', for now you can't.
3370 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
3371
3372 def test_get_rfc2231_params_with_8bit(self):
3373 msg = email.message_from_bytes(textwrap.dedent("""\
3374 Content-Type: text/plain; charset=us-ascii;
3375 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3376 ).encode('latin-1'))
3377 self.assertEqual(msg.get_param('title'),
3378 ('us-ascii', 'en', 'This is not f\uFFFDn'))
3379
3380 def test_set_rfc2231_params_with_8bit(self):
3381 msg = email.message_from_bytes(textwrap.dedent("""\
3382 Content-Type: text/plain; charset=us-ascii;
3383 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3384 ).encode('latin-1'))
3385 msg.set_param('title', 'test')
3386 self.assertEqual(msg.get_param('title'), 'test')
3387
3388 def test_del_rfc2231_params_with_8bit(self):
3389 msg = email.message_from_bytes(textwrap.dedent("""\
3390 Content-Type: text/plain; charset=us-ascii;
3391 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3392 ).encode('latin-1'))
3393 msg.del_param('title')
3394 self.assertEqual(msg.get_param('title'), None)
3395 self.assertEqual(msg.get_content_maintype(), 'text')
3396
3397 def test_get_payload_with_8bit_cte_header(self):
3398 msg = email.message_from_bytes(textwrap.dedent("""\
3399 Content-Transfer-Encoding: b\xa7se64
3400 Content-Type: text/plain; charset=latin-1
3401
3402 payload
3403 """).encode('latin-1'))
3404 self.assertEqual(msg.get_payload(), 'payload\n')
3405 self.assertEqual(msg.get_payload(decode=True), b'payload\n')
3406
R. David Murray96fd54e2010-10-08 15:55:28 +00003407 non_latin_bin_msg = textwrap.dedent("""\
3408 From: foo@bar.com
3409 To: báz
3410 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
3411 \tJean de Baddie
3412 Mime-Version: 1.0
3413 Content-Type: text/plain; charset="utf-8"
3414 Content-Transfer-Encoding: 8bit
3415
3416 Да, они летят.
3417 """).encode('utf-8')
3418
3419 def test_bytes_generator(self):
3420 msg = email.message_from_bytes(self.non_latin_bin_msg)
3421 out = BytesIO()
3422 email.generator.BytesGenerator(out).flatten(msg)
3423 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
3424
R. David Murray7372a072011-01-26 21:21:32 +00003425 def test_bytes_generator_handles_None_body(self):
3426 #Issue 11019
3427 msg = email.message.Message()
3428 out = BytesIO()
3429 email.generator.BytesGenerator(out).flatten(msg)
3430 self.assertEqual(out.getvalue(), b"\n")
3431
R. David Murray92532142011-01-07 23:25:30 +00003432 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
R. David Murray96fd54e2010-10-08 15:55:28 +00003433 From: foo@bar.com
R. David Murray92532142011-01-07 23:25:30 +00003434 To: =?unknown-8bit?q?b=C3=A1z?=
3435 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
3436 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
3437 =?unknown-8bit?q?_Jean_de_Baddie?=
R. David Murray96fd54e2010-10-08 15:55:28 +00003438 Mime-Version: 1.0
3439 Content-Type: text/plain; charset="utf-8"
3440 Content-Transfer-Encoding: base64
3441
3442 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
3443 """)
3444
3445 def test_generator_handles_8bit(self):
3446 msg = email.message_from_bytes(self.non_latin_bin_msg)
3447 out = StringIO()
3448 email.generator.Generator(out).flatten(msg)
R. David Murray92532142011-01-07 23:25:30 +00003449 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
R. David Murray96fd54e2010-10-08 15:55:28 +00003450
3451 def test_bytes_generator_with_unix_from(self):
3452 # The unixfrom contains a current date, so we can't check it
3453 # literally. Just make sure the first word is 'From' and the
3454 # rest of the message matches the input.
3455 msg = email.message_from_bytes(self.non_latin_bin_msg)
3456 out = BytesIO()
3457 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
3458 lines = out.getvalue().split(b'\n')
3459 self.assertEqual(lines[0].split()[0], b'From')
3460 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
3461
R. David Murray92532142011-01-07 23:25:30 +00003462 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
3463 non_latin_bin_msg_as7bit[2:4] = [
3464 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3465 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
3466 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
3467
R. David Murray96fd54e2010-10-08 15:55:28 +00003468 def test_message_from_binary_file(self):
3469 fn = 'test.msg'
3470 self.addCleanup(unlink, fn)
3471 with open(fn, 'wb') as testfile:
3472 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00003473 with open(fn, 'rb') as testfile:
3474 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00003475 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
3476
3477 latin_bin_msg = textwrap.dedent("""\
3478 From: foo@bar.com
3479 To: Dinsdale
3480 Subject: Nudge nudge, wink, wink
3481 Mime-Version: 1.0
3482 Content-Type: text/plain; charset="latin-1"
3483 Content-Transfer-Encoding: 8bit
3484
3485 oh là là, know what I mean, know what I mean?
3486 """).encode('latin-1')
3487
3488 latin_bin_msg_as7bit = textwrap.dedent("""\
3489 From: foo@bar.com
3490 To: Dinsdale
3491 Subject: Nudge nudge, wink, wink
3492 Mime-Version: 1.0
3493 Content-Type: text/plain; charset="iso-8859-1"
3494 Content-Transfer-Encoding: quoted-printable
3495
3496 oh l=E0 l=E0, know what I mean, know what I mean?
3497 """)
3498
3499 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
3500 m = email.message_from_bytes(self.latin_bin_msg)
3501 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3502
3503 def test_decoded_generator_emits_unicode_body(self):
3504 m = email.message_from_bytes(self.latin_bin_msg)
3505 out = StringIO()
3506 email.generator.DecodedGenerator(out).flatten(m)
3507 #DecodedHeader output contains an extra blank line compared
3508 #to the input message. RDM: not sure if this is a bug or not,
3509 #but it is not specific to the 8bit->7bit conversion.
3510 self.assertEqual(out.getvalue(),
3511 self.latin_bin_msg.decode('latin-1')+'\n')
3512
3513 def test_bytes_feedparser(self):
3514 bfp = email.feedparser.BytesFeedParser()
3515 for i in range(0, len(self.latin_bin_msg), 10):
3516 bfp.feed(self.latin_bin_msg[i:i+10])
3517 m = bfp.close()
3518 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3519
R. David Murray8451c4b2010-10-23 22:19:56 +00003520 def test_crlf_flatten(self):
3521 with openfile('msg_26.txt', 'rb') as fp:
3522 text = fp.read()
3523 msg = email.message_from_bytes(text)
3524 s = BytesIO()
3525 g = email.generator.BytesGenerator(s)
3526 g.flatten(msg, linesep='\r\n')
3527 self.assertEqual(s.getvalue(), text)
R David Murrayc5c14722011-04-06 08:13:02 -04003528
3529 def test_8bit_multipart(self):
3530 # Issue 11605
3531 source = textwrap.dedent("""\
3532 Date: Fri, 18 Mar 2011 17:15:43 +0100
3533 To: foo@example.com
3534 From: foodwatch-Newsletter <bar@example.com>
3535 Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
3536 Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
3537 MIME-Version: 1.0
3538 Content-Type: multipart/alternative;
3539 boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
3540
3541 --b1_76a486bee62b0d200f33dc2ca08220ad
3542 Content-Type: text/plain; charset="utf-8"
3543 Content-Transfer-Encoding: 8bit
3544
3545 Guten Tag, ,
3546
3547 mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
3548 Nachrichten aus Japan.
3549
3550
3551 --b1_76a486bee62b0d200f33dc2ca08220ad
3552 Content-Type: text/html; charset="utf-8"
3553 Content-Transfer-Encoding: 8bit
3554
3555 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
3556 "http://www.w3.org/TR/html4/loose.dtd">
3557 <html lang="de">
3558 <head>
3559 <title>foodwatch - Newsletter</title>
3560 </head>
3561 <body>
3562 <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
3563 die Nachrichten aus Japan.</p>
3564 </body>
3565 </html>
3566 --b1_76a486bee62b0d200f33dc2ca08220ad--
3567
3568 """).encode('utf-8')
3569 msg = email.message_from_bytes(source)
3570 s = BytesIO()
3571 g = email.generator.BytesGenerator(s)
3572 g.flatten(msg)
3573 self.assertEqual(s.getvalue(), source)
3574
R David Murray9fd170e2012-03-14 14:05:03 -04003575 def test_bytes_generator_b_encoding_linesep(self):
3576 # Issue 14062: b encoding was tacking on an extra \n.
3577 m = Message()
3578 # This has enough non-ascii that it should always end up b encoded.
3579 m['Subject'] = Header('žluťoučký kůň')
3580 s = BytesIO()
3581 g = email.generator.BytesGenerator(s)
3582 g.flatten(m, linesep='\r\n')
3583 self.assertEqual(
3584 s.getvalue(),
3585 b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3586
3587 def test_generator_b_encoding_linesep(self):
3588 # Since this broke in ByteGenerator, test Generator for completeness.
3589 m = Message()
3590 # This has enough non-ascii that it should always end up b encoded.
3591 m['Subject'] = Header('žluťoučký kůň')
3592 s = StringIO()
3593 g = email.generator.Generator(s)
3594 g.flatten(m, linesep='\r\n')
3595 self.assertEqual(
3596 s.getvalue(),
3597 'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3598
R. David Murray8451c4b2010-10-23 22:19:56 +00003599 maxDiff = None
3600
Ezio Melottib3aedd42010-11-20 19:04:17 +00003601
R. David Murray719a4492010-11-21 16:53:48 +00003602class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003603
R. David Murraye5db2632010-11-20 15:10:13 +00003604 maxDiff = None
3605
R. David Murray96fd54e2010-10-08 15:55:28 +00003606 def _msgobj(self, filename):
3607 with openfile(filename, 'rb') as fp:
3608 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003609 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003610 msg = email.message_from_bytes(data)
3611 return msg, data
3612
R. David Murray719a4492010-11-21 16:53:48 +00003613 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003614 b = BytesIO()
3615 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003616 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R David Murraya46ed112011-03-31 13:11:40 -04003617 self.assertEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003618
3619
R. David Murray719a4492010-11-21 16:53:48 +00003620class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3621 TestIdempotent):
3622 linesep = '\n'
3623 blinesep = b'\n'
3624 normalize_linesep_regex = re.compile(br'\r\n')
3625
3626
3627class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3628 TestIdempotent):
3629 linesep = '\r\n'
3630 blinesep = b'\r\n'
3631 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3632
Ezio Melottib3aedd42010-11-20 19:04:17 +00003633
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003634class TestBase64(unittest.TestCase):
3635 def test_len(self):
3636 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003637 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003638 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003639 for size in range(15):
3640 if size == 0 : bsize = 0
3641 elif size <= 3 : bsize = 4
3642 elif size <= 6 : bsize = 8
3643 elif size <= 9 : bsize = 12
3644 elif size <= 12: bsize = 16
3645 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003646 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003647
3648 def test_decode(self):
3649 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003650 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003651 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003652
3653 def test_encode(self):
3654 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003655 eq(base64mime.body_encode(b''), b'')
3656 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003657 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003658 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003659 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003660 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003661eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3662eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3663eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3664eHh4eCB4eHh4IA==
3665""")
3666 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003667 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003668 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003669eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3670eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3671eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3672eHh4eCB4eHh4IA==\r
3673""")
3674
3675 def test_header_encode(self):
3676 eq = self.assertEqual
3677 he = base64mime.header_encode
3678 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003679 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3680 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003681 # Test the charset option
3682 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3683 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003684
3685
Ezio Melottib3aedd42010-11-20 19:04:17 +00003686
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003687class TestQuopri(unittest.TestCase):
3688 def setUp(self):
3689 # Set of characters (as byte integers) that don't need to be encoded
3690 # in headers.
3691 self.hlit = list(chain(
3692 range(ord('a'), ord('z') + 1),
3693 range(ord('A'), ord('Z') + 1),
3694 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003695 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003696 # Set of characters (as byte integers) that do need to be encoded in
3697 # headers.
3698 self.hnon = [c for c in range(256) if c not in self.hlit]
3699 assert len(self.hlit) + len(self.hnon) == 256
3700 # Set of characters (as byte integers) that don't need to be encoded
3701 # in bodies.
3702 self.blit = list(range(ord(' '), ord('~') + 1))
3703 self.blit.append(ord('\t'))
3704 self.blit.remove(ord('='))
3705 # Set of characters (as byte integers) that do need to be encoded in
3706 # bodies.
3707 self.bnon = [c for c in range(256) if c not in self.blit]
3708 assert len(self.blit) + len(self.bnon) == 256
3709
Guido van Rossum9604e662007-08-30 03:46:43 +00003710 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003711 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003712 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003713 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003714 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003715 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003716 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003717
Guido van Rossum9604e662007-08-30 03:46:43 +00003718 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003719 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003720 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003721 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003722 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003723 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003724 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003725
3726 def test_header_quopri_len(self):
3727 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003728 eq(quoprimime.header_length(b'hello'), 5)
3729 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003730 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003731 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003732 # =?xxx?q?...?= means 10 extra characters
3733 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00003734 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
3735 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003736 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003737 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003738 # =?xxx?q?...?= means 10 extra characters
3739 10)
3740 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003741 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003742 'expected length 1 for %r' % chr(c))
3743 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003744 # Space is special; it's encoded to _
3745 if c == ord(' '):
3746 continue
3747 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003748 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00003749 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003750
3751 def test_body_quopri_len(self):
3752 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003753 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003754 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003755 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003756 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003757
3758 def test_quote_unquote_idempotent(self):
3759 for x in range(256):
3760 c = chr(x)
3761 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
3762
R David Murrayec1b5b82011-03-23 14:19:05 -04003763 def _test_header_encode(self, header, expected_encoded_header, charset=None):
3764 if charset is None:
3765 encoded_header = quoprimime.header_encode(header)
3766 else:
3767 encoded_header = quoprimime.header_encode(header, charset)
3768 self.assertEqual(encoded_header, expected_encoded_header)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003769
R David Murraycafd79d2011-03-23 15:25:55 -04003770 def test_header_encode_null(self):
3771 self._test_header_encode(b'', '')
3772
R David Murrayec1b5b82011-03-23 14:19:05 -04003773 def test_header_encode_one_word(self):
3774 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
3775
3776 def test_header_encode_two_lines(self):
3777 self._test_header_encode(b'hello\nworld',
3778 '=?iso-8859-1?q?hello=0Aworld?=')
3779
3780 def test_header_encode_non_ascii(self):
3781 self._test_header_encode(b'hello\xc7there',
3782 '=?iso-8859-1?q?hello=C7there?=')
3783
3784 def test_header_encode_alt_charset(self):
3785 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
3786 charset='iso-8859-2')
3787
3788 def _test_header_decode(self, encoded_header, expected_decoded_header):
3789 decoded_header = quoprimime.header_decode(encoded_header)
3790 self.assertEqual(decoded_header, expected_decoded_header)
3791
3792 def test_header_decode_null(self):
3793 self._test_header_decode('', '')
3794
3795 def test_header_decode_one_word(self):
3796 self._test_header_decode('hello', 'hello')
3797
3798 def test_header_decode_two_lines(self):
3799 self._test_header_decode('hello=0Aworld', 'hello\nworld')
3800
3801 def test_header_decode_non_ascii(self):
3802 self._test_header_decode('hello=C7there', 'hello\xc7there')
3803
3804 def _test_decode(self, encoded, expected_decoded, eol=None):
3805 if eol is None:
3806 decoded = quoprimime.decode(encoded)
3807 else:
3808 decoded = quoprimime.decode(encoded, eol=eol)
3809 self.assertEqual(decoded, expected_decoded)
3810
3811 def test_decode_null_word(self):
3812 self._test_decode('', '')
3813
3814 def test_decode_null_line_null_word(self):
3815 self._test_decode('\r\n', '\n')
3816
3817 def test_decode_one_word(self):
3818 self._test_decode('hello', 'hello')
3819
3820 def test_decode_one_word_eol(self):
3821 self._test_decode('hello', 'hello', eol='X')
3822
3823 def test_decode_one_line(self):
3824 self._test_decode('hello\r\n', 'hello\n')
3825
3826 def test_decode_one_line_lf(self):
3827 self._test_decode('hello\n', 'hello\n')
3828
R David Murraycafd79d2011-03-23 15:25:55 -04003829 def test_decode_one_line_cr(self):
3830 self._test_decode('hello\r', 'hello\n')
3831
3832 def test_decode_one_line_nl(self):
3833 self._test_decode('hello\n', 'helloX', eol='X')
3834
3835 def test_decode_one_line_crnl(self):
3836 self._test_decode('hello\r\n', 'helloX', eol='X')
3837
R David Murrayec1b5b82011-03-23 14:19:05 -04003838 def test_decode_one_line_one_word(self):
3839 self._test_decode('hello\r\nworld', 'hello\nworld')
3840
3841 def test_decode_one_line_one_word_eol(self):
3842 self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
3843
3844 def test_decode_two_lines(self):
3845 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
3846
R David Murraycafd79d2011-03-23 15:25:55 -04003847 def test_decode_two_lines_eol(self):
3848 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
3849
R David Murrayec1b5b82011-03-23 14:19:05 -04003850 def test_decode_one_long_line(self):
3851 self._test_decode('Spam' * 250, 'Spam' * 250)
3852
3853 def test_decode_one_space(self):
3854 self._test_decode(' ', '')
3855
3856 def test_decode_multiple_spaces(self):
3857 self._test_decode(' ' * 5, '')
3858
3859 def test_decode_one_line_trailing_spaces(self):
3860 self._test_decode('hello \r\n', 'hello\n')
3861
3862 def test_decode_two_lines_trailing_spaces(self):
3863 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n')
3864
3865 def test_decode_quoted_word(self):
3866 self._test_decode('=22quoted=20words=22', '"quoted words"')
3867
3868 def test_decode_uppercase_quoting(self):
3869 self._test_decode('ab=CD=EF', 'ab\xcd\xef')
3870
3871 def test_decode_lowercase_quoting(self):
3872 self._test_decode('ab=cd=ef', 'ab\xcd\xef')
3873
3874 def test_decode_soft_line_break(self):
3875 self._test_decode('soft line=\r\nbreak', 'soft linebreak')
3876
3877 def test_decode_false_quoting(self):
3878 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
3879
3880 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
3881 kwargs = {}
3882 if maxlinelen is None:
3883 # Use body_encode's default.
3884 maxlinelen = 76
3885 else:
3886 kwargs['maxlinelen'] = maxlinelen
3887 if eol is None:
3888 # Use body_encode's default.
3889 eol = '\n'
3890 else:
3891 kwargs['eol'] = eol
3892 encoded_body = quoprimime.body_encode(body, **kwargs)
3893 self.assertEqual(encoded_body, expected_encoded_body)
3894 if eol == '\n' or eol == '\r\n':
3895 # We know how to split the result back into lines, so maxlinelen
3896 # can be checked.
3897 for line in encoded_body.splitlines():
3898 self.assertLessEqual(len(line), maxlinelen)
3899
3900 def test_encode_null(self):
3901 self._test_encode('', '')
3902
3903 def test_encode_null_lines(self):
3904 self._test_encode('\n\n', '\n\n')
3905
3906 def test_encode_one_line(self):
3907 self._test_encode('hello\n', 'hello\n')
3908
3909 def test_encode_one_line_crlf(self):
3910 self._test_encode('hello\r\n', 'hello\n')
3911
3912 def test_encode_one_line_eol(self):
3913 self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
3914
3915 def test_encode_one_space(self):
3916 self._test_encode(' ', '=20')
3917
3918 def test_encode_one_line_one_space(self):
3919 self._test_encode(' \n', '=20\n')
3920
R David Murrayb938c8c2011-03-24 12:19:26 -04003921# XXX: body_encode() expect strings, but uses ord(char) from these strings
3922# to index into a 256-entry list. For code points above 255, this will fail.
3923# Should there be a check for 8-bit only ord() values in body, or at least
3924# a comment about the expected input?
3925
3926 def test_encode_two_lines_one_space(self):
3927 self._test_encode(' \n \n', '=20\n=20\n')
3928
R David Murrayec1b5b82011-03-23 14:19:05 -04003929 def test_encode_one_word_trailing_spaces(self):
3930 self._test_encode('hello ', 'hello =20')
3931
3932 def test_encode_one_line_trailing_spaces(self):
3933 self._test_encode('hello \n', 'hello =20\n')
3934
3935 def test_encode_one_word_trailing_tab(self):
3936 self._test_encode('hello \t', 'hello =09')
3937
3938 def test_encode_one_line_trailing_tab(self):
3939 self._test_encode('hello \t\n', 'hello =09\n')
3940
3941 def test_encode_trailing_space_before_maxlinelen(self):
3942 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
3943
R David Murrayb938c8c2011-03-24 12:19:26 -04003944 def test_encode_trailing_space_at_maxlinelen(self):
3945 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
3946
R David Murrayec1b5b82011-03-23 14:19:05 -04003947 def test_encode_trailing_space_beyond_maxlinelen(self):
R David Murrayb938c8c2011-03-24 12:19:26 -04003948 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
3949
3950 def test_encode_whitespace_lines(self):
3951 self._test_encode(' \n' * 5, '=20\n' * 5)
R David Murrayec1b5b82011-03-23 14:19:05 -04003952
3953 def test_encode_quoted_equals(self):
3954 self._test_encode('a = b', 'a =3D b')
3955
3956 def test_encode_one_long_string(self):
3957 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
3958
3959 def test_encode_one_long_line(self):
3960 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
3961
3962 def test_encode_one_very_long_line(self):
3963 self._test_encode('x' * 200 + '\n',
3964 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
3965
3966 def test_encode_one_long_line(self):
3967 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
3968
3969 def test_encode_shortest_maxlinelen(self):
3970 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003971
R David Murrayb938c8c2011-03-24 12:19:26 -04003972 def test_encode_maxlinelen_too_small(self):
3973 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
3974
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003975 def test_encode(self):
3976 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003977 eq(quoprimime.body_encode(''), '')
3978 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003979 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00003980 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003981 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00003982 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003983xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
3984 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
3985x xxxx xxxx xxxx xxxx=20""")
3986 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00003987 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
3988 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003989xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
3990 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
3991x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00003992 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003993one line
3994
3995two line"""), """\
3996one line
3997
3998two line""")
3999
4000
Ezio Melottib3aedd42010-11-20 19:04:17 +00004001
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004002# Test the Charset class
4003class TestCharset(unittest.TestCase):
4004 def tearDown(self):
4005 from email import charset as CharsetModule
4006 try:
4007 del CharsetModule.CHARSETS['fake']
4008 except KeyError:
4009 pass
4010
Guido van Rossum9604e662007-08-30 03:46:43 +00004011 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004012 eq = self.assertEqual
4013 # Make sure us-ascii = no Unicode conversion
4014 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00004015 eq(c.header_encode('Hello World!'), 'Hello World!')
4016 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004017 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00004018 self.assertRaises(UnicodeError, c.header_encode, s)
4019 c = Charset('utf-8')
4020 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004021
4022 def test_body_encode(self):
4023 eq = self.assertEqual
4024 # Try a charset with QP body encoding
4025 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004026 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004027 # Try a charset with Base64 body encoding
4028 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00004029 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004030 # Try a charset with None body encoding
4031 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004032 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004033 # Try the convert argument, where input codec != output codec
4034 c = Charset('euc-jp')
4035 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00004036 # XXX FIXME
4037## try:
4038## eq('\x1b$B5FCO;~IW\x1b(B',
4039## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
4040## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
4041## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
4042## except LookupError:
4043## # We probably don't have the Japanese codecs installed
4044## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004045 # Testing SF bug #625509, which we have to fake, since there are no
4046 # built-in encodings where the header encoding is QP but the body
4047 # encoding is not.
4048 from email import charset as CharsetModule
R David Murray56a9d7e2011-03-15 12:20:02 -04004049 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004050 c = Charset('fake')
R David Murray56a9d7e2011-03-15 12:20:02 -04004051 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004052
4053 def test_unicode_charset_name(self):
4054 charset = Charset('us-ascii')
4055 self.assertEqual(str(charset), 'us-ascii')
4056 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
4057
4058
Ezio Melottib3aedd42010-11-20 19:04:17 +00004059
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004060# Test multilingual MIME headers.
4061class TestHeader(TestEmailBase):
4062 def test_simple(self):
4063 eq = self.ndiffAssertEqual
4064 h = Header('Hello World!')
4065 eq(h.encode(), 'Hello World!')
4066 h.append(' Goodbye World!')
4067 eq(h.encode(), 'Hello World! Goodbye World!')
4068
4069 def test_simple_surprise(self):
4070 eq = self.ndiffAssertEqual
4071 h = Header('Hello World!')
4072 eq(h.encode(), 'Hello World!')
4073 h.append('Goodbye World!')
4074 eq(h.encode(), 'Hello World! Goodbye World!')
4075
4076 def test_header_needs_no_decoding(self):
4077 h = 'no decoding needed'
4078 self.assertEqual(decode_header(h), [(h, None)])
4079
4080 def test_long(self):
4081 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
4082 maxlinelen=76)
4083 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004084 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004085
4086 def test_multilingual(self):
4087 eq = self.ndiffAssertEqual
4088 g = Charset("iso-8859-1")
4089 cz = Charset("iso-8859-2")
4090 utf8 = Charset("utf-8")
4091 g_head = (b'Die Mieter treten hier ein werden mit einem '
4092 b'Foerderband komfortabel den Korridor entlang, '
4093 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
4094 b'gegen die rotierenden Klingen bef\xf6rdert. ')
4095 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
4096 b'd\xf9vtipu.. ')
4097 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
4098 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
4099 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
4100 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
4101 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
4102 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
4103 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
4104 '\u3044\u307e\u3059\u3002')
4105 h = Header(g_head, g)
4106 h.append(cz_head, cz)
4107 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00004108 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004109 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00004110=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
4111 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
4112 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
4113 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004114 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
4115 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
4116 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
4117 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00004118 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
4119 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
4120 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
4121 decoded = decode_header(enc)
4122 eq(len(decoded), 3)
4123 eq(decoded[0], (g_head, 'iso-8859-1'))
4124 eq(decoded[1], (cz_head, 'iso-8859-2'))
4125 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004126 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00004127 eq(ustr,
4128 (b'Die Mieter treten hier ein werden mit einem Foerderband '
4129 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
4130 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
4131 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
4132 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
4133 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
4134 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
4135 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
4136 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
4137 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
4138 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
4139 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
4140 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
4141 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
4142 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
4143 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
4144 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004145 # Test make_header()
4146 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00004147 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004148
4149 def test_empty_header_encode(self):
4150 h = Header()
4151 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00004152
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004153 def test_header_ctor_default_args(self):
4154 eq = self.ndiffAssertEqual
4155 h = Header()
4156 eq(h, '')
4157 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00004158 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004159
4160 def test_explicit_maxlinelen(self):
4161 eq = self.ndiffAssertEqual
4162 hstr = ('A very long line that must get split to something other '
4163 'than at the 76th character boundary to test the non-default '
4164 'behavior')
4165 h = Header(hstr)
4166 eq(h.encode(), '''\
4167A very long line that must get split to something other than at the 76th
4168 character boundary to test the non-default behavior''')
4169 eq(str(h), hstr)
4170 h = Header(hstr, header_name='Subject')
4171 eq(h.encode(), '''\
4172A very long line that must get split to something other than at the
4173 76th character boundary to test the non-default behavior''')
4174 eq(str(h), hstr)
4175 h = Header(hstr, maxlinelen=1024, header_name='Subject')
4176 eq(h.encode(), hstr)
4177 eq(str(h), hstr)
4178
Guido van Rossum9604e662007-08-30 03:46:43 +00004179 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004180 eq = self.ndiffAssertEqual
4181 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004182 x = 'xxxx ' * 20
4183 h.append(x)
4184 s = h.encode()
4185 eq(s, """\
4186=?iso-8859-1?q?xxx?=
4187 =?iso-8859-1?q?x_?=
4188 =?iso-8859-1?q?xx?=
4189 =?iso-8859-1?q?xx?=
4190 =?iso-8859-1?q?_x?=
4191 =?iso-8859-1?q?xx?=
4192 =?iso-8859-1?q?x_?=
4193 =?iso-8859-1?q?xx?=
4194 =?iso-8859-1?q?xx?=
4195 =?iso-8859-1?q?_x?=
4196 =?iso-8859-1?q?xx?=
4197 =?iso-8859-1?q?x_?=
4198 =?iso-8859-1?q?xx?=
4199 =?iso-8859-1?q?xx?=
4200 =?iso-8859-1?q?_x?=
4201 =?iso-8859-1?q?xx?=
4202 =?iso-8859-1?q?x_?=
4203 =?iso-8859-1?q?xx?=
4204 =?iso-8859-1?q?xx?=
4205 =?iso-8859-1?q?_x?=
4206 =?iso-8859-1?q?xx?=
4207 =?iso-8859-1?q?x_?=
4208 =?iso-8859-1?q?xx?=
4209 =?iso-8859-1?q?xx?=
4210 =?iso-8859-1?q?_x?=
4211 =?iso-8859-1?q?xx?=
4212 =?iso-8859-1?q?x_?=
4213 =?iso-8859-1?q?xx?=
4214 =?iso-8859-1?q?xx?=
4215 =?iso-8859-1?q?_x?=
4216 =?iso-8859-1?q?xx?=
4217 =?iso-8859-1?q?x_?=
4218 =?iso-8859-1?q?xx?=
4219 =?iso-8859-1?q?xx?=
4220 =?iso-8859-1?q?_x?=
4221 =?iso-8859-1?q?xx?=
4222 =?iso-8859-1?q?x_?=
4223 =?iso-8859-1?q?xx?=
4224 =?iso-8859-1?q?xx?=
4225 =?iso-8859-1?q?_x?=
4226 =?iso-8859-1?q?xx?=
4227 =?iso-8859-1?q?x_?=
4228 =?iso-8859-1?q?xx?=
4229 =?iso-8859-1?q?xx?=
4230 =?iso-8859-1?q?_x?=
4231 =?iso-8859-1?q?xx?=
4232 =?iso-8859-1?q?x_?=
4233 =?iso-8859-1?q?xx?=
4234 =?iso-8859-1?q?xx?=
4235 =?iso-8859-1?q?_?=""")
4236 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004237 h = Header(charset='iso-8859-1', maxlinelen=40)
4238 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004239 s = h.encode()
4240 eq(s, """\
4241=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
4242 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
4243 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
4244 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
4245 =?iso-8859-1?q?_xxxx_xxxx_?=""")
4246 eq(x, str(make_header(decode_header(s))))
4247
4248 def test_base64_splittable(self):
4249 eq = self.ndiffAssertEqual
4250 h = Header(charset='koi8-r', maxlinelen=20)
4251 x = 'xxxx ' * 20
4252 h.append(x)
4253 s = h.encode()
4254 eq(s, """\
4255=?koi8-r?b?eHh4?=
4256 =?koi8-r?b?eCB4?=
4257 =?koi8-r?b?eHh4?=
4258 =?koi8-r?b?IHh4?=
4259 =?koi8-r?b?eHgg?=
4260 =?koi8-r?b?eHh4?=
4261 =?koi8-r?b?eCB4?=
4262 =?koi8-r?b?eHh4?=
4263 =?koi8-r?b?IHh4?=
4264 =?koi8-r?b?eHgg?=
4265 =?koi8-r?b?eHh4?=
4266 =?koi8-r?b?eCB4?=
4267 =?koi8-r?b?eHh4?=
4268 =?koi8-r?b?IHh4?=
4269 =?koi8-r?b?eHgg?=
4270 =?koi8-r?b?eHh4?=
4271 =?koi8-r?b?eCB4?=
4272 =?koi8-r?b?eHh4?=
4273 =?koi8-r?b?IHh4?=
4274 =?koi8-r?b?eHgg?=
4275 =?koi8-r?b?eHh4?=
4276 =?koi8-r?b?eCB4?=
4277 =?koi8-r?b?eHh4?=
4278 =?koi8-r?b?IHh4?=
4279 =?koi8-r?b?eHgg?=
4280 =?koi8-r?b?eHh4?=
4281 =?koi8-r?b?eCB4?=
4282 =?koi8-r?b?eHh4?=
4283 =?koi8-r?b?IHh4?=
4284 =?koi8-r?b?eHgg?=
4285 =?koi8-r?b?eHh4?=
4286 =?koi8-r?b?eCB4?=
4287 =?koi8-r?b?eHh4?=
4288 =?koi8-r?b?IA==?=""")
4289 eq(x, str(make_header(decode_header(s))))
4290 h = Header(charset='koi8-r', maxlinelen=40)
4291 h.append(x)
4292 s = h.encode()
4293 eq(s, """\
4294=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
4295 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
4296 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
4297 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
4298 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
4299 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
4300 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004301
4302 def test_us_ascii_header(self):
4303 eq = self.assertEqual
4304 s = 'hello'
4305 x = decode_header(s)
4306 eq(x, [('hello', None)])
4307 h = make_header(x)
4308 eq(s, h.encode())
4309
4310 def test_string_charset(self):
4311 eq = self.assertEqual
4312 h = Header()
4313 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00004314 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004315
4316## def test_unicode_error(self):
4317## raises = self.assertRaises
4318## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
4319## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
4320## h = Header()
4321## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
4322## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
4323## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
4324
4325 def test_utf8_shortest(self):
4326 eq = self.assertEqual
4327 h = Header('p\xf6stal', 'utf-8')
4328 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
4329 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
4330 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
4331
4332 def test_bad_8bit_header(self):
4333 raises = self.assertRaises
4334 eq = self.assertEqual
4335 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4336 raises(UnicodeError, Header, x)
4337 h = Header()
4338 raises(UnicodeError, h.append, x)
4339 e = x.decode('utf-8', 'replace')
4340 eq(str(Header(x, errors='replace')), e)
4341 h.append(x, errors='replace')
4342 eq(str(h), e)
4343
R David Murray041015c2011-03-25 15:10:55 -04004344 def test_escaped_8bit_header(self):
4345 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
R David Murray6bdb1762011-06-18 12:30:55 -04004346 e = x.decode('ascii', 'surrogateescape')
4347 h = Header(e, charset=email.charset.UNKNOWN8BIT)
R David Murray041015c2011-03-25 15:10:55 -04004348 self.assertEqual(str(h),
4349 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4350 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4351
R David Murraye5e366c2011-06-18 12:57:28 -04004352 def test_header_handles_binary_unknown8bit(self):
4353 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4354 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4355 self.assertEqual(str(h),
4356 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4357 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4358
4359 def test_make_header_handles_binary_unknown8bit(self):
4360 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4361 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4362 h2 = email.header.make_header(email.header.decode_header(h))
4363 self.assertEqual(str(h2),
4364 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4365 self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')])
4366
R David Murray041015c2011-03-25 15:10:55 -04004367 def test_modify_returned_list_does_not_change_header(self):
4368 h = Header('test')
4369 chunks = email.header.decode_header(h)
4370 chunks.append(('ascii', 'test2'))
4371 self.assertEqual(str(h), 'test')
4372
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004373 def test_encoded_adjacent_nonencoded(self):
4374 eq = self.assertEqual
4375 h = Header()
4376 h.append('hello', 'iso-8859-1')
4377 h.append('world')
4378 s = h.encode()
4379 eq(s, '=?iso-8859-1?q?hello?= world')
4380 h = make_header(decode_header(s))
4381 eq(h.encode(), s)
4382
4383 def test_whitespace_eater(self):
4384 eq = self.assertEqual
4385 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
4386 parts = decode_header(s)
4387 eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)])
4388 hdr = make_header(parts)
4389 eq(hdr.encode(),
4390 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
4391
4392 def test_broken_base64_header(self):
4393 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00004394 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004395 raises(errors.HeaderParseError, decode_header, s)
4396
R. David Murray477efb32011-01-05 01:39:32 +00004397 def test_shift_jis_charset(self):
4398 h = Header('文', charset='shift_jis')
4399 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
4400
R David Murrayde912762011-03-16 18:26:23 -04004401 def test_flatten_header_with_no_value(self):
4402 # Issue 11401 (regression from email 4.x) Note that the space after
4403 # the header doesn't reflect the input, but this is also the way
4404 # email 4.x behaved. At some point it would be nice to fix that.
4405 msg = email.message_from_string("EmptyHeader:")
4406 self.assertEqual(str(msg), "EmptyHeader: \n\n")
4407
R David Murray01581ee2011-04-18 10:04:34 -04004408 def test_encode_preserves_leading_ws_on_value(self):
4409 msg = Message()
4410 msg['SomeHeader'] = ' value with leading ws'
4411 self.assertEqual(str(msg), "SomeHeader: value with leading ws\n\n")
4412
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004413
Ezio Melottib3aedd42010-11-20 19:04:17 +00004414
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004415# Test RFC 2231 header parameters (en/de)coding
4416class TestRFC2231(TestEmailBase):
4417 def test_get_param(self):
4418 eq = self.assertEqual
4419 msg = self._msgobj('msg_29.txt')
4420 eq(msg.get_param('title'),
4421 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4422 eq(msg.get_param('title', unquote=False),
4423 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
4424
4425 def test_set_param(self):
4426 eq = self.ndiffAssertEqual
4427 msg = Message()
4428 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4429 charset='us-ascii')
4430 eq(msg.get_param('title'),
4431 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
4432 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4433 charset='us-ascii', language='en')
4434 eq(msg.get_param('title'),
4435 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4436 msg = self._msgobj('msg_01.txt')
4437 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4438 charset='us-ascii', language='en')
4439 eq(msg.as_string(maxheaderlen=78), """\
4440Return-Path: <bbb@zzz.org>
4441Delivered-To: bbb@zzz.org
4442Received: by mail.zzz.org (Postfix, from userid 889)
4443\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4444MIME-Version: 1.0
4445Content-Transfer-Encoding: 7bit
4446Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4447From: bbb@ddd.com (John X. Doe)
4448To: bbb@zzz.org
4449Subject: This is a test message
4450Date: Fri, 4 May 2001 14:05:44 -0400
4451Content-Type: text/plain; charset=us-ascii;
R. David Murraydfd7eb02010-12-24 22:36:49 +00004452 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004453
4454
4455Hi,
4456
4457Do you like this message?
4458
4459-Me
4460""")
4461
R David Murraya2860e82011-04-16 09:20:30 -04004462 def test_set_param_requote(self):
4463 msg = Message()
4464 msg.set_param('title', 'foo')
4465 self.assertEqual(msg['content-type'], 'text/plain; title="foo"')
4466 msg.set_param('title', 'bar', requote=False)
4467 self.assertEqual(msg['content-type'], 'text/plain; title=bar')
4468 # tspecial is still quoted.
4469 msg.set_param('title', "(bar)bell", requote=False)
4470 self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"')
4471
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004472 def test_del_param(self):
4473 eq = self.ndiffAssertEqual
4474 msg = self._msgobj('msg_01.txt')
4475 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
4476 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4477 charset='us-ascii', language='en')
4478 msg.del_param('foo', header='Content-Type')
4479 eq(msg.as_string(maxheaderlen=78), """\
4480Return-Path: <bbb@zzz.org>
4481Delivered-To: bbb@zzz.org
4482Received: by mail.zzz.org (Postfix, from userid 889)
4483\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4484MIME-Version: 1.0
4485Content-Transfer-Encoding: 7bit
4486Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4487From: bbb@ddd.com (John X. Doe)
4488To: bbb@zzz.org
4489Subject: This is a test message
4490Date: Fri, 4 May 2001 14:05:44 -0400
4491Content-Type: text/plain; charset="us-ascii";
R. David Murraydfd7eb02010-12-24 22:36:49 +00004492 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004493
4494
4495Hi,
4496
4497Do you like this message?
4498
4499-Me
4500""")
4501
4502 def test_rfc2231_get_content_charset(self):
4503 eq = self.assertEqual
4504 msg = self._msgobj('msg_32.txt')
4505 eq(msg.get_content_charset(), 'us-ascii')
4506
R. David Murraydfd7eb02010-12-24 22:36:49 +00004507 def test_rfc2231_parse_rfc_quoting(self):
4508 m = textwrap.dedent('''\
4509 Content-Disposition: inline;
4510 \tfilename*0*=''This%20is%20even%20more%20;
4511 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
4512 \tfilename*2="is it not.pdf"
4513
4514 ''')
4515 msg = email.message_from_string(m)
4516 self.assertEqual(msg.get_filename(),
4517 'This is even more ***fun*** is it not.pdf')
4518 self.assertEqual(m, msg.as_string())
4519
4520 def test_rfc2231_parse_extra_quoting(self):
4521 m = textwrap.dedent('''\
4522 Content-Disposition: inline;
4523 \tfilename*0*="''This%20is%20even%20more%20";
4524 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4525 \tfilename*2="is it not.pdf"
4526
4527 ''')
4528 msg = email.message_from_string(m)
4529 self.assertEqual(msg.get_filename(),
4530 'This is even more ***fun*** is it not.pdf')
4531 self.assertEqual(m, msg.as_string())
4532
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004533 def test_rfc2231_no_language_or_charset(self):
4534 m = '''\
4535Content-Transfer-Encoding: 8bit
4536Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
4537Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
4538
4539'''
4540 msg = email.message_from_string(m)
4541 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004542 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004543 self.assertEqual(
4544 param,
4545 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
4546
4547 def test_rfc2231_no_language_or_charset_in_filename(self):
4548 m = '''\
4549Content-Disposition: inline;
4550\tfilename*0*="''This%20is%20even%20more%20";
4551\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4552\tfilename*2="is it not.pdf"
4553
4554'''
4555 msg = email.message_from_string(m)
4556 self.assertEqual(msg.get_filename(),
4557 'This is even more ***fun*** is it not.pdf')
4558
4559 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
4560 m = '''\
4561Content-Disposition: inline;
4562\tfilename*0*="''This%20is%20even%20more%20";
4563\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4564\tfilename*2="is it not.pdf"
4565
4566'''
4567 msg = email.message_from_string(m)
4568 self.assertEqual(msg.get_filename(),
4569 'This is even more ***fun*** is it not.pdf')
4570
4571 def test_rfc2231_partly_encoded(self):
4572 m = '''\
4573Content-Disposition: inline;
4574\tfilename*0="''This%20is%20even%20more%20";
4575\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4576\tfilename*2="is it not.pdf"
4577
4578'''
4579 msg = email.message_from_string(m)
4580 self.assertEqual(
4581 msg.get_filename(),
4582 'This%20is%20even%20more%20***fun*** is it not.pdf')
4583
4584 def test_rfc2231_partly_nonencoded(self):
4585 m = '''\
4586Content-Disposition: inline;
4587\tfilename*0="This%20is%20even%20more%20";
4588\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
4589\tfilename*2="is it not.pdf"
4590
4591'''
4592 msg = email.message_from_string(m)
4593 self.assertEqual(
4594 msg.get_filename(),
4595 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
4596
4597 def test_rfc2231_no_language_or_charset_in_boundary(self):
4598 m = '''\
4599Content-Type: multipart/alternative;
4600\tboundary*0*="''This%20is%20even%20more%20";
4601\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
4602\tboundary*2="is it not.pdf"
4603
4604'''
4605 msg = email.message_from_string(m)
4606 self.assertEqual(msg.get_boundary(),
4607 'This is even more ***fun*** is it not.pdf')
4608
4609 def test_rfc2231_no_language_or_charset_in_charset(self):
4610 # This is a nonsensical charset value, but tests the code anyway
4611 m = '''\
4612Content-Type: text/plain;
4613\tcharset*0*="This%20is%20even%20more%20";
4614\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
4615\tcharset*2="is it not.pdf"
4616
4617'''
4618 msg = email.message_from_string(m)
4619 self.assertEqual(msg.get_content_charset(),
4620 'this is even more ***fun*** is it not.pdf')
4621
4622 def test_rfc2231_bad_encoding_in_filename(self):
4623 m = '''\
4624Content-Disposition: inline;
4625\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
4626\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4627\tfilename*2="is it not.pdf"
4628
4629'''
4630 msg = email.message_from_string(m)
4631 self.assertEqual(msg.get_filename(),
4632 'This is even more ***fun*** is it not.pdf')
4633
4634 def test_rfc2231_bad_encoding_in_charset(self):
4635 m = """\
4636Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
4637
4638"""
4639 msg = email.message_from_string(m)
4640 # This should return None because non-ascii characters in the charset
4641 # are not allowed.
4642 self.assertEqual(msg.get_content_charset(), None)
4643
4644 def test_rfc2231_bad_character_in_charset(self):
4645 m = """\
4646Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
4647
4648"""
4649 msg = email.message_from_string(m)
4650 # This should return None because non-ascii characters in the charset
4651 # are not allowed.
4652 self.assertEqual(msg.get_content_charset(), None)
4653
4654 def test_rfc2231_bad_character_in_filename(self):
4655 m = '''\
4656Content-Disposition: inline;
4657\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
4658\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4659\tfilename*2*="is it not.pdf%E2"
4660
4661'''
4662 msg = email.message_from_string(m)
4663 self.assertEqual(msg.get_filename(),
4664 'This is even more ***fun*** is it not.pdf\ufffd')
4665
4666 def test_rfc2231_unknown_encoding(self):
4667 m = """\
4668Content-Transfer-Encoding: 8bit
4669Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
4670
4671"""
4672 msg = email.message_from_string(m)
4673 self.assertEqual(msg.get_filename(), 'myfile.txt')
4674
4675 def test_rfc2231_single_tick_in_filename_extended(self):
4676 eq = self.assertEqual
4677 m = """\
4678Content-Type: application/x-foo;
4679\tname*0*=\"Frank's\"; name*1*=\" Document\"
4680
4681"""
4682 msg = email.message_from_string(m)
4683 charset, language, s = msg.get_param('name')
4684 eq(charset, None)
4685 eq(language, None)
4686 eq(s, "Frank's Document")
4687
4688 def test_rfc2231_single_tick_in_filename(self):
4689 m = """\
4690Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
4691
4692"""
4693 msg = email.message_from_string(m)
4694 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004695 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004696 self.assertEqual(param, "Frank's Document")
4697
4698 def test_rfc2231_tick_attack_extended(self):
4699 eq = self.assertEqual
4700 m = """\
4701Content-Type: application/x-foo;
4702\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
4703
4704"""
4705 msg = email.message_from_string(m)
4706 charset, language, s = msg.get_param('name')
4707 eq(charset, 'us-ascii')
4708 eq(language, 'en-us')
4709 eq(s, "Frank's Document")
4710
4711 def test_rfc2231_tick_attack(self):
4712 m = """\
4713Content-Type: application/x-foo;
4714\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
4715
4716"""
4717 msg = email.message_from_string(m)
4718 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004719 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004720 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
4721
4722 def test_rfc2231_no_extended_values(self):
4723 eq = self.assertEqual
4724 m = """\
4725Content-Type: application/x-foo; name=\"Frank's Document\"
4726
4727"""
4728 msg = email.message_from_string(m)
4729 eq(msg.get_param('name'), "Frank's Document")
4730
4731 def test_rfc2231_encoded_then_unencoded_segments(self):
4732 eq = self.assertEqual
4733 m = """\
4734Content-Type: application/x-foo;
4735\tname*0*=\"us-ascii'en-us'My\";
4736\tname*1=\" Document\";
4737\tname*2*=\" For You\"
4738
4739"""
4740 msg = email.message_from_string(m)
4741 charset, language, s = msg.get_param('name')
4742 eq(charset, 'us-ascii')
4743 eq(language, 'en-us')
4744 eq(s, 'My Document For You')
4745
4746 def test_rfc2231_unencoded_then_encoded_segments(self):
4747 eq = self.assertEqual
4748 m = """\
4749Content-Type: application/x-foo;
4750\tname*0=\"us-ascii'en-us'My\";
4751\tname*1*=\" Document\";
4752\tname*2*=\" For You\"
4753
4754"""
4755 msg = email.message_from_string(m)
4756 charset, language, s = msg.get_param('name')
4757 eq(charset, 'us-ascii')
4758 eq(language, 'en-us')
4759 eq(s, 'My Document For You')
4760
4761
Ezio Melottib3aedd42010-11-20 19:04:17 +00004762
R. David Murraya8f480f2010-01-16 18:30:03 +00004763# Tests to ensure that signed parts of an email are completely preserved, as
4764# required by RFC1847 section 2.1. Note that these are incomplete, because the
4765# email package does not currently always preserve the body. See issue 1670765.
4766class TestSigned(TestEmailBase):
4767
4768 def _msg_and_obj(self, filename):
R David Murray28346b82011-03-31 11:40:20 -04004769 with openfile(filename) as fp:
R. David Murraya8f480f2010-01-16 18:30:03 +00004770 original = fp.read()
4771 msg = email.message_from_string(original)
4772 return original, msg
4773
4774 def _signed_parts_eq(self, original, result):
4775 # Extract the first mime part of each message
4776 import re
4777 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
4778 inpart = repart.search(original).group(2)
4779 outpart = repart.search(result).group(2)
4780 self.assertEqual(outpart, inpart)
4781
4782 def test_long_headers_as_string(self):
4783 original, msg = self._msg_and_obj('msg_45.txt')
4784 result = msg.as_string()
4785 self._signed_parts_eq(original, result)
4786
4787 def test_long_headers_as_string_maxheaderlen(self):
4788 original, msg = self._msg_and_obj('msg_45.txt')
4789 result = msg.as_string(maxheaderlen=60)
4790 self._signed_parts_eq(original, result)
4791
4792 def test_long_headers_flatten(self):
4793 original, msg = self._msg_and_obj('msg_45.txt')
4794 fp = StringIO()
4795 Generator(fp).flatten(msg)
4796 result = fp.getvalue()
4797 self._signed_parts_eq(original, result)
4798
4799
Ezio Melottib3aedd42010-11-20 19:04:17 +00004800
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004801if __name__ == '__main__':
R David Murray9aaba782011-03-21 17:17:06 -04004802 unittest.main()