blob: 3dda9213104fc9d8de3e2b64fdd77733a9e731e8 [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
R. David Murray719a4492010-11-21 16:53:48 +00006import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00007import sys
8import time
9import base64
10import difflib
11import unittest
12import warnings
R. David Murray96fd54e2010-10-08 15:55:28 +000013import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000014
R. David Murray96fd54e2010-10-08 15:55:28 +000015from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016from itertools import chain
17
18import email
R David Murrayc27e5222012-05-25 15:01:48 -040019import email.policy
Guido van Rossum8b3febe2007-08-30 01:15:14 +000020
21from email.charset import Charset
22from email.header import Header, decode_header, make_header
23from email.parser import Parser, HeaderParser
24from email.generator import Generator, DecodedGenerator
25from email.message import Message
26from email.mime.application import MIMEApplication
27from email.mime.audio import MIMEAudio
28from email.mime.text import MIMEText
29from email.mime.image import MIMEImage
30from email.mime.base import MIMEBase
31from email.mime.message import MIMEMessage
32from email.mime.multipart import MIMEMultipart
33from email import utils
34from email import errors
35from email import encoders
36from email import iterators
37from email import base64mime
38from email import quoprimime
39
R David Murray28346b82011-03-31 11:40:20 -040040from test.support import run_unittest, unlink
R David Murraya256bac2011-03-31 12:20:23 -040041from test.test_email import openfile, TestEmailBase
Guido van Rossum8b3febe2007-08-30 01:15:14 +000042
43NL = '\n'
44EMPTYSTRING = ''
45SPACE = ' '
46
47
Guido van Rossum8b3febe2007-08-30 01:15:14 +000048# Test various aspects of the Message class's API
49class TestMessageAPI(TestEmailBase):
50 def test_get_all(self):
51 eq = self.assertEqual
52 msg = self._msgobj('msg_20.txt')
53 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
54 eq(msg.get_all('xx', 'n/a'), 'n/a')
55
R. David Murraye5db2632010-11-20 15:10:13 +000056 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000057 eq = self.assertEqual
58 msg = Message()
59 eq(msg.get_charset(), None)
60 charset = Charset('iso-8859-1')
61 msg.set_charset(charset)
62 eq(msg['mime-version'], '1.0')
63 eq(msg.get_content_type(), 'text/plain')
64 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
65 eq(msg.get_param('charset'), 'iso-8859-1')
66 eq(msg['content-transfer-encoding'], 'quoted-printable')
67 eq(msg.get_charset().input_charset, 'iso-8859-1')
68 # Remove the charset
69 msg.set_charset(None)
70 eq(msg.get_charset(), None)
71 eq(msg['content-type'], 'text/plain')
72 # Try adding a charset when there's already MIME headers present
73 msg = Message()
74 msg['MIME-Version'] = '2.0'
75 msg['Content-Type'] = 'text/x-weird'
76 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
77 msg.set_charset(charset)
78 eq(msg['mime-version'], '2.0')
79 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
80 eq(msg['content-transfer-encoding'], 'quinted-puntable')
81
82 def test_set_charset_from_string(self):
83 eq = self.assertEqual
84 msg = Message()
85 msg.set_charset('us-ascii')
86 eq(msg.get_charset().input_charset, 'us-ascii')
87 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
88
89 def test_set_payload_with_charset(self):
90 msg = Message()
91 charset = Charset('iso-8859-1')
92 msg.set_payload('This is a string payload', charset)
93 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
94
95 def test_get_charsets(self):
96 eq = self.assertEqual
97
98 msg = self._msgobj('msg_08.txt')
99 charsets = msg.get_charsets()
100 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
101
102 msg = self._msgobj('msg_09.txt')
103 charsets = msg.get_charsets('dingbat')
104 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
105 'koi8-r'])
106
107 msg = self._msgobj('msg_12.txt')
108 charsets = msg.get_charsets()
109 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
110 'iso-8859-3', 'us-ascii', 'koi8-r'])
111
112 def test_get_filename(self):
113 eq = self.assertEqual
114
115 msg = self._msgobj('msg_04.txt')
116 filenames = [p.get_filename() for p in msg.get_payload()]
117 eq(filenames, ['msg.txt', 'msg.txt'])
118
119 msg = self._msgobj('msg_07.txt')
120 subpart = msg.get_payload(1)
121 eq(subpart.get_filename(), 'dingusfish.gif')
122
123 def test_get_filename_with_name_parameter(self):
124 eq = self.assertEqual
125
126 msg = self._msgobj('msg_44.txt')
127 filenames = [p.get_filename() for p in msg.get_payload()]
128 eq(filenames, ['msg.txt', 'msg.txt'])
129
130 def test_get_boundary(self):
131 eq = self.assertEqual
132 msg = self._msgobj('msg_07.txt')
133 # No quotes!
134 eq(msg.get_boundary(), 'BOUNDARY')
135
136 def test_set_boundary(self):
137 eq = self.assertEqual
138 # This one has no existing boundary parameter, but the Content-Type:
139 # header appears fifth.
140 msg = self._msgobj('msg_01.txt')
141 msg.set_boundary('BOUNDARY')
142 header, value = msg.items()[4]
143 eq(header.lower(), 'content-type')
144 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
145 # This one has a Content-Type: header, with a boundary, stuck in the
146 # middle of its headers. Make sure the order is preserved; it should
147 # be fifth.
148 msg = self._msgobj('msg_04.txt')
149 msg.set_boundary('BOUNDARY')
150 header, value = msg.items()[4]
151 eq(header.lower(), 'content-type')
152 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
153 # And this one has no Content-Type: header at all.
154 msg = self._msgobj('msg_03.txt')
155 self.assertRaises(errors.HeaderParseError,
156 msg.set_boundary, 'BOUNDARY')
157
R. David Murray73a559d2010-12-21 18:07:59 +0000158 def test_make_boundary(self):
159 msg = MIMEMultipart('form-data')
160 # Note that when the boundary gets created is an implementation
161 # detail and might change.
162 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
163 # Trigger creation of boundary
164 msg.as_string()
165 self.assertEqual(msg.items()[0][1][:33],
166 'multipart/form-data; boundary="==')
167 # XXX: there ought to be tests of the uniqueness of the boundary, too.
168
R. David Murray57c45ac2010-02-21 04:39:40 +0000169 def test_message_rfc822_only(self):
170 # Issue 7970: message/rfc822 not in multipart parsed by
171 # HeaderParser caused an exception when flattened.
R David Murray28346b82011-03-31 11:40:20 -0400172 with openfile('msg_46.txt') as fp:
Brett Cannon384917a2010-10-29 23:08:36 +0000173 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000174 parser = HeaderParser()
175 msg = parser.parsestr(msgdata)
176 out = StringIO()
177 gen = Generator(out, True, 0)
178 gen.flatten(msg, False)
179 self.assertEqual(out.getvalue(), msgdata)
180
R David Murrayb35c8502011-04-13 16:46:05 -0400181 def test_byte_message_rfc822_only(self):
182 # Make sure new bytes header parser also passes this.
183 with openfile('msg_46.txt', 'rb') as fp:
184 msgdata = fp.read()
185 parser = email.parser.BytesHeaderParser()
186 msg = parser.parsebytes(msgdata)
187 out = BytesIO()
188 gen = email.generator.BytesGenerator(out)
189 gen.flatten(msg)
190 self.assertEqual(out.getvalue(), msgdata)
191
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000192 def test_get_decoded_payload(self):
193 eq = self.assertEqual
194 msg = self._msgobj('msg_10.txt')
195 # The outer message is a multipart
196 eq(msg.get_payload(decode=True), None)
197 # Subpart 1 is 7bit encoded
198 eq(msg.get_payload(0).get_payload(decode=True),
199 b'This is a 7bit encoded message.\n')
200 # Subpart 2 is quopri
201 eq(msg.get_payload(1).get_payload(decode=True),
202 b'\xa1This is a Quoted Printable encoded message!\n')
203 # Subpart 3 is base64
204 eq(msg.get_payload(2).get_payload(decode=True),
205 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000206 # Subpart 4 is base64 with a trailing newline, which
207 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000208 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000209 b'This is a Base64 encoded message.\n')
210 # Subpart 5 has no Content-Transfer-Encoding: header.
211 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000212 b'This has no Content-Transfer-Encoding: header.\n')
213
214 def test_get_decoded_uu_payload(self):
215 eq = self.assertEqual
216 msg = Message()
217 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
218 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
219 msg['content-transfer-encoding'] = cte
220 eq(msg.get_payload(decode=True), b'hello world')
221 # Now try some bogus data
222 msg.set_payload('foo')
223 eq(msg.get_payload(decode=True), b'foo')
224
R David Murraya2860e82011-04-16 09:20:30 -0400225 def test_get_payload_n_raises_on_non_multipart(self):
226 msg = Message()
227 self.assertRaises(TypeError, msg.get_payload, 1)
228
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000229 def test_decoded_generator(self):
230 eq = self.assertEqual
231 msg = self._msgobj('msg_07.txt')
232 with openfile('msg_17.txt') as fp:
233 text = fp.read()
234 s = StringIO()
235 g = DecodedGenerator(s)
236 g.flatten(msg)
237 eq(s.getvalue(), text)
238
239 def test__contains__(self):
240 msg = Message()
241 msg['From'] = 'Me'
242 msg['to'] = 'You'
243 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000244 self.assertTrue('from' in msg)
245 self.assertTrue('From' in msg)
246 self.assertTrue('FROM' in msg)
247 self.assertTrue('to' in msg)
248 self.assertTrue('To' in msg)
249 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000250
251 def test_as_string(self):
252 eq = self.ndiffAssertEqual
253 msg = self._msgobj('msg_01.txt')
254 with openfile('msg_01.txt') as fp:
255 text = fp.read()
256 eq(text, str(msg))
257 fullrepr = msg.as_string(unixfrom=True)
258 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000259 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000260 eq(text, NL.join(lines[1:]))
261
262 def test_bad_param(self):
263 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
264 self.assertEqual(msg.get_param('baz'), '')
265
266 def test_missing_filename(self):
267 msg = email.message_from_string("From: foo\n")
268 self.assertEqual(msg.get_filename(), None)
269
270 def test_bogus_filename(self):
271 msg = email.message_from_string(
272 "Content-Disposition: blarg; filename\n")
273 self.assertEqual(msg.get_filename(), '')
274
275 def test_missing_boundary(self):
276 msg = email.message_from_string("From: foo\n")
277 self.assertEqual(msg.get_boundary(), None)
278
279 def test_get_params(self):
280 eq = self.assertEqual
281 msg = email.message_from_string(
282 'X-Header: foo=one; bar=two; baz=three\n')
283 eq(msg.get_params(header='x-header'),
284 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
285 msg = email.message_from_string(
286 'X-Header: foo; bar=one; baz=two\n')
287 eq(msg.get_params(header='x-header'),
288 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
289 eq(msg.get_params(), None)
290 msg = email.message_from_string(
291 'X-Header: foo; bar="one"; baz=two\n')
292 eq(msg.get_params(header='x-header'),
293 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
294
295 def test_get_param_liberal(self):
296 msg = Message()
297 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
298 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
299
300 def test_get_param(self):
301 eq = self.assertEqual
302 msg = email.message_from_string(
303 "X-Header: foo=one; bar=two; baz=three\n")
304 eq(msg.get_param('bar', header='x-header'), 'two')
305 eq(msg.get_param('quuz', header='x-header'), None)
306 eq(msg.get_param('quuz'), None)
307 msg = email.message_from_string(
308 'X-Header: foo; bar="one"; baz=two\n')
309 eq(msg.get_param('foo', header='x-header'), '')
310 eq(msg.get_param('bar', header='x-header'), 'one')
311 eq(msg.get_param('baz', header='x-header'), 'two')
312 # XXX: We are not RFC-2045 compliant! We cannot parse:
313 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
314 # msg.get_param("weird")
315 # yet.
316
317 def test_get_param_funky_continuation_lines(self):
318 msg = self._msgobj('msg_22.txt')
319 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
320
321 def test_get_param_with_semis_in_quotes(self):
322 msg = email.message_from_string(
323 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
324 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
325 self.assertEqual(msg.get_param('name', unquote=False),
326 '"Jim&amp;&amp;Jill"')
327
R. David Murrayd48739f2010-04-14 18:59:18 +0000328 def test_get_param_with_quotes(self):
329 msg = email.message_from_string(
330 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
331 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
332 msg = email.message_from_string(
333 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
334 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
335
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000336 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000337 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000338 msg = email.message_from_string('Header: exists')
339 unless('header' in msg)
340 unless('Header' in msg)
341 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000342 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000343
344 def test_set_param(self):
345 eq = self.assertEqual
346 msg = Message()
347 msg.set_param('charset', 'iso-2022-jp')
348 eq(msg.get_param('charset'), 'iso-2022-jp')
349 msg.set_param('importance', 'high value')
350 eq(msg.get_param('importance'), 'high value')
351 eq(msg.get_param('importance', unquote=False), '"high value"')
352 eq(msg.get_params(), [('text/plain', ''),
353 ('charset', 'iso-2022-jp'),
354 ('importance', 'high value')])
355 eq(msg.get_params(unquote=False), [('text/plain', ''),
356 ('charset', '"iso-2022-jp"'),
357 ('importance', '"high value"')])
358 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
359 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
360
361 def test_del_param(self):
362 eq = self.assertEqual
363 msg = self._msgobj('msg_05.txt')
364 eq(msg.get_params(),
365 [('multipart/report', ''), ('report-type', 'delivery-status'),
366 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
367 old_val = msg.get_param("report-type")
368 msg.del_param("report-type")
369 eq(msg.get_params(),
370 [('multipart/report', ''),
371 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
372 msg.set_param("report-type", old_val)
373 eq(msg.get_params(),
374 [('multipart/report', ''),
375 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
376 ('report-type', old_val)])
377
378 def test_del_param_on_other_header(self):
379 msg = Message()
380 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
381 msg.del_param('filename', 'content-disposition')
382 self.assertEqual(msg['content-disposition'], 'attachment')
383
R David Murraya2860e82011-04-16 09:20:30 -0400384 def test_del_param_on_nonexistent_header(self):
385 msg = Message()
386 msg.del_param('filename', 'content-disposition')
387
388 def test_del_nonexistent_param(self):
389 msg = Message()
390 msg.add_header('Content-Type', 'text/plain', charset='utf-8')
391 existing_header = msg['Content-Type']
392 msg.del_param('foobar', header='Content-Type')
393 self.assertEqual(msg['Content-Type'], 'text/plain; charset="utf-8"')
394
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000395 def test_set_type(self):
396 eq = self.assertEqual
397 msg = Message()
398 self.assertRaises(ValueError, msg.set_type, 'text')
399 msg.set_type('text/plain')
400 eq(msg['content-type'], 'text/plain')
401 msg.set_param('charset', 'us-ascii')
402 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
403 msg.set_type('text/html')
404 eq(msg['content-type'], 'text/html; charset="us-ascii"')
405
406 def test_set_type_on_other_header(self):
407 msg = Message()
408 msg['X-Content-Type'] = 'text/plain'
409 msg.set_type('application/octet-stream', 'X-Content-Type')
410 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
411
412 def test_get_content_type_missing(self):
413 msg = Message()
414 self.assertEqual(msg.get_content_type(), 'text/plain')
415
416 def test_get_content_type_missing_with_default_type(self):
417 msg = Message()
418 msg.set_default_type('message/rfc822')
419 self.assertEqual(msg.get_content_type(), 'message/rfc822')
420
421 def test_get_content_type_from_message_implicit(self):
422 msg = self._msgobj('msg_30.txt')
423 self.assertEqual(msg.get_payload(0).get_content_type(),
424 'message/rfc822')
425
426 def test_get_content_type_from_message_explicit(self):
427 msg = self._msgobj('msg_28.txt')
428 self.assertEqual(msg.get_payload(0).get_content_type(),
429 'message/rfc822')
430
431 def test_get_content_type_from_message_text_plain_implicit(self):
432 msg = self._msgobj('msg_03.txt')
433 self.assertEqual(msg.get_content_type(), 'text/plain')
434
435 def test_get_content_type_from_message_text_plain_explicit(self):
436 msg = self._msgobj('msg_01.txt')
437 self.assertEqual(msg.get_content_type(), 'text/plain')
438
439 def test_get_content_maintype_missing(self):
440 msg = Message()
441 self.assertEqual(msg.get_content_maintype(), 'text')
442
443 def test_get_content_maintype_missing_with_default_type(self):
444 msg = Message()
445 msg.set_default_type('message/rfc822')
446 self.assertEqual(msg.get_content_maintype(), 'message')
447
448 def test_get_content_maintype_from_message_implicit(self):
449 msg = self._msgobj('msg_30.txt')
450 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
451
452 def test_get_content_maintype_from_message_explicit(self):
453 msg = self._msgobj('msg_28.txt')
454 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
455
456 def test_get_content_maintype_from_message_text_plain_implicit(self):
457 msg = self._msgobj('msg_03.txt')
458 self.assertEqual(msg.get_content_maintype(), 'text')
459
460 def test_get_content_maintype_from_message_text_plain_explicit(self):
461 msg = self._msgobj('msg_01.txt')
462 self.assertEqual(msg.get_content_maintype(), 'text')
463
464 def test_get_content_subtype_missing(self):
465 msg = Message()
466 self.assertEqual(msg.get_content_subtype(), 'plain')
467
468 def test_get_content_subtype_missing_with_default_type(self):
469 msg = Message()
470 msg.set_default_type('message/rfc822')
471 self.assertEqual(msg.get_content_subtype(), 'rfc822')
472
473 def test_get_content_subtype_from_message_implicit(self):
474 msg = self._msgobj('msg_30.txt')
475 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
476
477 def test_get_content_subtype_from_message_explicit(self):
478 msg = self._msgobj('msg_28.txt')
479 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
480
481 def test_get_content_subtype_from_message_text_plain_implicit(self):
482 msg = self._msgobj('msg_03.txt')
483 self.assertEqual(msg.get_content_subtype(), 'plain')
484
485 def test_get_content_subtype_from_message_text_plain_explicit(self):
486 msg = self._msgobj('msg_01.txt')
487 self.assertEqual(msg.get_content_subtype(), 'plain')
488
489 def test_get_content_maintype_error(self):
490 msg = Message()
491 msg['Content-Type'] = 'no-slash-in-this-string'
492 self.assertEqual(msg.get_content_maintype(), 'text')
493
494 def test_get_content_subtype_error(self):
495 msg = Message()
496 msg['Content-Type'] = 'no-slash-in-this-string'
497 self.assertEqual(msg.get_content_subtype(), 'plain')
498
499 def test_replace_header(self):
500 eq = self.assertEqual
501 msg = Message()
502 msg.add_header('First', 'One')
503 msg.add_header('Second', 'Two')
504 msg.add_header('Third', 'Three')
505 eq(msg.keys(), ['First', 'Second', 'Third'])
506 eq(msg.values(), ['One', 'Two', 'Three'])
507 msg.replace_header('Second', 'Twenty')
508 eq(msg.keys(), ['First', 'Second', 'Third'])
509 eq(msg.values(), ['One', 'Twenty', 'Three'])
510 msg.add_header('First', 'Eleven')
511 msg.replace_header('First', 'One Hundred')
512 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
513 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
514 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
515
R David Murray80e0aee2012-05-27 21:23:34 -0400516 # test_defect_handling:test_invalid_chars_in_base64_payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000517 def test_broken_base64_payload(self):
518 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
519 msg = Message()
520 msg['content-type'] = 'audio/x-midi'
521 msg['content-transfer-encoding'] = 'base64'
522 msg.set_payload(x)
523 self.assertEqual(msg.get_payload(decode=True),
R David Murray80e0aee2012-05-27 21:23:34 -0400524 (b'\x03\x00\xe9\xd0\xfe\xff\xff.\x8b\xc0'
525 b'\xa1\x00p\xf6\xbf\xe9\x0f'))
526 self.assertIsInstance(msg.defects[0],
527 errors.InvalidBase64CharactersDefect)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000528
R David Murraya2860e82011-04-16 09:20:30 -0400529 def test_broken_unicode_payload(self):
530 # This test improves coverage but is not a compliance test.
531 # The behavior in this situation is currently undefined by the API.
532 x = 'this is a br\xf6ken thing to do'
533 msg = Message()
534 msg['content-type'] = 'text/plain'
535 msg['content-transfer-encoding'] = '8bit'
536 msg.set_payload(x)
537 self.assertEqual(msg.get_payload(decode=True),
538 bytes(x, 'raw-unicode-escape'))
539
540 def test_questionable_bytes_payload(self):
541 # This test improves coverage but is not a compliance test,
542 # since it involves poking inside the black box.
543 x = 'this is a quéstionable thing to do'.encode('utf-8')
544 msg = Message()
545 msg['content-type'] = 'text/plain; charset="utf-8"'
546 msg['content-transfer-encoding'] = '8bit'
547 msg._payload = x
548 self.assertEqual(msg.get_payload(decode=True), x)
549
R. David Murray7ec754b2010-12-13 23:51:19 +0000550 # Issue 1078919
551 def test_ascii_add_header(self):
552 msg = Message()
553 msg.add_header('Content-Disposition', 'attachment',
554 filename='bud.gif')
555 self.assertEqual('attachment; filename="bud.gif"',
556 msg['Content-Disposition'])
557
558 def test_noascii_add_header(self):
559 msg = Message()
560 msg.add_header('Content-Disposition', 'attachment',
561 filename="Fußballer.ppt")
562 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000563 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
R. David Murray7ec754b2010-12-13 23:51:19 +0000564 msg['Content-Disposition'])
565
566 def test_nonascii_add_header_via_triple(self):
567 msg = Message()
568 msg.add_header('Content-Disposition', 'attachment',
569 filename=('iso-8859-1', '', 'Fußballer.ppt'))
570 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000571 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
572 msg['Content-Disposition'])
573
574 def test_ascii_add_header_with_tspecial(self):
575 msg = Message()
576 msg.add_header('Content-Disposition', 'attachment',
577 filename="windows [filename].ppt")
578 self.assertEqual(
579 'attachment; filename="windows [filename].ppt"',
580 msg['Content-Disposition'])
581
582 def test_nonascii_add_header_with_tspecial(self):
583 msg = Message()
584 msg.add_header('Content-Disposition', 'attachment',
585 filename="Fußballer [filename].ppt")
586 self.assertEqual(
587 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
R. David Murray7ec754b2010-12-13 23:51:19 +0000588 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000589
R David Murraya2860e82011-04-16 09:20:30 -0400590 def test_add_header_with_name_only_param(self):
591 msg = Message()
592 msg.add_header('Content-Disposition', 'inline', foo_bar=None)
593 self.assertEqual("inline; foo-bar", msg['Content-Disposition'])
594
595 def test_add_header_with_no_value(self):
596 msg = Message()
597 msg.add_header('X-Status', None)
598 self.assertEqual('', msg['X-Status'])
599
R. David Murray5b2d9dd2011-01-09 02:35:24 +0000600 # Issue 5871: reject an attempt to embed a header inside a header value
601 # (header injection attack).
602 def test_embeded_header_via_Header_rejected(self):
603 msg = Message()
604 msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
605 self.assertRaises(errors.HeaderParseError, msg.as_string)
606
607 def test_embeded_header_via_string_rejected(self):
608 msg = Message()
609 msg['Dummy'] = 'dummy\nX-Injected-Header: test'
610 self.assertRaises(errors.HeaderParseError, msg.as_string)
611
R David Murray7441a7a2012-03-14 02:59:51 -0400612 def test_unicode_header_defaults_to_utf8_encoding(self):
613 # Issue 14291
614 m = MIMEText('abc\n')
615 m['Subject'] = 'É test'
616 self.assertEqual(str(m),textwrap.dedent("""\
617 Content-Type: text/plain; charset="us-ascii"
618 MIME-Version: 1.0
619 Content-Transfer-Encoding: 7bit
620 Subject: =?utf-8?q?=C3=89_test?=
621
622 abc
623 """))
624
R David Murray8680bcc2012-03-22 22:17:51 -0400625 def test_unicode_body_defaults_to_utf8_encoding(self):
626 # Issue 14291
627 m = MIMEText('É testabc\n')
628 self.assertEqual(str(m),textwrap.dedent("""\
R David Murray8680bcc2012-03-22 22:17:51 -0400629 Content-Type: text/plain; charset="utf-8"
R David Murray42243c42012-03-22 22:40:44 -0400630 MIME-Version: 1.0
R David Murray8680bcc2012-03-22 22:17:51 -0400631 Content-Transfer-Encoding: base64
632
633 w4kgdGVzdGFiYwo=
634 """))
635
636
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000637# Test the email.encoders module
638class TestEncoders(unittest.TestCase):
R David Murray6d94bd42011-03-16 15:52:22 -0400639
640 def test_EncodersEncode_base64(self):
641 with openfile('PyBanner048.gif', 'rb') as fp:
642 bindata = fp.read()
643 mimed = email.mime.image.MIMEImage(bindata)
644 base64ed = mimed.get_payload()
645 # the transfer-encoded body lines should all be <=76 characters
646 lines = base64ed.split('\n')
647 self.assertLessEqual(max([ len(x) for x in lines ]), 76)
648
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000649 def test_encode_empty_payload(self):
650 eq = self.assertEqual
651 msg = Message()
652 msg.set_charset('us-ascii')
653 eq(msg['content-transfer-encoding'], '7bit')
654
655 def test_default_cte(self):
656 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000657 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000658 msg = MIMEText('hello world')
659 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000660 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000661 msg = MIMEText('hello \xf8 world')
R David Murray8680bcc2012-03-22 22:17:51 -0400662 eq(msg['content-transfer-encoding'], 'base64')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000663 # And now with a different charset
664 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
665 eq(msg['content-transfer-encoding'], 'quoted-printable')
666
R. David Murraye85200d2010-05-06 01:41:14 +0000667 def test_encode7or8bit(self):
668 # Make sure a charset whose input character set is 8bit but
669 # whose output character set is 7bit gets a transfer-encoding
670 # of 7bit.
671 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000672 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000673 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000674
Ezio Melottib3aedd42010-11-20 19:04:17 +0000675
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000676# Test long header wrapping
677class TestLongHeaders(TestEmailBase):
R David Murray01581ee2011-04-18 10:04:34 -0400678
679 maxDiff = None
680
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000681 def test_split_long_continuation(self):
682 eq = self.ndiffAssertEqual
683 msg = email.message_from_string("""\
684Subject: bug demonstration
685\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
686\tmore text
687
688test
689""")
690 sfp = StringIO()
691 g = Generator(sfp)
692 g.flatten(msg)
693 eq(sfp.getvalue(), """\
694Subject: bug demonstration
695\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
696\tmore text
697
698test
699""")
700
701 def test_another_long_almost_unsplittable_header(self):
702 eq = self.ndiffAssertEqual
703 hstr = """\
704bug demonstration
705\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
706\tmore text"""
707 h = Header(hstr, continuation_ws='\t')
708 eq(h.encode(), """\
709bug demonstration
710\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
711\tmore text""")
712 h = Header(hstr.replace('\t', ' '))
713 eq(h.encode(), """\
714bug demonstration
715 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
716 more text""")
717
718 def test_long_nonstring(self):
719 eq = self.ndiffAssertEqual
720 g = Charset("iso-8859-1")
721 cz = Charset("iso-8859-2")
722 utf8 = Charset("utf-8")
723 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
724 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
725 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
726 b'bef\xf6rdert. ')
727 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
728 b'd\xf9vtipu.. ')
729 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
730 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
731 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
732 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
733 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
734 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
735 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
736 '\u3044\u307e\u3059\u3002')
737 h = Header(g_head, g, header_name='Subject')
738 h.append(cz_head, cz)
739 h.append(utf8_head, utf8)
740 msg = Message()
741 msg['Subject'] = h
742 sfp = StringIO()
743 g = Generator(sfp)
744 g.flatten(msg)
745 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000746Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
747 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
748 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
749 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
750 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
751 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
752 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
753 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
754 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
755 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
756 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000757
758""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000759 eq(h.encode(maxlinelen=76), """\
760=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
761 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
762 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
763 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
764 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
765 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
766 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
767 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
768 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
769 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
770 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000771
772 def test_long_header_encode(self):
773 eq = self.ndiffAssertEqual
774 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
775 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
776 header_name='X-Foobar-Spoink-Defrobnit')
777 eq(h.encode(), '''\
778wasnipoop; giraffes="very-long-necked-animals";
779 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
780
781 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
782 eq = self.ndiffAssertEqual
783 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
784 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
785 header_name='X-Foobar-Spoink-Defrobnit',
786 continuation_ws='\t')
787 eq(h.encode(), '''\
788wasnipoop; giraffes="very-long-necked-animals";
789 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
790
791 def test_long_header_encode_with_tab_continuation(self):
792 eq = self.ndiffAssertEqual
793 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
794 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
795 header_name='X-Foobar-Spoink-Defrobnit',
796 continuation_ws='\t')
797 eq(h.encode(), '''\
798wasnipoop; giraffes="very-long-necked-animals";
799\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
800
R David Murray3a6152f2011-03-14 21:13:03 -0400801 def test_header_encode_with_different_output_charset(self):
802 h = Header('文', 'euc-jp')
803 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
804
805 def test_long_header_encode_with_different_output_charset(self):
806 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
807 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
808 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
809 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
810 res = """\
811=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
812 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
813 self.assertEqual(h.encode(), res)
814
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000815 def test_header_splitter(self):
816 eq = self.ndiffAssertEqual
817 msg = MIMEText('')
818 # It'd be great if we could use add_header() here, but that doesn't
819 # guarantee an order of the parameters.
820 msg['X-Foobar-Spoink-Defrobnit'] = (
821 'wasnipoop; giraffes="very-long-necked-animals"; '
822 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
823 sfp = StringIO()
824 g = Generator(sfp)
825 g.flatten(msg)
826 eq(sfp.getvalue(), '''\
827Content-Type: text/plain; charset="us-ascii"
828MIME-Version: 1.0
829Content-Transfer-Encoding: 7bit
830X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
831 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
832
833''')
834
835 def test_no_semis_header_splitter(self):
836 eq = self.ndiffAssertEqual
837 msg = Message()
838 msg['From'] = 'test@dom.ain'
839 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
840 msg.set_payload('Test')
841 sfp = StringIO()
842 g = Generator(sfp)
843 g.flatten(msg)
844 eq(sfp.getvalue(), """\
845From: test@dom.ain
846References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
847 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
848
849Test""")
850
R David Murray7da4db12011-04-07 20:37:17 -0400851 def test_last_split_chunk_does_not_fit(self):
852 eq = self.ndiffAssertEqual
853 h = Header('Subject: the first part of this is short, but_the_second'
854 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
855 '_all_by_itself')
856 eq(h.encode(), """\
857Subject: the first part of this is short,
858 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
859
860 def test_splittable_leading_char_followed_by_overlong_unsplitable(self):
861 eq = self.ndiffAssertEqual
862 h = Header(', but_the_second'
863 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
864 '_all_by_itself')
865 eq(h.encode(), """\
866,
867 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
868
869 def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self):
870 eq = self.ndiffAssertEqual
871 h = Header(', , but_the_second'
872 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
873 '_all_by_itself')
874 eq(h.encode(), """\
875, ,
876 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
877
878 def test_trailing_splitable_on_overlong_unsplitable(self):
879 eq = self.ndiffAssertEqual
880 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
881 'be_on_a_line_all_by_itself;')
882 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
883 "be_on_a_line_all_by_itself;")
884
885 def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self):
886 eq = self.ndiffAssertEqual
887 h = Header('; '
888 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
R David Murray01581ee2011-04-18 10:04:34 -0400889 'be_on_a_line_all_by_itself; ')
R David Murray7da4db12011-04-07 20:37:17 -0400890 eq(h.encode(), """\
891;
R David Murray01581ee2011-04-18 10:04:34 -0400892 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
R David Murray7da4db12011-04-07 20:37:17 -0400893
R David Murraye1292a22011-04-07 20:54:03 -0400894 def test_long_header_with_multiple_sequential_split_chars(self):
R David Murraye1292a22011-04-07 20:54:03 -0400895 eq = self.ndiffAssertEqual
896 h = Header('This is a long line that has two whitespaces in a row. '
897 'This used to cause truncation of the header when folded')
898 eq(h.encode(), """\
899This is a long line that has two whitespaces in a row. This used to cause
900 truncation of the header when folded""")
901
R David Murray01581ee2011-04-18 10:04:34 -0400902 def test_splitter_split_on_punctuation_only_if_fws(self):
903 eq = self.ndiffAssertEqual
904 h = Header('thisverylongheaderhas;semicolons;and,commas,but'
905 'they;arenotlegal;fold,points')
906 eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;"
907 "arenotlegal;fold,points")
908
909 def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self):
910 eq = self.ndiffAssertEqual
911 h = Header('this is a test where we need to have more than one line '
912 'before; our final line that is just too big to fit;; '
913 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
914 'be_on_a_line_all_by_itself;')
915 eq(h.encode(), """\
916this is a test where we need to have more than one line before;
917 our final line that is just too big to fit;;
918 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
919
920 def test_overlong_last_part_followed_by_split_point(self):
921 eq = self.ndiffAssertEqual
922 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
923 'be_on_a_line_all_by_itself ')
924 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_"
925 "should_be_on_a_line_all_by_itself ")
926
927 def test_multiline_with_overlong_parts_separated_by_two_split_points(self):
928 eq = self.ndiffAssertEqual
929 h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_'
930 'before_our_final_line_; ; '
931 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
932 'be_on_a_line_all_by_itself; ')
933 eq(h.encode(), """\
934this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_;
935 ;
936 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
937
938 def test_multiline_with_overlong_last_part_followed_by_split_point(self):
939 eq = self.ndiffAssertEqual
940 h = Header('this is a test where we need to have more than one line '
941 'before our final line; ; '
942 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
943 'be_on_a_line_all_by_itself; ')
944 eq(h.encode(), """\
945this is a test where we need to have more than one line before our final line;
946 ;
947 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
948
949 def test_long_header_with_whitespace_runs(self):
950 eq = self.ndiffAssertEqual
951 msg = Message()
952 msg['From'] = 'test@dom.ain'
953 msg['References'] = SPACE.join(['<foo@dom.ain> '] * 10)
954 msg.set_payload('Test')
955 sfp = StringIO()
956 g = Generator(sfp)
957 g.flatten(msg)
958 eq(sfp.getvalue(), """\
959From: test@dom.ain
960References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
961 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
962 <foo@dom.ain> <foo@dom.ain>\x20\x20
963
964Test""")
965
966 def test_long_run_with_semi_header_splitter(self):
967 eq = self.ndiffAssertEqual
968 msg = Message()
969 msg['From'] = 'test@dom.ain'
970 msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc'
971 msg.set_payload('Test')
972 sfp = StringIO()
973 g = Generator(sfp)
974 g.flatten(msg)
975 eq(sfp.getvalue(), """\
976From: test@dom.ain
977References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
978 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
979 <foo@dom.ain>; abc
980
981Test""")
982
983 def test_splitter_split_on_punctuation_only_if_fws(self):
984 eq = self.ndiffAssertEqual
985 msg = Message()
986 msg['From'] = 'test@dom.ain'
987 msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but'
988 'they;arenotlegal;fold,points')
989 msg.set_payload('Test')
990 sfp = StringIO()
991 g = Generator(sfp)
992 g.flatten(msg)
993 # XXX the space after the header should not be there.
994 eq(sfp.getvalue(), """\
995From: test@dom.ain
996References:\x20
997 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points
998
999Test""")
1000
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001001 def test_no_split_long_header(self):
1002 eq = self.ndiffAssertEqual
1003 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +00001004 h = Header(hstr)
1005 # These come on two lines because Headers are really field value
1006 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001007 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001008References:
1009 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
1010 h = Header('x' * 80)
1011 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001012
1013 def test_splitting_multiple_long_lines(self):
1014 eq = self.ndiffAssertEqual
1015 hstr = """\
1016from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1017\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1018\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1019"""
1020 h = Header(hstr, continuation_ws='\t')
1021 eq(h.encode(), """\
1022from babylon.socal-raves.org (localhost [127.0.0.1]);
1023 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1024 for <mailman-admin@babylon.socal-raves.org>;
1025 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1026\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1027 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1028 for <mailman-admin@babylon.socal-raves.org>;
1029 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1030\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1031 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1032 for <mailman-admin@babylon.socal-raves.org>;
1033 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
1034
1035 def test_splitting_first_line_only_is_long(self):
1036 eq = self.ndiffAssertEqual
1037 hstr = """\
1038from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
1039\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1040\tid 17k4h5-00034i-00
1041\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
1042 h = Header(hstr, maxlinelen=78, header_name='Received',
1043 continuation_ws='\t')
1044 eq(h.encode(), """\
1045from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
1046 helo=cthulhu.gerg.ca)
1047\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1048\tid 17k4h5-00034i-00
1049\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
1050
1051 def test_long_8bit_header(self):
1052 eq = self.ndiffAssertEqual
1053 msg = Message()
1054 h = Header('Britische Regierung gibt', 'iso-8859-1',
1055 header_name='Subject')
1056 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +00001057 eq(h.encode(maxlinelen=76), """\
1058=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1059 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001060 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +00001061 eq(msg.as_string(maxheaderlen=76), """\
1062Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1063 =?iso-8859-1?q?hore-Windkraftprojekte?=
1064
1065""")
1066 eq(msg.as_string(maxheaderlen=0), """\
1067Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001068
1069""")
1070
1071 def test_long_8bit_header_no_charset(self):
1072 eq = self.ndiffAssertEqual
1073 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +00001074 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
1075 'f\xfcr Offshore-Windkraftprojekte '
1076 '<a-very-long-address@example.com>')
1077 msg['Reply-To'] = header_string
R David Murray7441a7a2012-03-14 02:59:51 -04001078 eq(msg.as_string(maxheaderlen=78), """\
1079Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1080 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1081
1082""")
Barry Warsaw8c571042007-08-30 19:17:18 +00001083 msg = Message()
R David Murray7441a7a2012-03-14 02:59:51 -04001084 msg['Reply-To'] = Header(header_string,
Barry Warsaw8c571042007-08-30 19:17:18 +00001085 header_name='Reply-To')
1086 eq(msg.as_string(maxheaderlen=78), """\
1087Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1088 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001089
1090""")
1091
1092 def test_long_to_header(self):
1093 eq = self.ndiffAssertEqual
1094 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
R David Murray01581ee2011-04-18 10:04:34 -04001095 '<someone@eecs.umich.edu>, '
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001096 '"Someone Test #B" <someone@umich.edu>, '
1097 '"Someone Test #C" <someone@eecs.umich.edu>, '
1098 '"Someone Test #D" <someone@eecs.umich.edu>')
1099 msg = Message()
1100 msg['To'] = to
1101 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +00001102To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001103 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +00001104 "Someone Test #C" <someone@eecs.umich.edu>,
1105 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001106
1107''')
1108
1109 def test_long_line_after_append(self):
1110 eq = self.ndiffAssertEqual
1111 s = 'This is an example of string which has almost the limit of header length.'
1112 h = Header(s)
1113 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +00001114 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001115This is an example of string which has almost the limit of header length.
1116 Add another line.""")
1117
1118 def test_shorter_line_with_append(self):
1119 eq = self.ndiffAssertEqual
1120 s = 'This is a shorter line.'
1121 h = Header(s)
1122 h.append('Add another sentence. (Surprise?)')
1123 eq(h.encode(),
1124 'This is a shorter line. Add another sentence. (Surprise?)')
1125
1126 def test_long_field_name(self):
1127 eq = self.ndiffAssertEqual
1128 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +00001129 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
1130 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
1131 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
1132 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001133 h = Header(gs, 'iso-8859-1', header_name=fn)
1134 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +00001135 eq(h.encode(maxlinelen=76), """\
1136=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
1137 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
1138 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
1139 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001140
1141 def test_long_received_header(self):
1142 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
1143 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
1144 'Wed, 05 Mar 2003 18:10:18 -0700')
1145 msg = Message()
1146 msg['Received-1'] = Header(h, continuation_ws='\t')
1147 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +00001148 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001149 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001150Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1151 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001152 Wed, 05 Mar 2003 18:10:18 -0700
R David Murray01581ee2011-04-18 10:04:34 -04001153Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1154 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001155 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001156
1157""")
1158
1159 def test_string_headerinst_eq(self):
1160 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
1161 'tu-muenchen.de> (David Bremner\'s message of '
1162 '"Thu, 6 Mar 2003 13:58:21 +0100")')
1163 msg = Message()
1164 msg['Received-1'] = Header(h, header_name='Received-1',
1165 continuation_ws='\t')
1166 msg['Received-2'] = h
R David Murray01581ee2011-04-18 10:04:34 -04001167 # XXX The space after the ':' should not be there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001168 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001169Received-1:\x20
1170 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1171 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1172Received-2:\x20
1173 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1174 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001175
1176""")
1177
1178 def test_long_unbreakable_lines_with_continuation(self):
1179 eq = self.ndiffAssertEqual
1180 msg = Message()
1181 t = """\
1182iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1183 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
1184 msg['Face-1'] = t
1185 msg['Face-2'] = Header(t, header_name='Face-2')
R David Murray01581ee2011-04-18 10:04:34 -04001186 msg['Face-3'] = ' ' + t
Barry Warsawbef9d212007-08-31 10:55:37 +00001187 # XXX This splitting is all wrong. It the first value line should be
R David Murray01581ee2011-04-18 10:04:34 -04001188 # snug against the field name or the space after the header not there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001189 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +00001190Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001191 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001192 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +00001193Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +00001194 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001195 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
R David Murray01581ee2011-04-18 10:04:34 -04001196Face-3:\x20
1197 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1198 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001199
1200""")
1201
1202 def test_another_long_multiline_header(self):
1203 eq = self.ndiffAssertEqual
1204 m = ('Received: from siimage.com '
1205 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +00001206 'Microsoft SMTPSVC(5.0.2195.4905); '
1207 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001208 msg = email.message_from_string(m)
1209 eq(msg.as_string(maxheaderlen=78), '''\
R David Murray01581ee2011-04-18 10:04:34 -04001210Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
1211 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001212
1213''')
1214
1215 def test_long_lines_with_different_header(self):
1216 eq = self.ndiffAssertEqual
1217 h = ('List-Unsubscribe: '
1218 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
1219 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
1220 '?subject=unsubscribe>')
1221 msg = Message()
1222 msg['List'] = h
1223 msg['List'] = Header(h, header_name='List')
1224 eq(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001225List: List-Unsubscribe:
1226 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001227 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
R David Murray01581ee2011-04-18 10:04:34 -04001228List: List-Unsubscribe:
1229 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001230 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001231
1232""")
1233
R. David Murray6f0022d2011-01-07 21:57:25 +00001234 def test_long_rfc2047_header_with_embedded_fws(self):
1235 h = Header(textwrap.dedent("""\
1236 We're going to pretend this header is in a non-ascii character set
1237 \tto see if line wrapping with encoded words and embedded
1238 folding white space works"""),
1239 charset='utf-8',
1240 header_name='Test')
1241 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1242 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1243 =?utf-8?q?cter_set?=
1244 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1245 =?utf-8?q?_folding_white_space_works?=""")+'\n')
1246
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001247
Ezio Melottib3aedd42010-11-20 19:04:17 +00001248
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001249# Test mangling of "From " lines in the body of a message
1250class TestFromMangling(unittest.TestCase):
1251 def setUp(self):
1252 self.msg = Message()
1253 self.msg['From'] = 'aaa@bbb.org'
1254 self.msg.set_payload("""\
1255From the desk of A.A.A.:
1256Blah blah blah
1257""")
1258
1259 def test_mangled_from(self):
1260 s = StringIO()
1261 g = Generator(s, mangle_from_=True)
1262 g.flatten(self.msg)
1263 self.assertEqual(s.getvalue(), """\
1264From: aaa@bbb.org
1265
1266>From the desk of A.A.A.:
1267Blah blah blah
1268""")
1269
1270 def test_dont_mangle_from(self):
1271 s = StringIO()
1272 g = Generator(s, mangle_from_=False)
1273 g.flatten(self.msg)
1274 self.assertEqual(s.getvalue(), """\
1275From: aaa@bbb.org
1276
1277From the desk of A.A.A.:
1278Blah blah blah
1279""")
1280
1281
Ezio Melottib3aedd42010-11-20 19:04:17 +00001282
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001283# Test the basic MIMEAudio class
1284class TestMIMEAudio(unittest.TestCase):
1285 def setUp(self):
R David Murray28346b82011-03-31 11:40:20 -04001286 with openfile('audiotest.au', 'rb') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001287 self._audiodata = fp.read()
1288 self._au = MIMEAudio(self._audiodata)
1289
1290 def test_guess_minor_type(self):
1291 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1292
1293 def test_encoding(self):
1294 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001295 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1296 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001297
1298 def test_checkSetMinor(self):
1299 au = MIMEAudio(self._audiodata, 'fish')
1300 self.assertEqual(au.get_content_type(), 'audio/fish')
1301
1302 def test_add_header(self):
1303 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001304 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001305 self._au.add_header('Content-Disposition', 'attachment',
1306 filename='audiotest.au')
1307 eq(self._au['content-disposition'],
1308 'attachment; filename="audiotest.au"')
1309 eq(self._au.get_params(header='content-disposition'),
1310 [('attachment', ''), ('filename', 'audiotest.au')])
1311 eq(self._au.get_param('filename', header='content-disposition'),
1312 'audiotest.au')
1313 missing = []
1314 eq(self._au.get_param('attachment', header='content-disposition'), '')
1315 unless(self._au.get_param('foo', failobj=missing,
1316 header='content-disposition') is missing)
1317 # Try some missing stuff
1318 unless(self._au.get_param('foobar', missing) is missing)
1319 unless(self._au.get_param('attachment', missing,
1320 header='foobar') is missing)
1321
1322
Ezio Melottib3aedd42010-11-20 19:04:17 +00001323
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001324# Test the basic MIMEImage class
1325class TestMIMEImage(unittest.TestCase):
1326 def setUp(self):
1327 with openfile('PyBanner048.gif', 'rb') as fp:
1328 self._imgdata = fp.read()
1329 self._im = MIMEImage(self._imgdata)
1330
1331 def test_guess_minor_type(self):
1332 self.assertEqual(self._im.get_content_type(), 'image/gif')
1333
1334 def test_encoding(self):
1335 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001336 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1337 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001338
1339 def test_checkSetMinor(self):
1340 im = MIMEImage(self._imgdata, 'fish')
1341 self.assertEqual(im.get_content_type(), 'image/fish')
1342
1343 def test_add_header(self):
1344 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001345 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001346 self._im.add_header('Content-Disposition', 'attachment',
1347 filename='dingusfish.gif')
1348 eq(self._im['content-disposition'],
1349 'attachment; filename="dingusfish.gif"')
1350 eq(self._im.get_params(header='content-disposition'),
1351 [('attachment', ''), ('filename', 'dingusfish.gif')])
1352 eq(self._im.get_param('filename', header='content-disposition'),
1353 'dingusfish.gif')
1354 missing = []
1355 eq(self._im.get_param('attachment', header='content-disposition'), '')
1356 unless(self._im.get_param('foo', failobj=missing,
1357 header='content-disposition') is missing)
1358 # Try some missing stuff
1359 unless(self._im.get_param('foobar', missing) is missing)
1360 unless(self._im.get_param('attachment', missing,
1361 header='foobar') is missing)
1362
1363
Ezio Melottib3aedd42010-11-20 19:04:17 +00001364
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001365# Test the basic MIMEApplication class
1366class TestMIMEApplication(unittest.TestCase):
1367 def test_headers(self):
1368 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001369 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001370 eq(msg.get_content_type(), 'application/octet-stream')
1371 eq(msg['content-transfer-encoding'], 'base64')
1372
1373 def test_body(self):
1374 eq = self.assertEqual
R David Murray6d94bd42011-03-16 15:52:22 -04001375 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1376 msg = MIMEApplication(bytesdata)
1377 # whitespace in the cte encoded block is RFC-irrelevant.
1378 eq(msg.get_payload().strip(), '+vv8/f7/')
1379 eq(msg.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001380
1381
Ezio Melottib3aedd42010-11-20 19:04:17 +00001382
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001383# Test the basic MIMEText class
1384class TestMIMEText(unittest.TestCase):
1385 def setUp(self):
1386 self._msg = MIMEText('hello there')
1387
1388 def test_types(self):
1389 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001390 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001391 eq(self._msg.get_content_type(), 'text/plain')
1392 eq(self._msg.get_param('charset'), 'us-ascii')
1393 missing = []
1394 unless(self._msg.get_param('foobar', missing) is missing)
1395 unless(self._msg.get_param('charset', missing, header='foobar')
1396 is missing)
1397
1398 def test_payload(self):
1399 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001400 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001401
1402 def test_charset(self):
1403 eq = self.assertEqual
1404 msg = MIMEText('hello there', _charset='us-ascii')
1405 eq(msg.get_charset().input_charset, 'us-ascii')
1406 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1407
R. David Murray850fc852010-06-03 01:58:28 +00001408 def test_7bit_input(self):
1409 eq = self.assertEqual
1410 msg = MIMEText('hello there', _charset='us-ascii')
1411 eq(msg.get_charset().input_charset, 'us-ascii')
1412 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1413
1414 def test_7bit_input_no_charset(self):
1415 eq = self.assertEqual
1416 msg = MIMEText('hello there')
1417 eq(msg.get_charset(), 'us-ascii')
1418 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1419 self.assertTrue('hello there' in msg.as_string())
1420
1421 def test_utf8_input(self):
1422 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1423 eq = self.assertEqual
1424 msg = MIMEText(teststr, _charset='utf-8')
1425 eq(msg.get_charset().output_charset, 'utf-8')
1426 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1427 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1428
1429 @unittest.skip("can't fix because of backward compat in email5, "
1430 "will fix in email6")
1431 def test_utf8_input_no_charset(self):
1432 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1433 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1434
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001435
Ezio Melottib3aedd42010-11-20 19:04:17 +00001436
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001437# Test complicated multipart/* messages
1438class TestMultipart(TestEmailBase):
1439 def setUp(self):
1440 with openfile('PyBanner048.gif', 'rb') as fp:
1441 data = fp.read()
1442 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1443 image = MIMEImage(data, name='dingusfish.gif')
1444 image.add_header('content-disposition', 'attachment',
1445 filename='dingusfish.gif')
1446 intro = MIMEText('''\
1447Hi there,
1448
1449This is the dingus fish.
1450''')
1451 container.attach(intro)
1452 container.attach(image)
1453 container['From'] = 'Barry <barry@digicool.com>'
1454 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1455 container['Subject'] = 'Here is your dingus fish'
1456
1457 now = 987809702.54848599
1458 timetuple = time.localtime(now)
1459 if timetuple[-1] == 0:
1460 tzsecs = time.timezone
1461 else:
1462 tzsecs = time.altzone
1463 if tzsecs > 0:
1464 sign = '-'
1465 else:
1466 sign = '+'
1467 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1468 container['Date'] = time.strftime(
1469 '%a, %d %b %Y %H:%M:%S',
1470 time.localtime(now)) + tzoffset
1471 self._msg = container
1472 self._im = image
1473 self._txt = intro
1474
1475 def test_hierarchy(self):
1476 # convenience
1477 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001478 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001479 raises = self.assertRaises
1480 # tests
1481 m = self._msg
1482 unless(m.is_multipart())
1483 eq(m.get_content_type(), 'multipart/mixed')
1484 eq(len(m.get_payload()), 2)
1485 raises(IndexError, m.get_payload, 2)
1486 m0 = m.get_payload(0)
1487 m1 = m.get_payload(1)
1488 unless(m0 is self._txt)
1489 unless(m1 is self._im)
1490 eq(m.get_payload(), [m0, m1])
1491 unless(not m0.is_multipart())
1492 unless(not m1.is_multipart())
1493
1494 def test_empty_multipart_idempotent(self):
1495 text = """\
1496Content-Type: multipart/mixed; boundary="BOUNDARY"
1497MIME-Version: 1.0
1498Subject: A subject
1499To: aperson@dom.ain
1500From: bperson@dom.ain
1501
1502
1503--BOUNDARY
1504
1505
1506--BOUNDARY--
1507"""
1508 msg = Parser().parsestr(text)
1509 self.ndiffAssertEqual(text, msg.as_string())
1510
1511 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1512 outer = MIMEBase('multipart', 'mixed')
1513 outer['Subject'] = 'A subject'
1514 outer['To'] = 'aperson@dom.ain'
1515 outer['From'] = 'bperson@dom.ain'
1516 outer.set_boundary('BOUNDARY')
1517 self.ndiffAssertEqual(outer.as_string(), '''\
1518Content-Type: multipart/mixed; boundary="BOUNDARY"
1519MIME-Version: 1.0
1520Subject: A subject
1521To: aperson@dom.ain
1522From: bperson@dom.ain
1523
1524--BOUNDARY
1525
1526--BOUNDARY--''')
1527
1528 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1529 outer = MIMEBase('multipart', 'mixed')
1530 outer['Subject'] = 'A subject'
1531 outer['To'] = 'aperson@dom.ain'
1532 outer['From'] = 'bperson@dom.ain'
1533 outer.preamble = ''
1534 outer.epilogue = ''
1535 outer.set_boundary('BOUNDARY')
1536 self.ndiffAssertEqual(outer.as_string(), '''\
1537Content-Type: multipart/mixed; boundary="BOUNDARY"
1538MIME-Version: 1.0
1539Subject: A subject
1540To: aperson@dom.ain
1541From: bperson@dom.ain
1542
1543
1544--BOUNDARY
1545
1546--BOUNDARY--
1547''')
1548
1549 def test_one_part_in_a_multipart(self):
1550 eq = self.ndiffAssertEqual
1551 outer = MIMEBase('multipart', 'mixed')
1552 outer['Subject'] = 'A subject'
1553 outer['To'] = 'aperson@dom.ain'
1554 outer['From'] = 'bperson@dom.ain'
1555 outer.set_boundary('BOUNDARY')
1556 msg = MIMEText('hello world')
1557 outer.attach(msg)
1558 eq(outer.as_string(), '''\
1559Content-Type: multipart/mixed; boundary="BOUNDARY"
1560MIME-Version: 1.0
1561Subject: A subject
1562To: aperson@dom.ain
1563From: bperson@dom.ain
1564
1565--BOUNDARY
1566Content-Type: text/plain; charset="us-ascii"
1567MIME-Version: 1.0
1568Content-Transfer-Encoding: 7bit
1569
1570hello world
1571--BOUNDARY--''')
1572
1573 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1574 eq = self.ndiffAssertEqual
1575 outer = MIMEBase('multipart', 'mixed')
1576 outer['Subject'] = 'A subject'
1577 outer['To'] = 'aperson@dom.ain'
1578 outer['From'] = 'bperson@dom.ain'
1579 outer.preamble = ''
1580 msg = MIMEText('hello world')
1581 outer.attach(msg)
1582 outer.set_boundary('BOUNDARY')
1583 eq(outer.as_string(), '''\
1584Content-Type: multipart/mixed; boundary="BOUNDARY"
1585MIME-Version: 1.0
1586Subject: A subject
1587To: aperson@dom.ain
1588From: bperson@dom.ain
1589
1590
1591--BOUNDARY
1592Content-Type: text/plain; charset="us-ascii"
1593MIME-Version: 1.0
1594Content-Transfer-Encoding: 7bit
1595
1596hello world
1597--BOUNDARY--''')
1598
1599
1600 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1601 eq = self.ndiffAssertEqual
1602 outer = MIMEBase('multipart', 'mixed')
1603 outer['Subject'] = 'A subject'
1604 outer['To'] = 'aperson@dom.ain'
1605 outer['From'] = 'bperson@dom.ain'
1606 outer.preamble = None
1607 msg = MIMEText('hello world')
1608 outer.attach(msg)
1609 outer.set_boundary('BOUNDARY')
1610 eq(outer.as_string(), '''\
1611Content-Type: multipart/mixed; boundary="BOUNDARY"
1612MIME-Version: 1.0
1613Subject: A subject
1614To: aperson@dom.ain
1615From: bperson@dom.ain
1616
1617--BOUNDARY
1618Content-Type: text/plain; charset="us-ascii"
1619MIME-Version: 1.0
1620Content-Transfer-Encoding: 7bit
1621
1622hello world
1623--BOUNDARY--''')
1624
1625
1626 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1627 eq = self.ndiffAssertEqual
1628 outer = MIMEBase('multipart', 'mixed')
1629 outer['Subject'] = 'A subject'
1630 outer['To'] = 'aperson@dom.ain'
1631 outer['From'] = 'bperson@dom.ain'
1632 outer.epilogue = None
1633 msg = MIMEText('hello world')
1634 outer.attach(msg)
1635 outer.set_boundary('BOUNDARY')
1636 eq(outer.as_string(), '''\
1637Content-Type: multipart/mixed; boundary="BOUNDARY"
1638MIME-Version: 1.0
1639Subject: A subject
1640To: aperson@dom.ain
1641From: bperson@dom.ain
1642
1643--BOUNDARY
1644Content-Type: text/plain; charset="us-ascii"
1645MIME-Version: 1.0
1646Content-Transfer-Encoding: 7bit
1647
1648hello world
1649--BOUNDARY--''')
1650
1651
1652 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1653 eq = self.ndiffAssertEqual
1654 outer = MIMEBase('multipart', 'mixed')
1655 outer['Subject'] = 'A subject'
1656 outer['To'] = 'aperson@dom.ain'
1657 outer['From'] = 'bperson@dom.ain'
1658 outer.epilogue = ''
1659 msg = MIMEText('hello world')
1660 outer.attach(msg)
1661 outer.set_boundary('BOUNDARY')
1662 eq(outer.as_string(), '''\
1663Content-Type: multipart/mixed; boundary="BOUNDARY"
1664MIME-Version: 1.0
1665Subject: A subject
1666To: aperson@dom.ain
1667From: bperson@dom.ain
1668
1669--BOUNDARY
1670Content-Type: text/plain; charset="us-ascii"
1671MIME-Version: 1.0
1672Content-Transfer-Encoding: 7bit
1673
1674hello world
1675--BOUNDARY--
1676''')
1677
1678
1679 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1680 eq = self.ndiffAssertEqual
1681 outer = MIMEBase('multipart', 'mixed')
1682 outer['Subject'] = 'A subject'
1683 outer['To'] = 'aperson@dom.ain'
1684 outer['From'] = 'bperson@dom.ain'
1685 outer.epilogue = '\n'
1686 msg = MIMEText('hello world')
1687 outer.attach(msg)
1688 outer.set_boundary('BOUNDARY')
1689 eq(outer.as_string(), '''\
1690Content-Type: multipart/mixed; boundary="BOUNDARY"
1691MIME-Version: 1.0
1692Subject: A subject
1693To: aperson@dom.ain
1694From: bperson@dom.ain
1695
1696--BOUNDARY
1697Content-Type: text/plain; charset="us-ascii"
1698MIME-Version: 1.0
1699Content-Transfer-Encoding: 7bit
1700
1701hello world
1702--BOUNDARY--
1703
1704''')
1705
1706 def test_message_external_body(self):
1707 eq = self.assertEqual
1708 msg = self._msgobj('msg_36.txt')
1709 eq(len(msg.get_payload()), 2)
1710 msg1 = msg.get_payload(1)
1711 eq(msg1.get_content_type(), 'multipart/alternative')
1712 eq(len(msg1.get_payload()), 2)
1713 for subpart in msg1.get_payload():
1714 eq(subpart.get_content_type(), 'message/external-body')
1715 eq(len(subpart.get_payload()), 1)
1716 subsubpart = subpart.get_payload(0)
1717 eq(subsubpart.get_content_type(), 'text/plain')
1718
1719 def test_double_boundary(self):
1720 # msg_37.txt is a multipart that contains two dash-boundary's in a
1721 # row. Our interpretation of RFC 2046 calls for ignoring the second
1722 # and subsequent boundaries.
1723 msg = self._msgobj('msg_37.txt')
1724 self.assertEqual(len(msg.get_payload()), 3)
1725
1726 def test_nested_inner_contains_outer_boundary(self):
1727 eq = self.ndiffAssertEqual
1728 # msg_38.txt has an inner part that contains outer boundaries. My
1729 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1730 # these are illegal and should be interpreted as unterminated inner
1731 # parts.
1732 msg = self._msgobj('msg_38.txt')
1733 sfp = StringIO()
1734 iterators._structure(msg, sfp)
1735 eq(sfp.getvalue(), """\
1736multipart/mixed
1737 multipart/mixed
1738 multipart/alternative
1739 text/plain
1740 text/plain
1741 text/plain
1742 text/plain
1743""")
1744
1745 def test_nested_with_same_boundary(self):
1746 eq = self.ndiffAssertEqual
1747 # msg 39.txt is similarly evil in that it's got inner parts that use
1748 # the same boundary as outer parts. Again, I believe the way this is
1749 # parsed is closest to the spirit of RFC 2046
1750 msg = self._msgobj('msg_39.txt')
1751 sfp = StringIO()
1752 iterators._structure(msg, sfp)
1753 eq(sfp.getvalue(), """\
1754multipart/mixed
1755 multipart/mixed
1756 multipart/alternative
1757 application/octet-stream
1758 application/octet-stream
1759 text/plain
1760""")
1761
1762 def test_boundary_in_non_multipart(self):
1763 msg = self._msgobj('msg_40.txt')
1764 self.assertEqual(msg.as_string(), '''\
1765MIME-Version: 1.0
1766Content-Type: text/html; boundary="--961284236552522269"
1767
1768----961284236552522269
1769Content-Type: text/html;
1770Content-Transfer-Encoding: 7Bit
1771
1772<html></html>
1773
1774----961284236552522269--
1775''')
1776
1777 def test_boundary_with_leading_space(self):
1778 eq = self.assertEqual
1779 msg = email.message_from_string('''\
1780MIME-Version: 1.0
1781Content-Type: multipart/mixed; boundary=" XXXX"
1782
1783-- XXXX
1784Content-Type: text/plain
1785
1786
1787-- XXXX
1788Content-Type: text/plain
1789
1790-- XXXX--
1791''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001792 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001793 eq(msg.get_boundary(), ' XXXX')
1794 eq(len(msg.get_payload()), 2)
1795
1796 def test_boundary_without_trailing_newline(self):
1797 m = Parser().parsestr("""\
1798Content-Type: multipart/mixed; boundary="===============0012394164=="
1799MIME-Version: 1.0
1800
1801--===============0012394164==
1802Content-Type: image/file1.jpg
1803MIME-Version: 1.0
1804Content-Transfer-Encoding: base64
1805
1806YXNkZg==
1807--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001808 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001809
1810
Ezio Melottib3aedd42010-11-20 19:04:17 +00001811
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001812# Test some badly formatted messages
R David Murrayc27e5222012-05-25 15:01:48 -04001813class TestNonConformant(TestEmailBase):
R David Murray3edd22a2011-04-18 13:59:37 -04001814
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001815 def test_parse_missing_minor_type(self):
1816 eq = self.assertEqual
1817 msg = self._msgobj('msg_14.txt')
1818 eq(msg.get_content_type(), 'text/plain')
1819 eq(msg.get_content_maintype(), 'text')
1820 eq(msg.get_content_subtype(), 'plain')
1821
R David Murray80e0aee2012-05-27 21:23:34 -04001822 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001823 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001824 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001825 msg = self._msgobj('msg_15.txt')
1826 # XXX We can probably eventually do better
1827 inner = msg.get_payload(0)
1828 unless(hasattr(inner, 'defects'))
R David Murrayc27e5222012-05-25 15:01:48 -04001829 self.assertEqual(len(inner.defects), 1)
1830 unless(isinstance(inner.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001831 errors.StartBoundaryNotFoundDefect))
1832
R David Murray80e0aee2012-05-27 21:23:34 -04001833 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001834 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001835 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001836 msg = self._msgobj('msg_25.txt')
1837 unless(isinstance(msg.get_payload(), str))
R David Murrayc27e5222012-05-25 15:01:48 -04001838 self.assertEqual(len(msg.defects), 2)
1839 unless(isinstance(msg.defects[0],
R David Murray3edd22a2011-04-18 13:59:37 -04001840 errors.NoBoundaryInMultipartDefect))
R David Murrayc27e5222012-05-25 15:01:48 -04001841 unless(isinstance(msg.defects[1],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001842 errors.MultipartInvariantViolationDefect))
1843
R David Murray749073a2011-06-22 13:47:53 -04001844 multipart_msg = textwrap.dedent("""\
1845 Date: Wed, 14 Nov 2007 12:56:23 GMT
1846 From: foo@bar.invalid
1847 To: foo@bar.invalid
1848 Subject: Content-Transfer-Encoding: base64 and multipart
1849 MIME-Version: 1.0
1850 Content-Type: multipart/mixed;
1851 boundary="===============3344438784458119861=="{}
1852
1853 --===============3344438784458119861==
1854 Content-Type: text/plain
1855
1856 Test message
1857
1858 --===============3344438784458119861==
1859 Content-Type: application/octet-stream
1860 Content-Transfer-Encoding: base64
1861
1862 YWJj
1863
1864 --===============3344438784458119861==--
1865 """)
1866
R David Murray80e0aee2012-05-27 21:23:34 -04001867 # test_defect_handling
R David Murray749073a2011-06-22 13:47:53 -04001868 def test_multipart_invalid_cte(self):
R David Murrayc27e5222012-05-25 15:01:48 -04001869 msg = self._str_msg(
1870 self.multipart_msg.format("\nContent-Transfer-Encoding: base64"))
1871 self.assertEqual(len(msg.defects), 1)
1872 self.assertIsInstance(msg.defects[0],
R David Murray749073a2011-06-22 13:47:53 -04001873 errors.InvalidMultipartContentTransferEncodingDefect)
1874
R David Murray80e0aee2012-05-27 21:23:34 -04001875 # test_defect_handling
R David Murray749073a2011-06-22 13:47:53 -04001876 def test_multipart_no_cte_no_defect(self):
R David Murrayc27e5222012-05-25 15:01:48 -04001877 msg = self._str_msg(self.multipart_msg.format(''))
1878 self.assertEqual(len(msg.defects), 0)
R David Murray749073a2011-06-22 13:47:53 -04001879
R David Murray80e0aee2012-05-27 21:23:34 -04001880 # test_defect_handling
R David Murray749073a2011-06-22 13:47:53 -04001881 def test_multipart_valid_cte_no_defect(self):
1882 for cte in ('7bit', '8bit', 'BINary'):
R David Murrayc27e5222012-05-25 15:01:48 -04001883 msg = self._str_msg(
R David Murray749073a2011-06-22 13:47:53 -04001884 self.multipart_msg.format(
R David Murrayc27e5222012-05-25 15:01:48 -04001885 "\nContent-Transfer-Encoding: {}".format(cte)))
1886 self.assertEqual(len(msg.defects), 0)
R David Murray749073a2011-06-22 13:47:53 -04001887
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001888 def test_invalid_content_type(self):
1889 eq = self.assertEqual
1890 neq = self.ndiffAssertEqual
1891 msg = Message()
1892 # RFC 2045, $5.2 says invalid yields text/plain
1893 msg['Content-Type'] = 'text'
1894 eq(msg.get_content_maintype(), 'text')
1895 eq(msg.get_content_subtype(), 'plain')
1896 eq(msg.get_content_type(), 'text/plain')
1897 # Clear the old value and try something /really/ invalid
1898 del msg['content-type']
1899 msg['Content-Type'] = 'foo'
1900 eq(msg.get_content_maintype(), 'text')
1901 eq(msg.get_content_subtype(), 'plain')
1902 eq(msg.get_content_type(), 'text/plain')
1903 # Still, make sure that the message is idempotently generated
1904 s = StringIO()
1905 g = Generator(s)
1906 g.flatten(msg)
1907 neq(s.getvalue(), 'Content-Type: foo\n\n')
1908
1909 def test_no_start_boundary(self):
1910 eq = self.ndiffAssertEqual
1911 msg = self._msgobj('msg_31.txt')
1912 eq(msg.get_payload(), """\
1913--BOUNDARY
1914Content-Type: text/plain
1915
1916message 1
1917
1918--BOUNDARY
1919Content-Type: text/plain
1920
1921message 2
1922
1923--BOUNDARY--
1924""")
1925
1926 def test_no_separating_blank_line(self):
1927 eq = self.ndiffAssertEqual
1928 msg = self._msgobj('msg_35.txt')
1929 eq(msg.as_string(), """\
1930From: aperson@dom.ain
1931To: bperson@dom.ain
1932Subject: here's something interesting
1933
1934counter to RFC 2822, there's no separating newline here
1935""")
1936
R David Murray80e0aee2012-05-27 21:23:34 -04001937 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001938 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001939 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001940 msg = self._msgobj('msg_41.txt')
1941 unless(hasattr(msg, 'defects'))
R David Murrayc27e5222012-05-25 15:01:48 -04001942 self.assertEqual(len(msg.defects), 2)
1943 unless(isinstance(msg.defects[0],
R David Murray3edd22a2011-04-18 13:59:37 -04001944 errors.NoBoundaryInMultipartDefect))
R David Murrayc27e5222012-05-25 15:01:48 -04001945 unless(isinstance(msg.defects[1],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001946 errors.MultipartInvariantViolationDefect))
1947
R David Murray80e0aee2012-05-27 21:23:34 -04001948 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001949 def test_missing_start_boundary(self):
1950 outer = self._msgobj('msg_42.txt')
1951 # The message structure is:
1952 #
1953 # multipart/mixed
1954 # text/plain
1955 # message/rfc822
1956 # multipart/mixed [*]
1957 #
1958 # [*] This message is missing its start boundary
1959 bad = outer.get_payload(1).get_payload(0)
R David Murrayc27e5222012-05-25 15:01:48 -04001960 self.assertEqual(len(bad.defects), 1)
1961 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001962 errors.StartBoundaryNotFoundDefect))
1963
R David Murray80e0aee2012-05-27 21:23:34 -04001964 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001965 def test_first_line_is_continuation_header(self):
1966 eq = self.assertEqual
R David Murrayadbdcdb2012-05-27 20:45:01 -04001967 m = ' Line 1\nSubject: test\n\nbody'
R David Murrayc27e5222012-05-25 15:01:48 -04001968 msg = email.message_from_string(m)
R David Murrayadbdcdb2012-05-27 20:45:01 -04001969 eq(msg.keys(), ['Subject'])
1970 eq(msg.get_payload(), 'body')
R David Murrayc27e5222012-05-25 15:01:48 -04001971 eq(len(msg.defects), 1)
R David Murrayadbdcdb2012-05-27 20:45:01 -04001972 self.assertDefectsEqual(msg.defects,
1973 [errors.FirstHeaderLineIsContinuationDefect])
R David Murrayc27e5222012-05-25 15:01:48 -04001974 eq(msg.defects[0].line, ' Line 1\n')
R David Murray3edd22a2011-04-18 13:59:37 -04001975
R David Murrayd41595b2012-05-28 20:14:10 -04001976 # test_defect_handling
R David Murrayadbdcdb2012-05-27 20:45:01 -04001977 def test_missing_header_body_separator(self):
1978 # Our heuristic if we see a line that doesn't look like a header (no
1979 # leading whitespace but no ':') is to assume that the blank line that
1980 # separates the header from the body is missing, and to stop parsing
1981 # headers and start parsing the body.
1982 msg = self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n')
1983 self.assertEqual(msg.keys(), ['Subject'])
1984 self.assertEqual(msg.get_payload(), 'not a header\nTo: abc\n\nb\n')
1985 self.assertDefectsEqual(msg.defects,
1986 [errors.MissingHeaderBodySeparatorDefect])
1987
Ezio Melottib3aedd42010-11-20 19:04:17 +00001988
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001989# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00001990class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001991 def test_rfc2047_multiline(self):
1992 eq = self.assertEqual
1993 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
1994 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
1995 dh = decode_header(s)
1996 eq(dh, [
1997 (b'Re:', None),
1998 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
1999 (b'baz foo bar', None),
2000 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
2001 header = make_header(dh)
2002 eq(str(header),
2003 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00002004 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00002005Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
2006 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002007
2008 def test_whitespace_eater_unicode(self):
2009 eq = self.assertEqual
2010 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
2011 dh = decode_header(s)
2012 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
2013 (b'Pirard <pirard@dom.ain>', None)])
2014 header = str(make_header(dh))
2015 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
2016
2017 def test_whitespace_eater_unicode_2(self):
2018 eq = self.assertEqual
2019 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
2020 dh = decode_header(s)
2021 eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'),
2022 (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')])
2023 hu = str(make_header(dh))
2024 eq(hu, 'The quick brown fox jumped over the lazy dog')
2025
2026 def test_rfc2047_missing_whitespace(self):
2027 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
2028 dh = decode_header(s)
2029 self.assertEqual(dh, [(s, None)])
2030
2031 def test_rfc2047_with_whitespace(self):
2032 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
2033 dh = decode_header(s)
2034 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
2035 (b'rg', None), (b'\xe5', 'iso-8859-1'),
2036 (b'sbord', None)])
2037
R. David Murrayc4e69cc2010-08-03 22:14:10 +00002038 def test_rfc2047_B_bad_padding(self):
2039 s = '=?iso-8859-1?B?%s?='
2040 data = [ # only test complete bytes
2041 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
2042 ('dmk=', b'vi'), ('dmk', b'vi')
2043 ]
2044 for q, a in data:
2045 dh = decode_header(s % q)
2046 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002047
R. David Murray31e984c2010-10-01 15:40:20 +00002048 def test_rfc2047_Q_invalid_digits(self):
2049 # issue 10004.
2050 s = '=?iso-8659-1?Q?andr=e9=zz?='
2051 self.assertEqual(decode_header(s),
2052 [(b'andr\xe9=zz', 'iso-8659-1')])
2053
Ezio Melottib3aedd42010-11-20 19:04:17 +00002054
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002055# Test the MIMEMessage class
2056class TestMIMEMessage(TestEmailBase):
2057 def setUp(self):
2058 with openfile('msg_11.txt') as fp:
2059 self._text = fp.read()
2060
2061 def test_type_error(self):
2062 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
2063
2064 def test_valid_argument(self):
2065 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002066 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002067 subject = 'A sub-message'
2068 m = Message()
2069 m['Subject'] = subject
2070 r = MIMEMessage(m)
2071 eq(r.get_content_type(), 'message/rfc822')
2072 payload = r.get_payload()
2073 unless(isinstance(payload, list))
2074 eq(len(payload), 1)
2075 subpart = payload[0]
2076 unless(subpart is m)
2077 eq(subpart['subject'], subject)
2078
2079 def test_bad_multipart(self):
2080 eq = self.assertEqual
2081 msg1 = Message()
2082 msg1['Subject'] = 'subpart 1'
2083 msg2 = Message()
2084 msg2['Subject'] = 'subpart 2'
2085 r = MIMEMessage(msg1)
2086 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
2087
2088 def test_generate(self):
2089 # First craft the message to be encapsulated
2090 m = Message()
2091 m['Subject'] = 'An enclosed message'
2092 m.set_payload('Here is the body of the message.\n')
2093 r = MIMEMessage(m)
2094 r['Subject'] = 'The enclosing message'
2095 s = StringIO()
2096 g = Generator(s)
2097 g.flatten(r)
2098 self.assertEqual(s.getvalue(), """\
2099Content-Type: message/rfc822
2100MIME-Version: 1.0
2101Subject: The enclosing message
2102
2103Subject: An enclosed message
2104
2105Here is the body of the message.
2106""")
2107
2108 def test_parse_message_rfc822(self):
2109 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002110 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002111 msg = self._msgobj('msg_11.txt')
2112 eq(msg.get_content_type(), 'message/rfc822')
2113 payload = msg.get_payload()
2114 unless(isinstance(payload, list))
2115 eq(len(payload), 1)
2116 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002117 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002118 eq(submsg['subject'], 'An enclosed message')
2119 eq(submsg.get_payload(), 'Here is the body of the message.\n')
2120
2121 def test_dsn(self):
2122 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002123 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002124 # msg 16 is a Delivery Status Notification, see RFC 1894
2125 msg = self._msgobj('msg_16.txt')
2126 eq(msg.get_content_type(), 'multipart/report')
2127 unless(msg.is_multipart())
2128 eq(len(msg.get_payload()), 3)
2129 # Subpart 1 is a text/plain, human readable section
2130 subpart = msg.get_payload(0)
2131 eq(subpart.get_content_type(), 'text/plain')
2132 eq(subpart.get_payload(), """\
2133This report relates to a message you sent with the following header fields:
2134
2135 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
2136 Date: Sun, 23 Sep 2001 20:10:55 -0700
2137 From: "Ian T. Henry" <henryi@oxy.edu>
2138 To: SoCal Raves <scr@socal-raves.org>
2139 Subject: [scr] yeah for Ians!!
2140
2141Your message cannot be delivered to the following recipients:
2142
2143 Recipient address: jangel1@cougar.noc.ucla.edu
2144 Reason: recipient reached disk quota
2145
2146""")
2147 # Subpart 2 contains the machine parsable DSN information. It
2148 # consists of two blocks of headers, represented by two nested Message
2149 # objects.
2150 subpart = msg.get_payload(1)
2151 eq(subpart.get_content_type(), 'message/delivery-status')
2152 eq(len(subpart.get_payload()), 2)
2153 # message/delivery-status should treat each block as a bunch of
2154 # headers, i.e. a bunch of Message objects.
2155 dsn1 = subpart.get_payload(0)
2156 unless(isinstance(dsn1, Message))
2157 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
2158 eq(dsn1.get_param('dns', header='reporting-mta'), '')
2159 # Try a missing one <wink>
2160 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
2161 dsn2 = subpart.get_payload(1)
2162 unless(isinstance(dsn2, Message))
2163 eq(dsn2['action'], 'failed')
2164 eq(dsn2.get_params(header='original-recipient'),
2165 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
2166 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
2167 # Subpart 3 is the original message
2168 subpart = msg.get_payload(2)
2169 eq(subpart.get_content_type(), 'message/rfc822')
2170 payload = subpart.get_payload()
2171 unless(isinstance(payload, list))
2172 eq(len(payload), 1)
2173 subsubpart = payload[0]
2174 unless(isinstance(subsubpart, Message))
2175 eq(subsubpart.get_content_type(), 'text/plain')
2176 eq(subsubpart['message-id'],
2177 '<002001c144a6$8752e060$56104586@oxy.edu>')
2178
2179 def test_epilogue(self):
2180 eq = self.ndiffAssertEqual
2181 with openfile('msg_21.txt') as fp:
2182 text = fp.read()
2183 msg = Message()
2184 msg['From'] = 'aperson@dom.ain'
2185 msg['To'] = 'bperson@dom.ain'
2186 msg['Subject'] = 'Test'
2187 msg.preamble = 'MIME message'
2188 msg.epilogue = 'End of MIME message\n'
2189 msg1 = MIMEText('One')
2190 msg2 = MIMEText('Two')
2191 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2192 msg.attach(msg1)
2193 msg.attach(msg2)
2194 sfp = StringIO()
2195 g = Generator(sfp)
2196 g.flatten(msg)
2197 eq(sfp.getvalue(), text)
2198
2199 def test_no_nl_preamble(self):
2200 eq = self.ndiffAssertEqual
2201 msg = Message()
2202 msg['From'] = 'aperson@dom.ain'
2203 msg['To'] = 'bperson@dom.ain'
2204 msg['Subject'] = 'Test'
2205 msg.preamble = 'MIME message'
2206 msg.epilogue = ''
2207 msg1 = MIMEText('One')
2208 msg2 = MIMEText('Two')
2209 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2210 msg.attach(msg1)
2211 msg.attach(msg2)
2212 eq(msg.as_string(), """\
2213From: aperson@dom.ain
2214To: bperson@dom.ain
2215Subject: Test
2216Content-Type: multipart/mixed; boundary="BOUNDARY"
2217
2218MIME message
2219--BOUNDARY
2220Content-Type: text/plain; charset="us-ascii"
2221MIME-Version: 1.0
2222Content-Transfer-Encoding: 7bit
2223
2224One
2225--BOUNDARY
2226Content-Type: text/plain; charset="us-ascii"
2227MIME-Version: 1.0
2228Content-Transfer-Encoding: 7bit
2229
2230Two
2231--BOUNDARY--
2232""")
2233
2234 def test_default_type(self):
2235 eq = self.assertEqual
2236 with openfile('msg_30.txt') as fp:
2237 msg = email.message_from_file(fp)
2238 container1 = msg.get_payload(0)
2239 eq(container1.get_default_type(), 'message/rfc822')
2240 eq(container1.get_content_type(), 'message/rfc822')
2241 container2 = msg.get_payload(1)
2242 eq(container2.get_default_type(), 'message/rfc822')
2243 eq(container2.get_content_type(), 'message/rfc822')
2244 container1a = container1.get_payload(0)
2245 eq(container1a.get_default_type(), 'text/plain')
2246 eq(container1a.get_content_type(), 'text/plain')
2247 container2a = container2.get_payload(0)
2248 eq(container2a.get_default_type(), 'text/plain')
2249 eq(container2a.get_content_type(), 'text/plain')
2250
2251 def test_default_type_with_explicit_container_type(self):
2252 eq = self.assertEqual
2253 with openfile('msg_28.txt') as fp:
2254 msg = email.message_from_file(fp)
2255 container1 = msg.get_payload(0)
2256 eq(container1.get_default_type(), 'message/rfc822')
2257 eq(container1.get_content_type(), 'message/rfc822')
2258 container2 = msg.get_payload(1)
2259 eq(container2.get_default_type(), 'message/rfc822')
2260 eq(container2.get_content_type(), 'message/rfc822')
2261 container1a = container1.get_payload(0)
2262 eq(container1a.get_default_type(), 'text/plain')
2263 eq(container1a.get_content_type(), 'text/plain')
2264 container2a = container2.get_payload(0)
2265 eq(container2a.get_default_type(), 'text/plain')
2266 eq(container2a.get_content_type(), 'text/plain')
2267
2268 def test_default_type_non_parsed(self):
2269 eq = self.assertEqual
2270 neq = self.ndiffAssertEqual
2271 # Set up container
2272 container = MIMEMultipart('digest', 'BOUNDARY')
2273 container.epilogue = ''
2274 # Set up subparts
2275 subpart1a = MIMEText('message 1\n')
2276 subpart2a = MIMEText('message 2\n')
2277 subpart1 = MIMEMessage(subpart1a)
2278 subpart2 = MIMEMessage(subpart2a)
2279 container.attach(subpart1)
2280 container.attach(subpart2)
2281 eq(subpart1.get_content_type(), 'message/rfc822')
2282 eq(subpart1.get_default_type(), 'message/rfc822')
2283 eq(subpart2.get_content_type(), 'message/rfc822')
2284 eq(subpart2.get_default_type(), 'message/rfc822')
2285 neq(container.as_string(0), '''\
2286Content-Type: multipart/digest; boundary="BOUNDARY"
2287MIME-Version: 1.0
2288
2289--BOUNDARY
2290Content-Type: message/rfc822
2291MIME-Version: 1.0
2292
2293Content-Type: text/plain; charset="us-ascii"
2294MIME-Version: 1.0
2295Content-Transfer-Encoding: 7bit
2296
2297message 1
2298
2299--BOUNDARY
2300Content-Type: message/rfc822
2301MIME-Version: 1.0
2302
2303Content-Type: text/plain; charset="us-ascii"
2304MIME-Version: 1.0
2305Content-Transfer-Encoding: 7bit
2306
2307message 2
2308
2309--BOUNDARY--
2310''')
2311 del subpart1['content-type']
2312 del subpart1['mime-version']
2313 del subpart2['content-type']
2314 del subpart2['mime-version']
2315 eq(subpart1.get_content_type(), 'message/rfc822')
2316 eq(subpart1.get_default_type(), 'message/rfc822')
2317 eq(subpart2.get_content_type(), 'message/rfc822')
2318 eq(subpart2.get_default_type(), 'message/rfc822')
2319 neq(container.as_string(0), '''\
2320Content-Type: multipart/digest; boundary="BOUNDARY"
2321MIME-Version: 1.0
2322
2323--BOUNDARY
2324
2325Content-Type: text/plain; charset="us-ascii"
2326MIME-Version: 1.0
2327Content-Transfer-Encoding: 7bit
2328
2329message 1
2330
2331--BOUNDARY
2332
2333Content-Type: text/plain; charset="us-ascii"
2334MIME-Version: 1.0
2335Content-Transfer-Encoding: 7bit
2336
2337message 2
2338
2339--BOUNDARY--
2340''')
2341
2342 def test_mime_attachments_in_constructor(self):
2343 eq = self.assertEqual
2344 text1 = MIMEText('')
2345 text2 = MIMEText('')
2346 msg = MIMEMultipart(_subparts=(text1, text2))
2347 eq(len(msg.get_payload()), 2)
2348 eq(msg.get_payload(0), text1)
2349 eq(msg.get_payload(1), text2)
2350
Christian Heimes587c2bf2008-01-19 16:21:02 +00002351 def test_default_multipart_constructor(self):
2352 msg = MIMEMultipart()
2353 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002354
Ezio Melottib3aedd42010-11-20 19:04:17 +00002355
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002356# A general test of parser->model->generator idempotency. IOW, read a message
2357# in, parse it into a message object tree, then without touching the tree,
2358# regenerate the plain text. The original text and the transformed text
2359# should be identical. Note: that we ignore the Unix-From since that may
2360# contain a changed date.
2361class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002362
2363 linesep = '\n'
2364
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002365 def _msgobj(self, filename):
2366 with openfile(filename) as fp:
2367 data = fp.read()
2368 msg = email.message_from_string(data)
2369 return msg, data
2370
R. David Murray719a4492010-11-21 16:53:48 +00002371 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002372 eq = self.ndiffAssertEqual
2373 s = StringIO()
2374 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002375 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002376 eq(text, s.getvalue())
2377
2378 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002379 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002380 msg, text = self._msgobj('msg_01.txt')
2381 eq(msg.get_content_type(), 'text/plain')
2382 eq(msg.get_content_maintype(), 'text')
2383 eq(msg.get_content_subtype(), 'plain')
2384 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2385 eq(msg.get_param('charset'), 'us-ascii')
2386 eq(msg.preamble, None)
2387 eq(msg.epilogue, None)
2388 self._idempotent(msg, text)
2389
2390 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002391 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002392 msg, text = self._msgobj('msg_03.txt')
2393 eq(msg.get_content_type(), 'text/plain')
2394 eq(msg.get_params(), None)
2395 eq(msg.get_param('charset'), None)
2396 self._idempotent(msg, text)
2397
2398 def test_simple_multipart(self):
2399 msg, text = self._msgobj('msg_04.txt')
2400 self._idempotent(msg, text)
2401
2402 def test_MIME_digest(self):
2403 msg, text = self._msgobj('msg_02.txt')
2404 self._idempotent(msg, text)
2405
2406 def test_long_header(self):
2407 msg, text = self._msgobj('msg_27.txt')
2408 self._idempotent(msg, text)
2409
2410 def test_MIME_digest_with_part_headers(self):
2411 msg, text = self._msgobj('msg_28.txt')
2412 self._idempotent(msg, text)
2413
2414 def test_mixed_with_image(self):
2415 msg, text = self._msgobj('msg_06.txt')
2416 self._idempotent(msg, text)
2417
2418 def test_multipart_report(self):
2419 msg, text = self._msgobj('msg_05.txt')
2420 self._idempotent(msg, text)
2421
2422 def test_dsn(self):
2423 msg, text = self._msgobj('msg_16.txt')
2424 self._idempotent(msg, text)
2425
2426 def test_preamble_epilogue(self):
2427 msg, text = self._msgobj('msg_21.txt')
2428 self._idempotent(msg, text)
2429
2430 def test_multipart_one_part(self):
2431 msg, text = self._msgobj('msg_23.txt')
2432 self._idempotent(msg, text)
2433
2434 def test_multipart_no_parts(self):
2435 msg, text = self._msgobj('msg_24.txt')
2436 self._idempotent(msg, text)
2437
2438 def test_no_start_boundary(self):
2439 msg, text = self._msgobj('msg_31.txt')
2440 self._idempotent(msg, text)
2441
2442 def test_rfc2231_charset(self):
2443 msg, text = self._msgobj('msg_32.txt')
2444 self._idempotent(msg, text)
2445
2446 def test_more_rfc2231_parameters(self):
2447 msg, text = self._msgobj('msg_33.txt')
2448 self._idempotent(msg, text)
2449
2450 def test_text_plain_in_a_multipart_digest(self):
2451 msg, text = self._msgobj('msg_34.txt')
2452 self._idempotent(msg, text)
2453
2454 def test_nested_multipart_mixeds(self):
2455 msg, text = self._msgobj('msg_12a.txt')
2456 self._idempotent(msg, text)
2457
2458 def test_message_external_body_idempotent(self):
2459 msg, text = self._msgobj('msg_36.txt')
2460 self._idempotent(msg, text)
2461
R. David Murray719a4492010-11-21 16:53:48 +00002462 def test_message_delivery_status(self):
2463 msg, text = self._msgobj('msg_43.txt')
2464 self._idempotent(msg, text, unixfrom=True)
2465
R. David Murray96fd54e2010-10-08 15:55:28 +00002466 def test_message_signed_idempotent(self):
2467 msg, text = self._msgobj('msg_45.txt')
2468 self._idempotent(msg, text)
2469
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002470 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002471 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002472 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002473 # Get a message object and reset the seek pointer for other tests
2474 msg, text = self._msgobj('msg_05.txt')
2475 eq(msg.get_content_type(), 'multipart/report')
2476 # Test the Content-Type: parameters
2477 params = {}
2478 for pk, pv in msg.get_params():
2479 params[pk] = pv
2480 eq(params['report-type'], 'delivery-status')
2481 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002482 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2483 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002484 eq(len(msg.get_payload()), 3)
2485 # Make sure the subparts are what we expect
2486 msg1 = msg.get_payload(0)
2487 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002488 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002489 msg2 = msg.get_payload(1)
2490 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002491 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002492 msg3 = msg.get_payload(2)
2493 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002494 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002495 payload = msg3.get_payload()
2496 unless(isinstance(payload, list))
2497 eq(len(payload), 1)
2498 msg4 = payload[0]
2499 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002500 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002501
2502 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002503 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002504 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002505 msg, text = self._msgobj('msg_06.txt')
2506 # Check some of the outer headers
2507 eq(msg.get_content_type(), 'message/rfc822')
2508 # Make sure the payload is a list of exactly one sub-Message, and that
2509 # that submessage has a type of text/plain
2510 payload = msg.get_payload()
2511 unless(isinstance(payload, list))
2512 eq(len(payload), 1)
2513 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002514 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002515 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002516 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002517 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002518
2519
Ezio Melottib3aedd42010-11-20 19:04:17 +00002520
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002521# Test various other bits of the package's functionality
2522class TestMiscellaneous(TestEmailBase):
2523 def test_message_from_string(self):
2524 with openfile('msg_01.txt') as fp:
2525 text = fp.read()
2526 msg = email.message_from_string(text)
2527 s = StringIO()
2528 # Don't wrap/continue long headers since we're trying to test
2529 # idempotency.
2530 g = Generator(s, maxheaderlen=0)
2531 g.flatten(msg)
2532 self.assertEqual(text, s.getvalue())
2533
2534 def test_message_from_file(self):
2535 with openfile('msg_01.txt') as fp:
2536 text = fp.read()
2537 fp.seek(0)
2538 msg = email.message_from_file(fp)
2539 s = StringIO()
2540 # Don't wrap/continue long headers since we're trying to test
2541 # idempotency.
2542 g = Generator(s, maxheaderlen=0)
2543 g.flatten(msg)
2544 self.assertEqual(text, s.getvalue())
2545
2546 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002547 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002548 with openfile('msg_01.txt') as fp:
2549 text = fp.read()
2550
2551 # Create a subclass
2552 class MyMessage(Message):
2553 pass
2554
2555 msg = email.message_from_string(text, MyMessage)
2556 unless(isinstance(msg, MyMessage))
2557 # Try something more complicated
2558 with openfile('msg_02.txt') as fp:
2559 text = fp.read()
2560 msg = email.message_from_string(text, MyMessage)
2561 for subpart in msg.walk():
2562 unless(isinstance(subpart, MyMessage))
2563
2564 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002565 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002566 # Create a subclass
2567 class MyMessage(Message):
2568 pass
2569
2570 with openfile('msg_01.txt') as fp:
2571 msg = email.message_from_file(fp, MyMessage)
2572 unless(isinstance(msg, MyMessage))
2573 # Try something more complicated
2574 with openfile('msg_02.txt') as fp:
2575 msg = email.message_from_file(fp, MyMessage)
2576 for subpart in msg.walk():
2577 unless(isinstance(subpart, MyMessage))
2578
R David Murrayc27e5222012-05-25 15:01:48 -04002579 def test_custom_message_does_not_require_arguments(self):
2580 class MyMessage(Message):
2581 def __init__(self):
2582 super().__init__()
2583 msg = self._str_msg("Subject: test\n\ntest", MyMessage)
2584 self.assertTrue(isinstance(msg, MyMessage))
2585
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002586 def test__all__(self):
2587 module = __import__('email')
R David Murray1b6c7242012-03-16 22:43:05 -04002588 self.assertEqual(sorted(module.__all__), [
2589 'base64mime', 'charset', 'encoders', 'errors', 'feedparser',
2590 'generator', 'header', 'iterators', 'message',
2591 'message_from_binary_file', 'message_from_bytes',
2592 'message_from_file', 'message_from_string', 'mime', 'parser',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002593 'quoprimime', 'utils',
2594 ])
2595
2596 def test_formatdate(self):
2597 now = time.time()
2598 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2599 time.gmtime(now)[:6])
2600
2601 def test_formatdate_localtime(self):
2602 now = time.time()
2603 self.assertEqual(
2604 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2605 time.localtime(now)[:6])
2606
2607 def test_formatdate_usegmt(self):
2608 now = time.time()
2609 self.assertEqual(
2610 utils.formatdate(now, localtime=False),
2611 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2612 self.assertEqual(
2613 utils.formatdate(now, localtime=False, usegmt=True),
2614 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2615
2616 def test_parsedate_none(self):
2617 self.assertEqual(utils.parsedate(''), None)
2618
2619 def test_parsedate_compact(self):
2620 # The FWS after the comma is optional
2621 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2622 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2623
2624 def test_parsedate_no_dayofweek(self):
2625 eq = self.assertEqual
2626 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2627 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2628
2629 def test_parsedate_compact_no_dayofweek(self):
2630 eq = self.assertEqual
2631 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2632 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2633
R. David Murray4a62e892010-12-23 20:35:46 +00002634 def test_parsedate_no_space_before_positive_offset(self):
2635 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2636 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2637
2638 def test_parsedate_no_space_before_negative_offset(self):
2639 # Issue 1155362: we already handled '+' for this case.
2640 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
2641 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
2642
2643
R David Murrayaccd1c02011-03-13 20:06:23 -04002644 def test_parsedate_accepts_time_with_dots(self):
2645 eq = self.assertEqual
2646 eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'),
2647 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2648 eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'),
2649 (2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800))
2650
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002651 def test_parsedate_acceptable_to_time_functions(self):
2652 eq = self.assertEqual
2653 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2654 t = int(time.mktime(timetup))
2655 eq(time.localtime(t)[:6], timetup[:6])
2656 eq(int(time.strftime('%Y', timetup)), 2003)
2657 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2658 t = int(time.mktime(timetup[:9]))
2659 eq(time.localtime(t)[:6], timetup[:6])
2660 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2661
R. David Murray219d1c82010-08-25 00:45:55 +00002662 def test_parsedate_y2k(self):
2663 """Test for parsing a date with a two-digit year.
2664
2665 Parsing a date with a two-digit year should return the correct
2666 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2667 obsoletes RFC822) requires four-digit years.
2668
2669 """
2670 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2671 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2672 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2673 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2674
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002675 def test_parseaddr_empty(self):
2676 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2677 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2678
2679 def test_noquote_dump(self):
2680 self.assertEqual(
2681 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2682 'A Silly Person <person@dom.ain>')
2683
2684 def test_escape_dump(self):
2685 self.assertEqual(
2686 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
R David Murrayb53319f2012-03-14 15:31:47 -04002687 r'"A (Very) Silly Person" <person@dom.ain>')
2688 self.assertEqual(
2689 utils.parseaddr(r'"A \(Very\) Silly Person" <person@dom.ain>'),
2690 ('A (Very) Silly Person', 'person@dom.ain'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002691 a = r'A \(Special\) Person'
2692 b = 'person@dom.ain'
2693 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2694
2695 def test_escape_backslashes(self):
2696 self.assertEqual(
2697 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2698 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2699 a = r'Arthur \Backslash\ Foobar'
2700 b = 'person@dom.ain'
2701 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2702
R David Murray8debacb2011-04-06 09:35:57 -04002703 def test_quotes_unicode_names(self):
2704 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2705 name = "H\u00e4ns W\u00fcrst"
2706 addr = 'person@dom.ain'
2707 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2708 latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <person@dom.ain>"
2709 self.assertEqual(utils.formataddr((name, addr)), utf8_base64)
2710 self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'),
2711 latin1_quopri)
2712
2713 def test_accepts_any_charset_like_object(self):
2714 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2715 name = "H\u00e4ns W\u00fcrst"
2716 addr = 'person@dom.ain'
2717 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2718 foobar = "FOOBAR"
2719 class CharsetMock:
2720 def header_encode(self, string):
2721 return foobar
2722 mock = CharsetMock()
2723 mock_expected = "%s <%s>" % (foobar, addr)
2724 self.assertEqual(utils.formataddr((name, addr), mock), mock_expected)
2725 self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')),
2726 utf8_base64)
2727
2728 def test_invalid_charset_like_object_raises_error(self):
2729 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2730 name = "H\u00e4ns W\u00fcrst"
2731 addr = 'person@dom.ain'
2732 # A object without a header_encode method:
2733 bad_charset = object()
2734 self.assertRaises(AttributeError, utils.formataddr, (name, addr),
2735 bad_charset)
2736
2737 def test_unicode_address_raises_error(self):
2738 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2739 addr = 'pers\u00f6n@dom.in'
2740 self.assertRaises(UnicodeError, utils.formataddr, (None, addr))
2741 self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr))
2742
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002743 def test_name_with_dot(self):
2744 x = 'John X. Doe <jxd@example.com>'
2745 y = '"John X. Doe" <jxd@example.com>'
2746 a, b = ('John X. Doe', 'jxd@example.com')
2747 self.assertEqual(utils.parseaddr(x), (a, b))
2748 self.assertEqual(utils.parseaddr(y), (a, b))
2749 # formataddr() quotes the name if there's a dot in it
2750 self.assertEqual(utils.formataddr((a, b)), y)
2751
R. David Murray5397e862010-10-02 15:58:26 +00002752 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2753 # issue 10005. Note that in the third test the second pair of
2754 # backslashes is not actually a quoted pair because it is not inside a
2755 # comment or quoted string: the address being parsed has a quoted
2756 # string containing a quoted backslash, followed by 'example' and two
2757 # backslashes, followed by another quoted string containing a space and
2758 # the word 'example'. parseaddr copies those two backslashes
2759 # literally. Per rfc5322 this is not technically correct since a \ may
2760 # not appear in an address outside of a quoted string. It is probably
2761 # a sensible Postel interpretation, though.
2762 eq = self.assertEqual
2763 eq(utils.parseaddr('""example" example"@example.com'),
2764 ('', '""example" example"@example.com'))
2765 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2766 ('', '"\\"example\\" example"@example.com'))
2767 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2768 ('', '"\\\\"example\\\\" example"@example.com'))
2769
R. David Murray63563cd2010-12-18 18:25:38 +00002770 def test_parseaddr_preserves_spaces_in_local_part(self):
2771 # issue 9286. A normal RFC5322 local part should not contain any
2772 # folding white space, but legacy local parts can (they are a sequence
2773 # of atoms, not dotatoms). On the other hand we strip whitespace from
2774 # before the @ and around dots, on the assumption that the whitespace
2775 # around the punctuation is a mistake in what would otherwise be
2776 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
2777 self.assertEqual(('', "merwok wok@xample.com"),
2778 utils.parseaddr("merwok wok@xample.com"))
2779 self.assertEqual(('', "merwok wok@xample.com"),
2780 utils.parseaddr("merwok wok@xample.com"))
2781 self.assertEqual(('', "merwok wok@xample.com"),
2782 utils.parseaddr(" merwok wok @xample.com"))
2783 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
2784 utils.parseaddr('merwok"wok" wok@xample.com'))
2785 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
2786 utils.parseaddr('merwok. wok . wok@xample.com'))
2787
R David Murrayb53319f2012-03-14 15:31:47 -04002788 def test_formataddr_does_not_quote_parens_in_quoted_string(self):
2789 addr = ("'foo@example.com' (foo@example.com)",
2790 'foo@example.com')
2791 addrstr = ('"\'foo@example.com\' '
2792 '(foo@example.com)" <foo@example.com>')
2793 self.assertEqual(utils.parseaddr(addrstr), addr)
2794 self.assertEqual(utils.formataddr(addr), addrstr)
2795
2796
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002797 def test_multiline_from_comment(self):
2798 x = """\
2799Foo
2800\tBar <foo@example.com>"""
2801 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2802
2803 def test_quote_dump(self):
2804 self.assertEqual(
2805 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2806 r'"A Silly; Person" <person@dom.ain>')
2807
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002808 def test_charset_richcomparisons(self):
2809 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002810 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002811 cset1 = Charset()
2812 cset2 = Charset()
2813 eq(cset1, 'us-ascii')
2814 eq(cset1, 'US-ASCII')
2815 eq(cset1, 'Us-AsCiI')
2816 eq('us-ascii', cset1)
2817 eq('US-ASCII', cset1)
2818 eq('Us-AsCiI', cset1)
2819 ne(cset1, 'usascii')
2820 ne(cset1, 'USASCII')
2821 ne(cset1, 'UsAsCiI')
2822 ne('usascii', cset1)
2823 ne('USASCII', cset1)
2824 ne('UsAsCiI', cset1)
2825 eq(cset1, cset2)
2826 eq(cset2, cset1)
2827
2828 def test_getaddresses(self):
2829 eq = self.assertEqual
2830 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2831 'Bud Person <bperson@dom.ain>']),
2832 [('Al Person', 'aperson@dom.ain'),
2833 ('Bud Person', 'bperson@dom.ain')])
2834
2835 def test_getaddresses_nasty(self):
2836 eq = self.assertEqual
2837 eq(utils.getaddresses(['foo: ;']), [('', '')])
2838 eq(utils.getaddresses(
2839 ['[]*-- =~$']),
2840 [('', ''), ('', ''), ('', '*--')])
2841 eq(utils.getaddresses(
2842 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2843 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2844
2845 def test_getaddresses_embedded_comment(self):
2846 """Test proper handling of a nested comment"""
2847 eq = self.assertEqual
2848 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2849 eq(addrs[0][1], 'foo@bar.com')
2850
2851 def test_utils_quote_unquote(self):
2852 eq = self.assertEqual
2853 msg = Message()
2854 msg.add_header('content-disposition', 'attachment',
2855 filename='foo\\wacky"name')
2856 eq(msg.get_filename(), 'foo\\wacky"name')
2857
2858 def test_get_body_encoding_with_bogus_charset(self):
2859 charset = Charset('not a charset')
2860 self.assertEqual(charset.get_body_encoding(), 'base64')
2861
2862 def test_get_body_encoding_with_uppercase_charset(self):
2863 eq = self.assertEqual
2864 msg = Message()
2865 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2866 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2867 charsets = msg.get_charsets()
2868 eq(len(charsets), 1)
2869 eq(charsets[0], 'utf-8')
2870 charset = Charset(charsets[0])
2871 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002872 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002873 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2874 eq(msg.get_payload(decode=True), b'hello world')
2875 eq(msg['content-transfer-encoding'], 'base64')
2876 # Try another one
2877 msg = Message()
2878 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2879 charsets = msg.get_charsets()
2880 eq(len(charsets), 1)
2881 eq(charsets[0], 'us-ascii')
2882 charset = Charset(charsets[0])
2883 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2884 msg.set_payload('hello world', charset=charset)
2885 eq(msg.get_payload(), 'hello world')
2886 eq(msg['content-transfer-encoding'], '7bit')
2887
2888 def test_charsets_case_insensitive(self):
2889 lc = Charset('us-ascii')
2890 uc = Charset('US-ASCII')
2891 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2892
2893 def test_partial_falls_inside_message_delivery_status(self):
2894 eq = self.ndiffAssertEqual
2895 # The Parser interface provides chunks of data to FeedParser in 8192
2896 # byte gulps. SF bug #1076485 found one of those chunks inside
2897 # message/delivery-status header block, which triggered an
2898 # unreadline() of NeedMoreData.
2899 msg = self._msgobj('msg_43.txt')
2900 sfp = StringIO()
2901 iterators._structure(msg, sfp)
2902 eq(sfp.getvalue(), """\
2903multipart/report
2904 text/plain
2905 message/delivery-status
2906 text/plain
2907 text/plain
2908 text/plain
2909 text/plain
2910 text/plain
2911 text/plain
2912 text/plain
2913 text/plain
2914 text/plain
2915 text/plain
2916 text/plain
2917 text/plain
2918 text/plain
2919 text/plain
2920 text/plain
2921 text/plain
2922 text/plain
2923 text/plain
2924 text/plain
2925 text/plain
2926 text/plain
2927 text/plain
2928 text/plain
2929 text/plain
2930 text/plain
2931 text/plain
2932 text/rfc822-headers
2933""")
2934
R. David Murraya0b44b52010-12-02 21:47:19 +00002935 def test_make_msgid_domain(self):
2936 self.assertEqual(
2937 email.utils.make_msgid(domain='testdomain-string')[-19:],
2938 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002939
Ezio Melottib3aedd42010-11-20 19:04:17 +00002940
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002941# Test the iterator/generators
2942class TestIterators(TestEmailBase):
2943 def test_body_line_iterator(self):
2944 eq = self.assertEqual
2945 neq = self.ndiffAssertEqual
2946 # First a simple non-multipart message
2947 msg = self._msgobj('msg_01.txt')
2948 it = iterators.body_line_iterator(msg)
2949 lines = list(it)
2950 eq(len(lines), 6)
2951 neq(EMPTYSTRING.join(lines), msg.get_payload())
2952 # Now a more complicated multipart
2953 msg = self._msgobj('msg_02.txt')
2954 it = iterators.body_line_iterator(msg)
2955 lines = list(it)
2956 eq(len(lines), 43)
2957 with openfile('msg_19.txt') as fp:
2958 neq(EMPTYSTRING.join(lines), fp.read())
2959
2960 def test_typed_subpart_iterator(self):
2961 eq = self.assertEqual
2962 msg = self._msgobj('msg_04.txt')
2963 it = iterators.typed_subpart_iterator(msg, 'text')
2964 lines = []
2965 subparts = 0
2966 for subpart in it:
2967 subparts += 1
2968 lines.append(subpart.get_payload())
2969 eq(subparts, 2)
2970 eq(EMPTYSTRING.join(lines), """\
2971a simple kind of mirror
2972to reflect upon our own
2973a simple kind of mirror
2974to reflect upon our own
2975""")
2976
2977 def test_typed_subpart_iterator_default_type(self):
2978 eq = self.assertEqual
2979 msg = self._msgobj('msg_03.txt')
2980 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
2981 lines = []
2982 subparts = 0
2983 for subpart in it:
2984 subparts += 1
2985 lines.append(subpart.get_payload())
2986 eq(subparts, 1)
2987 eq(EMPTYSTRING.join(lines), """\
2988
2989Hi,
2990
2991Do you like this message?
2992
2993-Me
2994""")
2995
R. David Murray45bf773f2010-07-17 01:19:57 +00002996 def test_pushCR_LF(self):
2997 '''FeedParser BufferedSubFile.push() assumed it received complete
2998 line endings. A CR ending one push() followed by a LF starting
2999 the next push() added an empty line.
3000 '''
3001 imt = [
3002 ("a\r \n", 2),
3003 ("b", 0),
3004 ("c\n", 1),
3005 ("", 0),
3006 ("d\r\n", 1),
3007 ("e\r", 0),
3008 ("\nf", 1),
3009 ("\r\n", 1),
3010 ]
3011 from email.feedparser import BufferedSubFile, NeedMoreData
3012 bsf = BufferedSubFile()
3013 om = []
3014 nt = 0
3015 for il, n in imt:
3016 bsf.push(il)
3017 nt += n
3018 n1 = 0
3019 while True:
3020 ol = bsf.readline()
3021 if ol == NeedMoreData:
3022 break
3023 om.append(ol)
3024 n1 += 1
3025 self.assertTrue(n == n1)
3026 self.assertTrue(len(om) == nt)
3027 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
3028
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003029
Ezio Melottib3aedd42010-11-20 19:04:17 +00003030
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003031class TestParsers(TestEmailBase):
R David Murrayb35c8502011-04-13 16:46:05 -04003032
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003033 def test_header_parser(self):
3034 eq = self.assertEqual
3035 # Parse only the headers of a complex multipart MIME document
3036 with openfile('msg_02.txt') as fp:
3037 msg = HeaderParser().parse(fp)
3038 eq(msg['from'], 'ppp-request@zzz.org')
3039 eq(msg['to'], 'ppp@zzz.org')
3040 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003041 self.assertFalse(msg.is_multipart())
3042 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003043
R David Murrayb35c8502011-04-13 16:46:05 -04003044 def test_bytes_header_parser(self):
3045 eq = self.assertEqual
3046 # Parse only the headers of a complex multipart MIME document
3047 with openfile('msg_02.txt', 'rb') as fp:
3048 msg = email.parser.BytesHeaderParser().parse(fp)
3049 eq(msg['from'], 'ppp-request@zzz.org')
3050 eq(msg['to'], 'ppp@zzz.org')
3051 eq(msg.get_content_type(), 'multipart/mixed')
3052 self.assertFalse(msg.is_multipart())
3053 self.assertTrue(isinstance(msg.get_payload(), str))
3054 self.assertTrue(isinstance(msg.get_payload(decode=True), bytes))
3055
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003056 def test_whitespace_continuation(self):
3057 eq = self.assertEqual
3058 # This message contains a line after the Subject: header that has only
3059 # whitespace, but it is not empty!
3060 msg = email.message_from_string("""\
3061From: aperson@dom.ain
3062To: bperson@dom.ain
3063Subject: the next line has a space on it
3064\x20
3065Date: Mon, 8 Apr 2002 15:09:19 -0400
3066Message-ID: spam
3067
3068Here's the message body
3069""")
3070 eq(msg['subject'], 'the next line has a space on it\n ')
3071 eq(msg['message-id'], 'spam')
3072 eq(msg.get_payload(), "Here's the message body\n")
3073
3074 def test_whitespace_continuation_last_header(self):
3075 eq = self.assertEqual
3076 # Like the previous test, but the subject line is the last
3077 # header.
3078 msg = email.message_from_string("""\
3079From: aperson@dom.ain
3080To: bperson@dom.ain
3081Date: Mon, 8 Apr 2002 15:09:19 -0400
3082Message-ID: spam
3083Subject: the next line has a space on it
3084\x20
3085
3086Here's the message body
3087""")
3088 eq(msg['subject'], 'the next line has a space on it\n ')
3089 eq(msg['message-id'], 'spam')
3090 eq(msg.get_payload(), "Here's the message body\n")
3091
3092 def test_crlf_separation(self):
3093 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00003094 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003095 msg = Parser().parse(fp)
3096 eq(len(msg.get_payload()), 2)
3097 part1 = msg.get_payload(0)
3098 eq(part1.get_content_type(), 'text/plain')
3099 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
3100 part2 = msg.get_payload(1)
3101 eq(part2.get_content_type(), 'application/riscos')
3102
R. David Murray8451c4b2010-10-23 22:19:56 +00003103 def test_crlf_flatten(self):
3104 # Using newline='\n' preserves the crlfs in this input file.
3105 with openfile('msg_26.txt', newline='\n') as fp:
3106 text = fp.read()
3107 msg = email.message_from_string(text)
3108 s = StringIO()
3109 g = Generator(s)
3110 g.flatten(msg, linesep='\r\n')
3111 self.assertEqual(s.getvalue(), text)
3112
3113 maxDiff = None
3114
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003115 def test_multipart_digest_with_extra_mime_headers(self):
3116 eq = self.assertEqual
3117 neq = self.ndiffAssertEqual
3118 with openfile('msg_28.txt') as fp:
3119 msg = email.message_from_file(fp)
3120 # Structure is:
3121 # multipart/digest
3122 # message/rfc822
3123 # text/plain
3124 # message/rfc822
3125 # text/plain
3126 eq(msg.is_multipart(), 1)
3127 eq(len(msg.get_payload()), 2)
3128 part1 = msg.get_payload(0)
3129 eq(part1.get_content_type(), 'message/rfc822')
3130 eq(part1.is_multipart(), 1)
3131 eq(len(part1.get_payload()), 1)
3132 part1a = part1.get_payload(0)
3133 eq(part1a.is_multipart(), 0)
3134 eq(part1a.get_content_type(), 'text/plain')
3135 neq(part1a.get_payload(), 'message 1\n')
3136 # next message/rfc822
3137 part2 = msg.get_payload(1)
3138 eq(part2.get_content_type(), 'message/rfc822')
3139 eq(part2.is_multipart(), 1)
3140 eq(len(part2.get_payload()), 1)
3141 part2a = part2.get_payload(0)
3142 eq(part2a.is_multipart(), 0)
3143 eq(part2a.get_content_type(), 'text/plain')
3144 neq(part2a.get_payload(), 'message 2\n')
3145
3146 def test_three_lines(self):
3147 # A bug report by Andrew McNamara
3148 lines = ['From: Andrew Person <aperson@dom.ain',
3149 'Subject: Test',
3150 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
3151 msg = email.message_from_string(NL.join(lines))
3152 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
3153
3154 def test_strip_line_feed_and_carriage_return_in_headers(self):
3155 eq = self.assertEqual
3156 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
3157 value1 = 'text'
3158 value2 = 'more text'
3159 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
3160 value1, value2)
3161 msg = email.message_from_string(m)
3162 eq(msg.get('Header'), value1)
3163 eq(msg.get('Next-Header'), value2)
3164
3165 def test_rfc2822_header_syntax(self):
3166 eq = self.assertEqual
3167 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3168 msg = email.message_from_string(m)
3169 eq(len(msg), 3)
3170 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
3171 eq(msg.get_payload(), 'body')
3172
3173 def test_rfc2822_space_not_allowed_in_header(self):
3174 eq = self.assertEqual
3175 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3176 msg = email.message_from_string(m)
3177 eq(len(msg.keys()), 0)
3178
3179 def test_rfc2822_one_character_header(self):
3180 eq = self.assertEqual
3181 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
3182 msg = email.message_from_string(m)
3183 headers = msg.keys()
3184 headers.sort()
3185 eq(headers, ['A', 'B', 'CC'])
3186 eq(msg.get_payload(), 'body')
3187
R. David Murray45e0e142010-06-16 02:19:40 +00003188 def test_CRLFLF_at_end_of_part(self):
3189 # issue 5610: feedparser should not eat two chars from body part ending
3190 # with "\r\n\n".
3191 m = (
3192 "From: foo@bar.com\n"
3193 "To: baz\n"
3194 "Mime-Version: 1.0\n"
3195 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
3196 "\n"
3197 "--BOUNDARY\n"
3198 "Content-Type: text/plain\n"
3199 "\n"
3200 "body ending with CRLF newline\r\n"
3201 "\n"
3202 "--BOUNDARY--\n"
3203 )
3204 msg = email.message_from_string(m)
3205 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003206
Ezio Melottib3aedd42010-11-20 19:04:17 +00003207
R. David Murray96fd54e2010-10-08 15:55:28 +00003208class Test8BitBytesHandling(unittest.TestCase):
3209 # In Python3 all input is string, but that doesn't work if the actual input
3210 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
3211 # decode byte streams using the surrogateescape error handler, and
3212 # reconvert to binary at appropriate places if we detect surrogates. This
3213 # doesn't allow us to transform headers with 8bit bytes (they get munged),
3214 # but it does allow us to parse and preserve them, and to decode body
3215 # parts that use an 8bit CTE.
3216
3217 bodytest_msg = textwrap.dedent("""\
3218 From: foo@bar.com
3219 To: baz
3220 Mime-Version: 1.0
3221 Content-Type: text/plain; charset={charset}
3222 Content-Transfer-Encoding: {cte}
3223
3224 {bodyline}
3225 """)
3226
3227 def test_known_8bit_CTE(self):
3228 m = self.bodytest_msg.format(charset='utf-8',
3229 cte='8bit',
3230 bodyline='pöstal').encode('utf-8')
3231 msg = email.message_from_bytes(m)
3232 self.assertEqual(msg.get_payload(), "pöstal\n")
3233 self.assertEqual(msg.get_payload(decode=True),
3234 "pöstal\n".encode('utf-8'))
3235
3236 def test_unknown_8bit_CTE(self):
3237 m = self.bodytest_msg.format(charset='notavalidcharset',
3238 cte='8bit',
3239 bodyline='pöstal').encode('utf-8')
3240 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003241 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
R. David Murray96fd54e2010-10-08 15:55:28 +00003242 self.assertEqual(msg.get_payload(decode=True),
3243 "pöstal\n".encode('utf-8'))
3244
3245 def test_8bit_in_quopri_body(self):
3246 # This is non-RFC compliant data...without 'decode' the library code
3247 # decodes the body using the charset from the headers, and because the
3248 # source byte really is utf-8 this works. This is likely to fail
3249 # against real dirty data (ie: produce mojibake), but the data is
3250 # invalid anyway so it is as good a guess as any. But this means that
3251 # this test just confirms the current behavior; that behavior is not
3252 # necessarily the best possible behavior. With 'decode' it is
3253 # returning the raw bytes, so that test should be of correct behavior,
3254 # or at least produce the same result that email4 did.
3255 m = self.bodytest_msg.format(charset='utf-8',
3256 cte='quoted-printable',
3257 bodyline='p=C3=B6stál').encode('utf-8')
3258 msg = email.message_from_bytes(m)
3259 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
3260 self.assertEqual(msg.get_payload(decode=True),
3261 'pöstál\n'.encode('utf-8'))
3262
3263 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
3264 # This is similar to the previous test, but proves that if the 8bit
3265 # byte is undecodeable in the specified charset, it gets replaced
3266 # by the unicode 'unknown' character. Again, this may or may not
3267 # be the ideal behavior. Note that if decode=False none of the
3268 # decoders will get involved, so this is the only test we need
3269 # for this behavior.
3270 m = self.bodytest_msg.format(charset='ascii',
3271 cte='quoted-printable',
3272 bodyline='p=C3=B6stál').encode('utf-8')
3273 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003274 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
R. David Murray96fd54e2010-10-08 15:55:28 +00003275 self.assertEqual(msg.get_payload(decode=True),
3276 'pöstál\n'.encode('utf-8'))
3277
R David Murray80e0aee2012-05-27 21:23:34 -04003278 # test_defect_handling:test_invalid_chars_in_base64_payload
R. David Murray96fd54e2010-10-08 15:55:28 +00003279 def test_8bit_in_base64_body(self):
R David Murray80e0aee2012-05-27 21:23:34 -04003280 # If we get 8bit bytes in a base64 body, we can just ignore them
3281 # as being outside the base64 alphabet and decode anyway. But
3282 # we register a defect.
R. David Murray96fd54e2010-10-08 15:55:28 +00003283 m = self.bodytest_msg.format(charset='utf-8',
3284 cte='base64',
3285 bodyline='cMO2c3RhbAá=').encode('utf-8')
3286 msg = email.message_from_bytes(m)
3287 self.assertEqual(msg.get_payload(decode=True),
R David Murray80e0aee2012-05-27 21:23:34 -04003288 'pöstal'.encode('utf-8'))
3289 self.assertIsInstance(msg.defects[0],
3290 errors.InvalidBase64CharactersDefect)
R. David Murray96fd54e2010-10-08 15:55:28 +00003291
3292 def test_8bit_in_uuencode_body(self):
3293 # Sticking an 8bit byte in a uuencode block makes it undecodable by
3294 # normal means, so the block is returned undecoded, but as bytes.
3295 m = self.bodytest_msg.format(charset='utf-8',
3296 cte='uuencode',
3297 bodyline='<,.V<W1A; á ').encode('utf-8')
3298 msg = email.message_from_bytes(m)
3299 self.assertEqual(msg.get_payload(decode=True),
3300 '<,.V<W1A; á \n'.encode('utf-8'))
3301
3302
R. David Murray92532142011-01-07 23:25:30 +00003303 headertest_headers = (
3304 ('From: foo@bar.com', ('From', 'foo@bar.com')),
3305 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
3306 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
3307 '\tJean de Baddie',
3308 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3309 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
3310 ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
3311 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
3312 )
3313 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
3314 '\nYes, they are flying.\n').encode('utf-8')
R. David Murray96fd54e2010-10-08 15:55:28 +00003315
3316 def test_get_8bit_header(self):
3317 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003318 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
3319 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
R. David Murray96fd54e2010-10-08 15:55:28 +00003320
3321 def test_print_8bit_headers(self):
3322 msg = email.message_from_bytes(self.headertest_msg)
3323 self.assertEqual(str(msg),
R. David Murray92532142011-01-07 23:25:30 +00003324 textwrap.dedent("""\
3325 From: {}
3326 To: {}
3327 Subject: {}
3328 From: {}
3329
3330 Yes, they are flying.
3331 """).format(*[expected[1] for (_, expected) in
3332 self.headertest_headers]))
R. David Murray96fd54e2010-10-08 15:55:28 +00003333
3334 def test_values_with_8bit_headers(self):
3335 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003336 self.assertListEqual([str(x) for x in msg.values()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003337 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003338 'b\uFFFD\uFFFDz',
3339 'Maintenant je vous pr\uFFFD\uFFFDsente mon '
3340 'coll\uFFFD\uFFFDgue, le pouf '
3341 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
R. David Murray96fd54e2010-10-08 15:55:28 +00003342 '\tJean de Baddie',
R. David Murray92532142011-01-07 23:25:30 +00003343 "g\uFFFD\uFFFDst"])
R. David Murray96fd54e2010-10-08 15:55:28 +00003344
3345 def test_items_with_8bit_headers(self):
3346 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003347 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003348 [('From', 'foo@bar.com'),
R. David Murray92532142011-01-07 23:25:30 +00003349 ('To', 'b\uFFFD\uFFFDz'),
3350 ('Subject', 'Maintenant je vous '
3351 'pr\uFFFD\uFFFDsente '
3352 'mon coll\uFFFD\uFFFDgue, le pouf '
3353 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3354 '\tJean de Baddie'),
3355 ('From', 'g\uFFFD\uFFFDst')])
R. David Murray96fd54e2010-10-08 15:55:28 +00003356
3357 def test_get_all_with_8bit_headers(self):
3358 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003359 self.assertListEqual([str(x) for x in msg.get_all('from')],
R. David Murray96fd54e2010-10-08 15:55:28 +00003360 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003361 'g\uFFFD\uFFFDst'])
R. David Murray96fd54e2010-10-08 15:55:28 +00003362
R David Murraya2150232011-03-16 21:11:23 -04003363 def test_get_content_type_with_8bit(self):
3364 msg = email.message_from_bytes(textwrap.dedent("""\
3365 Content-Type: text/pl\xA7in; charset=utf-8
3366 """).encode('latin-1'))
3367 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
3368 self.assertEqual(msg.get_content_maintype(), "text")
3369 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
3370
3371 def test_get_params_with_8bit(self):
3372 msg = email.message_from_bytes(
3373 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
3374 self.assertEqual(msg.get_params(header='x-header'),
3375 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
3376 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
3377 # XXX: someday you might be able to get 'b\xa7r', for now you can't.
3378 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
3379
3380 def test_get_rfc2231_params_with_8bit(self):
3381 msg = email.message_from_bytes(textwrap.dedent("""\
3382 Content-Type: text/plain; charset=us-ascii;
3383 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3384 ).encode('latin-1'))
3385 self.assertEqual(msg.get_param('title'),
3386 ('us-ascii', 'en', 'This is not f\uFFFDn'))
3387
3388 def test_set_rfc2231_params_with_8bit(self):
3389 msg = email.message_from_bytes(textwrap.dedent("""\
3390 Content-Type: text/plain; charset=us-ascii;
3391 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3392 ).encode('latin-1'))
3393 msg.set_param('title', 'test')
3394 self.assertEqual(msg.get_param('title'), 'test')
3395
3396 def test_del_rfc2231_params_with_8bit(self):
3397 msg = email.message_from_bytes(textwrap.dedent("""\
3398 Content-Type: text/plain; charset=us-ascii;
3399 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3400 ).encode('latin-1'))
3401 msg.del_param('title')
3402 self.assertEqual(msg.get_param('title'), None)
3403 self.assertEqual(msg.get_content_maintype(), 'text')
3404
3405 def test_get_payload_with_8bit_cte_header(self):
3406 msg = email.message_from_bytes(textwrap.dedent("""\
3407 Content-Transfer-Encoding: b\xa7se64
3408 Content-Type: text/plain; charset=latin-1
3409
3410 payload
3411 """).encode('latin-1'))
3412 self.assertEqual(msg.get_payload(), 'payload\n')
3413 self.assertEqual(msg.get_payload(decode=True), b'payload\n')
3414
R. David Murray96fd54e2010-10-08 15:55:28 +00003415 non_latin_bin_msg = textwrap.dedent("""\
3416 From: foo@bar.com
3417 To: báz
3418 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
3419 \tJean de Baddie
3420 Mime-Version: 1.0
3421 Content-Type: text/plain; charset="utf-8"
3422 Content-Transfer-Encoding: 8bit
3423
3424 Да, они летят.
3425 """).encode('utf-8')
3426
3427 def test_bytes_generator(self):
3428 msg = email.message_from_bytes(self.non_latin_bin_msg)
3429 out = BytesIO()
3430 email.generator.BytesGenerator(out).flatten(msg)
3431 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
3432
R. David Murray7372a072011-01-26 21:21:32 +00003433 def test_bytes_generator_handles_None_body(self):
3434 #Issue 11019
3435 msg = email.message.Message()
3436 out = BytesIO()
3437 email.generator.BytesGenerator(out).flatten(msg)
3438 self.assertEqual(out.getvalue(), b"\n")
3439
R. David Murray92532142011-01-07 23:25:30 +00003440 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
R. David Murray96fd54e2010-10-08 15:55:28 +00003441 From: foo@bar.com
R. David Murray92532142011-01-07 23:25:30 +00003442 To: =?unknown-8bit?q?b=C3=A1z?=
3443 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
3444 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
3445 =?unknown-8bit?q?_Jean_de_Baddie?=
R. David Murray96fd54e2010-10-08 15:55:28 +00003446 Mime-Version: 1.0
3447 Content-Type: text/plain; charset="utf-8"
3448 Content-Transfer-Encoding: base64
3449
3450 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
3451 """)
3452
3453 def test_generator_handles_8bit(self):
3454 msg = email.message_from_bytes(self.non_latin_bin_msg)
3455 out = StringIO()
3456 email.generator.Generator(out).flatten(msg)
R. David Murray92532142011-01-07 23:25:30 +00003457 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
R. David Murray96fd54e2010-10-08 15:55:28 +00003458
3459 def test_bytes_generator_with_unix_from(self):
3460 # The unixfrom contains a current date, so we can't check it
3461 # literally. Just make sure the first word is 'From' and the
3462 # rest of the message matches the input.
3463 msg = email.message_from_bytes(self.non_latin_bin_msg)
3464 out = BytesIO()
3465 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
3466 lines = out.getvalue().split(b'\n')
3467 self.assertEqual(lines[0].split()[0], b'From')
3468 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
3469
R. David Murray92532142011-01-07 23:25:30 +00003470 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
3471 non_latin_bin_msg_as7bit[2:4] = [
3472 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3473 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
3474 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
3475
R. David Murray96fd54e2010-10-08 15:55:28 +00003476 def test_message_from_binary_file(self):
3477 fn = 'test.msg'
3478 self.addCleanup(unlink, fn)
3479 with open(fn, 'wb') as testfile:
3480 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00003481 with open(fn, 'rb') as testfile:
3482 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00003483 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
3484
3485 latin_bin_msg = textwrap.dedent("""\
3486 From: foo@bar.com
3487 To: Dinsdale
3488 Subject: Nudge nudge, wink, wink
3489 Mime-Version: 1.0
3490 Content-Type: text/plain; charset="latin-1"
3491 Content-Transfer-Encoding: 8bit
3492
3493 oh là là, know what I mean, know what I mean?
3494 """).encode('latin-1')
3495
3496 latin_bin_msg_as7bit = textwrap.dedent("""\
3497 From: foo@bar.com
3498 To: Dinsdale
3499 Subject: Nudge nudge, wink, wink
3500 Mime-Version: 1.0
3501 Content-Type: text/plain; charset="iso-8859-1"
3502 Content-Transfer-Encoding: quoted-printable
3503
3504 oh l=E0 l=E0, know what I mean, know what I mean?
3505 """)
3506
3507 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
3508 m = email.message_from_bytes(self.latin_bin_msg)
3509 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3510
3511 def test_decoded_generator_emits_unicode_body(self):
3512 m = email.message_from_bytes(self.latin_bin_msg)
3513 out = StringIO()
3514 email.generator.DecodedGenerator(out).flatten(m)
3515 #DecodedHeader output contains an extra blank line compared
3516 #to the input message. RDM: not sure if this is a bug or not,
3517 #but it is not specific to the 8bit->7bit conversion.
3518 self.assertEqual(out.getvalue(),
3519 self.latin_bin_msg.decode('latin-1')+'\n')
3520
3521 def test_bytes_feedparser(self):
3522 bfp = email.feedparser.BytesFeedParser()
3523 for i in range(0, len(self.latin_bin_msg), 10):
3524 bfp.feed(self.latin_bin_msg[i:i+10])
3525 m = bfp.close()
3526 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3527
R. David Murray8451c4b2010-10-23 22:19:56 +00003528 def test_crlf_flatten(self):
3529 with openfile('msg_26.txt', 'rb') as fp:
3530 text = fp.read()
3531 msg = email.message_from_bytes(text)
3532 s = BytesIO()
3533 g = email.generator.BytesGenerator(s)
3534 g.flatten(msg, linesep='\r\n')
3535 self.assertEqual(s.getvalue(), text)
R David Murrayc5c14722011-04-06 08:13:02 -04003536
3537 def test_8bit_multipart(self):
3538 # Issue 11605
3539 source = textwrap.dedent("""\
3540 Date: Fri, 18 Mar 2011 17:15:43 +0100
3541 To: foo@example.com
3542 From: foodwatch-Newsletter <bar@example.com>
3543 Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
3544 Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
3545 MIME-Version: 1.0
3546 Content-Type: multipart/alternative;
3547 boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
3548
3549 --b1_76a486bee62b0d200f33dc2ca08220ad
3550 Content-Type: text/plain; charset="utf-8"
3551 Content-Transfer-Encoding: 8bit
3552
3553 Guten Tag, ,
3554
3555 mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
3556 Nachrichten aus Japan.
3557
3558
3559 --b1_76a486bee62b0d200f33dc2ca08220ad
3560 Content-Type: text/html; charset="utf-8"
3561 Content-Transfer-Encoding: 8bit
3562
3563 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
3564 "http://www.w3.org/TR/html4/loose.dtd">
3565 <html lang="de">
3566 <head>
3567 <title>foodwatch - Newsletter</title>
3568 </head>
3569 <body>
3570 <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
3571 die Nachrichten aus Japan.</p>
3572 </body>
3573 </html>
3574 --b1_76a486bee62b0d200f33dc2ca08220ad--
3575
3576 """).encode('utf-8')
3577 msg = email.message_from_bytes(source)
3578 s = BytesIO()
3579 g = email.generator.BytesGenerator(s)
3580 g.flatten(msg)
3581 self.assertEqual(s.getvalue(), source)
3582
R David Murray9fd170e2012-03-14 14:05:03 -04003583 def test_bytes_generator_b_encoding_linesep(self):
3584 # Issue 14062: b encoding was tacking on an extra \n.
3585 m = Message()
3586 # This has enough non-ascii that it should always end up b encoded.
3587 m['Subject'] = Header('žluťoučký kůň')
3588 s = BytesIO()
3589 g = email.generator.BytesGenerator(s)
3590 g.flatten(m, linesep='\r\n')
3591 self.assertEqual(
3592 s.getvalue(),
3593 b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3594
3595 def test_generator_b_encoding_linesep(self):
3596 # Since this broke in ByteGenerator, test Generator for completeness.
3597 m = Message()
3598 # This has enough non-ascii that it should always end up b encoded.
3599 m['Subject'] = Header('žluťoučký kůň')
3600 s = StringIO()
3601 g = email.generator.Generator(s)
3602 g.flatten(m, linesep='\r\n')
3603 self.assertEqual(
3604 s.getvalue(),
3605 'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3606
R. David Murray8451c4b2010-10-23 22:19:56 +00003607 maxDiff = None
3608
Ezio Melottib3aedd42010-11-20 19:04:17 +00003609
R. David Murray719a4492010-11-21 16:53:48 +00003610class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003611
R. David Murraye5db2632010-11-20 15:10:13 +00003612 maxDiff = None
3613
R. David Murray96fd54e2010-10-08 15:55:28 +00003614 def _msgobj(self, filename):
3615 with openfile(filename, 'rb') as fp:
3616 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003617 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003618 msg = email.message_from_bytes(data)
3619 return msg, data
3620
R. David Murray719a4492010-11-21 16:53:48 +00003621 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003622 b = BytesIO()
3623 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003624 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R David Murraya46ed112011-03-31 13:11:40 -04003625 self.assertEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003626
3627
R. David Murray719a4492010-11-21 16:53:48 +00003628class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3629 TestIdempotent):
3630 linesep = '\n'
3631 blinesep = b'\n'
3632 normalize_linesep_regex = re.compile(br'\r\n')
3633
3634
3635class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3636 TestIdempotent):
3637 linesep = '\r\n'
3638 blinesep = b'\r\n'
3639 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3640
Ezio Melottib3aedd42010-11-20 19:04:17 +00003641
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003642class TestBase64(unittest.TestCase):
3643 def test_len(self):
3644 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003645 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003646 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003647 for size in range(15):
3648 if size == 0 : bsize = 0
3649 elif size <= 3 : bsize = 4
3650 elif size <= 6 : bsize = 8
3651 elif size <= 9 : bsize = 12
3652 elif size <= 12: bsize = 16
3653 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003654 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003655
3656 def test_decode(self):
3657 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003658 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003659 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003660
3661 def test_encode(self):
3662 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003663 eq(base64mime.body_encode(b''), b'')
3664 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003665 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003666 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003667 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003668 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003669eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3670eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3671eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3672eHh4eCB4eHh4IA==
3673""")
3674 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003675 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003676 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003677eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3678eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3679eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3680eHh4eCB4eHh4IA==\r
3681""")
3682
3683 def test_header_encode(self):
3684 eq = self.assertEqual
3685 he = base64mime.header_encode
3686 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003687 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3688 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003689 # Test the charset option
3690 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3691 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003692
3693
Ezio Melottib3aedd42010-11-20 19:04:17 +00003694
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003695class TestQuopri(unittest.TestCase):
3696 def setUp(self):
3697 # Set of characters (as byte integers) that don't need to be encoded
3698 # in headers.
3699 self.hlit = list(chain(
3700 range(ord('a'), ord('z') + 1),
3701 range(ord('A'), ord('Z') + 1),
3702 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003703 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003704 # Set of characters (as byte integers) that do need to be encoded in
3705 # headers.
3706 self.hnon = [c for c in range(256) if c not in self.hlit]
3707 assert len(self.hlit) + len(self.hnon) == 256
3708 # Set of characters (as byte integers) that don't need to be encoded
3709 # in bodies.
3710 self.blit = list(range(ord(' '), ord('~') + 1))
3711 self.blit.append(ord('\t'))
3712 self.blit.remove(ord('='))
3713 # Set of characters (as byte integers) that do need to be encoded in
3714 # bodies.
3715 self.bnon = [c for c in range(256) if c not in self.blit]
3716 assert len(self.blit) + len(self.bnon) == 256
3717
Guido van Rossum9604e662007-08-30 03:46:43 +00003718 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003719 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003720 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003721 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003722 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003723 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003724 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003725
Guido van Rossum9604e662007-08-30 03:46:43 +00003726 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003727 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003728 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003729 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003730 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003731 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003732 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003733
3734 def test_header_quopri_len(self):
3735 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003736 eq(quoprimime.header_length(b'hello'), 5)
3737 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003738 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003739 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003740 # =?xxx?q?...?= means 10 extra characters
3741 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00003742 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
3743 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003744 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003745 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003746 # =?xxx?q?...?= means 10 extra characters
3747 10)
3748 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003749 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003750 'expected length 1 for %r' % chr(c))
3751 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003752 # Space is special; it's encoded to _
3753 if c == ord(' '):
3754 continue
3755 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003756 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00003757 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003758
3759 def test_body_quopri_len(self):
3760 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003761 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003762 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003763 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003764 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003765
3766 def test_quote_unquote_idempotent(self):
3767 for x in range(256):
3768 c = chr(x)
3769 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
3770
R David Murrayec1b5b82011-03-23 14:19:05 -04003771 def _test_header_encode(self, header, expected_encoded_header, charset=None):
3772 if charset is None:
3773 encoded_header = quoprimime.header_encode(header)
3774 else:
3775 encoded_header = quoprimime.header_encode(header, charset)
3776 self.assertEqual(encoded_header, expected_encoded_header)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003777
R David Murraycafd79d2011-03-23 15:25:55 -04003778 def test_header_encode_null(self):
3779 self._test_header_encode(b'', '')
3780
R David Murrayec1b5b82011-03-23 14:19:05 -04003781 def test_header_encode_one_word(self):
3782 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
3783
3784 def test_header_encode_two_lines(self):
3785 self._test_header_encode(b'hello\nworld',
3786 '=?iso-8859-1?q?hello=0Aworld?=')
3787
3788 def test_header_encode_non_ascii(self):
3789 self._test_header_encode(b'hello\xc7there',
3790 '=?iso-8859-1?q?hello=C7there?=')
3791
3792 def test_header_encode_alt_charset(self):
3793 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
3794 charset='iso-8859-2')
3795
3796 def _test_header_decode(self, encoded_header, expected_decoded_header):
3797 decoded_header = quoprimime.header_decode(encoded_header)
3798 self.assertEqual(decoded_header, expected_decoded_header)
3799
3800 def test_header_decode_null(self):
3801 self._test_header_decode('', '')
3802
3803 def test_header_decode_one_word(self):
3804 self._test_header_decode('hello', 'hello')
3805
3806 def test_header_decode_two_lines(self):
3807 self._test_header_decode('hello=0Aworld', 'hello\nworld')
3808
3809 def test_header_decode_non_ascii(self):
3810 self._test_header_decode('hello=C7there', 'hello\xc7there')
3811
3812 def _test_decode(self, encoded, expected_decoded, eol=None):
3813 if eol is None:
3814 decoded = quoprimime.decode(encoded)
3815 else:
3816 decoded = quoprimime.decode(encoded, eol=eol)
3817 self.assertEqual(decoded, expected_decoded)
3818
3819 def test_decode_null_word(self):
3820 self._test_decode('', '')
3821
3822 def test_decode_null_line_null_word(self):
3823 self._test_decode('\r\n', '\n')
3824
3825 def test_decode_one_word(self):
3826 self._test_decode('hello', 'hello')
3827
3828 def test_decode_one_word_eol(self):
3829 self._test_decode('hello', 'hello', eol='X')
3830
3831 def test_decode_one_line(self):
3832 self._test_decode('hello\r\n', 'hello\n')
3833
3834 def test_decode_one_line_lf(self):
3835 self._test_decode('hello\n', 'hello\n')
3836
R David Murraycafd79d2011-03-23 15:25:55 -04003837 def test_decode_one_line_cr(self):
3838 self._test_decode('hello\r', 'hello\n')
3839
3840 def test_decode_one_line_nl(self):
3841 self._test_decode('hello\n', 'helloX', eol='X')
3842
3843 def test_decode_one_line_crnl(self):
3844 self._test_decode('hello\r\n', 'helloX', eol='X')
3845
R David Murrayec1b5b82011-03-23 14:19:05 -04003846 def test_decode_one_line_one_word(self):
3847 self._test_decode('hello\r\nworld', 'hello\nworld')
3848
3849 def test_decode_one_line_one_word_eol(self):
3850 self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
3851
3852 def test_decode_two_lines(self):
3853 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
3854
R David Murraycafd79d2011-03-23 15:25:55 -04003855 def test_decode_two_lines_eol(self):
3856 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
3857
R David Murrayec1b5b82011-03-23 14:19:05 -04003858 def test_decode_one_long_line(self):
3859 self._test_decode('Spam' * 250, 'Spam' * 250)
3860
3861 def test_decode_one_space(self):
3862 self._test_decode(' ', '')
3863
3864 def test_decode_multiple_spaces(self):
3865 self._test_decode(' ' * 5, '')
3866
3867 def test_decode_one_line_trailing_spaces(self):
3868 self._test_decode('hello \r\n', 'hello\n')
3869
3870 def test_decode_two_lines_trailing_spaces(self):
3871 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n')
3872
3873 def test_decode_quoted_word(self):
3874 self._test_decode('=22quoted=20words=22', '"quoted words"')
3875
3876 def test_decode_uppercase_quoting(self):
3877 self._test_decode('ab=CD=EF', 'ab\xcd\xef')
3878
3879 def test_decode_lowercase_quoting(self):
3880 self._test_decode('ab=cd=ef', 'ab\xcd\xef')
3881
3882 def test_decode_soft_line_break(self):
3883 self._test_decode('soft line=\r\nbreak', 'soft linebreak')
3884
3885 def test_decode_false_quoting(self):
3886 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
3887
3888 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
3889 kwargs = {}
3890 if maxlinelen is None:
3891 # Use body_encode's default.
3892 maxlinelen = 76
3893 else:
3894 kwargs['maxlinelen'] = maxlinelen
3895 if eol is None:
3896 # Use body_encode's default.
3897 eol = '\n'
3898 else:
3899 kwargs['eol'] = eol
3900 encoded_body = quoprimime.body_encode(body, **kwargs)
3901 self.assertEqual(encoded_body, expected_encoded_body)
3902 if eol == '\n' or eol == '\r\n':
3903 # We know how to split the result back into lines, so maxlinelen
3904 # can be checked.
3905 for line in encoded_body.splitlines():
3906 self.assertLessEqual(len(line), maxlinelen)
3907
3908 def test_encode_null(self):
3909 self._test_encode('', '')
3910
3911 def test_encode_null_lines(self):
3912 self._test_encode('\n\n', '\n\n')
3913
3914 def test_encode_one_line(self):
3915 self._test_encode('hello\n', 'hello\n')
3916
3917 def test_encode_one_line_crlf(self):
3918 self._test_encode('hello\r\n', 'hello\n')
3919
3920 def test_encode_one_line_eol(self):
3921 self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
3922
3923 def test_encode_one_space(self):
3924 self._test_encode(' ', '=20')
3925
3926 def test_encode_one_line_one_space(self):
3927 self._test_encode(' \n', '=20\n')
3928
R David Murrayb938c8c2011-03-24 12:19:26 -04003929# XXX: body_encode() expect strings, but uses ord(char) from these strings
3930# to index into a 256-entry list. For code points above 255, this will fail.
3931# Should there be a check for 8-bit only ord() values in body, or at least
3932# a comment about the expected input?
3933
3934 def test_encode_two_lines_one_space(self):
3935 self._test_encode(' \n \n', '=20\n=20\n')
3936
R David Murrayec1b5b82011-03-23 14:19:05 -04003937 def test_encode_one_word_trailing_spaces(self):
3938 self._test_encode('hello ', 'hello =20')
3939
3940 def test_encode_one_line_trailing_spaces(self):
3941 self._test_encode('hello \n', 'hello =20\n')
3942
3943 def test_encode_one_word_trailing_tab(self):
3944 self._test_encode('hello \t', 'hello =09')
3945
3946 def test_encode_one_line_trailing_tab(self):
3947 self._test_encode('hello \t\n', 'hello =09\n')
3948
3949 def test_encode_trailing_space_before_maxlinelen(self):
3950 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
3951
R David Murrayb938c8c2011-03-24 12:19:26 -04003952 def test_encode_trailing_space_at_maxlinelen(self):
3953 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
3954
R David Murrayec1b5b82011-03-23 14:19:05 -04003955 def test_encode_trailing_space_beyond_maxlinelen(self):
R David Murrayb938c8c2011-03-24 12:19:26 -04003956 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
3957
3958 def test_encode_whitespace_lines(self):
3959 self._test_encode(' \n' * 5, '=20\n' * 5)
R David Murrayec1b5b82011-03-23 14:19:05 -04003960
3961 def test_encode_quoted_equals(self):
3962 self._test_encode('a = b', 'a =3D b')
3963
3964 def test_encode_one_long_string(self):
3965 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
3966
3967 def test_encode_one_long_line(self):
3968 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
3969
3970 def test_encode_one_very_long_line(self):
3971 self._test_encode('x' * 200 + '\n',
3972 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
3973
3974 def test_encode_one_long_line(self):
3975 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
3976
3977 def test_encode_shortest_maxlinelen(self):
3978 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003979
R David Murrayb938c8c2011-03-24 12:19:26 -04003980 def test_encode_maxlinelen_too_small(self):
3981 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
3982
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003983 def test_encode(self):
3984 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003985 eq(quoprimime.body_encode(''), '')
3986 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003987 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00003988 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003989 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00003990 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003991xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
3992 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
3993x xxxx xxxx xxxx xxxx=20""")
3994 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00003995 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
3996 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003997xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
3998 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
3999x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00004000 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004001one line
4002
4003two line"""), """\
4004one line
4005
4006two line""")
4007
4008
Ezio Melottib3aedd42010-11-20 19:04:17 +00004009
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004010# Test the Charset class
4011class TestCharset(unittest.TestCase):
4012 def tearDown(self):
4013 from email import charset as CharsetModule
4014 try:
4015 del CharsetModule.CHARSETS['fake']
4016 except KeyError:
4017 pass
4018
Guido van Rossum9604e662007-08-30 03:46:43 +00004019 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004020 eq = self.assertEqual
4021 # Make sure us-ascii = no Unicode conversion
4022 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00004023 eq(c.header_encode('Hello World!'), 'Hello World!')
4024 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004025 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00004026 self.assertRaises(UnicodeError, c.header_encode, s)
4027 c = Charset('utf-8')
4028 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004029
4030 def test_body_encode(self):
4031 eq = self.assertEqual
4032 # Try a charset with QP body encoding
4033 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004034 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004035 # Try a charset with Base64 body encoding
4036 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00004037 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004038 # Try a charset with None body encoding
4039 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004040 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004041 # Try the convert argument, where input codec != output codec
4042 c = Charset('euc-jp')
4043 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00004044 # XXX FIXME
4045## try:
4046## eq('\x1b$B5FCO;~IW\x1b(B',
4047## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
4048## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
4049## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
4050## except LookupError:
4051## # We probably don't have the Japanese codecs installed
4052## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004053 # Testing SF bug #625509, which we have to fake, since there are no
4054 # built-in encodings where the header encoding is QP but the body
4055 # encoding is not.
4056 from email import charset as CharsetModule
R David Murray56a9d7e2011-03-15 12:20:02 -04004057 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004058 c = Charset('fake')
R David Murray56a9d7e2011-03-15 12:20:02 -04004059 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004060
4061 def test_unicode_charset_name(self):
4062 charset = Charset('us-ascii')
4063 self.assertEqual(str(charset), 'us-ascii')
4064 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
4065
4066
Ezio Melottib3aedd42010-11-20 19:04:17 +00004067
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004068# Test multilingual MIME headers.
4069class TestHeader(TestEmailBase):
4070 def test_simple(self):
4071 eq = self.ndiffAssertEqual
4072 h = Header('Hello World!')
4073 eq(h.encode(), 'Hello World!')
4074 h.append(' Goodbye World!')
4075 eq(h.encode(), 'Hello World! Goodbye World!')
4076
4077 def test_simple_surprise(self):
4078 eq = self.ndiffAssertEqual
4079 h = Header('Hello World!')
4080 eq(h.encode(), 'Hello World!')
4081 h.append('Goodbye World!')
4082 eq(h.encode(), 'Hello World! Goodbye World!')
4083
4084 def test_header_needs_no_decoding(self):
4085 h = 'no decoding needed'
4086 self.assertEqual(decode_header(h), [(h, None)])
4087
4088 def test_long(self):
4089 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
4090 maxlinelen=76)
4091 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004092 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004093
4094 def test_multilingual(self):
4095 eq = self.ndiffAssertEqual
4096 g = Charset("iso-8859-1")
4097 cz = Charset("iso-8859-2")
4098 utf8 = Charset("utf-8")
4099 g_head = (b'Die Mieter treten hier ein werden mit einem '
4100 b'Foerderband komfortabel den Korridor entlang, '
4101 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
4102 b'gegen die rotierenden Klingen bef\xf6rdert. ')
4103 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
4104 b'd\xf9vtipu.. ')
4105 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
4106 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
4107 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
4108 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
4109 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
4110 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
4111 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
4112 '\u3044\u307e\u3059\u3002')
4113 h = Header(g_head, g)
4114 h.append(cz_head, cz)
4115 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00004116 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004117 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00004118=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
4119 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
4120 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
4121 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004122 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
4123 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
4124 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
4125 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00004126 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
4127 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
4128 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
4129 decoded = decode_header(enc)
4130 eq(len(decoded), 3)
4131 eq(decoded[0], (g_head, 'iso-8859-1'))
4132 eq(decoded[1], (cz_head, 'iso-8859-2'))
4133 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004134 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00004135 eq(ustr,
4136 (b'Die Mieter treten hier ein werden mit einem Foerderband '
4137 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
4138 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
4139 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
4140 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
4141 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
4142 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
4143 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
4144 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
4145 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
4146 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
4147 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
4148 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
4149 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
4150 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
4151 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
4152 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004153 # Test make_header()
4154 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00004155 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004156
4157 def test_empty_header_encode(self):
4158 h = Header()
4159 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00004160
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004161 def test_header_ctor_default_args(self):
4162 eq = self.ndiffAssertEqual
4163 h = Header()
4164 eq(h, '')
4165 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00004166 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004167
4168 def test_explicit_maxlinelen(self):
4169 eq = self.ndiffAssertEqual
4170 hstr = ('A very long line that must get split to something other '
4171 'than at the 76th character boundary to test the non-default '
4172 'behavior')
4173 h = Header(hstr)
4174 eq(h.encode(), '''\
4175A very long line that must get split to something other than at the 76th
4176 character boundary to test the non-default behavior''')
4177 eq(str(h), hstr)
4178 h = Header(hstr, header_name='Subject')
4179 eq(h.encode(), '''\
4180A very long line that must get split to something other than at the
4181 76th character boundary to test the non-default behavior''')
4182 eq(str(h), hstr)
4183 h = Header(hstr, maxlinelen=1024, header_name='Subject')
4184 eq(h.encode(), hstr)
4185 eq(str(h), hstr)
4186
Guido van Rossum9604e662007-08-30 03:46:43 +00004187 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004188 eq = self.ndiffAssertEqual
4189 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004190 x = 'xxxx ' * 20
4191 h.append(x)
4192 s = h.encode()
4193 eq(s, """\
4194=?iso-8859-1?q?xxx?=
4195 =?iso-8859-1?q?x_?=
4196 =?iso-8859-1?q?xx?=
4197 =?iso-8859-1?q?xx?=
4198 =?iso-8859-1?q?_x?=
4199 =?iso-8859-1?q?xx?=
4200 =?iso-8859-1?q?x_?=
4201 =?iso-8859-1?q?xx?=
4202 =?iso-8859-1?q?xx?=
4203 =?iso-8859-1?q?_x?=
4204 =?iso-8859-1?q?xx?=
4205 =?iso-8859-1?q?x_?=
4206 =?iso-8859-1?q?xx?=
4207 =?iso-8859-1?q?xx?=
4208 =?iso-8859-1?q?_x?=
4209 =?iso-8859-1?q?xx?=
4210 =?iso-8859-1?q?x_?=
4211 =?iso-8859-1?q?xx?=
4212 =?iso-8859-1?q?xx?=
4213 =?iso-8859-1?q?_x?=
4214 =?iso-8859-1?q?xx?=
4215 =?iso-8859-1?q?x_?=
4216 =?iso-8859-1?q?xx?=
4217 =?iso-8859-1?q?xx?=
4218 =?iso-8859-1?q?_x?=
4219 =?iso-8859-1?q?xx?=
4220 =?iso-8859-1?q?x_?=
4221 =?iso-8859-1?q?xx?=
4222 =?iso-8859-1?q?xx?=
4223 =?iso-8859-1?q?_x?=
4224 =?iso-8859-1?q?xx?=
4225 =?iso-8859-1?q?x_?=
4226 =?iso-8859-1?q?xx?=
4227 =?iso-8859-1?q?xx?=
4228 =?iso-8859-1?q?_x?=
4229 =?iso-8859-1?q?xx?=
4230 =?iso-8859-1?q?x_?=
4231 =?iso-8859-1?q?xx?=
4232 =?iso-8859-1?q?xx?=
4233 =?iso-8859-1?q?_x?=
4234 =?iso-8859-1?q?xx?=
4235 =?iso-8859-1?q?x_?=
4236 =?iso-8859-1?q?xx?=
4237 =?iso-8859-1?q?xx?=
4238 =?iso-8859-1?q?_x?=
4239 =?iso-8859-1?q?xx?=
4240 =?iso-8859-1?q?x_?=
4241 =?iso-8859-1?q?xx?=
4242 =?iso-8859-1?q?xx?=
4243 =?iso-8859-1?q?_?=""")
4244 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004245 h = Header(charset='iso-8859-1', maxlinelen=40)
4246 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004247 s = h.encode()
4248 eq(s, """\
4249=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
4250 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
4251 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
4252 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
4253 =?iso-8859-1?q?_xxxx_xxxx_?=""")
4254 eq(x, str(make_header(decode_header(s))))
4255
4256 def test_base64_splittable(self):
4257 eq = self.ndiffAssertEqual
4258 h = Header(charset='koi8-r', maxlinelen=20)
4259 x = 'xxxx ' * 20
4260 h.append(x)
4261 s = h.encode()
4262 eq(s, """\
4263=?koi8-r?b?eHh4?=
4264 =?koi8-r?b?eCB4?=
4265 =?koi8-r?b?eHh4?=
4266 =?koi8-r?b?IHh4?=
4267 =?koi8-r?b?eHgg?=
4268 =?koi8-r?b?eHh4?=
4269 =?koi8-r?b?eCB4?=
4270 =?koi8-r?b?eHh4?=
4271 =?koi8-r?b?IHh4?=
4272 =?koi8-r?b?eHgg?=
4273 =?koi8-r?b?eHh4?=
4274 =?koi8-r?b?eCB4?=
4275 =?koi8-r?b?eHh4?=
4276 =?koi8-r?b?IHh4?=
4277 =?koi8-r?b?eHgg?=
4278 =?koi8-r?b?eHh4?=
4279 =?koi8-r?b?eCB4?=
4280 =?koi8-r?b?eHh4?=
4281 =?koi8-r?b?IHh4?=
4282 =?koi8-r?b?eHgg?=
4283 =?koi8-r?b?eHh4?=
4284 =?koi8-r?b?eCB4?=
4285 =?koi8-r?b?eHh4?=
4286 =?koi8-r?b?IHh4?=
4287 =?koi8-r?b?eHgg?=
4288 =?koi8-r?b?eHh4?=
4289 =?koi8-r?b?eCB4?=
4290 =?koi8-r?b?eHh4?=
4291 =?koi8-r?b?IHh4?=
4292 =?koi8-r?b?eHgg?=
4293 =?koi8-r?b?eHh4?=
4294 =?koi8-r?b?eCB4?=
4295 =?koi8-r?b?eHh4?=
4296 =?koi8-r?b?IA==?=""")
4297 eq(x, str(make_header(decode_header(s))))
4298 h = Header(charset='koi8-r', maxlinelen=40)
4299 h.append(x)
4300 s = h.encode()
4301 eq(s, """\
4302=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
4303 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
4304 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
4305 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
4306 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
4307 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
4308 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004309
4310 def test_us_ascii_header(self):
4311 eq = self.assertEqual
4312 s = 'hello'
4313 x = decode_header(s)
4314 eq(x, [('hello', None)])
4315 h = make_header(x)
4316 eq(s, h.encode())
4317
4318 def test_string_charset(self):
4319 eq = self.assertEqual
4320 h = Header()
4321 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00004322 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004323
4324## def test_unicode_error(self):
4325## raises = self.assertRaises
4326## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
4327## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
4328## h = Header()
4329## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
4330## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
4331## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
4332
4333 def test_utf8_shortest(self):
4334 eq = self.assertEqual
4335 h = Header('p\xf6stal', 'utf-8')
4336 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
4337 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
4338 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
4339
4340 def test_bad_8bit_header(self):
4341 raises = self.assertRaises
4342 eq = self.assertEqual
4343 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4344 raises(UnicodeError, Header, x)
4345 h = Header()
4346 raises(UnicodeError, h.append, x)
4347 e = x.decode('utf-8', 'replace')
4348 eq(str(Header(x, errors='replace')), e)
4349 h.append(x, errors='replace')
4350 eq(str(h), e)
4351
R David Murray041015c2011-03-25 15:10:55 -04004352 def test_escaped_8bit_header(self):
4353 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
R David Murray6bdb1762011-06-18 12:30:55 -04004354 e = x.decode('ascii', 'surrogateescape')
4355 h = Header(e, charset=email.charset.UNKNOWN8BIT)
R David Murray041015c2011-03-25 15:10:55 -04004356 self.assertEqual(str(h),
4357 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4358 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4359
R David Murraye5e366c2011-06-18 12:57:28 -04004360 def test_header_handles_binary_unknown8bit(self):
4361 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4362 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4363 self.assertEqual(str(h),
4364 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4365 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4366
4367 def test_make_header_handles_binary_unknown8bit(self):
4368 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4369 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4370 h2 = email.header.make_header(email.header.decode_header(h))
4371 self.assertEqual(str(h2),
4372 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4373 self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')])
4374
R David Murray041015c2011-03-25 15:10:55 -04004375 def test_modify_returned_list_does_not_change_header(self):
4376 h = Header('test')
4377 chunks = email.header.decode_header(h)
4378 chunks.append(('ascii', 'test2'))
4379 self.assertEqual(str(h), 'test')
4380
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004381 def test_encoded_adjacent_nonencoded(self):
4382 eq = self.assertEqual
4383 h = Header()
4384 h.append('hello', 'iso-8859-1')
4385 h.append('world')
4386 s = h.encode()
4387 eq(s, '=?iso-8859-1?q?hello?= world')
4388 h = make_header(decode_header(s))
4389 eq(h.encode(), s)
4390
4391 def test_whitespace_eater(self):
4392 eq = self.assertEqual
4393 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
4394 parts = decode_header(s)
4395 eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)])
4396 hdr = make_header(parts)
4397 eq(hdr.encode(),
4398 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
4399
4400 def test_broken_base64_header(self):
4401 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00004402 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004403 raises(errors.HeaderParseError, decode_header, s)
4404
R. David Murray477efb32011-01-05 01:39:32 +00004405 def test_shift_jis_charset(self):
4406 h = Header('文', charset='shift_jis')
4407 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
4408
R David Murrayde912762011-03-16 18:26:23 -04004409 def test_flatten_header_with_no_value(self):
4410 # Issue 11401 (regression from email 4.x) Note that the space after
4411 # the header doesn't reflect the input, but this is also the way
4412 # email 4.x behaved. At some point it would be nice to fix that.
4413 msg = email.message_from_string("EmptyHeader:")
4414 self.assertEqual(str(msg), "EmptyHeader: \n\n")
4415
R David Murray01581ee2011-04-18 10:04:34 -04004416 def test_encode_preserves_leading_ws_on_value(self):
4417 msg = Message()
4418 msg['SomeHeader'] = ' value with leading ws'
4419 self.assertEqual(str(msg), "SomeHeader: value with leading ws\n\n")
4420
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004421
Ezio Melottib3aedd42010-11-20 19:04:17 +00004422
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004423# Test RFC 2231 header parameters (en/de)coding
4424class TestRFC2231(TestEmailBase):
4425 def test_get_param(self):
4426 eq = self.assertEqual
4427 msg = self._msgobj('msg_29.txt')
4428 eq(msg.get_param('title'),
4429 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4430 eq(msg.get_param('title', unquote=False),
4431 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
4432
4433 def test_set_param(self):
4434 eq = self.ndiffAssertEqual
4435 msg = Message()
4436 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4437 charset='us-ascii')
4438 eq(msg.get_param('title'),
4439 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
4440 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4441 charset='us-ascii', language='en')
4442 eq(msg.get_param('title'),
4443 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4444 msg = self._msgobj('msg_01.txt')
4445 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4446 charset='us-ascii', language='en')
4447 eq(msg.as_string(maxheaderlen=78), """\
4448Return-Path: <bbb@zzz.org>
4449Delivered-To: bbb@zzz.org
4450Received: by mail.zzz.org (Postfix, from userid 889)
4451\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4452MIME-Version: 1.0
4453Content-Transfer-Encoding: 7bit
4454Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4455From: bbb@ddd.com (John X. Doe)
4456To: bbb@zzz.org
4457Subject: This is a test message
4458Date: Fri, 4 May 2001 14:05:44 -0400
4459Content-Type: text/plain; charset=us-ascii;
R. David Murraydfd7eb02010-12-24 22:36:49 +00004460 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004461
4462
4463Hi,
4464
4465Do you like this message?
4466
4467-Me
4468""")
4469
R David Murraya2860e82011-04-16 09:20:30 -04004470 def test_set_param_requote(self):
4471 msg = Message()
4472 msg.set_param('title', 'foo')
4473 self.assertEqual(msg['content-type'], 'text/plain; title="foo"')
4474 msg.set_param('title', 'bar', requote=False)
4475 self.assertEqual(msg['content-type'], 'text/plain; title=bar')
4476 # tspecial is still quoted.
4477 msg.set_param('title', "(bar)bell", requote=False)
4478 self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"')
4479
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004480 def test_del_param(self):
4481 eq = self.ndiffAssertEqual
4482 msg = self._msgobj('msg_01.txt')
4483 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
4484 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4485 charset='us-ascii', language='en')
4486 msg.del_param('foo', header='Content-Type')
4487 eq(msg.as_string(maxheaderlen=78), """\
4488Return-Path: <bbb@zzz.org>
4489Delivered-To: bbb@zzz.org
4490Received: by mail.zzz.org (Postfix, from userid 889)
4491\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4492MIME-Version: 1.0
4493Content-Transfer-Encoding: 7bit
4494Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4495From: bbb@ddd.com (John X. Doe)
4496To: bbb@zzz.org
4497Subject: This is a test message
4498Date: Fri, 4 May 2001 14:05:44 -0400
4499Content-Type: text/plain; charset="us-ascii";
R. David Murraydfd7eb02010-12-24 22:36:49 +00004500 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004501
4502
4503Hi,
4504
4505Do you like this message?
4506
4507-Me
4508""")
4509
4510 def test_rfc2231_get_content_charset(self):
4511 eq = self.assertEqual
4512 msg = self._msgobj('msg_32.txt')
4513 eq(msg.get_content_charset(), 'us-ascii')
4514
R. David Murraydfd7eb02010-12-24 22:36:49 +00004515 def test_rfc2231_parse_rfc_quoting(self):
4516 m = textwrap.dedent('''\
4517 Content-Disposition: inline;
4518 \tfilename*0*=''This%20is%20even%20more%20;
4519 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
4520 \tfilename*2="is it not.pdf"
4521
4522 ''')
4523 msg = email.message_from_string(m)
4524 self.assertEqual(msg.get_filename(),
4525 'This is even more ***fun*** is it not.pdf')
4526 self.assertEqual(m, msg.as_string())
4527
4528 def test_rfc2231_parse_extra_quoting(self):
4529 m = textwrap.dedent('''\
4530 Content-Disposition: inline;
4531 \tfilename*0*="''This%20is%20even%20more%20";
4532 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4533 \tfilename*2="is it not.pdf"
4534
4535 ''')
4536 msg = email.message_from_string(m)
4537 self.assertEqual(msg.get_filename(),
4538 'This is even more ***fun*** is it not.pdf')
4539 self.assertEqual(m, msg.as_string())
4540
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004541 def test_rfc2231_no_language_or_charset(self):
4542 m = '''\
4543Content-Transfer-Encoding: 8bit
4544Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
4545Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
4546
4547'''
4548 msg = email.message_from_string(m)
4549 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004550 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004551 self.assertEqual(
4552 param,
4553 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
4554
4555 def test_rfc2231_no_language_or_charset_in_filename(self):
4556 m = '''\
4557Content-Disposition: inline;
4558\tfilename*0*="''This%20is%20even%20more%20";
4559\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4560\tfilename*2="is it not.pdf"
4561
4562'''
4563 msg = email.message_from_string(m)
4564 self.assertEqual(msg.get_filename(),
4565 'This is even more ***fun*** is it not.pdf')
4566
4567 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
4568 m = '''\
4569Content-Disposition: inline;
4570\tfilename*0*="''This%20is%20even%20more%20";
4571\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4572\tfilename*2="is it not.pdf"
4573
4574'''
4575 msg = email.message_from_string(m)
4576 self.assertEqual(msg.get_filename(),
4577 'This is even more ***fun*** is it not.pdf')
4578
4579 def test_rfc2231_partly_encoded(self):
4580 m = '''\
4581Content-Disposition: inline;
4582\tfilename*0="''This%20is%20even%20more%20";
4583\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4584\tfilename*2="is it not.pdf"
4585
4586'''
4587 msg = email.message_from_string(m)
4588 self.assertEqual(
4589 msg.get_filename(),
4590 'This%20is%20even%20more%20***fun*** is it not.pdf')
4591
4592 def test_rfc2231_partly_nonencoded(self):
4593 m = '''\
4594Content-Disposition: inline;
4595\tfilename*0="This%20is%20even%20more%20";
4596\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
4597\tfilename*2="is it not.pdf"
4598
4599'''
4600 msg = email.message_from_string(m)
4601 self.assertEqual(
4602 msg.get_filename(),
4603 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
4604
4605 def test_rfc2231_no_language_or_charset_in_boundary(self):
4606 m = '''\
4607Content-Type: multipart/alternative;
4608\tboundary*0*="''This%20is%20even%20more%20";
4609\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
4610\tboundary*2="is it not.pdf"
4611
4612'''
4613 msg = email.message_from_string(m)
4614 self.assertEqual(msg.get_boundary(),
4615 'This is even more ***fun*** is it not.pdf')
4616
4617 def test_rfc2231_no_language_or_charset_in_charset(self):
4618 # This is a nonsensical charset value, but tests the code anyway
4619 m = '''\
4620Content-Type: text/plain;
4621\tcharset*0*="This%20is%20even%20more%20";
4622\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
4623\tcharset*2="is it not.pdf"
4624
4625'''
4626 msg = email.message_from_string(m)
4627 self.assertEqual(msg.get_content_charset(),
4628 'this is even more ***fun*** is it not.pdf')
4629
4630 def test_rfc2231_bad_encoding_in_filename(self):
4631 m = '''\
4632Content-Disposition: inline;
4633\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
4634\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4635\tfilename*2="is it not.pdf"
4636
4637'''
4638 msg = email.message_from_string(m)
4639 self.assertEqual(msg.get_filename(),
4640 'This is even more ***fun*** is it not.pdf')
4641
4642 def test_rfc2231_bad_encoding_in_charset(self):
4643 m = """\
4644Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
4645
4646"""
4647 msg = email.message_from_string(m)
4648 # This should return None because non-ascii characters in the charset
4649 # are not allowed.
4650 self.assertEqual(msg.get_content_charset(), None)
4651
4652 def test_rfc2231_bad_character_in_charset(self):
4653 m = """\
4654Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
4655
4656"""
4657 msg = email.message_from_string(m)
4658 # This should return None because non-ascii characters in the charset
4659 # are not allowed.
4660 self.assertEqual(msg.get_content_charset(), None)
4661
4662 def test_rfc2231_bad_character_in_filename(self):
4663 m = '''\
4664Content-Disposition: inline;
4665\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
4666\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4667\tfilename*2*="is it not.pdf%E2"
4668
4669'''
4670 msg = email.message_from_string(m)
4671 self.assertEqual(msg.get_filename(),
4672 'This is even more ***fun*** is it not.pdf\ufffd')
4673
4674 def test_rfc2231_unknown_encoding(self):
4675 m = """\
4676Content-Transfer-Encoding: 8bit
4677Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
4678
4679"""
4680 msg = email.message_from_string(m)
4681 self.assertEqual(msg.get_filename(), 'myfile.txt')
4682
4683 def test_rfc2231_single_tick_in_filename_extended(self):
4684 eq = self.assertEqual
4685 m = """\
4686Content-Type: application/x-foo;
4687\tname*0*=\"Frank's\"; name*1*=\" Document\"
4688
4689"""
4690 msg = email.message_from_string(m)
4691 charset, language, s = msg.get_param('name')
4692 eq(charset, None)
4693 eq(language, None)
4694 eq(s, "Frank's Document")
4695
4696 def test_rfc2231_single_tick_in_filename(self):
4697 m = """\
4698Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
4699
4700"""
4701 msg = email.message_from_string(m)
4702 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004703 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004704 self.assertEqual(param, "Frank's Document")
4705
4706 def test_rfc2231_tick_attack_extended(self):
4707 eq = self.assertEqual
4708 m = """\
4709Content-Type: application/x-foo;
4710\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
4711
4712"""
4713 msg = email.message_from_string(m)
4714 charset, language, s = msg.get_param('name')
4715 eq(charset, 'us-ascii')
4716 eq(language, 'en-us')
4717 eq(s, "Frank's Document")
4718
4719 def test_rfc2231_tick_attack(self):
4720 m = """\
4721Content-Type: application/x-foo;
4722\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
4723
4724"""
4725 msg = email.message_from_string(m)
4726 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004727 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004728 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
4729
4730 def test_rfc2231_no_extended_values(self):
4731 eq = self.assertEqual
4732 m = """\
4733Content-Type: application/x-foo; name=\"Frank's Document\"
4734
4735"""
4736 msg = email.message_from_string(m)
4737 eq(msg.get_param('name'), "Frank's Document")
4738
4739 def test_rfc2231_encoded_then_unencoded_segments(self):
4740 eq = self.assertEqual
4741 m = """\
4742Content-Type: application/x-foo;
4743\tname*0*=\"us-ascii'en-us'My\";
4744\tname*1=\" Document\";
4745\tname*2*=\" For You\"
4746
4747"""
4748 msg = email.message_from_string(m)
4749 charset, language, s = msg.get_param('name')
4750 eq(charset, 'us-ascii')
4751 eq(language, 'en-us')
4752 eq(s, 'My Document For You')
4753
4754 def test_rfc2231_unencoded_then_encoded_segments(self):
4755 eq = self.assertEqual
4756 m = """\
4757Content-Type: application/x-foo;
4758\tname*0=\"us-ascii'en-us'My\";
4759\tname*1*=\" Document\";
4760\tname*2*=\" For You\"
4761
4762"""
4763 msg = email.message_from_string(m)
4764 charset, language, s = msg.get_param('name')
4765 eq(charset, 'us-ascii')
4766 eq(language, 'en-us')
4767 eq(s, 'My Document For You')
4768
4769
Ezio Melottib3aedd42010-11-20 19:04:17 +00004770
R. David Murraya8f480f2010-01-16 18:30:03 +00004771# Tests to ensure that signed parts of an email are completely preserved, as
4772# required by RFC1847 section 2.1. Note that these are incomplete, because the
4773# email package does not currently always preserve the body. See issue 1670765.
4774class TestSigned(TestEmailBase):
4775
4776 def _msg_and_obj(self, filename):
R David Murray28346b82011-03-31 11:40:20 -04004777 with openfile(filename) as fp:
R. David Murraya8f480f2010-01-16 18:30:03 +00004778 original = fp.read()
4779 msg = email.message_from_string(original)
4780 return original, msg
4781
4782 def _signed_parts_eq(self, original, result):
4783 # Extract the first mime part of each message
4784 import re
4785 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
4786 inpart = repart.search(original).group(2)
4787 outpart = repart.search(result).group(2)
4788 self.assertEqual(outpart, inpart)
4789
4790 def test_long_headers_as_string(self):
4791 original, msg = self._msg_and_obj('msg_45.txt')
4792 result = msg.as_string()
4793 self._signed_parts_eq(original, result)
4794
4795 def test_long_headers_as_string_maxheaderlen(self):
4796 original, msg = self._msg_and_obj('msg_45.txt')
4797 result = msg.as_string(maxheaderlen=60)
4798 self._signed_parts_eq(original, result)
4799
4800 def test_long_headers_flatten(self):
4801 original, msg = self._msg_and_obj('msg_45.txt')
4802 fp = StringIO()
4803 Generator(fp).flatten(msg)
4804 result = fp.getvalue()
4805 self._signed_parts_eq(original, result)
4806
4807
Ezio Melottib3aedd42010-11-20 19:04:17 +00004808
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004809if __name__ == '__main__':
R David Murray9aaba782011-03-21 17:17:06 -04004810 unittest.main()